• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Tests for Cudnn RNN models."""
16from __future__ import absolute_import
17from __future__ import division
18from __future__ import print_function
19
20import argparse
21import collections
22import functools
23import itertools
24import os
25import sys
26import unittest
27
28import numpy as np
29
30from tensorflow.contrib.cudnn_rnn.python.layers import cudnn_rnn
31from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops
32from tensorflow.contrib.rnn.python.ops import rnn as contrib_rnn_lib
33from tensorflow.python.eager import backprop
34from tensorflow.python.eager import context
35from tensorflow.python.framework import dtypes
36from tensorflow.python.framework import ops
37from tensorflow.python.framework import random_seed
38from tensorflow.python.framework import test_util
39from tensorflow.python.ops import array_ops
40from tensorflow.python.ops import control_flow_ops
41from tensorflow.python.ops import gen_nn_ops
42from tensorflow.python.ops import gradients_impl as gradients
43from tensorflow.python.ops import init_ops
44from tensorflow.python.ops import math_ops
45from tensorflow.python.ops import random_ops
46from tensorflow.python.ops import rnn as rnn_lib
47from tensorflow.python.ops import rnn_cell_impl
48from tensorflow.python.ops import state_ops
49from tensorflow.python.ops import variable_scope as vs
50from tensorflow.python.ops import variables
51from tensorflow.python.ops.losses import losses
52from tensorflow.python.platform import googletest
53from tensorflow.python.platform import test
54from tensorflow.python.platform import tf_logging as logging
55from tensorflow.python.training import adagrad
56from tensorflow.python.training import adam
57from tensorflow.python.training import gradient_descent
58from tensorflow.python.training import momentum
59from tensorflow.python.training import rmsprop
60from tensorflow.python.training import saver as saver_lib
61from tensorflow.python.training.tracking import util as trackable_utils
62
63
64CUDNN_LSTM = cudnn_rnn_ops.CUDNN_LSTM
65CUDNN_GRU = cudnn_rnn_ops.CUDNN_GRU
66CUDNN_RNN_RELU = cudnn_rnn_ops.CUDNN_RNN_RELU
67CUDNN_RNN_TANH = cudnn_rnn_ops.CUDNN_RNN_TANH
68CUDNN_RNN_UNIDIRECTION = cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION
69CUDNN_RNN_BIDIRECTION = cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION
70
71CUDNN_LSTM_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_LSTM_PARAMS_PER_LAYER
72CUDNN_GRU_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_GRU_PARAMS_PER_LAYER
73CUDNN_RNN_TANH_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_RNN_TANH_PARAMS_PER_LAYER
74CUDNN_RNN_RELU_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_RNN_RELU_PARAMS_PER_LAYER
75
76
77class CudnnTestModel(object):
78  """Model with convenient APIs for easier building and running test graph.
79
80  The graph built is used by all tests below to avoid repeatedly building
81  similar test graphs.
82  """
83
84  def __init__(self,
85               rnn_mode,
86               num_layers,
87               num_units,
88               input_size,
89               direction=CUDNN_RNN_UNIDIRECTION,
90               dropout=0.,
91               dtype=dtypes.float32,
92               training=False,
93               seed=None,
94               kernel_initializer=None,
95               bias_initializer=None):
96    if dtype not in (dtypes.float16, dtypes.float32, dtypes.float64):
97      raise ValueError("Invalid dtype: %s" % dtype)
98    self._dtype = dtype
99
100    self._inputs = array_ops.placeholder(
101        dtype=dtype, shape=[None, None, input_size], name="inputs")
102    h = array_ops.placeholder(
103        dtype=dtype, shape=[None, None, num_units], name="h")
104    c = array_ops.placeholder(
105        dtype=dtype, shape=[None, None, num_units], name="c")
106    if rnn_mode == CUDNN_LSTM:
107      model_fn = cudnn_rnn.CudnnLSTM
108      self._initial_state = (h, c)
109    elif rnn_mode == CUDNN_GRU:
110      model_fn = cudnn_rnn.CudnnGRU
111      self._initial_state = (h,)
112    elif rnn_mode == CUDNN_RNN_TANH:
113      model_fn = cudnn_rnn.CudnnRNNTanh
114      self._initial_state = (h,)
115    elif rnn_mode == CUDNN_RNN_RELU:
116      model_fn = cudnn_rnn.CudnnRNNRelu
117      self._initial_state = (h,)
118    else:
119      raise ValueError("Invalid rnn_mode: %s" % rnn_mode)
120    self._rnn = model_fn(
121        num_layers,
122        num_units,
123        direction=direction,
124        dropout=dropout,
125        dtype=dtype,
126        seed=seed,
127        kernel_initializer=kernel_initializer,
128        bias_initializer=bias_initializer)
129    self._rnn.build([None, None, input_size])
130
131    self._outputs, self._output_state = self._rnn(
132        self._inputs, initial_state=self._initial_state, training=training)
133
134  def _AddUp(self, outputs, output_state):
135    total = math_ops.reduce_sum(outputs)
136    for s in output_state:
137      total += math_ops.reduce_sum(s)
138    return total
139
140  @property
141  def inputs(self):
142    return self._inputs
143
144  @property
145  def initial_state(self):
146    return self._initial_state
147
148  @property
149  def outputs(self):
150    return self._outputs
151
152  @property
153  def output_state(self):
154    return self._output_state
155
156  @property
157  def rnn(self):
158    return self._rnn
159
160  @property
161  def total_sum(self):
162    return self._AddUp(self.outputs, self.output_state)
163
164  def SynthesizeInput(self, seq_length, batch_size, seed=1234):
165    """Synthesizes input and initial state values for testing."""
166    np.random.seed(seed)
167    num_layers = self._rnn.num_layers
168    dir_count = self._rnn.num_dirs
169    num_units = self._rnn.num_units
170    input_size = self._rnn.input_size
171
172    np_dtype = np.float32 if self._dtype == dtypes.float32 else np.float64
173    inputs = np.random.randn(seq_length, batch_size,
174                             input_size).astype(np_dtype)
175    input_h = np.random.randn(num_layers * dir_count, batch_size,
176                              num_units).astype(np_dtype)
177    if self._rnn.rnn_mode == CUDNN_LSTM:
178      input_c = np.random.randn(num_layers * dir_count, batch_size,
179                                num_units).astype(np_dtype)
180      initial_state = (input_h, input_c)
181    else:
182      initial_state = (input_h,)
183    return inputs, initial_state
184
185  def ZeroState(self, batch_size):
186    num_layers = self._rnn.num_layers
187    dir_count = self._rnn.num_dirs
188    num_units = self._rnn.num_units
189
190    np_dtype = np.float32 if self._dtype == dtypes.float32 else np.float64
191    input_h = np.zeros((num_layers * dir_count, batch_size,
192                        num_units)).astype(np_dtype)
193    if self._rnn.rnn_mode == CUDNN_LSTM:
194      input_c = np.zeros((num_layers * dir_count, batch_size,
195                          num_units)).astype(np_dtype)
196      initial_state = (input_h, input_c)
197    else:
198      initial_state = (input_h,)
199    return initial_state
200
201  def FProp(self, inputs_t, initial_state_t, training):
202    """Builds additional subgraph with given inputs and state.
203
204    Args:
205      inputs_t: a tensor.
206      initial_state_t: a tensor.
207      training: boolean, true if training mode.
208    Returns:
209      A tensor of the forward pass output of the model.
210    """
211    outputs, output_state = self._rnn(
212        inputs_t, initial_state=initial_state_t, training=training)
213    return self._AddUp(outputs, output_state)
214
215  def Feed(self, sess, inputs, initial_state=None, return_sum=True):
216    """Runs graph with given inputs and initial state."""
217    batch_size = inputs.shape[1]
218    if initial_state is None:
219      initial_state = self.ZeroState(batch_size)
220    if return_sum:
221      return sess.run(
222          self.total_sum,
223          feed_dict={self.inputs: inputs,
224                     self.initial_state: initial_state})
225    else:
226      return sess.run(
227          [self.outputs, self.output_state],
228          feed_dict={self.inputs: inputs,
229                     self.initial_state: initial_state})
230
231
232def _CreateCudnnCompatibleCanonicalRNN(rnn, inputs, is_bidi=False, scope=None):
233  mode = rnn.rnn_mode
234  num_units = rnn.num_units
235  num_layers = rnn.num_layers
236
237  # To reuse cuDNN-trained models, must use cudnn compatible rnn cells.
238  if mode == CUDNN_LSTM:
239    single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleLSTMCell(num_units)
240  elif mode == CUDNN_GRU:
241    single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleGRUCell(num_units)
242  elif mode == CUDNN_RNN_TANH:
243    single_cell = (lambda: rnn_cell_impl.BasicRNNCell(num_units, math_ops.tanh))
244  elif mode == CUDNN_RNN_RELU:
245    single_cell = (
246        lambda: rnn_cell_impl.BasicRNNCell(num_units, gen_nn_ops.relu))
247  else:
248    raise ValueError("%s is not supported!" % mode)
249
250  if not is_bidi:
251    cell = rnn_cell_impl.MultiRNNCell(
252        [single_cell() for _ in range(num_layers)])
253    return rnn_lib.dynamic_rnn(
254        cell, inputs, dtype=dtypes.float32, time_major=True, scope=scope)
255  else:
256    cells_fw = [single_cell() for _ in range(num_layers)]
257    cells_bw = [single_cell() for _ in range(num_layers)]
258
259    (outputs, output_state_fw,
260     output_state_bw) = contrib_rnn_lib.stack_bidirectional_dynamic_rnn(
261         cells_fw,
262         cells_bw,
263         inputs,
264         dtype=dtypes.float32,
265         time_major=True,
266         scope=scope)
267    return outputs, (output_state_fw, output_state_bw)
268
269
270class CudnnRNNTestBasic(test_util.TensorFlowTestCase):
271
272  @unittest.skipUnless(test.is_built_with_cuda(),
273                       "Test only applicable when running on GPUs")
274  def testLayerBasic(self):
275    num_layers = 4
276    num_units = 2
277    batch_size = 8
278    direction = CUDNN_RNN_UNIDIRECTION
279    dir_count = 1
280
281    with vs.variable_scope("main"):
282      kernel_initializer = init_ops.constant_initializer(0.)
283      bias_initializer = init_ops.constant_initializer(0.)
284      inputs = random_ops.random_uniform([
285          num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32)
286
287      lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
288                                 direction=direction,
289                                 kernel_initializer=kernel_initializer,
290                                 bias_initializer=bias_initializer,
291                                 name="awesome_lstm")
292
293      # Build the layer
294      outputs1, _ = lstm(inputs)
295      # Reuse the layer
296      outputs2, _ = lstm(inputs)
297
298      total_sum1 = math_ops.reduce_sum(outputs1)
299      total_sum2 = math_ops.reduce_sum(outputs2)
300
301    with vs.variable_scope("main", reuse=True):
302      lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
303                                 direction=direction,
304                                 kernel_initializer=kernel_initializer,
305                                 bias_initializer=bias_initializer,
306                                 name="awesome_lstm")
307
308      # Reuse the layer
309      outputs3, _ = lstm(inputs)
310      total_sum3 = math_ops.reduce_sum(outputs3)
311
312    self.assertEqual(1, len(variables.trainable_variables()))
313    self.assertEqual(1, len(ops.get_collection(ops.GraphKeys.SAVEABLE_OBJECTS)))
314    self.assertEqual("main/awesome_lstm/opaque_kernel",
315                     variables.trainable_variables()[0].op.name)
316
317    with self.test_session(use_gpu=True) as sess:
318      sess.run(variables.global_variables_initializer())
319      (total_sum1_v, total_sum2_v, total_sum3_v) = sess.run(
320          [total_sum1, total_sum2, total_sum3])
321      self.assertEqual(0, total_sum1_v)
322      self.assertEqual(0, total_sum2_v)
323      self.assertEqual(0, total_sum3_v)
324
325  @unittest.skipUnless(test.is_built_with_cuda(),
326                       "Test only applicable when running on GPUs")
327  def testOptimizersSupport(self):
328    for opt in ("adagrad", "adam", "rmsprop", "momentum", "sgd"):
329      self._TestOptimizerSupportHelper(opt)
330
331  def _GetOptimizer(self, opt):
332    if opt == "adagrad":
333      return adagrad.AdagradOptimizer(learning_rate=1e-2)
334    elif opt == "adam":
335      return adam.AdamOptimizer(learning_rate=1e-2)
336    elif opt == "rmsprop":
337      return rmsprop.RMSPropOptimizer(learning_rate=1e-2)
338    elif opt == "momentum":
339      return momentum.MomentumOptimizer(learning_rate=1e-2, momentum=0.9)
340    elif opt == "sgd":
341      return gradient_descent.GradientDescentOptimizer(learning_rate=1e-2)
342    else:
343      raise ValueError("Unsupported optimizer: %s" % opt)
344
345  def _TestOptimizerSupportHelper(self, opt):
346    num_layers = 4
347    num_units = 2
348    batch_size = 8
349    direction = CUDNN_RNN_UNIDIRECTION
350    dir_count = 1
351
352    with ops.Graph().as_default() as g:
353      kernel_initializer = init_ops.constant_initializer(0.)
354      bias_initializer = init_ops.constant_initializer(0.)
355      inputs = random_ops.random_uniform([
356          num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32)
357
358      lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
359                                 direction=direction,
360                                 kernel_initializer=kernel_initializer,
361                                 bias_initializer=bias_initializer,
362                                 name="awesome_lstm")
363      outputs, _ = lstm(inputs)
364      loss = math_ops.reduce_sum(outputs)
365      optimizer = self._GetOptimizer(opt)
366      train_op = optimizer.minimize(loss)
367
368    with self.test_session(use_gpu=True, graph=g) as sess:
369      sess.run(variables.global_variables_initializer())
370      sess.run(train_op)
371
372  @unittest.skipUnless(test.is_built_with_cuda(),
373                       "Test only applicable when running on GPUs")
374  def testSaveableGraphDeviceAssignment(self):
375    num_layers = 4
376    num_units = 2
377    batch_size = 8
378    direction = CUDNN_RNN_UNIDIRECTION
379    dir_count = 1
380
381    def DeviceFn(op):
382      if op.type in ("Variable", "VariableV2"):
383        return "/cpu:0"
384      else:
385        return "/gpu:0"
386
387    with ops.Graph().as_default() as g:
388      with ops.device(DeviceFn):
389        with vs.variable_scope("main"):
390          kernel_initializer = init_ops.constant_initializer(3.14)
391          bias_initializer = init_ops.constant_initializer(1.59)
392          inputs = random_ops.random_uniform(
393              [num_layers * dir_count, batch_size, num_units],
394              dtype=dtypes.float32)
395
396          lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
397                                     direction=direction,
398                                     kernel_initializer=kernel_initializer,
399                                     bias_initializer=bias_initializer,
400                                     name="awesome_lstm")
401          outputs = lstm(inputs)
402
403        # saver is created in the scope of DeviceFn.
404        saver = saver_lib.Saver()
405
406    with self.test_session(use_gpu=True, graph=g) as sess:
407      save_path = os.path.join(self.get_temp_dir(),
408                               "test-saveable-device-assignment")
409      sess.run(variables.global_variables_initializer())
410
411      saver.save(sess, save_path)
412      saver.restore(sess, save_path)
413      sess.run(outputs)
414
415  @unittest.skipUnless(test.is_built_with_cuda(),
416                       "Test only applicable when running on GPUs")
417  def testDifferentShapesEager(self):
418    # Checks that kernel caching does not cause sharing of temporary storage
419    # across different input shapes when executing eagerly.
420    with context.eager_mode():
421      with ops.device("gpu:0"):
422        first_output, _ = cudnn_rnn.CudnnGRU(1, 100)(
423            array_ops.zeros([28, 100, 28]))
424        second_output, _ = cudnn_rnn.CudnnGRU(1, 100)(
425            array_ops.zeros([28, 100, 100]))
426        self.assertAllEqual([28, 100, 100], first_output.shape)
427        self.assertAllEqual([28, 100, 100], second_output.shape)
428
429        def _LossFunc():
430          first_output, _ = cudnn_rnn.CudnnGRU(1, 100)(
431              array_ops.zeros([28, 100, 28]))
432          second_output, _ = cudnn_rnn.CudnnGRU(1, 100)(
433              array_ops.zeros([28, 100, 100]))
434          return (math_ops.reduce_sum(first_output) +
435                  math_ops.reduce_sum(second_output))
436
437        backprop.implicit_grad(_LossFunc)()
438
439  @unittest.skipUnless(test.is_built_with_cuda(),
440                       "Test only applicable when running on GPUs")
441  def testDifferentShapesGraph(self):
442    # Tests that a single kernel instance presented with multiple input shapes
443    # does not crash with graph execution.
444    with ops.device("gpu:0"):
445      layer = cudnn_rnn.CudnnGRU(1, 100)
446      layer(array_ops.zeros([28, 100, 100]))
447
448      def _Cond(index, accumulation):
449        del accumulation  # unused
450        return math_ops.less(index, 4)
451
452      def _Body(index, accumulation):
453        layer_input = accumulation[:, :, 10 * (1 + index % 2):]
454        output, _ = layer(layer_input)
455        return index + 1, accumulation + output
456
457      original_input = array_ops.zeros([28, 100, 100])
458      _, accumulation = control_flow_ops.while_loop(_Cond, _Body,
459                                                    [0, original_input])
460      grad, = gradients.gradients(
461          math_ops.reduce_sum(accumulation), (original_input,))
462    init_op = variables.global_variables_initializer()
463    with self.cached_session() as sess:
464      sess.run(init_op)
465      accumulation_eval, grad_eval = sess.run((accumulation, grad))
466      self.assertAllEqual([28, 100, 100], accumulation_eval.shape)
467      self.assertAllEqual([28, 100, 100], grad_eval.shape)
468
469
470# TODO(jamesqin): Transform to parameterized test after it is included in the
471# TF open source codebase.
472class CudnnRNNTestSaveRestore(test_util.TensorFlowTestCase):
473
474  def _CompareWeights(self, lhs, rhs):
475    self.assertEqual(len(lhs), len(rhs))
476    for lw, rw in zip(lhs, rhs):
477      self.assertAllEqual(lw, rw)
478
479  def _CompareBiases(self, lhs, rhs, rnn_mode, num_layers, direction):
480    self.assertEqual(len(lhs), len(rhs))
481    if rnn_mode == CUDNN_LSTM:
482      num_params_per_layer = CUDNN_LSTM_PARAMS_PER_LAYER
483    elif rnn_mode == CUDNN_GRU:
484      num_params_per_layer = CUDNN_GRU_PARAMS_PER_LAYER
485    elif rnn_mode == CUDNN_RNN_TANH:
486      num_params_per_layer = CUDNN_RNN_TANH_PARAMS_PER_LAYER
487    else:
488      num_params_per_layer = CUDNN_RNN_RELU_PARAMS_PER_LAYER
489    num_dirs = 1 if direction == CUDNN_RNN_UNIDIRECTION else 2
490    num_params_per_layer *= num_dirs
491    self.assertEqual(num_params_per_layer * num_layers, len(lhs))
492
493    for i in range(num_layers):
494      layer_lhs = lhs[i * num_params_per_layer: (i+1) * num_params_per_layer]
495      layer_rhs = rhs[i * num_params_per_layer: (i+1) * num_params_per_layer]
496      if direction == CUDNN_RNN_UNIDIRECTION:
497        self._CompareSingleLayerBiases(layer_lhs, layer_rhs)
498      else:
499        size = len(layer_lhs)
500        fw_lhs, bw_lhs = layer_lhs[:size//2], layer_lhs[size//2:]
501        fw_rhs, bw_rhs = layer_rhs[:size//2], layer_rhs[size//2:]
502        self._CompareSingleLayerBiases(fw_lhs, fw_rhs)
503        self._CompareSingleLayerBiases(bw_lhs, bw_rhs)
504
505  def _CompareSingleLayerBiases(self, lhs, rhs):
506    self.assertEqual(len(lhs), len(rhs))
507
508    lf_lhs, rt_lhs = lhs[:len(lhs)//2], lhs[len(lhs)//2:]
509    lf_rhs, rt_rhs = rhs[:len(rhs)//2], rhs[len(rhs)//2:]
510    self.assertEqual(len(lf_lhs), len(rt_lhs))
511    self.assertEqual(len(lf_rhs), len(rt_rhs))
512
513    sum_lhs, sum_rhs = [], []
514    for lf, rt in zip(lf_lhs, rt_lhs):
515      sum_lhs.append(lf + rt)
516    for lf, rt in zip(lf_rhs, rt_rhs):
517      sum_rhs.append(lf + rt)
518    self.assertEqual(len(sum_lhs), len(sum_rhs))
519    for lf, rt in zip(sum_lhs, sum_rhs):
520      self.assertAllEqual(lf, rt)
521
522  def _TestSaveRestoreVariable(self, rnn_mode, direction, dtype):
523    input_size = 3
524    num_layers = 2
525    num_units = 7
526    with ops.Graph().as_default() as g:
527      random_seed.set_random_seed(1234)
528      model = CudnnTestModel(
529          rnn_mode,
530          num_layers,
531          num_units,
532          input_size,
533          direction=direction,
534          dtype=dtype)
535      rnn = model.rnn
536      save_path = os.path.join(self.get_temp_dir(),
537                               "save-restore-variable-test")
538      saver = saver_lib.Saver()
539      weights, biases = (
540          model.rnn.saveable.format_converter._opaque_to_cu_canonical(
541              model.rnn.saveable._variables))
542      opaque_params = rnn.trainable_variables[0]
543      # CudnnTestModel() creates CudnnOpaqueParamsSaveable that helps saver save
544      # Cudnn vars in canonical format.
545      reset_op = state_ops.assign(
546          opaque_params,
547          array_ops.zeros(array_ops.shape(opaque_params), dtype=dtype))
548      # Passing graph explicitly, otherwise an old sess would be reused.
549      with self.test_session(use_gpu=True, graph=g) as sess:
550        sess.run(variables.global_variables_initializer())
551        val = saver.save(sess, save_path)
552        self.assertEqual(save_path, val)
553        weights_v, biases_v = sess.run([weights, biases])
554
555        # Reset opaque param
556        sess.run(reset_op)
557        saver.restore(sess, save_path)
558        weights_v_restored, biases_v_restored = sess.run([weights, biases])
559
560        self._CompareWeights(weights_v, weights_v_restored)
561        self._CompareBiases(biases_v, biases_v_restored, rnn_mode, num_layers,
562                            direction)
563
564  def _TestSaveRestoreTwoVariables(self, rnn_mode, direction, dtype):
565    input_size = 3
566    num_layers = 2
567    num_units = 7
568    with ops.Graph().as_default() as g:
569      random_seed.set_random_seed(1234)
570      with vs.variable_scope("m1"):
571        model1 = CudnnTestModel(
572            rnn_mode,
573            num_layers,
574            num_units,
575            input_size,
576            direction=direction,
577            dtype=dtype)
578      with vs.variable_scope("m2"):
579        model2 = CudnnTestModel(
580            rnn_mode,
581            num_layers,
582            num_units,
583            input_size,
584            direction=direction,
585            dtype=dtype)
586      opaque_params = (model1.rnn.trainable_variables[0],
587                       model2.rnn.trainable_variables[0])
588      saveable1 = model1.rnn.saveable
589      weights1, biases1 = saveable1.format_converter._opaque_to_cu_canonical(
590          saveable1._variables)
591      saveable2 = model1.rnn.saveable
592      weights2, biases2 = saveable2.format_converter._opaque_to_cu_canonical(
593          saveable2._variables)
594      reset_params = [
595          state_ops.assign(params,
596                           array_ops.zeros_like(params, dtype=dtype))
597          for params in opaque_params
598      ]
599      reset_op = control_flow_ops.group(*reset_params)
600      save_path = os.path.join(self.get_temp_dir(),
601                               "save-restore-variable-test2")
602      saver = saver_lib.Saver()
603      # Passing graph explicitly, otherwise an old sess would be reused.
604      with self.test_session(use_gpu=True, graph=g) as sess:
605        sess.run(variables.global_variables_initializer())
606        val = saver.save(sess, save_path)
607        self.assertEqual(save_path, val)
608
609        weights1_v, biases1_v = sess.run([weights1, biases1])
610        weights2_v, biases2_v = sess.run([weights2, biases2])
611
612        sess.run(reset_op)
613        saver.restore(sess, save_path)
614        weights1_v_restored, biases1_v_restored = sess.run([weights1, biases1])
615        weights2_v_restored, biases2_v_restored = sess.run([weights2, biases2])
616
617        self._CompareWeights(weights1_v, weights1_v_restored)
618        self._CompareWeights(weights2_v, weights2_v_restored)
619        self._CompareBiases(biases1_v, biases1_v_restored, rnn_mode, num_layers,
620                            direction)
621        self._CompareBiases(biases2_v, biases2_v_restored, rnn_mode, num_layers,
622                            direction)
623
624  def _TestSaveRestoreOutput(self, rnn_mode, direction, dtype):
625    with ops.Graph().as_default() as g:
626      num_layers = 2
627      num_units = 7
628      input_size = 7
629      seq_length = 8
630      batch_size = 4
631      model = CudnnTestModel(
632          rnn_mode,
633          num_layers,
634          num_units,
635          input_size,
636          direction=direction,
637          dtype=dtype,
638          training=False)
639      rnn = model.rnn
640
641      save_path = os.path.join(self.get_temp_dir(), "save-restore-output-test")
642      saver = saver_lib.Saver()
643
644      # Only one opaque var in a cudnn layer.
645      assert len(rnn.trainable_variables) == 1
646      reset_params = state_ops.assign(
647          rnn.trainable_variables[0],
648          array_ops.zeros(
649              array_ops.shape(rnn.trainable_variables[0]), dtype=dtype))
650
651      # Passing graph explicitly, otherwise an old sess would be reused.
652      with self.test_session(use_gpu=True, graph=g) as sess:
653        sess.run(variables.global_variables_initializer())
654        inputs, initial_state = model.SynthesizeInput(seq_length, batch_size)
655        total_sum_v = model.Feed(sess, inputs, initial_state)
656        val = saver.save(sess, save_path)
657        self.assertEqual(save_path, val)
658
659        sess.run(reset_params)
660        saver.restore(sess, save_path)
661        total_sum_v_restored = model.Feed(sess, inputs, initial_state)
662        self.assertAllClose(total_sum_v, total_sum_v_restored, atol=1e-5)
663
664  def _TestSaveRestoreHelper(self, rnn_mode):
665    directions = [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION]
666    dtype_list = [dtypes.float16, dtypes.float32, dtypes.float64]
667    for direction, dtype in itertools.product(directions, dtype_list):
668      self._TestSaveRestoreVariable(rnn_mode, direction, dtype)
669      self._TestSaveRestoreTwoVariables(rnn_mode, direction, dtype)
670      self._TestSaveRestoreOutput(rnn_mode, direction, dtype)
671
672  @unittest.skipUnless(test.is_built_with_cuda(),
673                       "Test only applicable when running on GPUs")
674  def testSaveRestoreRepeatedlyCreateCustomSaveable(self):
675    input_size = 3
676    num_layers = 2
677    num_units = 7
678    with ops.Graph().as_default():
679      random_seed.set_random_seed(1234)
680      model = CudnnTestModel(
681          CUDNN_LSTM,
682          num_layers,
683          num_units,
684          input_size,
685          direction=CUDNN_RNN_UNIDIRECTION,
686          dtype=dtypes.float32)
687      with self.assertRaisesRegexp(RuntimeError,
688                                   "Cudnn saveable already created"):
689        model.rnn._create_saveable()
690
691  @unittest.skipUnless(test.is_built_with_cuda(),
692                       "Test only applicable when running on GPUs")
693  def testSaveRestoreLSTM(self):
694    self._TestSaveRestoreHelper(CUDNN_LSTM)
695
696  @unittest.skipUnless(test.is_built_with_cuda(),
697                       "Test only applicable when running on GPUs")
698  def testSaveRestoreGRU(self):
699    self._TestSaveRestoreHelper(CUDNN_GRU)
700
701  @unittest.skipUnless(test.is_built_with_cuda(),
702                       "Test only applicable when running on GPUs")
703  def testSaveRestoreRNNTanh(self):
704    self._TestSaveRestoreHelper(CUDNN_RNN_TANH)
705
706  @unittest.skipUnless(test.is_built_with_cuda(),
707                       "Test only applicable when running on GPUs")
708  def testSaveRestoreRNNRelu(self):
709    self._TestSaveRestoreHelper(CUDNN_RNN_RELU)
710
711
712class CudnnRNNTestSaveRestoreTrackable(test_util.TensorFlowTestCase):
713
714  def _VerifyCheckpoint(
715      self, checkpoint_path, compatible_cell_fn, cudnn_cell_fn,
716      num_layers, input_size, expected_variable_values, num_applications=3):
717    checkpoint_directory = self.get_temp_dir()
718    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
719    with ops.device("gpu:0"):
720      cudnn_layer = cudnn_cell_fn()
721      cudnn_checkpoint = trackable_utils.Checkpoint(cell=cudnn_layer)
722      status = cudnn_checkpoint.restore(checkpoint_path)
723      inputs = 3. * array_ops.ones([num_applications, num_layers, input_size],
724                                   dtype=dtypes.float32)
725      cudnn_output, _ = cudnn_layer(inputs)
726      status.run_restore_ops()
727    second_save_path = cudnn_checkpoint.save(checkpoint_prefix)
728    restore_layer = compatible_cell_fn()
729    restore_layer_checkpoint = trackable_utils.Checkpoint(
730        cell=restore_layer)
731    status = restore_layer_checkpoint.restore(second_save_path)
732    current_state = restore_layer.zero_state(1, dtypes.float32)
733    for _ in range(num_applications):
734      restore_layer_output, current_state = restore_layer(
735          inputs=3. * array_ops.ones([1, input_size]),
736          state=current_state)
737    status.run_restore_ops()
738    self.assertTrue(restore_layer.variables)
739    for variable, expected_value in zip(
740        restore_layer.variables, expected_variable_values):
741      self.assertAllClose(expected_value, self.evaluate(variable))
742    self.assertAllClose(self.evaluate(restore_layer_output),
743                        self.evaluate(cudnn_output)[-1, -1:, ...])
744
745  def _TrackableSingleCellUnidirectionalTestTemplate(
746      self, single_cell_fn, cudnn_cell_fn):
747    # Single-layer cuDNN cells with object-based checkpointing should be
748    # checkpoint compatible with either single CudnnCompatible cells or
749    # MultiRnnCells with one cell.
750    input_size = 3
751    save_cell_layer = single_cell_fn()
752    save_cell_layer(
753        inputs=array_ops.ones([1, input_size]),
754        state=save_cell_layer.zero_state(1, dtypes.float32))
755    self.assertTrue(save_cell_layer.variables)
756    expected_values = []
757    np.random.seed(10)
758    for variable in save_cell_layer.variables:
759      value = np.random.normal(size=variable.shape)
760      expected_values.append(value)
761      self.evaluate(variable.assign(value))
762    save_checkpoint = trackable_utils.Checkpoint(cell=save_cell_layer)
763    checkpoint_directory = self.get_temp_dir()
764    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
765    first_save_path = save_checkpoint.save(checkpoint_prefix)
766    self._VerifyCheckpoint(
767        checkpoint_path=first_save_path,
768        compatible_cell_fn=
769        lambda: rnn_cell_impl.MultiRNNCell([single_cell_fn()]),
770        cudnn_cell_fn=cudnn_cell_fn,
771        num_layers=1,
772        expected_variable_values=expected_values,
773        input_size=input_size)
774
775  @unittest.skipUnless(test.is_built_with_cuda(),
776                       "Test only applicable when running on GPUs")
777  @test_util.run_in_graph_and_eager_modes
778  def testLSTMTrackableSingleLayer(self):
779    num_units = 2
780    direction = CUDNN_RNN_UNIDIRECTION
781    self._TrackableSingleCellUnidirectionalTestTemplate(
782        single_cell_fn=functools.partial(
783            cudnn_rnn_ops.CudnnCompatibleLSTMCell, num_units=num_units),
784        cudnn_cell_fn=functools.partial(
785            cudnn_rnn.CudnnLSTM, num_layers=1, num_units=num_units,
786            direction=direction, name="awesome_lstm"))
787
788  @unittest.skipUnless(test.is_built_with_cuda(),
789                       "Test only applicable when running on GPUs")
790  @test_util.run_in_graph_and_eager_modes
791  def testGRUTrackableSingleLayer(self):
792    num_units = 2
793    direction = CUDNN_RNN_UNIDIRECTION
794    with self.assertRaises(NotImplementedError):
795      # TODO(allenl): Implement object-based saving for GRUs and other cells.
796      self._TrackableSingleCellUnidirectionalTestTemplate(
797          single_cell_fn=functools.partial(
798              cudnn_rnn_ops.CudnnCompatibleGRUCell, num_units=num_units),
799          cudnn_cell_fn=functools.partial(
800              cudnn_rnn.CudnnGRU, num_layers=1, num_units=num_units,
801              direction=direction, name="awesome_gru"))
802
803  def _TrackableMultiLayerTestTemplate(
804      self, single_cell_fn, cudnn_cell_fn, num_layers):
805
806    def _MultiCellFn():
807      return rnn_cell_impl.MultiRNNCell(
808          [single_cell_fn() for _ in range(num_layers)])
809    input_size = 3
810    save_graph = ops.Graph()
811    with save_graph.as_default(), self.session(graph=save_graph):
812      save_layer = _MultiCellFn()
813      save_layer(inputs=array_ops.ones([1, input_size]),
814                 state=save_layer.zero_state(1, dtypes.float32))
815      self.assertTrue(save_layer.variables)
816      expected_values = []
817      np.random.seed(10)
818      for variable in save_layer.variables:
819        value = np.random.normal(size=variable.shape)
820        expected_values.append(value)
821        self.evaluate(variable.assign(value))
822      save_checkpoint = trackable_utils.Checkpoint(cell=save_layer)
823      checkpoint_directory = self.get_temp_dir()
824      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
825      first_save_path = save_checkpoint.save(checkpoint_prefix)
826    self._VerifyCheckpoint(
827        checkpoint_path=first_save_path,
828        compatible_cell_fn=_MultiCellFn, cudnn_cell_fn=cudnn_cell_fn,
829        num_layers=num_layers,
830        expected_variable_values=expected_values,
831        input_size=input_size)
832
833  @unittest.skipUnless(test.is_built_with_cuda(),
834                       "Test only applicable when running on GPUs")
835  @test_util.run_in_graph_and_eager_modes
836  def testCudnnCompatibleLSTMCheckpointablMultiLayer(self):
837    num_units = 2
838    num_layers = 3
839    direction = CUDNN_RNN_UNIDIRECTION
840    self._TrackableMultiLayerTestTemplate(
841        single_cell_fn=functools.partial(
842            cudnn_rnn_ops.CudnnCompatibleLSTMCell, num_units=num_units),
843        cudnn_cell_fn=functools.partial(
844            cudnn_rnn.CudnnLSTM, num_layers=num_layers, num_units=num_units,
845            direction=direction, name="awesome_lstm"),
846        num_layers=num_layers)
847
848
849# TODO(jamesqin): Transform to parameterized test after it is included in the
850# TF open source codebase.
851class CudnnRNNTestCompatibleRNNCells(test_util.TensorFlowTestCase):
852
853  @unittest.skipUnless(test.is_built_with_cuda(),
854                       "Test only applicable when running on GPUs")
855  def testCudnnCompatibleLSTM(self):
856    self._TestCudnnCompatibleRnnCellsHelper(CUDNN_LSTM)
857
858  @unittest.skipUnless(test.is_built_with_cuda(),
859                       "Test only applicable when running on GPUs")
860  def testCudnnCompatibleGRU(self):
861    self._TestCudnnCompatibleRnnCellsHelper(CUDNN_GRU)
862
863  @unittest.skipUnless(test.is_built_with_cuda(),
864                       "Test only applicable when running on GPUs")
865  def testCudnnCompatibleRNNTanh(self):
866    self._TestCudnnCompatibleRnnCellsHelper(CUDNN_RNN_TANH)
867
868  @unittest.skipUnless(test.is_built_with_cuda(),
869                       "Test only applicable when running on GPUs")
870  def testCudnnCompatibleRNNRelu(self):
871    self._TestCudnnCompatibleRnnCellsHelper(CUDNN_RNN_RELU)
872
873  def _TestCudnnCompatibleRnnCellsHelper(self, rnn_mode):
874    configs = [
875        {
876            "num_layers": 1,
877            "seq_length": 3,
878            "num_units": 4,
879            "input_size": 5,
880            "batch_size": 6,
881        },
882        {
883            "num_layers": 2,
884            "seq_length": 8,
885            "num_units": 4,
886            "input_size": 8,
887            "batch_size": 16,
888        },
889        {
890            "num_layers": 2,
891            "seq_length": 3,
892            "num_units": 4,
893            "input_size": 5,
894            "batch_size": 6,
895        },
896        {
897            "num_layers": 1,
898            "seq_length": 2,
899            "num_units": 2,
900            "input_size": 4,
901            "batch_size": 1,
902        },
903    ]
904    directions = [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION]
905    for cfg, direction in zip(configs, directions):
906      self._TestCudnnCompatibleRnnCells(cfg["num_layers"], cfg["seq_length"],
907                                        cfg["num_units"], cfg["input_size"],
908                                        cfg["batch_size"], rnn_mode, direction)
909
910  def _TestCudnnCompatibleRnnCells(self, num_layers, seq_length, num_units,
911                                   input_size, batch_size, rnn_mode, direction):
912    dtype = dtypes.float32
913    # Train graph
914    with ops.Graph().as_default() as g:
915      model = CudnnTestModel(
916          rnn_mode,
917          num_layers,
918          num_units,
919          input_size,
920          direction=direction,
921          dtype=dtype,
922          training=True)
923      target_output = array_ops.placeholder(dtype=dtype)
924      loss_op = losses.log_loss(
925          labels=target_output, predictions=model.total_sum)
926      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1e-2)
927      train_op = optimizer.minimize(loss_op)
928
929      saver = saver_lib.Saver()
930
931      # Train Cudnn model
932      seed = 0
933      with self.test_session(use_gpu=True, graph=g) as sess:
934        sess.run(variables.global_variables_initializer())
935        # Train 128 steps
936        num_steps = 128
937        for _ in range(num_steps):
938          inputs, _ = model.SynthesizeInput(seq_length, batch_size, seed)
939          targets = np.random.rand()
940          sess.run(
941              train_op,
942              feed_dict={
943                  model.inputs: inputs,
944                  model.initial_state: model.ZeroState(batch_size),
945                  target_output: targets
946              })
947          seed += 1
948
949        save_path = os.path.join(self.get_temp_dir(),
950                                 ("cudnn-rnn-%s-test" % rnn_mode))
951        save_v = saver.save(sess, save_path)
952        self.assertEqual(save_path, save_v)
953
954    # Cudnn inference graph
955    with ops.Graph().as_default() as g:
956      model = CudnnTestModel(
957          rnn_mode,
958          num_layers,
959          num_units,
960          input_size,
961          direction=direction,
962          dtype=dtype,
963          training=False)
964      rnn = model.rnn
965      saver = saver_lib.Saver()
966
967      inference_input = np.random.rand(seq_length, batch_size,
968                                       input_size).astype(np.float32)
969      with self.test_session(use_gpu=True, graph=g) as sess:
970        sess.run(variables.global_variables_initializer())
971        saver.restore(sess, save_path)
972
973        # Cudnn inference
974        cudnn_outputs_v, cudnn_output_states_v = model.Feed(
975            sess, inference_input, return_sum=False)
976
977    # Canonical RNN inference graph
978    with ops.Graph().as_default() as g:
979      cell_inputs = array_ops.placeholder(
980          dtype, shape=[seq_length, batch_size, input_size])
981      if direction == CUDNN_RNN_UNIDIRECTION:
982        # outputs is one tensor, states are num_layer tuples, each 2 tensors
983        (outputs, states) = _CreateCudnnCompatibleCanonicalRNN(rnn, cell_inputs)
984        if rnn_mode == CUDNN_LSTM:
985          output_h = array_ops.stack([s.h for s in states])
986          output_c = array_ops.stack([s.c for s in states])
987        else:
988          output_state = array_ops.stack([s for s in states])
989      else:
990        # outputs is one tensor.
991        # states is a tuple of 2 tuples:
992        # each sub tuple is num_layer tuples, each with 2 tensors.
993        (outputs, states) = _CreateCudnnCompatibleCanonicalRNN(
994            rnn, cell_inputs, is_bidi=True)
995        output_state_fw, output_state_bw = states
996        if rnn_mode == CUDNN_LSTM:
997          output_h, output_c = [], []
998          for s_fw, s_bw in zip(output_state_fw, output_state_bw):
999            output_h.append(array_ops.stack([s_fw.h, s_bw.h]))
1000            output_c.append(array_ops.stack([s_fw.c, s_bw.c]))
1001          output_h = array_ops.concat(output_h, axis=0)
1002          output_c = array_ops.concat(output_c, axis=0)
1003        else:
1004          output_state = []
1005          for s_fw, s_bw in zip(output_state_fw, output_state_bw):
1006            output_state.append(array_ops.stack([s_fw, s_bw]))
1007          output_state = array_ops.concat(output_state, axis=0)
1008      saver = saver_lib.Saver()
1009
1010      with self.test_session(use_gpu=True, graph=g) as sess:
1011        saver.restore(sess, save_path)
1012
1013        # BlockCell inference
1014        if rnn_mode == CUDNN_LSTM:
1015          outputs_v, output_h_v, output_c_v = sess.run(
1016              [outputs, output_h, output_c],
1017              feed_dict={cell_inputs: inference_input})
1018          self.assertAllClose(cudnn_outputs_v, outputs_v)
1019          cudnn_output_h_v, cudnn_output_c_v = cudnn_output_states_v
1020          self.assertAllClose(cudnn_output_h_v, output_h_v)
1021          self.assertAllClose(cudnn_output_c_v, output_c_v)
1022        else:
1023          outputs_v, output_state_v = sess.run(
1024              [outputs, output_state],
1025              feed_dict={cell_inputs: inference_input})
1026          self.assertAllClose(cudnn_outputs_v, outputs_v, atol=1e-4, rtol=2e-4)
1027          (cudnn_output_h_v,) = cudnn_output_states_v
1028          self.assertAllClose(cudnn_output_h_v, output_state_v, atol=2e-5,
1029                              rtol=2e-5)
1030
1031
1032class CudnnRNNTestParamsSize(test_util.TensorFlowTestCase):
1033
1034  def _TestOpaqueParamsSize(self, rnn_mode, num_layers, num_units, input_size,
1035                            dtype, direction):
1036    logging.info("Testing one lstm param size with config: %s", locals())
1037    model = CudnnTestModel(
1038        rnn_mode,
1039        num_layers,
1040        num_units,
1041        input_size,
1042        dtype=dtype,
1043        direction=direction)
1044    rnn = model.rnn
1045
1046    # Min param size estimate = sum(weights.size) + sum(biases.size)
1047    min_params_size = (
1048        sum(map(np.prod, rnn.canonical_weight_shapes)) +
1049        sum(sp[0] for sp in rnn.canonical_bias_shapes))
1050
1051    opaque_params = rnn.trainable_variables[0]
1052    with self.test_session(use_gpu=True, graph=ops.get_default_graph()):
1053      variables.global_variables_initializer().run()
1054      opaque_params_size_v = opaque_params.eval().size
1055      self.assertLessEqual(min_params_size, opaque_params_size_v)
1056
1057  @unittest.skipUnless(test.is_built_with_cuda(),
1058                       "Test only applicable when running on GPUs")
1059  def testOpaqueParamsSize(self):
1060    test_configs = [
1061        [4, 200, 200],
1062        [4, 200, 300],
1063        [4, 200, 100],
1064        [1, 100, 200],
1065        [2, 200, 100],
1066        [3, 200, 400],
1067    ]
1068    directions = [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION]
1069    dtype_list = [dtypes.float16, dtypes.float32, dtypes.float64]
1070    rnns = [CUDNN_LSTM, CUDNN_GRU, CUDNN_RNN_RELU, CUDNN_RNN_TANH]
1071    for (rnn, config, dtype, direction) in itertools.product(
1072        rnns, test_configs, dtype_list, directions):
1073      num_layers, num_units, input_size = config
1074      with ops.Graph().as_default():
1075        self._TestOpaqueParamsSize(rnn, num_layers, num_units, input_size,
1076                                   dtype, direction)
1077
1078
1079class CudnnRNNTestTraining(test_util.TensorFlowTestCase):
1080
1081  def setUp(self):
1082    super(CudnnRNNTestTraining, self).setUp()
1083    self._reset_rnd_gen_state = os.environ.get("TF_CUDNN_RESET_RND_GEN_STATE",
1084                                               str(False))
1085    self._rnn_use_v2 = os.environ.get("TF_CUDNN_RNN_USE_V2", "0")
1086
1087  def tearDown(self):
1088    super(CudnnRNNTestTraining, self).tearDown()
1089    os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = self._reset_rnd_gen_state
1090    os.environ["TF_CUDNN_RNN_USE_V2"] = self._rnn_use_v2
1091
1092  def _ComputeNumericGrad(self, sess, y, x, delta=1e-4, step=1):
1093    """Compute the numeric gradient of y wrt to x.
1094
1095    Args:
1096      sess: The TF session constructed with a graph containing x and y.
1097      y: A scalar TF Tensor in the graph constructed in sess.
1098      x: A TF Tensor in the graph constructed in sess.
1099      delta: Gradient checker's small perturbation of x[i].
1100      step: Only compute numerical gradients for a subset of x values.
1101        I.e. dy/dx[i] is computed if i % step == 0.
1102    Returns:
1103      A Tensor of the same shape and dtype as x. If x[i] is not chosen
1104      to compute the numerical gradient dy/x[i], the corresponding
1105      value is set to 0.
1106    """
1107
1108    x_data = sess.run(x)
1109    x_size = x_data.size
1110    x_shape = x_data.shape
1111
1112    numeric_grad = np.zeros(x_size, dtype=x_data.dtype)
1113
1114    for i in range(0, x_size, step):
1115      x_pos = x_data.copy()
1116      if x_size == 1:
1117        x_pos += delta
1118      else:
1119        x_pos.flat[i] += delta
1120      y_pos_feed_dict = dict([(x.name, x_pos)])
1121      y_pos = sess.run(y, feed_dict=y_pos_feed_dict)
1122
1123      x_neg = x_data.copy()
1124      if x_size == 1:
1125        x_neg -= delta
1126      else:
1127        x_neg.flat[i] -= delta
1128      y_neg_feed_dict = dict([(x.name, x_neg)])
1129      y_neg = sess.run(y, feed_dict=y_neg_feed_dict)
1130      numeric_grad[i] = (y_pos - y_neg) / (2 * delta)
1131    return numeric_grad.reshape(x_shape)
1132
1133  def _GetShape(self, sess, inputs):
1134    if not isinstance(inputs, collections.Iterable):
1135      return sess.run(array_ops.shape(inputs))
1136    else:
1137      return sess.run([array_ops.shape(x) for x in inputs])
1138
1139  def _GradientCheckFp16(self, sess, y, xs, num_samples,
1140                         tolerance=1e-6, delta=1e-4):
1141    """Gradient check for Fp16.
1142
1143    Fp16 numerical gradients end up being zeros. Use a new way to check
1144    gradients:
1145
1146    Given multi-variant function:
1147    y = f(x1, x2, ... xn)
1148    delta_y = f(x1 + delta_x1, x2+delta_x2, ..., xn+delta_xn) -
1149              f(x1, x2, ..., xn)
1150            = f'(x1) * delta_x1 + f'(x2) * delta_x2 + .. + f'(xn) * delta_xn
1151    where:
1152      delta_xi are very small disturbance.
1153      f'(xi) is the gradient of y w.r.t xi.
1154
1155    The gradient check verifies the expected delta_y calculated by the above
1156    equation is close to the actual delta_y.
1157    Args:
1158      sess: tf.Session object.
1159      y: output tensor.
1160      xs: a tensor or a list of input tensors.
1161      num_samples: number of test samples to run.
1162      tolerance: error tolerance.
1163      delta: the order of magnititued of input disturbance to apply to calculate
1164        the output change w.r.t inputs.
1165    """
1166    sym_grads = self._ComputeSymGrads(sess, y, xs)
1167    xs_shapes = self._GetShape(sess, xs)
1168
1169    x_vals = [sess.run(x) for x in xs]
1170    for _ in range(num_samples):
1171      delta_xs = [delta * np.random.rand(*shape.tolist())
1172                  for shape in xs_shapes]
1173
1174      feed_dict = {}
1175      for x, x_val, delta_x in zip(xs, x_vals, delta_xs):
1176        feed_dict[x] = x_val + delta_x
1177      actual_delta_y = (float(sess.run(y, feed_dict=feed_dict)) -
1178                        float(sess.run(y)))
1179
1180      expected_delta_y = 0.
1181      for sym_grad, delta_x in zip(sym_grads, delta_xs):
1182        expected_delta_y += np.dot(
1183            sym_grad.astype(np.float32).flatten(),
1184            delta_x.astype(np.float32).flatten())
1185      self.assertAllClose(expected_delta_y, actual_delta_y,
1186                          atol=tolerance, rtol=tolerance)
1187
1188  def _GradientCheck(self, sess, y, xs, tolerance=1e-6, delta=1e-4):
1189    sym_grads = self._ComputeSymGrads(sess, y, xs)
1190
1191    num_grads = [self._ComputeNumericGrad(sess, y, x, delta) for x in xs]
1192    self.assertEqual(len(sym_grads), len(num_grads))
1193    for x, sym, num in zip(xs, sym_grads, num_grads):
1194      logging.info("Comparing gradients for input: %s", x.name)
1195      self.assertFalse(np.any(np.isnan(sym)))
1196      self.assertFalse(np.any(np.isnan(num)))
1197      self.assertAllClose(sym, num, atol=tolerance, rtol=tolerance)
1198
1199  def _ComputeSymGrads(self, sess, y, xs):
1200    sym_grads_t = gradients.gradients(y, xs)
1201    return sess.run(sym_grads_t)
1202
1203  def _TestOneSimpleTraining(self, rnn_mode, num_layers, num_units, input_size,
1204                             batch_size, seq_length, dir_count, dropout, dtype,
1205                             use_v2, delta, tolerance):
1206    # Gradient checking runs two forward ops with almost the same input. Need to
1207    # make sure the drop patterns across the two runs are the same.
1208    logging.info("Training test with config: %s", locals())
1209    os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = str(True)
1210
1211    np.random.seed(1234)
1212    random_seed.set_random_seed(5678)
1213    has_input_c = (rnn_mode == CUDNN_LSTM)
1214    direction = (CUDNN_RNN_UNIDIRECTION
1215                 if dir_count == 1 else CUDNN_RNN_BIDIRECTION)
1216    if use_v2:
1217      os.environ["TF_CUDNN_RNN_USE_V2"] = "1"
1218    else:
1219      os.environ["TF_CUDNN_RNN_USE_V2"] = "0"
1220    model = CudnnTestModel(
1221        rnn_mode,
1222        num_layers,
1223        num_units,
1224        input_size,
1225        direction=direction,
1226        dropout=dropout,
1227        dtype=dtype,
1228        training=True,
1229        bias_initializer=init_ops.random_normal_initializer(
1230            mean=1., dtype=dtype))
1231    rnn = model.rnn
1232    params = rnn.trainable_variables[0]
1233
1234    inputs = variables.Variable(
1235        random_ops.random_uniform([seq_length, batch_size, input_size],
1236                                  dtype=dtype),
1237        dtype=dtype).read_value()
1238    input_h = variables.Variable(
1239        random_ops.random_uniform(
1240            [num_layers * dir_count, batch_size, num_units], dtype=dtype),
1241        dtype=dtype).read_value()
1242    if has_input_c:
1243      input_c = variables.Variable(
1244          random_ops.random_uniform(
1245              [num_layers * dir_count, batch_size, num_units], dtype=dtype),
1246          dtype=dtype).read_value()
1247      initial_state = (input_h, input_c)
1248    else:
1249      initial_state = (input_h,)
1250    total_sum = model.FProp(inputs, initial_state, training=True)
1251
1252    with self.test_session(use_gpu=True, graph=ops.get_default_graph()) as sess:
1253      sess.run(variables.global_variables_initializer())
1254      all_inputs = [inputs, params]
1255      for s in initial_state:
1256        all_inputs.append(s)
1257      if dtype == dtypes.float16:
1258        self._GradientCheckFp16(
1259            sess, total_sum, all_inputs,
1260            num_samples=FLAGS.grad_check_num_samples,
1261            tolerance=tolerance, delta=delta)
1262      else:
1263        for _ in range(FLAGS.grad_check_num_samples):
1264          # Each time choose a different set of inputs.
1265          sess.run(variables.global_variables_initializer())
1266          self._GradientCheck(
1267              sess, total_sum, all_inputs,
1268              tolerance=tolerance, delta=delta)
1269
1270  def _TestSimpleTrainingHelper(self, rnn_mode, test_configs):
1271    dropouts = [0, 0.5, 1.]
1272    v2_options = [False, True]
1273    for config, dropout, use_v2 in itertools.product(test_configs, dropouts,
1274                                                     v2_options):
1275      dtype = config.get("dtype", dtypes.float32)
1276      delta = config.get("delta", 1e-4)
1277      tolerance = config.get("tolerance", 1e-6)
1278      dir_count = config.get("dir_count", 1)
1279      shape = config["shape"]
1280      if dtype == dtypes.float64:
1281        # TODO(jamesqin): b/117848763
1282        use_v2 = False
1283      with ops.Graph().as_default():
1284        self._TestOneSimpleTraining(
1285            rnn_mode, shape["num_layers"], shape["num_units"],
1286            shape["input_size"], shape["batch_size"], shape["seq_length"],
1287            dir_count, dropout, dtype, use_v2, delta, tolerance)
1288
1289  @unittest.skipUnless(test.is_built_with_cuda(),
1290                       "Test only applicable when running on GPUs")
1291  def testSimpleTrainingLSTMFp64(self):
1292    test_configs = [
1293        {
1294            "dtype": dtypes.float64,
1295            "tolerance": 5e-6,
1296            "shape": {
1297                "num_layers": 2,
1298                "num_units": 3,
1299                "input_size": 4,
1300                "batch_size": 3,
1301                "seq_length": 4,
1302            },
1303        },
1304    ]
1305    self._TestSimpleTrainingHelper(CUDNN_LSTM, test_configs)
1306
1307  @unittest.skipUnless(test.is_built_with_cuda(),
1308                       "Test only applicable when running on GPUs")
1309  def testSimpleTrainingLSTMFp32(self):
1310    test_configs = [
1311        {
1312            "dtype": dtypes.float32,
1313            "delta": 1e-4,
1314            "tolerance": 9e-2,
1315            "shape": {
1316                "num_layers": 2,
1317                "num_units": 3,
1318                "input_size": 4,
1319                "batch_size": 3,
1320                "seq_length": 4,
1321            },
1322        },
1323    ]
1324    self._TestSimpleTrainingHelper(CUDNN_LSTM, test_configs)
1325
1326  @unittest.skipUnless(test.is_built_with_cuda(),
1327                       "Test only applicable when running on GPUs")
1328  def testSimpleTrainingLSTMFp16(self):
1329    test_configs = [
1330        {
1331            "dtype": dtypes.float16,
1332            "delta": 1e-3,
1333            "tolerance": 9e-2,
1334            "shape": {
1335                "num_layers": 2,
1336                "num_units": 3,
1337                "input_size": 4,
1338                "batch_size": 3,
1339                "seq_length": 4,
1340            },
1341        },
1342        {
1343            "dtype": dtypes.float16,
1344            "delta": 1e-2,
1345            "tolerance": 9e-2,
1346            "shape": {
1347                "num_layers": 2,
1348                "num_units": 6,
1349                "input_size": 8,
1350                "batch_size": 6,
1351                "seq_length": 4,
1352            },
1353        },
1354    ]
1355    self._TestSimpleTrainingHelper(CUDNN_LSTM, test_configs)
1356
1357  @unittest.skipUnless(test.is_built_with_cuda(),
1358                       "Test only applicable when running on GPUs")
1359  def testSimpleTrainingGRUFp64(self):
1360    test_configs = [
1361        {
1362            "dtype": dtypes.float64,
1363            "tolerance": 5e-6,
1364            "shape": {
1365                "num_layers": 2,
1366                "num_units": 3,
1367                "input_size": 4,
1368                "batch_size": 3,
1369                "seq_length": 4,
1370            }
1371        },
1372    ]
1373    self._TestSimpleTrainingHelper(CUDNN_GRU, test_configs)
1374
1375  @unittest.skipUnless(test.is_built_with_cuda(),
1376                       "Test only applicable when running on GPUs")
1377  def testSimpleTrainingGRUFp32(self):
1378    test_configs = [
1379        {
1380            "dtype": dtypes.float32,
1381            "delta": 1e-3,
1382            "tolerance": 4e-3,
1383            "shape": {
1384                "num_layers": 2,
1385                "num_units": 3,
1386                "input_size": 4,
1387                "batch_size": 3,
1388                "seq_length": 4,
1389            },
1390        },
1391    ]
1392    self._TestSimpleTrainingHelper(CUDNN_GRU, test_configs)
1393
1394  @unittest.skipUnless(test.is_built_with_cuda(),
1395                       "Test only applicable when running on GPUs")
1396  def testSimpleTrainingGRUFp16(self):
1397    test_configs = [
1398        {
1399            "dtype": dtypes.float16,
1400            "delta": 2e-3,
1401            "tolerance": 6e-2,
1402            "shape": {
1403                "num_layers": 2,
1404                "num_units": 3,
1405                "input_size": 4,
1406                "batch_size": 3,
1407                "seq_length": 4,
1408            },
1409        },
1410    ]
1411    self._TestSimpleTrainingHelper(CUDNN_GRU, test_configs)
1412
1413  @unittest.skipUnless(test.is_built_with_cuda(),
1414                       "Test only applicable when running on GPUs")
1415  def testSimpleTrainingRNNTanhFp64(self):
1416    test_configs = [
1417        {
1418            "dtype": dtypes.float64,
1419            "tolerance": 5e-6,
1420            "shape": {
1421                "num_layers": 2,
1422                "num_units": 3,
1423                "input_size": 4,
1424                "batch_size": 3,
1425                "seq_length": 4,
1426            },
1427        },
1428    ]
1429    self._TestSimpleTrainingHelper(CUDNN_RNN_TANH, test_configs)
1430
1431  @unittest.skipUnless(test.is_built_with_cuda(),
1432                       "Test only applicable when running on GPUs")
1433  def testSimpleTrainingRNNTanhFp32(self):
1434    test_configs = [
1435        {
1436            "dtype": dtypes.float32,
1437            "delta": 1e-3,
1438            "tolerance": 5e-3,
1439            "shape": {
1440                "num_layers": 2,
1441                "num_units": 3,
1442                "input_size": 4,
1443                "batch_size": 3,
1444                "seq_length": 4,
1445            },
1446        },
1447    ]
1448    self._TestSimpleTrainingHelper(CUDNN_RNN_TANH, test_configs)
1449
1450  @unittest.skipUnless(test.is_built_with_cuda(),
1451                       "Test only applicable when running on GPUs")
1452  def testSimpleTrainingRNNTanhFp16(self):
1453    test_configs = [
1454        {
1455            "dtype": dtypes.float16,
1456            "delta": 1e-3,
1457            "tolerance": 5e-2,
1458            "shape": {
1459                "num_layers": 2,
1460                "num_units": 3,
1461                "input_size": 4,
1462                "batch_size": 3,
1463                "seq_length": 4,
1464            },
1465        },
1466    ]
1467    self._TestSimpleTrainingHelper(CUDNN_RNN_TANH, test_configs)
1468
1469  @unittest.skipUnless(test.is_built_with_cuda(),
1470                       "Test only applicable when running on GPUs")
1471  def testSimpleTrainingRNNReluFp64(self):
1472    test_configs = [
1473        {
1474            "dtype": dtypes.float64,
1475            "tolerance": 5e-6,
1476            "shape": {
1477                "num_layers": 2,
1478                "num_units": 3,
1479                "input_size": 4,
1480                "batch_size": 3,
1481                "seq_length": 4,
1482            },
1483        },
1484    ]
1485    self._TestSimpleTrainingHelper(CUDNN_RNN_RELU, test_configs)
1486
1487  @unittest.skipUnless(test.is_built_with_cuda(),
1488                       "Test only applicable when running on GPUs")
1489  def testSimpleTrainingRNNReluFp32(self):
1490    test_configs = [
1491        {
1492            "dtype": dtypes.float32,
1493            "delta": 1e-4,
1494            "tolerance": 3e-1,
1495            "shape": {
1496                "num_layers": 2,
1497                "num_units": 3,
1498                "input_size": 4,
1499                "batch_size": 3,
1500                "seq_length": 4,
1501            },
1502        },
1503    ]
1504    self._TestSimpleTrainingHelper(CUDNN_RNN_RELU, test_configs)
1505
1506  @unittest.skipUnless(test.is_built_with_cuda(),
1507                       "Test only applicable when running on GPUs")
1508  def testSimpleTrainingRNNReluFp16(self):
1509    test_configs = [
1510        {
1511            "dtype": dtypes.float16,
1512            "delta": 1e-3,
1513            "tolerance": 7e-2,
1514            "shape": {
1515                "num_layers": 2,
1516                "num_units": 3,
1517                "input_size": 4,
1518                "batch_size": 3,
1519                "seq_length": 4,
1520            },
1521        },
1522    ]
1523    self._TestSimpleTrainingHelper(CUDNN_RNN_RELU, test_configs)
1524
1525
1526if __name__ == "__main__":
1527  argv0 = sys.argv[0]
1528  parser = argparse.ArgumentParser()
1529  parser.add_argument(
1530      "--grad_check_num_samples",
1531      type=int,
1532      default=1,
1533      help="Number of samples to run for gradient check.")
1534  FLAGS, unparsed = parser.parse_known_args()
1535  sys.argv = [argv0] + unparsed
1536  googletest.main()
1537