1# Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests for cudnn recurrent layers.""" 16 17import os 18import tempfile 19 20from absl.testing import parameterized 21import numpy as np 22 23from tensorflow.python import keras 24from tensorflow.python.framework import ops 25from tensorflow.python.framework import test_util 26from tensorflow.python.keras import combinations 27from tensorflow.python.keras import keras_parameterized 28from tensorflow.python.keras import testing_utils 29from tensorflow.python.keras.optimizer_v2.rmsprop import RMSprop 30from tensorflow.python.ops import array_ops 31from tensorflow.python.platform import test 32from tensorflow.python.training import gradient_descent 33 34 35@keras_parameterized.run_all_keras_modes 36class CuDNNTest(keras_parameterized.TestCase): 37 38 @parameterized.named_parameters( 39 *testing_utils.generate_combinations_with_testcase_name( 40 layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM], 41 return_sequences=[True, False])) 42 @test_util.run_gpu_only 43 def test_cudnn_rnn_return_sequence(self, layer_class, return_sequences): 44 input_size = 10 45 timesteps = 6 46 units = 2 47 num_samples = 32 48 testing_utils.layer_test( 49 layer_class, 50 kwargs={'units': units, 51 'return_sequences': return_sequences}, 52 input_shape=(num_samples, timesteps, input_size)) 53 54 @parameterized.named_parameters( 55 *testing_utils.generate_combinations_with_testcase_name( 56 layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM], 57 go_backwards=[True, False])) 58 @test_util.run_gpu_only 59 def test_cudnn_rnn_go_backward(self, layer_class, go_backwards): 60 input_size = 10 61 timesteps = 6 62 units = 2 63 num_samples = 32 64 testing_utils.layer_test( 65 layer_class, 66 kwargs={'units': units, 67 'go_backwards': go_backwards}, 68 input_shape=(num_samples, timesteps, input_size)) 69 70 @parameterized.named_parameters( 71 ('cudnngru', keras.layers.CuDNNGRU), 72 ('cudnnlstm', keras.layers.CuDNNLSTM), 73 ) 74 @test_util.run_gpu_only 75 def test_return_state(self, layer_class): 76 input_size = 10 77 timesteps = 6 78 units = 2 79 num_samples = 32 80 num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 81 82 inputs = keras.Input(batch_shape=(num_samples, timesteps, input_size)) 83 layer = layer_class(units, return_state=True, stateful=True) 84 outputs = layer(inputs) 85 _, state = outputs[0], outputs[1:] 86 self.assertEqual(len(state), num_states) 87 model = keras.models.Model(inputs, state[0]) 88 model.run_eagerly = testing_utils.should_run_eagerly() 89 90 inputs = np.random.random((num_samples, timesteps, input_size)) 91 state = model.predict(inputs) 92 np.testing.assert_allclose( 93 keras.backend.eval(layer.states[0]), state, atol=1e-4) 94 95 @parameterized.named_parameters( 96 ('cudnngru', keras.layers.CuDNNGRU), 97 ('cudnnlstm', keras.layers.CuDNNLSTM), 98 ) 99 @test_util.run_gpu_only 100 def test_time_major_input(self, layer_class): 101 input_size = 10 102 timesteps = 6 103 units = 2 104 num_samples = 32 105 106 model = keras.models.Sequential() 107 model.add( 108 keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))) 109 layer = layer_class(units, time_major=True, return_sequences=True) 110 model.add(layer) 111 model.add( 112 keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))) 113 model.compile(loss='categorical_crossentropy', 114 optimizer=RMSprop(learning_rate=0.001)) 115 model.fit( 116 np.ones((num_samples, timesteps, input_size)), 117 np.ones((num_samples, timesteps, units))) 118 out = model.predict(np.ones((num_samples, timesteps, input_size))) 119 self.assertEqual(out.shape, (num_samples, timesteps, units)) 120 121 @parameterized.named_parameters( 122 ('cudnngru', keras.layers.CuDNNGRU), 123 ('cudnnlstm', keras.layers.CuDNNLSTM), 124 ) 125 @test_util.run_gpu_only 126 def test_specify_initial_state_keras_tensor(self, layer_class): 127 input_size = 10 128 timesteps = 6 129 units = 2 130 num_samples = 32 131 num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 132 133 inputs = keras.Input((timesteps, input_size)) 134 initial_state = [keras.Input((units,)) for _ in range(num_states)] 135 layer = layer_class(units) 136 if len(initial_state) == 1: 137 output = layer(inputs, initial_state=initial_state[0]) 138 else: 139 output = layer(inputs, initial_state=initial_state) 140 self.assertTrue( 141 any(initial_state[0] is t 142 for t in layer._inbound_nodes[0].input_tensors)) 143 144 model = keras.models.Model([inputs] + initial_state, output) 145 model.compile( 146 loss='categorical_crossentropy', 147 optimizer=RMSprop(learning_rate=0.001), 148 run_eagerly=testing_utils.should_run_eagerly()) 149 150 inputs = np.random.random((num_samples, timesteps, input_size)) 151 initial_state = [ 152 np.random.random((num_samples, units)) for _ in range(num_states) 153 ] 154 targets = np.random.random((num_samples, units)) 155 model.fit([inputs] + initial_state, targets) 156 157 158class CuDNNGraphOnlyTest(keras_parameterized.TestCase): 159 160 @parameterized.named_parameters( 161 ('cudnngru', keras.layers.CuDNNGRU), 162 ('cudnnlstm', keras.layers.CuDNNLSTM), 163 ) 164 @test_util.run_gpu_only 165 def test_regularizer(self, layer_class): 166 input_size = 10 167 timesteps = 6 168 units = 2 169 num_samples = 32 170 with ops.Graph().as_default(): 171 layer = layer_class( 172 units, 173 return_sequences=False, 174 input_shape=(timesteps, input_size), 175 kernel_regularizer=keras.regularizers.l1(0.01), 176 recurrent_regularizer=keras.regularizers.l1(0.01), 177 bias_regularizer='l2') 178 layer.build((None, None, input_size)) 179 self.assertEqual(len(layer.losses), 3) 180 181 layer = layer_class( 182 units, 183 return_sequences=False, 184 input_shape=(timesteps, input_size), 185 activity_regularizer='l2') 186 self.assertTrue(layer.activity_regularizer) 187 x = keras.backend.variable( 188 np.ones((num_samples, timesteps, input_size))) 189 layer(x) 190 self.assertEqual(len(layer.get_losses_for(x)), 1) 191 192 @parameterized.named_parameters( 193 ('cudnngru', keras.layers.CuDNNGRU), 194 ('cudnnlstm', keras.layers.CuDNNLSTM), 195 ) 196 @test_util.run_gpu_only 197 @test_util.run_v1_only('b/120941292') 198 def test_statefulness(self, layer_class): 199 input_size = 10 200 timesteps = 6 201 units = 2 202 num_samples = 32 203 204 with self.cached_session(): 205 model = keras.models.Sequential() 206 model.add( 207 keras.layers.Embedding( 208 10, 209 input_size, 210 input_length=timesteps, 211 batch_input_shape=(num_samples, timesteps))) 212 layer = layer_class( 213 units, return_sequences=False, stateful=True, weights=None) 214 model.add(layer) 215 model.compile(optimizer=gradient_descent.GradientDescentOptimizer(0.01), 216 loss='mse') 217 out1 = model.predict(np.ones((num_samples, timesteps))) 218 self.assertEqual(out1.shape, (num_samples, units)) 219 220 # train once so that the states change 221 model.train_on_batch( 222 np.ones((num_samples, timesteps)), np.ones((num_samples, units))) 223 out2 = model.predict(np.ones((num_samples, timesteps))) 224 225 # if the state is not reset, output should be different 226 self.assertNotEqual(out1.max(), out2.max()) 227 228 # check that output changes after states are reset 229 # (even though the model itself didn't change) 230 layer.reset_states() 231 out3 = model.predict(np.ones((num_samples, timesteps))) 232 self.assertNotEqual(out2.max(), out3.max()) 233 234 # check that container-level reset_states() works 235 model.reset_states() 236 out4 = model.predict(np.ones((num_samples, timesteps))) 237 self.assertAllClose(out3, out4, atol=1e-5) 238 239 # check that the call to `predict` updated the states 240 out5 = model.predict(np.ones((num_samples, timesteps))) 241 self.assertNotEqual(out4.max(), out5.max()) 242 243 244@combinations.generate(combinations.combine(mode=['graph', 'eager'])) 245class CuDNNV1OnlyTest(keras_parameterized.TestCase): 246 247 @test_util.run_gpu_only 248 def test_trainability(self): 249 input_size = 10 250 units = 2 251 for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: 252 layer = layer_class(units) 253 layer.build((None, None, input_size)) 254 self.assertEqual(len(layer.weights), 3) 255 self.assertEqual(len(layer.trainable_weights), 3) 256 self.assertEqual(len(layer.non_trainable_weights), 0) 257 layer.trainable = False 258 self.assertEqual(len(layer.weights), 3) 259 self.assertEqual(len(layer.non_trainable_weights), 3) 260 self.assertEqual(len(layer.trainable_weights), 0) 261 layer.trainable = True 262 self.assertEqual(len(layer.weights), 3) 263 self.assertEqual(len(layer.trainable_weights), 3) 264 self.assertEqual(len(layer.non_trainable_weights), 0) 265 266 @parameterized.named_parameters( 267 *testing_utils.generate_combinations_with_testcase_name( 268 rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False], 269 bidirectional=[True, False], implementation=[1, 2], 270 model_nest_level=[1, 2], model_type=['seq', 'func'])) 271 @test_util.run_v1_only('b/120911602, b/112083752') 272 @test_util.run_gpu_only 273 def test_load_weights_between_noncudnn_rnn(self, rnn_type, to_cudnn, 274 bidirectional, implementation, 275 model_nest_level, model_type): 276 input_size = 10 277 timesteps = 6 278 input_shape = (timesteps, input_size) 279 units = 2 280 num_samples = 32 281 inputs = np.random.random((num_samples, timesteps, input_size)) 282 283 rnn_layer_kwargs = { 284 'recurrent_activation': 'sigmoid', 285 # ensure biases are non-zero and properly converted 286 'bias_initializer': 'random_uniform', 287 'implementation': implementation 288 } 289 if rnn_type == 'LSTM': 290 rnn_layer_class = keras.layers.LSTM 291 cudnn_rnn_layer_class = keras.layers.CuDNNLSTM 292 else: 293 rnn_layer_class = keras.layers.GRU 294 cudnn_rnn_layer_class = keras.layers.CuDNNGRU 295 rnn_layer_kwargs['reset_after'] = True 296 297 layer = rnn_layer_class(units, **rnn_layer_kwargs) 298 if bidirectional: 299 layer = keras.layers.Bidirectional(layer) 300 301 cudnn_layer = cudnn_rnn_layer_class(units) 302 if bidirectional: 303 cudnn_layer = keras.layers.Bidirectional(cudnn_layer) 304 305 model = self._make_nested_model(input_shape, layer, model_nest_level, 306 model_type) 307 cudnn_model = self._make_nested_model(input_shape, cudnn_layer, 308 model_nest_level, model_type) 309 310 if to_cudnn: 311 self._convert_model_weights(model, cudnn_model) 312 else: 313 self._convert_model_weights(cudnn_model, model) 314 315 self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs), 316 atol=1e-4) 317 318 def _make_nested_model(self, input_shape, layer, level=1, model_type='func'): 319 # example: make_nested_seq_model((1,), Dense(10), level=2).summary() 320 def make_nested_seq_model(input_shape, layer, level=1): 321 model = layer 322 for i in range(1, level + 1): 323 layers = [keras.layers.InputLayer(input_shape), 324 model] if (i == 1) else [model] 325 model = keras.models.Sequential(layers) 326 if i > 1: 327 model.build((None,) + input_shape) 328 return model 329 330 # example: make_nested_func_model((1,), Dense(10), level=2).summary() 331 def make_nested_func_model(input_shape, layer, level=1): 332 model_input = keras.layers.Input(input_shape) 333 model = layer 334 for _ in range(level): 335 model = keras.models.Model(model_input, model(model_input)) 336 return model 337 338 if model_type == 'func': 339 return make_nested_func_model(input_shape, layer, level) 340 elif model_type == 'seq': 341 return make_nested_seq_model(input_shape, layer, level) 342 343 def _convert_model_weights(self, source_model, target_model): 344 _, fname = tempfile.mkstemp('.h5') 345 source_model.save_weights(fname) 346 target_model.load_weights(fname) 347 os.remove(fname) 348 349 @parameterized.named_parameters( 350 *testing_utils.generate_combinations_with_testcase_name( 351 rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False])) 352 @test_util.run_v1_only('b/120911602') 353 @test_util.run_gpu_only 354 def test_load_weights_between_noncudnn_rnn_time_distributed(self, rnn_type, 355 to_cudnn): 356 # Similar test as test_load_weights_between_noncudnn_rnn() but has different 357 # rank of input due to usage of TimeDistributed. Issue: #10356. 358 input_size = 10 359 steps = 6 360 timesteps = 6 361 input_shape = (timesteps, steps, input_size) 362 units = 2 363 num_samples = 32 364 inputs = np.random.random((num_samples, timesteps, steps, input_size)) 365 366 rnn_layer_kwargs = { 367 'recurrent_activation': 'sigmoid', 368 # ensure biases are non-zero and properly converted 369 'bias_initializer': 'random_uniform', 370 } 371 if rnn_type == 'LSTM': 372 rnn_layer_class = keras.layers.LSTM 373 cudnn_rnn_layer_class = keras.layers.CuDNNLSTM 374 else: 375 rnn_layer_class = keras.layers.GRU 376 cudnn_rnn_layer_class = keras.layers.CuDNNGRU 377 rnn_layer_kwargs['reset_after'] = True 378 379 layer = rnn_layer_class(units, **rnn_layer_kwargs) 380 layer = keras.layers.TimeDistributed(layer) 381 382 cudnn_layer = cudnn_rnn_layer_class(units) 383 cudnn_layer = keras.layers.TimeDistributed(cudnn_layer) 384 385 model = self._make_nested_model(input_shape, layer) 386 cudnn_model = self._make_nested_model(input_shape, cudnn_layer) 387 388 if to_cudnn: 389 self._convert_model_weights(model, cudnn_model) 390 else: 391 self._convert_model_weights(cudnn_model, model) 392 393 self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs), 394 atol=1e-4) 395 396 @test_util.run_gpu_only 397 def test_cudnnrnn_bidirectional(self): 398 rnn = keras.layers.CuDNNGRU 399 samples = 2 400 dim = 2 401 timesteps = 2 402 output_dim = 2 403 mode = 'concat' 404 405 x = np.random.random((samples, timesteps, dim)) 406 target_dim = 2 * output_dim if mode == 'concat' else output_dim 407 y = np.random.random((samples, target_dim)) 408 409 # test with Sequential model 410 model = keras.Sequential() 411 model.add( 412 keras.layers.Bidirectional( 413 rnn(output_dim), merge_mode=mode, input_shape=(None, dim))) 414 model.compile(loss='mse', optimizer='rmsprop') 415 model.fit(x, y, epochs=1, batch_size=1) 416 417 # test config 418 model.get_config() 419 model = keras.models.model_from_json(model.to_json()) 420 model.summary() 421 422 # test stacked bidirectional layers 423 model = keras.Sequential() 424 model.add( 425 keras.layers.Bidirectional( 426 rnn(output_dim, return_sequences=True), 427 merge_mode=mode, 428 input_shape=(None, dim))) 429 model.add(keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode)) 430 model.compile(loss='mse', optimizer=R'rmsprop') 431 model.fit(x, y, epochs=1, batch_size=1) 432 433 # test with functional API 434 inputs = keras.Input((timesteps, dim)) 435 outputs = keras.layers.Bidirectional( 436 rnn(output_dim), merge_mode=mode)( 437 inputs) 438 model = keras.Model(inputs, outputs) 439 model.compile(loss='mse', optimizer=R'rmsprop') 440 model.fit(x, y, epochs=1, batch_size=1) 441 442 # Bidirectional and stateful 443 inputs = keras.Input(batch_shape=(1, timesteps, dim)) 444 outputs = keras.layers.Bidirectional( 445 rnn(output_dim, stateful=True), merge_mode=mode)( 446 inputs) 447 model = keras.Model(inputs, outputs) 448 model.compile(loss='mse', optimizer='rmsprop') 449 model.fit(x, y, epochs=1, batch_size=1) 450 451 @test_util.run_gpu_only 452 def test_preprocess_weights_for_loading_gru_incompatible(self): 453 """Test loading weights between incompatible layers. 454 455 Should fail fast with an exception. 456 """ 457 input_shape = (3, 5) 458 459 def gru(cudnn=False, **kwargs): 460 layer_class = keras.layers.CuDNNGRU if cudnn else keras.layers.GRUV1 461 return layer_class(2, input_shape=input_shape, **kwargs) 462 463 def get_layer_weights(layer): 464 layer.build(input_shape=input_shape) 465 return layer.get_weights() 466 467 def assert_not_compatible(src, dest, message): 468 with self.assertRaises(ValueError) as ex: 469 keras.saving.hdf5_format.preprocess_weights_for_loading( 470 dest, 471 get_layer_weights(src)) 472 self.assertIn(message, str(ex.exception)) 473 474 assert_not_compatible( 475 gru(), 476 gru(cudnn=True), 477 'GRU(reset_after=False) is not compatible with CuDNNGRU') 478 assert_not_compatible( 479 gru(cudnn=True), 480 gru(), 481 'CuDNNGRU is not compatible with GRU(reset_after=False)') 482 assert_not_compatible( 483 gru(), 484 gru(reset_after=True), 485 'GRU(reset_after=False) is not compatible with ' 486 'GRU(reset_after=True)') 487 assert_not_compatible( 488 gru(reset_after=True), 489 gru(), 490 'GRU(reset_after=True) is not compatible with ' 491 'GRU(reset_after=False)') 492 493 494if __name__ == '__main__': 495 test.main() 496