1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests for Grappler LayoutOptimizer.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import numpy as np 22 23from tensorflow.core.protobuf import config_pb2 24from tensorflow.core.protobuf import device_properties_pb2 25from tensorflow.core.protobuf import rewriter_config_pb2 26from tensorflow.core.protobuf import saver_pb2 27from tensorflow.python.client import session 28from tensorflow.python.framework import constant_op 29from tensorflow.python.framework import dtypes 30from tensorflow.python.framework import ops 31from tensorflow.python.framework import random_seed 32from tensorflow.python.framework import test_util 33from tensorflow.python.grappler import cluster as gcluster 34from tensorflow.python.grappler import tf_optimizer 35from tensorflow.python.layers import convolutional as conv_layers 36from tensorflow.python.ops import array_ops 37from tensorflow.python.ops import gen_array_ops 38from tensorflow.python.ops import gen_math_ops 39from tensorflow.python.ops import gen_nn_ops 40from tensorflow.python.ops import map_fn 41from tensorflow.python.ops import math_ops 42from tensorflow.python.ops import nn 43from tensorflow.python.ops import random_ops 44from tensorflow.python.ops import state_ops 45from tensorflow.python.ops import variables 46from tensorflow.python.platform import test 47from tensorflow.python.training import gradient_descent 48from tensorflow.python.training import saver as saver_lib 49 50 51def _weight(shape): 52 """Generates a weight of a given shape.""" 53 return random_ops.truncated_normal(shape, seed=0, stddev=0.1) 54 55 56def _bias(shape): 57 """Generates a bias of a given shape.""" 58 return constant_op.constant(0.1, shape=shape) 59 60 61def _conv2d(x, w): 62 """Returns a 2d convolution layer with full stride.""" 63 return nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME') 64 65 66def _max_pool_2x2(x): 67 """Downsamples a feature map by 2X.""" 68 return nn.max_pool( 69 x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 70 71 72# Taken from tensorflow/examples/tutorials/mnist/mnist_deep.py 73def _two_layer_model(x): 74 x_image = array_ops.reshape(x, [-1, 28, 28, 1]) 75 w_conv1 = _weight([5, 5, 1, 32]) 76 b_conv1 = _bias([32]) 77 h_conv1 = nn.relu(_conv2d(x_image, w_conv1) + b_conv1) 78 h_pool1 = _max_pool_2x2(h_conv1) 79 w_conv2 = _weight([5, 5, 32, 64]) 80 b_conv2 = _bias([64]) 81 h_conv2 = nn.relu(_conv2d(h_pool1, w_conv2) + b_conv2) 82 h_pool2 = _max_pool_2x2(h_conv2) 83 return h_pool2 84 85 86def _model_with_second_port(): 87 random_seed.set_random_seed(0) 88 x = random_ops.truncated_normal([2, 5, 5, 4], seed=0) 89 scale = constant_op.constant(0.1, shape=[4]) 90 offset = constant_op.constant(0.3, shape=[4]) 91 y, mean, _ = nn.fused_batch_norm(x, scale, offset) 92 mul = math_ops.add(y, mean) 93 output = array_ops.identity(mul) 94 return output 95 96 97def _model_with_branch(x): 98 x_image = array_ops.reshape(x, [-1, 28, 28, 1]) 99 w_conv1 = _weight([5, 5, 1, 32]) 100 w_conv2 = _weight([5, 5, 1, 32]) 101 c_conv1 = _conv2d(x_image, w_conv1) 102 c_conv2 = _conv2d(x_image, w_conv2) 103 add = math_ops.add(c_conv1, c_conv2) 104 return add 105 106 107def _model_with_vec_and_4d(x): 108 x_image = array_ops.reshape(x, [-1, 28, 28, 1]) 109 w_conv1 = _weight([5, 5, 1, 32]) 110 c_conv1 = _conv2d(x_image, w_conv1) 111 vector = constant_op.constant(6.4, shape=[32]) 112 add = math_ops.add(c_conv1, vector) 113 return add 114 115 116def _loop(): 117 random_seed.set_random_seed(0) 118 x1 = random_ops.truncated_normal([1, 784], seed=0) 119 x2 = random_ops.truncated_normal([1, 784], seed=0) 120 x3 = random_ops.truncated_normal([1, 784], seed=0) 121 x4 = random_ops.truncated_normal([1, 784], seed=0) 122 elems = (x1, x2, x3, x4) 123 outputs = map_fn.map_fn(_two_layer_model, elems, dtype=dtypes.float32) 124 return outputs 125 126 127def _loop_with_branch(): 128 random_seed.set_random_seed(0) 129 x1 = random_ops.truncated_normal([1, 784], seed=0) 130 x2 = random_ops.truncated_normal([1, 784], seed=0) 131 x3 = random_ops.truncated_normal([1, 784], seed=0) 132 x4 = random_ops.truncated_normal([1, 784], seed=0) 133 elems = (x1, x2, x3, x4) 134 outputs = map_fn.map_fn(_model_with_branch, elems, dtype=dtypes.float32) 135 return outputs 136 137 138def _loop_with_vec_and_4d(): 139 random_seed.set_random_seed(0) 140 x1 = random_ops.truncated_normal([1, 784], seed=0) 141 x2 = random_ops.truncated_normal([1, 784], seed=0) 142 x3 = random_ops.truncated_normal([1, 784], seed=0) 143 x4 = random_ops.truncated_normal([1, 784], seed=0) 144 elems = (x1, x2, x3, x4) 145 outputs = map_fn.map_fn(_model_with_vec_and_4d, elems, dtype=dtypes.float32) 146 return outputs 147 148 149def _get_config(layout_optimizer=True): 150 if layout_optimizer: 151 rewrite_options = rewriter_config_pb2.RewriterConfig( 152 layout_optimizer=rewriter_config_pb2.RewriterConfig.ON, 153 # do not remove duplicated nodes 154 arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF) 155 else: 156 rewrite_options = rewriter_config_pb2.RewriterConfig( 157 layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF, 158 # do not remove duplicated nodes 159 arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF) 160 rewrite_options.min_graph_nodes = -1 161 graph_options = config_pb2.GraphOptions( 162 rewrite_options=rewrite_options, build_cost_model=1) 163 config = config_pb2.ConfigProto(graph_options=graph_options) 164 config.graph_options.optimizer_options.opt_level = -1 165 return config 166 167 168def _simple_metagraph(depthwise=False): 169 random_seed.set_random_seed(0) 170 x = variables.Variable(random_ops.truncated_normal([1, 200, 200, 3], seed=0)) 171 conv = conv_layers.separable_conv2d if depthwise else conv_layers.conv2d 172 y = conv(x, 32, [3, 3]) 173 z = conv(y, 32, [3, 3]) 174 optimizer = gradient_descent.GradientDescentOptimizer(1e-4) 175 loss = math_ops.reduce_mean(z) 176 train_op = optimizer.minimize(loss) 177 graph = ops.get_default_graph() 178 graph.add_to_collection('train_op', train_op) 179 meta_graph = saver_lib.export_meta_graph(graph_def=graph.as_graph_def()) 180 return meta_graph 181 182 183def _get_cluster(): 184 named_device = device_properties_pb2.NamedDevice() 185 named_device.name = '/GPU:0' 186 named_device.properties.type = 'GPU' 187 named_device.properties.num_cores = 24 188 named_device.properties.frequency = 1000 189 named_device.properties.environment['architecture'] = '4' 190 cluster = gcluster.Cluster(devices=[named_device]) 191 return cluster 192 193 194def _is_transpose(node): 195 return node.endswith('TransposeNHWCToNCHW-LayoutOptimizer') or node.endswith( 196 'TransposeNCHWToNHWC-LayoutOptimizer') 197 198 199def _is_permute(node): 200 return node.endswith('VecPermuteNHWCToNCHW-LayoutOptimizer') or node.endswith( 201 'VecPermuteNCHWToNHWC-LayoutOptimizer') 202 203 204class LayoutOptimizerTest(test.TestCase): 205 """Tests the Grappler layout optimizer.""" 206 207 def _assert_trans_nchw_to_nhwc(self, name, nodes): 208 self.assertIn(name + '-TransposeNCHWToNHWC-LayoutOptimizer', nodes) 209 210 def _assert_trans_nhwc_to_nchw(self, name, nodes): 211 self.assertIn(name + '-TransposeNHWCToNCHW-LayoutOptimizer', nodes) 212 213 def _assert_map_nhwc_to_nchw(self, name, nodes): 214 self.assertIn(name + '-DimMapNHWCToNCHW-LayoutOptimizer', nodes) 215 216 def _assert_vec_nchw_to_nhwc(self, name, nodes): 217 self.assertIn(name + '-VecPermuteNCHWToNHWC-LayoutOptimizer', nodes) 218 219 def _assert_vec_nhwc_to_nchw(self, name, nodes): 220 self.assertIn(name + '-VecPermuteNHWCToNCHW-LayoutOptimizer', nodes) 221 222 def _train(self, checkpoint_path, layout_optimizer=False, restore=False): 223 ops.reset_default_graph() 224 graph = ops.get_default_graph() 225 with session.Session( 226 config=_get_config(layout_optimizer), graph=graph) as sess: 227 batch = 2 228 height = 6 229 width = 7 230 input_channels = 3 231 shape = [batch, height, width, input_channels] 232 image = array_ops.placeholder(dtype='float32', shape=shape) 233 conv1 = conv_layers.conv2d(image, 32, [3, 3]) 234 conv2 = conv_layers.conv2d(conv1, 32, [3, 3]) 235 optimizer = gradient_descent.GradientDescentOptimizer(0.01) 236 loss = math_ops.reduce_mean(conv2) 237 train_op = optimizer.minimize(loss) 238 saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2) 239 240 if restore: 241 saver.restore(sess, checkpoint_path) 242 else: 243 self.evaluate(variables.global_variables_initializer()) 244 245 np.random.seed(0) 246 for _ in range(2): 247 image_val = np.random.rand(*shape).astype(np.float32) 248 sess.run([loss, train_op], feed_dict={image: image_val}) 249 250 if restore: 251 all_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) 252 all_vars_values = [var.eval(session=sess) for var in all_vars] 253 return all_vars_values 254 else: 255 saver.save(sess, checkpoint_path) 256 257 @test_util.deprecated_graph_mode_only 258 def testTwoConvLayers(self): 259 if test.is_gpu_available(cuda_only=True): 260 random_seed.set_random_seed(0) 261 x = random_ops.truncated_normal([1, 784], seed=0) 262 output = _two_layer_model(x) 263 264 with session.Session(config=_get_config(False)) as sess: 265 output_val_ref = self.evaluate(output) 266 267 with session.Session(config=_get_config()) as sess: 268 metadata = config_pb2.RunMetadata() 269 output_val = sess.run(output, run_metadata=metadata) 270 271 nodes = [] 272 num_transposes = 0 273 for node in metadata.cost_graph.node: 274 if _is_transpose(node.name): 275 num_transposes += 1 276 nodes.append(node.name) 277 278 # Four transposes were initially added in the Expand phase of 279 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 280 expected_num_transposes = 2 281 self.assertEqual(expected_num_transposes, num_transposes) 282 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 283 self._assert_trans_nchw_to_nhwc('Relu_1-0-0', nodes) 284 285 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 286 287 @test_util.deprecated_graph_mode_only 288 def testSplitWithNonConstAxis(self): 289 if test.is_gpu_available(cuda_only=True): 290 random_seed.set_random_seed(0) 291 x = random_ops.truncated_normal([1, 784], seed=0) 292 conv = _two_layer_model(x) 293 dim = array_ops.placeholder(dtype='int32') 294 split = array_ops.split(conv, 2, axis=dim) 295 scale = constant_op.constant(0.1, shape=[32]) 296 offset = constant_op.constant(0.3, shape=[32]) 297 bn0 = nn.fused_batch_norm(split[0], scale, offset) 298 bn1 = nn.fused_batch_norm(split[1], scale, offset) 299 add = bn0[0] + bn1[0] 300 output = array_ops.identity(add) 301 302 with session.Session(config=_get_config(False)) as sess: 303 output_val_ref = sess.run(output, feed_dict={dim: 3}) 304 305 with session.Session(config=_get_config()) as sess: 306 metadata = config_pb2.RunMetadata() 307 output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3}) 308 309 nodes = [] 310 num_transposes = 0 311 for node in metadata.cost_graph.node: 312 if _is_transpose(node.name): 313 num_transposes += 1 314 nodes.append(node.name) 315 316 expected_num_transposes = 2 317 self.assertEqual(expected_num_transposes, num_transposes) 318 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 319 self._assert_trans_nchw_to_nhwc('add_2-0-0', nodes) 320 self._assert_map_nhwc_to_nchw('split-0', nodes) 321 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 322 323 @test_util.deprecated_graph_mode_only 324 def testSplitVWithNonConstAxis(self): 325 if test.is_gpu_available(cuda_only=True): 326 random_seed.set_random_seed(0) 327 x = random_ops.truncated_normal([1, 784], seed=0) 328 conv = _two_layer_model(x) 329 dim = array_ops.placeholder(dtype='int32') 330 sizes = constant_op.constant([50, 10, 4], shape=[3]) 331 split = gen_array_ops.split_v( 332 value=conv, size_splits=sizes, axis=dim, num_split=3) 333 output = math_ops.reduce_sum(split[0]) 334 335 with session.Session(config=_get_config(False)) as sess: 336 output_val_ref = sess.run(output, feed_dict={dim: 3}) 337 338 with session.Session(config=_get_config()) as sess: 339 metadata = config_pb2.RunMetadata() 340 output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3}) 341 342 nodes = [] 343 num_transposes = 0 344 for node in metadata.cost_graph.node: 345 if _is_transpose(node.name): 346 num_transposes += 1 347 nodes.append(node.name) 348 349 # Four transposes were initially added in the Expand phase of 350 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 351 expected_num_transposes = 2 352 self.assertEqual(expected_num_transposes, num_transposes) 353 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 354 self._assert_trans_nchw_to_nhwc('SplitV-0-0', nodes) 355 self._assert_map_nhwc_to_nchw('SplitV-2', nodes) 356 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 357 358 @test_util.deprecated_graph_mode_only 359 def testPadWithConstPaddings(self): 360 if test.is_gpu_available(cuda_only=True): 361 random_seed.set_random_seed(0) 362 x = random_ops.truncated_normal([1, 784], seed=0) 363 conv = _two_layer_model(x) 364 paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]] 365 paddings = constant_op.constant( 366 paddings_val, dtype='int32', name='PaddingsConst') 367 pad = array_ops.pad(conv, paddings) 368 output = array_ops.identity(pad) 369 370 with session.Session(config=_get_config(False)) as sess: 371 output_val_ref = self.evaluate(output) 372 373 with session.Session(config=_get_config()) as sess: 374 metadata = config_pb2.RunMetadata() 375 output_val = sess.run(output, run_metadata=metadata) 376 377 nodes = [] 378 num_transposes = 0 379 for node in metadata.cost_graph.node: 380 if _is_transpose(node.name): 381 num_transposes += 1 382 nodes.append(node.name) 383 384 # Four transposes were initially added in the Expand phase of 385 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 386 expected_num_transposes = 2 387 self.assertEqual(expected_num_transposes, num_transposes) 388 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 389 self._assert_trans_nchw_to_nhwc('Pad-0-0', nodes) 390 self.assertIn('Pad-1-LayoutOptimizer', nodes) 391 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 392 393 @test_util.deprecated_graph_mode_only 394 def testReduceSum(self): 395 if test.is_gpu_available(cuda_only=True): 396 random_seed.set_random_seed(0) 397 x = random_ops.truncated_normal([1, 784], seed=0) 398 conv = _two_layer_model(x) 399 reduce_sum = math_ops.reduce_sum(conv) 400 output = array_ops.identity(reduce_sum) 401 402 with session.Session(config=_get_config(False)) as sess: 403 output_val_ref = self.evaluate(output) 404 405 with session.Session(config=_get_config()) as sess: 406 metadata = config_pb2.RunMetadata() 407 output_val = sess.run(output, run_metadata=metadata) 408 409 nodes = [] 410 num_transposes = 0 411 for node in metadata.cost_graph.node: 412 if _is_transpose(node.name): 413 num_transposes += 1 414 nodes.append(node.name) 415 416 # Three transposes were initially added in the Expand phase of 417 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 418 expected_num_transposes = 1 419 self.assertEqual(expected_num_transposes, num_transposes) 420 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 421 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 422 423 @test_util.deprecated_graph_mode_only 424 def testCast(self): 425 if test.is_gpu_available(cuda_only=True): 426 random_seed.set_random_seed(0) 427 x = random_ops.truncated_normal([1, 784], seed=0) 428 conv = _two_layer_model(x) 429 cast = math_ops.cast(conv, dtype='bool') 430 output = array_ops.identity(cast) 431 432 with session.Session(config=_get_config(False)) as sess: 433 output_val_ref = self.evaluate(output) 434 435 with session.Session(config=_get_config()) as sess: 436 metadata = config_pb2.RunMetadata() 437 output_val = sess.run(output, run_metadata=metadata) 438 439 nodes = [] 440 num_transposes = 0 441 for node in metadata.cost_graph.node: 442 if _is_transpose(node.name): 443 num_transposes += 1 444 nodes.append(node.name) 445 446 # Four transposes were initially added in the Expand phase of 447 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 448 expected_num_transposes = 2 449 self.assertEqual(expected_num_transposes, num_transposes) 450 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 451 self._assert_trans_nchw_to_nhwc('Cast-0-0', nodes) 452 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 453 454 @test_util.deprecated_graph_mode_only 455 def testSqueeze(self): 456 if test.is_gpu_available(cuda_only=True): 457 random_seed.set_random_seed(0) 458 x = random_ops.truncated_normal([1, 784], seed=0) 459 conv = _two_layer_model(x) 460 reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2]) 461 squeeze = array_ops.squeeze(reduce_sum) 462 output = array_ops.identity(squeeze) 463 464 with session.Session(config=_get_config(False)) as sess: 465 output_val_ref = self.evaluate(output) 466 467 with session.Session(config=_get_config()) as sess: 468 metadata = config_pb2.RunMetadata() 469 output_val = sess.run(output, run_metadata=metadata) 470 471 nodes = [] 472 num_transposes = 0 473 for node in metadata.cost_graph.node: 474 if _is_transpose(node.name): 475 num_transposes += 1 476 nodes.append(node.name) 477 478 # Three transposes were initially added in the Expand phase of 479 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 480 expected_num_transposes = 1 481 self.assertEqual(expected_num_transposes, num_transposes) 482 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 483 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 484 485 @test_util.deprecated_graph_mode_only 486 def testSqueezeAlongHW(self): 487 if test.is_gpu_available(cuda_only=True): 488 random_seed.set_random_seed(0) 489 x = random_ops.truncated_normal([1, 784], seed=0) 490 conv = _two_layer_model(x) 491 reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2], keepdims=True) 492 squeeze = array_ops.squeeze(reduce_sum, axis=[1, 2]) 493 output = array_ops.identity(squeeze) 494 495 with session.Session(config=_get_config(False)) as sess: 496 output_val_ref = self.evaluate(output) 497 498 with session.Session(config=_get_config()) as sess: 499 metadata = config_pb2.RunMetadata() 500 output_val = sess.run(output, run_metadata=metadata) 501 502 nodes = [] 503 num_transposes = 0 504 for node in metadata.cost_graph.node: 505 if _is_transpose(node.name): 506 num_transposes += 1 507 nodes.append(node.name) 508 509 # Three transposes were initially added in the Expand phase of 510 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 511 expected_num_transposes = 1 512 self.assertEqual(expected_num_transposes, num_transposes) 513 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 514 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 515 516 @test_util.deprecated_graph_mode_only 517 def testSqueezeAlongNHW(self): 518 if test.is_gpu_available(cuda_only=True): 519 random_seed.set_random_seed(0) 520 x = random_ops.truncated_normal([1, 784], seed=0) 521 conv = _two_layer_model(x) 522 reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2], keepdims=True) 523 squeeze = array_ops.squeeze(reduce_sum, axis=[0, 1, 2]) 524 output = array_ops.identity(squeeze) 525 526 with session.Session(config=_get_config(False)) as sess: 527 output_val_ref = self.evaluate(output) 528 529 with session.Session(config=_get_config()) as sess: 530 metadata = config_pb2.RunMetadata() 531 output_val = sess.run(output, run_metadata=metadata) 532 533 nodes = [] 534 num_transposes = 0 535 for node in metadata.cost_graph.node: 536 if _is_transpose(node.name): 537 num_transposes += 1 538 nodes.append(node.name) 539 540 # Three transposes were initially added in the Expand phase of 541 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 542 expected_num_transposes = 1 543 self.assertEqual(expected_num_transposes, num_transposes) 544 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 545 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 546 547 @test_util.deprecated_graph_mode_only 548 def testReduceSumAlongHWC(self): 549 if test.is_gpu_available(cuda_only=True): 550 random_seed.set_random_seed(0) 551 x = random_ops.truncated_normal([1, 784], seed=0) 552 conv = _two_layer_model(x) 553 reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2, 3]) 554 output = array_ops.identity(reduce_sum) 555 556 with session.Session(config=_get_config(False)) as sess: 557 output_val_ref = self.evaluate(output) 558 559 with session.Session(config=_get_config()) as sess: 560 metadata = config_pb2.RunMetadata() 561 output_val = sess.run(output, run_metadata=metadata) 562 563 nodes = [] 564 num_transposes = 0 565 for node in metadata.cost_graph.node: 566 if _is_transpose(node.name): 567 num_transposes += 1 568 nodes.append(node.name) 569 570 # Three transposes were initially added in the Expand phase of 571 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 572 expected_num_transposes = 1 573 self.assertEqual(expected_num_transposes, num_transposes) 574 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 575 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 576 577 @test_util.deprecated_graph_mode_only 578 def testReduceSumAlongNHW(self): 579 if test.is_gpu_available(cuda_only=True): 580 random_seed.set_random_seed(0) 581 x = random_ops.truncated_normal([1, 784], seed=0) 582 conv = _two_layer_model(x) 583 reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2]) 584 output = array_ops.identity(reduce_sum) 585 586 with session.Session(config=_get_config(False)) as sess: 587 output_val_ref = self.evaluate(output) 588 589 with session.Session(config=_get_config()) as sess: 590 metadata = config_pb2.RunMetadata() 591 output_val = sess.run(output, run_metadata=metadata) 592 593 nodes = [] 594 num_transposes = 0 595 for node in metadata.cost_graph.node: 596 if _is_transpose(node.name): 597 num_transposes += 1 598 nodes.append(node.name) 599 600 # Three transposes were initially added in the Expand phase of 601 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 602 expected_num_transposes = 1 603 self.assertEqual(expected_num_transposes, num_transposes) 604 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 605 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 606 607 @test_util.deprecated_graph_mode_only 608 def testReduceSumAlongC(self): 609 if test.is_gpu_available(cuda_only=True): 610 random_seed.set_random_seed(0) 611 x = random_ops.truncated_normal([1, 784], seed=0) 612 conv = _two_layer_model(x) 613 reduce_sum = math_ops.reduce_sum(conv, axis=[3]) 614 output = array_ops.identity(reduce_sum) 615 616 with session.Session(config=_get_config(False)) as sess: 617 output_val_ref = self.evaluate(output) 618 619 with session.Session(config=_get_config()) as sess: 620 metadata = config_pb2.RunMetadata() 621 output_val = sess.run(output, run_metadata=metadata) 622 623 nodes = [] 624 num_transposes = 0 625 for node in metadata.cost_graph.node: 626 if _is_transpose(node.name): 627 num_transposes += 1 628 nodes.append(node.name) 629 630 # Three transposes were initially added in the Expand phase of 631 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 632 expected_num_transposes = 1 633 self.assertEqual(expected_num_transposes, num_transposes) 634 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 635 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 636 637 @test_util.deprecated_graph_mode_only 638 def testReduceSumAlongCKeepDims(self): 639 if test.is_gpu_available(cuda_only=True): 640 random_seed.set_random_seed(0) 641 x = random_ops.truncated_normal([1, 784], seed=0) 642 conv = _two_layer_model(x) 643 reduce_sum = math_ops.reduce_sum(conv, axis=[3], keepdims=True) 644 output = array_ops.identity(reduce_sum) 645 646 with session.Session(config=_get_config(False)) as sess: 647 output_val_ref = self.evaluate(output) 648 649 with session.Session(config=_get_config()) as sess: 650 metadata = config_pb2.RunMetadata() 651 output_val = sess.run(output, run_metadata=metadata) 652 653 nodes = [] 654 num_transposes = 0 655 for node in metadata.cost_graph.node: 656 if _is_transpose(node.name): 657 num_transposes += 1 658 nodes.append(node.name) 659 660 # Four transposes were initially added in the Expand phase of 661 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 662 expected_num_transposes = 2 663 self.assertEqual(expected_num_transposes, num_transposes) 664 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 665 self._assert_trans_nchw_to_nhwc('Sum-0-0', nodes) 666 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 667 668 @test_util.deprecated_graph_mode_only 669 def testReduceSumAlongHKeepDims(self): 670 if test.is_gpu_available(cuda_only=True): 671 random_seed.set_random_seed(0) 672 x = random_ops.truncated_normal([1, 784], seed=0) 673 conv = _two_layer_model(x) 674 reduce_sum = math_ops.reduce_sum(conv, axis=[2], keepdims=True) 675 output = array_ops.identity(reduce_sum) 676 677 with session.Session(config=_get_config(False)) as sess: 678 output_val_ref = self.evaluate(output) 679 680 with session.Session(config=_get_config()) as sess: 681 metadata = config_pb2.RunMetadata() 682 output_val = sess.run(output, run_metadata=metadata) 683 684 nodes = [] 685 num_transposes = 0 686 for node in metadata.cost_graph.node: 687 if _is_transpose(node.name): 688 num_transposes += 1 689 nodes.append(node.name) 690 691 # Four transposes were initially added in the Expand phase of 692 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 693 expected_num_transposes = 2 694 self.assertEqual(expected_num_transposes, num_transposes) 695 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 696 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 697 698 @test_util.deprecated_graph_mode_only 699 def testReduceSumAlongWCKeepDims(self): 700 if test.is_gpu_available(cuda_only=True): 701 random_seed.set_random_seed(0) 702 x = random_ops.truncated_normal([1, 784], seed=0) 703 conv = _two_layer_model(x) 704 reduce_sum = math_ops.reduce_sum(conv, axis=[2, 3], keepdims=True) 705 output = array_ops.identity(reduce_sum) 706 707 with session.Session(config=_get_config(False)) as sess: 708 output_val_ref = self.evaluate(output) 709 710 with session.Session(config=_get_config()) as sess: 711 metadata = config_pb2.RunMetadata() 712 output_val = sess.run(output, run_metadata=metadata) 713 714 nodes = [] 715 num_transposes = 0 716 for node in metadata.cost_graph.node: 717 if _is_transpose(node.name): 718 num_transposes += 1 719 nodes.append(node.name) 720 721 # Four transposes were initially added in the Expand phase of 722 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 723 expected_num_transposes = 2 724 self.assertEqual(expected_num_transposes, num_transposes) 725 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 726 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 727 728 @test_util.deprecated_graph_mode_only 729 def testConcatWithControlDependency(self): 730 if test.is_gpu_available(cuda_only=True): 731 random_seed.set_random_seed(0) 732 x = random_ops.truncated_normal([1, 784], seed=0) 733 conv = _two_layer_model(x) 734 axis = constant_op.constant(3) 735 var = variables.Variable(3) 736 assign = state_ops.assign(var, 6) 737 with ops.control_dependencies([assign]): 738 concat = array_ops.concat([conv, conv], axis) 739 output = array_ops.identity(concat) 740 741 with session.Session(config=_get_config(False)) as sess: 742 output_val_ref = self.evaluate(output) 743 744 with session.Session(config=_get_config()) as sess: 745 metadata = config_pb2.RunMetadata() 746 output_val = sess.run(output, run_metadata=metadata) 747 748 nodes = [] 749 num_transposes = 0 750 for node in metadata.cost_graph.node: 751 if _is_transpose(node.name): 752 num_transposes += 1 753 nodes.append(node.name) 754 755 # Four transposes were initially added in the Expand phase of 756 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 757 expected_num_transposes = 2 758 self.assertEqual(expected_num_transposes, num_transposes) 759 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 760 self._assert_trans_nchw_to_nhwc('concat-0-0', nodes) 761 self.assertIn('concat-2-LayoutOptimizer', nodes) 762 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 763 764 @test_util.deprecated_graph_mode_only 765 def testFill(self): 766 if test.is_gpu_available(cuda_only=True): 767 random_seed.set_random_seed(0) 768 x = array_ops.placeholder(dtype='float32') 769 conv = _two_layer_model(x) 770 shape = array_ops.shape(conv) 771 scalar = array_ops.constant(5.7) 772 fill = array_ops.fill(shape, scalar) 773 output = array_ops.identity(fill) 774 775 x_val = [3.4] * 784 776 with session.Session(config=_get_config(False)) as sess: 777 output_val_ref = sess.run(output, feed_dict={x: x_val}) 778 779 with session.Session(config=_get_config()) as sess: 780 metadata = config_pb2.RunMetadata() 781 output_val = sess.run( 782 output, run_metadata=metadata, feed_dict={ 783 x: x_val 784 }) 785 786 nodes = [] 787 num_transposes = 0 788 num_vec_permute = 0 789 for node in metadata.cost_graph.node: 790 if _is_transpose(node.name): 791 num_transposes += 1 792 if _is_permute(node.name): 793 num_vec_permute += 1 794 nodes.append(node.name) 795 796 # Four transposes were initially added in the Expand phase of 797 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 798 expected_num_transposes = 2 799 self.assertEqual(expected_num_transposes, num_transposes) 800 # Two vector permute nodes were initially added in the Expand phase of 801 # LayoutOptimizer; they cancelled out each other in the Collapse phase. 802 expected_vec_permute = 0 803 self.assertEqual(expected_vec_permute, num_vec_permute) 804 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 805 self._assert_trans_nchw_to_nhwc('Fill-0-0', nodes) 806 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 807 808 @test_util.deprecated_graph_mode_only 809 def testTile(self): 810 if test.is_gpu_available(cuda_only=True): 811 random_seed.set_random_seed(0) 812 x = random_ops.truncated_normal([1, 784], seed=0) 813 conv = _two_layer_model(x) 814 multiple = array_ops.placeholder(dtype='int32') 815 tile = array_ops.tile(conv, multiple) 816 output = array_ops.identity(tile) 817 818 multiple_val = [2, 3, 4, 1] 819 with session.Session(config=_get_config(False)) as sess: 820 output_val_ref = sess.run(output, feed_dict={multiple: multiple_val}) 821 822 with session.Session(config=_get_config()) as sess: 823 metadata = config_pb2.RunMetadata() 824 output_val = sess.run( 825 output, run_metadata=metadata, feed_dict={ 826 multiple: multiple_val 827 }) 828 829 nodes = [] 830 num_transposes = 0 831 for node in metadata.cost_graph.node: 832 if _is_transpose(node.name): 833 num_transposes += 1 834 nodes.append(node.name) 835 836 # Four transposes were initially added in the Expand phase of 837 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 838 expected_num_transposes = 2 839 self.assertEqual(expected_num_transposes, num_transposes) 840 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 841 self._assert_trans_nchw_to_nhwc('Tile-0-0', nodes) 842 self._assert_vec_nhwc_to_nchw('Tile-1', nodes) 843 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 844 845 @test_util.deprecated_graph_mode_only 846 def testReverseWithConstDims(self): 847 if test.is_gpu_available(cuda_only=True): 848 random_seed.set_random_seed(0) 849 x = random_ops.truncated_normal([1, 784], seed=0) 850 conv = _two_layer_model(x) 851 dims = constant_op.constant([3, 1], name='DimsConst') 852 reverse = array_ops.reverse(conv, dims) 853 output = array_ops.identity(reverse) 854 855 with session.Session(config=_get_config(False)) as sess: 856 output_val_ref = self.evaluate(output) 857 858 with session.Session(config=_get_config()) as sess: 859 metadata = config_pb2.RunMetadata() 860 output_val = sess.run(output, run_metadata=metadata) 861 862 nodes = [] 863 num_transposes = 0 864 for node in metadata.cost_graph.node: 865 if _is_transpose(node.name): 866 num_transposes += 1 867 nodes.append(node.name) 868 869 # Four transposes were initially added in the Expand phase of 870 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 871 expected_num_transposes = 2 872 self.assertEqual(expected_num_transposes, num_transposes) 873 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 874 self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes) 875 self.assertIn('ReverseV2-1-LayoutOptimizer', nodes) 876 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 877 878 @test_util.deprecated_graph_mode_only 879 def testReverseWithNonConstDims(self): 880 if test.is_gpu_available(cuda_only=True): 881 random_seed.set_random_seed(0) 882 x = random_ops.truncated_normal([1, 784], seed=0) 883 conv = _two_layer_model(x) 884 dims = array_ops.placeholder(dtype='int32') 885 reverse = array_ops.reverse(conv, dims) 886 output = array_ops.identity(reverse) 887 888 dims_val = [2, 3] 889 with session.Session(config=_get_config(False)) as sess: 890 output_val_ref = sess.run(output, feed_dict={dims: dims_val}) 891 892 with session.Session(config=_get_config()) as sess: 893 metadata = config_pb2.RunMetadata() 894 output_val = sess.run( 895 output, run_metadata=metadata, feed_dict={ 896 dims: dims_val 897 }) 898 899 nodes = [] 900 num_transposes = 0 901 for node in metadata.cost_graph.node: 902 if _is_transpose(node.name): 903 num_transposes += 1 904 nodes.append(node.name) 905 906 # Four transposes were initially added in the Expand phase of 907 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 908 expected_num_transposes = 2 909 self.assertEqual(expected_num_transposes, num_transposes) 910 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 911 self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes) 912 self._assert_map_nhwc_to_nchw('ReverseV2-1', nodes) 913 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 914 915 @test_util.deprecated_graph_mode_only 916 def testSelectOp(self): 917 if test.is_gpu_available(cuda_only=True): 918 random_seed.set_random_seed(0) 919 x = random_ops.truncated_normal([1, 784], seed=0) 920 conv = _two_layer_model(x) 921 add = math_ops.add(conv, conv) 922 mean = math_ops.reduce_mean(conv) 923 condition = math_ops.less(conv, mean) 924 select = gen_math_ops.select(condition, conv, add) 925 output = array_ops.identity(select) 926 927 with session.Session(config=_get_config(False)) as sess: 928 output_val_ref = self.evaluate(output) 929 930 with session.Session(config=_get_config()) as sess: 931 metadata = config_pb2.RunMetadata() 932 output_val = sess.run(output, run_metadata=metadata) 933 934 nodes = [] 935 num_transposes = 0 936 for node in metadata.cost_graph.node: 937 if _is_transpose(node.name): 938 num_transposes += 1 939 nodes.append(node.name) 940 941 expected_num_transposes = 2 942 self.assertEqual(expected_num_transposes, num_transposes) 943 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 944 self._assert_trans_nchw_to_nhwc('Select-0-0', nodes) 945 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 946 947 @test_util.deprecated_graph_mode_only 948 def testSelectOpConditionUnknownShape(self): 949 if test.is_gpu_available(cuda_only=True): 950 random_seed.set_random_seed(0) 951 x = random_ops.truncated_normal([1, 784], seed=0) 952 conv = _two_layer_model(x) 953 add = math_ops.add(conv, conv) 954 condition = array_ops.placeholder(dtype='bool') 955 select = gen_math_ops.select(condition, conv, add) 956 output = array_ops.identity(select) 957 958 condition_val = np.zeros((1, 7, 7, 64)) 959 with session.Session(config=_get_config(False)) as sess: 960 output_val_ref = sess.run(output, feed_dict={condition: condition_val}) 961 962 with session.Session(config=_get_config()) as sess: 963 metadata = config_pb2.RunMetadata() 964 output_val = sess.run( 965 output, run_metadata=metadata, feed_dict={condition: condition_val}) 966 967 nodes = [] 968 num_transposes = 0 969 for node in metadata.cost_graph.node: 970 if _is_transpose(node.name): 971 num_transposes += 1 972 nodes.append(node.name) 973 974 expected_num_transposes = 3 975 self.assertEqual(expected_num_transposes, num_transposes) 976 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 977 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 978 979 @test_util.deprecated_graph_mode_only 980 def testSelectOpScalarCondition(self): 981 if test.is_gpu_available(cuda_only=True): 982 random_seed.set_random_seed(0) 983 x = random_ops.truncated_normal([1, 784], seed=0) 984 conv = _two_layer_model(x) 985 add = math_ops.add(conv, conv) 986 condition = constant_op.constant(True) 987 select = gen_math_ops.select(condition, conv, add) 988 output = array_ops.identity(select) 989 990 with session.Session(config=_get_config(False)) as sess: 991 output_val_ref = self.evaluate(output) 992 993 with session.Session(config=_get_config()) as sess: 994 metadata = config_pb2.RunMetadata() 995 output_val = sess.run(output, run_metadata=metadata) 996 997 nodes = [] 998 num_transposes = 0 999 for node in metadata.cost_graph.node: 1000 if _is_transpose(node.name): 1001 num_transposes += 1 1002 nodes.append(node.name) 1003 1004 expected_num_transposes = 2 1005 self.assertEqual(expected_num_transposes, num_transposes) 1006 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1007 self._assert_trans_nchw_to_nhwc('Select-0-0', nodes) 1008 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1009 1010 @test_util.deprecated_graph_mode_only 1011 def testPadWithNonConstPaddings(self): 1012 if test.is_gpu_available(cuda_only=True): 1013 random_seed.set_random_seed(0) 1014 x = random_ops.truncated_normal([1, 784], seed=0) 1015 conv = _two_layer_model(x) 1016 paddings = array_ops.placeholder(dtype='int32') 1017 pad = array_ops.pad(conv, paddings) 1018 output = array_ops.identity(pad) 1019 1020 paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]] 1021 with session.Session(config=_get_config(False)) as sess: 1022 output_val_ref = sess.run(output, feed_dict={paddings: paddings_val}) 1023 1024 with session.Session(config=_get_config()) as sess: 1025 metadata = config_pb2.RunMetadata() 1026 output_val = sess.run( 1027 output, run_metadata=metadata, feed_dict={ 1028 paddings: paddings_val 1029 }) 1030 1031 nodes = [] 1032 num_transposes = 0 1033 for node in metadata.cost_graph.node: 1034 if _is_transpose(node.name): 1035 num_transposes += 1 1036 nodes.append(node.name) 1037 1038 # Four transposes were initially added in the Expand phase of 1039 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1040 expected_num_transposes = 2 1041 self.assertEqual(expected_num_transposes, num_transposes) 1042 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1043 self._assert_trans_nchw_to_nhwc('Pad-0-0', nodes) 1044 self._assert_vec_nhwc_to_nchw('Pad-1', nodes) 1045 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1046 1047 @test_util.deprecated_graph_mode_only 1048 def testMaxPoolV2(self): 1049 if test.is_gpu_available(cuda_only=True): 1050 random_seed.set_random_seed(0) 1051 x = random_ops.truncated_normal([1, 784], seed=0) 1052 conv = _two_layer_model(x) 1053 ksize = constant_op.constant([1, 2, 3, 1], shape=[4]) 1054 strides = array_ops.placeholder(dtype='int32', shape=[4]) 1055 max_pool = gen_nn_ops.max_pool_v2(conv, ksize, strides, 'VALID') 1056 output = array_ops.identity(max_pool) 1057 1058 strides_val = [1, 3, 2, 1] 1059 with session.Session(config=_get_config(False)) as sess: 1060 output_val_ref = sess.run(output, feed_dict={strides: strides_val}) 1061 1062 with session.Session(config=_get_config()) as sess: 1063 metadata = config_pb2.RunMetadata() 1064 output_val = sess.run( 1065 output, run_metadata=metadata, feed_dict={ 1066 strides: strides_val 1067 }) 1068 1069 nodes = [] 1070 num_transposes = 0 1071 for node in metadata.cost_graph.node: 1072 if _is_transpose(node.name): 1073 num_transposes += 1 1074 nodes.append(node.name) 1075 1076 expected_num_transposes = 2 1077 self.assertEqual(expected_num_transposes, num_transposes) 1078 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1079 self._assert_trans_nchw_to_nhwc('MaxPoolV2-0-0', nodes) 1080 self._assert_vec_nhwc_to_nchw('MaxPoolV2-2', nodes) 1081 self.assertIn('MaxPoolV2-1-LayoutOptimizer', nodes) 1082 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1083 1084 @test_util.deprecated_graph_mode_only 1085 def testMaxPoolGradV2(self): 1086 if test.is_gpu_available(cuda_only=True): 1087 random_seed.set_random_seed(0) 1088 x = random_ops.truncated_normal([1, 784], seed=0) 1089 conv = _two_layer_model(x) 1090 ksize = constant_op.constant([1, 2, 3, 1], shape=[4]) 1091 strides = array_ops.placeholder(dtype='int32', shape=[4]) 1092 max_pool_grad = gen_nn_ops.max_pool_grad_v2(conv, conv, conv, ksize, 1093 strides, 'VALID') 1094 output = array_ops.identity(max_pool_grad) 1095 1096 strides_val = [1, 3, 2, 1] 1097 with session.Session(config=_get_config(False)) as sess: 1098 output_val_ref = sess.run(output, feed_dict={strides: strides_val}) 1099 1100 with session.Session(config=_get_config()) as sess: 1101 metadata = config_pb2.RunMetadata() 1102 output_val = sess.run( 1103 output, run_metadata=metadata, feed_dict={ 1104 strides: strides_val 1105 }) 1106 1107 nodes = [] 1108 num_transposes = 0 1109 for node in metadata.cost_graph.node: 1110 if _is_transpose(node.name): 1111 num_transposes += 1 1112 nodes.append(node.name) 1113 1114 expected_num_transposes = 2 1115 self.assertEqual(expected_num_transposes, num_transposes) 1116 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1117 self._assert_trans_nchw_to_nhwc('MaxPoolGradV2-0-0', nodes) 1118 self._assert_vec_nhwc_to_nchw('MaxPoolGradV2-4', nodes) 1119 self.assertIn('MaxPoolGradV2-3-LayoutOptimizer', nodes) 1120 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1121 1122 @test_util.deprecated_graph_mode_only 1123 def testSliceWithNonConstAxis(self): 1124 if test.is_gpu_available(cuda_only=True): 1125 random_seed.set_random_seed(0) 1126 x = random_ops.truncated_normal([1, 784], seed=0) 1127 conv = _two_layer_model(x) 1128 size = array_ops.placeholder(dtype='int32') 1129 s = array_ops.slice(conv, [0, 0, 0, 0], size) 1130 output = array_ops.identity(s) 1131 1132 size_val = [1, 2, 3, 4] 1133 with session.Session(config=_get_config(False)) as sess: 1134 output_val_ref = sess.run(output, feed_dict={size: size_val}) 1135 1136 with session.Session(config=_get_config()) as sess: 1137 metadata = config_pb2.RunMetadata() 1138 output_val = sess.run( 1139 output, run_metadata=metadata, feed_dict={ 1140 size: size_val 1141 }) 1142 1143 nodes = [] 1144 num_transposes = 0 1145 for node in metadata.cost_graph.node: 1146 if _is_transpose(node.name): 1147 num_transposes += 1 1148 nodes.append(node.name) 1149 1150 # Four transposes were initially added in the Expand phase of 1151 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1152 expected_num_transposes = 2 1153 self.assertEqual(expected_num_transposes, num_transposes) 1154 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1155 self._assert_trans_nchw_to_nhwc('Slice-0-0', nodes) 1156 self._assert_vec_nhwc_to_nchw('Slice-2', nodes) 1157 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1158 1159 @test_util.deprecated_graph_mode_only 1160 def testStridedSliceWithNonConstAxis(self): 1161 if test.is_gpu_available(cuda_only=True): 1162 random_seed.set_random_seed(0) 1163 x = random_ops.truncated_normal([1, 784], seed=0) 1164 conv = _two_layer_model(x) 1165 end = array_ops.placeholder(dtype='int32') 1166 s = array_ops.strided_slice(conv, [0, 0, 0, 0], end, strides=[1, 2, 3, 1]) 1167 output = array_ops.identity(s) 1168 1169 end_val = [1, 2, 3, 4] 1170 with session.Session(config=_get_config(False)) as sess: 1171 output_val_ref = sess.run(output, feed_dict={end: end_val}) 1172 1173 with session.Session(config=_get_config()) as sess: 1174 metadata = config_pb2.RunMetadata() 1175 output_val = sess.run( 1176 output, run_metadata=metadata, feed_dict={ 1177 end: end_val 1178 }) 1179 1180 nodes = [] 1181 num_transposes = 0 1182 for node in metadata.cost_graph.node: 1183 if _is_transpose(node.name): 1184 num_transposes += 1 1185 nodes.append(node.name) 1186 1187 # Four transposes were initially added in the Expand phase of 1188 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1189 expected_num_transposes = 2 1190 self.assertEqual(expected_num_transposes, num_transposes) 1191 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1192 self._assert_trans_nchw_to_nhwc('StridedSlice-0-0', nodes) 1193 self._assert_vec_nhwc_to_nchw('StridedSlice-2', nodes) 1194 self.assertIn('StridedSlice-1-LayoutOptimizer', nodes) 1195 self.assertIn('StridedSlice-3-LayoutOptimizer', nodes) 1196 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1197 1198 @test_util.deprecated_graph_mode_only 1199 def testStridedSliceWithMask1011(self): 1200 if test.is_gpu_available(cuda_only=True): 1201 random_seed.set_random_seed(0) 1202 x = random_ops.truncated_normal([1, 784], seed=0) 1203 conv = _two_layer_model(x) 1204 # This will generate a StridedSlice op with begin mask and 1205 # end mask 11(1011). 1206 s = conv[:, :, 1:-1, :] 1207 output = array_ops.identity(s) 1208 1209 with session.Session(config=_get_config(False)) as sess: 1210 output_val_ref = self.evaluate(output) 1211 1212 with session.Session(config=_get_config()) as sess: 1213 metadata = config_pb2.RunMetadata() 1214 output_val = sess.run(output, run_metadata=metadata) 1215 1216 nodes = [] 1217 num_transposes = 0 1218 for node in metadata.cost_graph.node: 1219 if _is_transpose(node.name): 1220 num_transposes += 1 1221 nodes.append(node.name) 1222 1223 # Four transposes were initially added in the Expand phase of 1224 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1225 expected_num_transposes = 2 1226 self.assertEqual(expected_num_transposes, num_transposes) 1227 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1228 self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes) 1229 self.assertIn('strided_slice-1-LayoutOptimizer', nodes) 1230 self.assertIn('strided_slice-2-LayoutOptimizer', nodes) 1231 self.assertIn('strided_slice-3-LayoutOptimizer', nodes) 1232 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1233 1234 @test_util.deprecated_graph_mode_only 1235 def testStridedSliceWithMask0111(self): 1236 if test.is_gpu_available(cuda_only=True): 1237 random_seed.set_random_seed(0) 1238 x = random_ops.truncated_normal([1, 784], seed=0) 1239 conv = _two_layer_model(x) 1240 # This will generate a StridedSlice op with begin mask and 1241 # end mask 7(0111). 1242 s = conv[:, :, :, 1:-1] 1243 output = array_ops.identity(s) 1244 1245 with session.Session(config=_get_config(False)) as sess: 1246 output_val_ref = self.evaluate(output) 1247 1248 with session.Session(config=_get_config()) as sess: 1249 metadata = config_pb2.RunMetadata() 1250 output_val = sess.run(output, run_metadata=metadata) 1251 1252 nodes = [] 1253 num_transposes = 0 1254 for node in metadata.cost_graph.node: 1255 if _is_transpose(node.name): 1256 num_transposes += 1 1257 nodes.append(node.name) 1258 1259 # Four transposes were initially added in the Expand phase of 1260 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1261 expected_num_transposes = 2 1262 self.assertEqual(expected_num_transposes, num_transposes) 1263 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1264 self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes) 1265 self.assertIn('strided_slice-1-LayoutOptimizer', nodes) 1266 self.assertIn('strided_slice-2-LayoutOptimizer', nodes) 1267 self.assertIn('strided_slice-3-LayoutOptimizer', nodes) 1268 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1269 1270 @test_util.deprecated_graph_mode_only 1271 def testStridedSliceGradWithNonConstAxis(self): 1272 if test.is_gpu_available(cuda_only=True): 1273 random_seed.set_random_seed(0) 1274 x = random_ops.truncated_normal([1, 784], seed=0) 1275 conv = _two_layer_model(x) 1276 end = array_ops.placeholder(dtype='int32') 1277 shape = array_ops.shape(conv) 1278 end_val = [1, 2, 3, 4] 1279 s = array_ops.strided_slice( 1280 conv, [0, 0, 0, 0], end_val, strides=[1, 2, 3, 1]) 1281 s_grad = array_ops.strided_slice_grad(shape, [0, 0, 0, 0], end, 1282 [1, 2, 3, 1], s) 1283 output = array_ops.identity(s_grad) 1284 1285 with session.Session(config=_get_config(False)) as sess: 1286 output_val_ref = sess.run(output, feed_dict={end: end_val}) 1287 1288 with session.Session(config=_get_config()) as sess: 1289 metadata = config_pb2.RunMetadata() 1290 output_val = sess.run( 1291 output, run_metadata=metadata, feed_dict={ 1292 end: end_val 1293 }) 1294 1295 nodes = [] 1296 num_transposes = 0 1297 for node in metadata.cost_graph.node: 1298 if _is_transpose(node.name): 1299 num_transposes += 1 1300 nodes.append(node.name) 1301 1302 # Four transposes were initially added in the Expand phase of 1303 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1304 expected_num_transposes = 2 1305 self.assertEqual(expected_num_transposes, num_transposes) 1306 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1307 self._assert_trans_nchw_to_nhwc('StridedSliceGrad-0-0', nodes) 1308 self._assert_vec_nhwc_to_nchw('StridedSliceGrad-2', nodes) 1309 self.assertIn('StridedSlice-1-LayoutOptimizer', nodes) 1310 self.assertIn('StridedSlice-2-LayoutOptimizer', nodes) 1311 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1312 1313 @test_util.deprecated_graph_mode_only 1314 def testShapeN(self): 1315 if test.is_gpu_available(cuda_only=True): 1316 x = array_ops.placeholder(dtype='float32') 1317 conv = _two_layer_model(x) 1318 shapen = array_ops.shape_n([conv, conv]) 1319 output = math_ops.add(shapen[0], shapen[1]) 1320 1321 x_val = [1.7] * 784 1322 with session.Session(config=_get_config(False)) as sess: 1323 output_val_ref = sess.run(output, feed_dict={x: x_val}) 1324 1325 with session.Session(config=_get_config()) as sess: 1326 metadata = config_pb2.RunMetadata() 1327 output_val = sess.run( 1328 output, run_metadata=metadata, feed_dict={ 1329 x: x_val 1330 }) 1331 1332 nodes = [] 1333 num_transposes = 0 1334 for node in metadata.cost_graph.node: 1335 if _is_transpose(node.name): 1336 num_transposes += 1 1337 nodes.append(node.name) 1338 1339 expected_num_transposes = 1 1340 self.assertEqual(expected_num_transposes, num_transposes) 1341 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1342 self._assert_vec_nchw_to_nhwc('ShapeN-0-0', nodes) 1343 self.assertAllEqual(output_val_ref, output_val) 1344 1345 @test_util.deprecated_graph_mode_only 1346 def testShapeNFollowedByNotConvertibleNodeReshape(self): 1347 if test.is_gpu_available(cuda_only=True): 1348 x = array_ops.placeholder(dtype='float32') 1349 conv = _two_layer_model(x) 1350 conv_reshape = array_ops.reshape(conv, [1, 1, 1, -1]) 1351 shapen = array_ops.shape_n([conv, conv_reshape]) 1352 shape = array_ops.identity(shapen[1]) 1353 ones = array_ops.ones(shape) 1354 output = math_ops.add_n([conv_reshape, ones]) 1355 1356 x_val = [1.7] * 784 1357 with session.Session(config=_get_config(False)) as sess: 1358 output_val_ref = sess.run(output, feed_dict={x: x_val}) 1359 1360 with session.Session(config=_get_config()) as sess: 1361 metadata = config_pb2.RunMetadata() 1362 output_val = sess.run( 1363 output, run_metadata=metadata, feed_dict={x: x_val}) 1364 1365 nodes = [] 1366 num_transposes = 0 1367 for node in metadata.cost_graph.node: 1368 if _is_transpose(node.name): 1369 num_transposes += 1 1370 nodes.append(node.name) 1371 1372 expected_num_transposes = 2 1373 self.assertEqual(expected_num_transposes, num_transposes) 1374 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1375 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1376 1377 @test_util.deprecated_graph_mode_only 1378 def testLoop(self): 1379 if test.is_gpu_available(cuda_only=True): 1380 output = _loop() 1381 1382 with session.Session(config=_get_config(False)) as sess: 1383 output_val_ref = self.evaluate(output) 1384 1385 with session.Session(config=_get_config()) as sess: 1386 metadata = config_pb2.RunMetadata() 1387 output_val = sess.run(output, run_metadata=metadata) 1388 1389 nodes = [] 1390 num_transposes = 0 1391 for node in metadata.cost_graph.node: 1392 if _is_transpose(node.name): 1393 num_transposes += 1 1394 nodes.append(node.name) 1395 1396 # Four transposes were initially added in the Expand phase of 1397 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1398 expected_num_transposes = 2 1399 self.assertEqual(expected_num_transposes, num_transposes) 1400 self.assertEqual(expected_num_transposes, num_transposes) 1401 self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) 1402 self._assert_trans_nchw_to_nhwc('map/while/MaxPool_1-0-2', nodes) 1403 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1404 1405 @test_util.deprecated_graph_mode_only 1406 def testLoopWithBranch(self): 1407 if test.is_gpu_available(cuda_only=True): 1408 output = _loop_with_branch() 1409 1410 with session.Session(config=_get_config(False)) as sess: 1411 output_val_ref = self.evaluate(output) 1412 1413 with session.Session(config=_get_config()) as sess: 1414 metadata = config_pb2.RunMetadata() 1415 output_val = sess.run(output, run_metadata=metadata) 1416 1417 nodes = [] 1418 num_transposes = 0 1419 for node in metadata.cost_graph.node: 1420 if _is_transpose(node.name): 1421 num_transposes += 1 1422 nodes.append(node.name) 1423 1424 expected_num_transposes = 3 1425 self.assertEqual(expected_num_transposes, num_transposes) 1426 self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) 1427 self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes) 1428 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1429 1430 @test_util.deprecated_graph_mode_only 1431 def testLoopWithVecAnd4D(self): 1432 if test.is_gpu_available(cuda_only=True): 1433 output = _loop_with_vec_and_4d() 1434 1435 with session.Session(config=_get_config(False)) as sess: 1436 output_val_ref = self.evaluate(output) 1437 1438 with session.Session(config=_get_config()) as sess: 1439 metadata = config_pb2.RunMetadata() 1440 output_val = sess.run(output, run_metadata=metadata) 1441 1442 nodes = [] 1443 num_transposes = 0 1444 for node in metadata.cost_graph.node: 1445 if _is_transpose(node.name): 1446 num_transposes += 1 1447 nodes.append(node.name) 1448 1449 expected_num_transposes = 2 1450 self.assertEqual(expected_num_transposes, num_transposes) 1451 self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) 1452 self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes) 1453 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1454 1455 @test_util.deprecated_graph_mode_only 1456 def testBinaryOpSecondPort(self): 1457 if test.is_gpu_available(cuda_only=True): 1458 output = _model_with_second_port() 1459 1460 with session.Session(config=_get_config(False)) as sess: 1461 output_val_ref = self.evaluate(output) 1462 1463 with session.Session(config=_get_config()) as sess: 1464 metadata = config_pb2.RunMetadata() 1465 output_val = sess.run(output, run_metadata=metadata) 1466 1467 nodes = [] 1468 num_transposes = 0 1469 for node in metadata.cost_graph.node: 1470 if _is_transpose(node.name): 1471 num_transposes += 1 1472 nodes.append(node.name) 1473 1474 expected_num_transposes = 2 1475 self.assertEqual(expected_num_transposes, num_transposes) 1476 self._assert_trans_nhwc_to_nchw('FusedBatchNorm-0', nodes) 1477 self._assert_trans_nchw_to_nhwc('Add-0-0', nodes) 1478 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1479 1480 @test_util.deprecated_graph_mode_only 1481 def testGradient(self): 1482 meta_graph = _simple_metagraph() 1483 config = config_pb2.ConfigProto() 1484 config.graph_options.rewrite_options.CopyFrom( 1485 rewriter_config_pb2.RewriterConfig( 1486 layout_optimizer=rewriter_config_pb2.RewriterConfig.ON, 1487 min_graph_nodes=-1)) 1488 optimized_graph = tf_optimizer.OptimizeGraph( 1489 config, meta_graph, cluster=_get_cluster()) 1490 1491 found = 0 1492 for node in optimized_graph.node: 1493 if node.op in ['Conv2D', 'Conv2DBackpropFilter', 'Conv2DBackpropInput']: 1494 found += 1 1495 self.assertEqual(node.attr['data_format'].s, b'NCHW') 1496 self.assertEqual(found, 5) 1497 1498 @test_util.deprecated_graph_mode_only 1499 def testDepthwise(self): 1500 meta_graph = _simple_metagraph(depthwise=True) 1501 config = config_pb2.ConfigProto() 1502 config.graph_options.rewrite_options.CopyFrom( 1503 rewriter_config_pb2.RewriterConfig( 1504 layout_optimizer=rewriter_config_pb2.RewriterConfig.ON, 1505 min_graph_nodes=-1)) 1506 optimized_graph = tf_optimizer.OptimizeGraph( 1507 config, meta_graph, cluster=_get_cluster()) 1508 1509 found = 0 1510 for node in optimized_graph.node: 1511 if node.op in [ 1512 'DepthwiseConv2dNative', 'DepthwiseConv2dNativeBackpropFilter', 1513 'DepthwiseConv2dNativeBackpropInput' 1514 ]: 1515 found += 1 1516 self.assertEqual(node.attr['data_format'].s, b'NCHW') 1517 self.assertEqual(found, 6) 1518 1519 def testCheckpointCompatibility(self): 1520 if not test.is_gpu_available(cuda_only=True): 1521 self.skipTest('GPU required') 1522 1523 checkpoint_path = self.get_temp_dir() 1524 self._train(checkpoint_path) 1525 vars_expected = self._train(checkpoint_path, restore=True) 1526 vars_layout_optimized = self._train( 1527 checkpoint_path, restore=True, layout_optimizer=True) 1528 1529 for var_expected, var_layout_optimized in zip(vars_expected, 1530 vars_layout_optimized): 1531 self.assertAllClose(var_expected, var_layout_optimized, atol=1e-6) 1532 1533 1534if __name__ == '__main__': 1535 test.main() 1536