• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Functional tests for convolutional operations."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import os
22import time
23
24import numpy as np
25
26from six.moves import xrange  # pylint: disable=redefined-builtin
27from tensorflow.core.protobuf import config_pb2
28from tensorflow.core.protobuf import rewriter_config_pb2
29from tensorflow.python.client import session as session_lib
30from tensorflow.python.eager import backprop
31from tensorflow.python.eager import context
32from tensorflow.python.framework import constant_op
33from tensorflow.python.framework import dtypes
34from tensorflow.python.framework import errors_impl
35from tensorflow.python.framework import ops
36from tensorflow.python.framework import test_util
37from tensorflow.python.layers import convolutional
38from tensorflow.python.ops import array_ops
39from tensorflow.python.ops import control_flow_ops
40from tensorflow.python.ops import gradient_checker
41from tensorflow.python.ops import gradients_impl
42from tensorflow.python.ops import math_ops
43from tensorflow.python.ops import nn_impl
44from tensorflow.python.ops import nn_ops
45from tensorflow.python.ops import random_ops
46from tensorflow.python.ops import variables
47import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
48from tensorflow.python.platform import test
49from tensorflow.python.platform import tf_logging
50from tensorflow.python.util.compat import collections_abc
51
52
53def GetShrunkInceptionShapes(shrink=10):
54  """Iterator for smaller versions of convolution shapes in 2015 Inception.
55
56  Relative to inception, each depth value is `depth // shrink`.
57
58  Args:
59    shrink: Factor to shrink each depth value by relative to Inception.
60
61  Yields:
62    Tuple (input_size, filter_size, out_size, stride, padding), the convolution
63    parameters of Inception layers.
64  """
65  input_sizes = [[4, 5, 5, 1248], [4, 8, 8, 384], [4, 8, 8, 384],
66                 [4, 8, 8, 2048], [4, 8, 8, 448], [4, 8, 8, 2048],
67                 [4, 8, 8, 2048], [4, 8, 8, 2048], [4, 8, 8, 1760],
68                 [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 8, 8, 1760],
69                 [4, 17, 17, 192], [4, 17, 17, 192], [4, 17, 17, 1248],
70                 [4, 17, 17, 128], [4, 17, 17, 1248], [4, 17, 17, 224],
71                 [4, 17, 17, 192], [4, 17, 17, 192], [4, 17, 17, 1216],
72                 [4, 17, 17, 1216], [4, 17, 17, 224], [4, 17, 17, 192],
73                 [4, 17, 17, 192], [4, 17, 17, 1152], [4, 17, 17, 1152],
74                 [4, 17, 17, 192], [4, 17, 17, 160], [4, 17, 17, 1152],
75                 [4, 17, 17, 1024], [4, 17, 17, 128], [4, 17, 17, 1024],
76                 [4, 17, 17, 128], [4, 17, 17, 1024], [4, 17, 17, 128],
77                 [4, 17, 17, 768], [4, 17, 17, 128], [4, 17, 17, 128],
78                 [4, 17, 17, 768], [4, 17, 17, 768], [4, 35, 35, 96],
79                 [4, 35, 35, 288], [4, 35, 35, 64], [4, 35, 35, 288],
80                 [4, 35, 35, 256], [4, 35, 35, 48], [4, 35, 35, 256],
81                 [4, 35, 35, 96], [4, 35, 35, 192], [4, 35, 35, 192],
82                 [4, 35, 35, 192], [4, 73, 73, 64], [4, 73, 73, 64],
83                 [4, 147, 147, 24]]
84  filter_sizes = [[1, 1, 1248, 128], [1, 3, 384, 384], [3, 1, 384, 384],
85                  [1, 1, 2048, 192], [3, 3, 448, 384], [1, 1, 2048, 320],
86                  [1, 1, 2048, 448], [1, 1, 2048, 384], [1, 1, 1760, 384],
87                  [1, 1, 1760, 192], [1, 1, 1760, 448], [1, 1, 1760, 320],
88                  [3, 3, 192, 192], [3, 3, 192, 192], [1, 1, 1248, 192],
89                  [3, 3, 128, 320], [1, 1, 1248, 128], [1, 3, 224, 224],
90                  [3, 1, 192, 256], [1, 3, 192, 256], [1, 1, 1216, 192],
91                  [1, 1, 1216, 96], [3, 1, 224, 224], [3, 3, 192, 224],
92                  [1, 3, 192, 192], [1, 1, 1152, 192], [1, 1, 1152, 128],
93                  [3, 1, 192, 192], [3, 3, 160, 192], [1, 1, 1152, 160],
94                  [1, 1, 1024, 128], [1, 3, 128, 192], [1, 1, 1024, 160],
95                  [3, 1, 128, 192], [1, 1, 1024, 256], [3, 1, 128, 128],
96                  [1, 1, 768, 192], [1, 3, 128, 128], [3, 3, 128, 128],
97                  [1, 1, 768, 128], [1, 1, 768, 320], [3, 3, 96, 96],
98                  [3, 3, 288, 384], [3, 3, 64, 96], [1, 1, 288, 64],
99                  [1, 1, 256, 64], [5, 5, 48, 64], [1, 1, 256, 48],
100                  [3, 3, 96, 96], [1, 1, 192, 32], [1, 1, 192, 64],
101                  [1, 1, 192, 48], [3, 3, 64, 192], [1, 1, 64, 64],
102                  [1, 1, 24, 64]]
103  out_sizes = [[4, 5, 5, 128], [4, 8, 8, 384], [4, 8, 8, 384],
104               [4, 8, 8, 192], [4, 8, 8, 384], [4, 8, 8, 320],
105               [4, 8, 8, 448], [4, 8, 8, 384], [4, 8, 8, 384],
106               [4, 8, 8, 192], [4, 8, 8, 448], [4, 8, 8, 320],
107               [4, 8, 8, 192], [4, 17, 17, 192], [4, 17, 17, 192],
108               [4, 8, 8, 320], [4, 17, 17, 128], [4, 17, 17, 224],
109               [4, 17, 17, 256], [4, 17, 17, 256], [4, 17, 17, 192],
110               [4, 17, 17, 96], [4, 17, 17, 224], [4, 17, 17, 224],
111               [4, 17, 17, 192], [4, 17, 17, 192], [4, 17, 17, 128],
112               [4, 17, 17, 192], [4, 17, 17, 192], [4, 17, 17, 160],
113               [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 160],
114               [4, 17, 17, 192], [4, 17, 17, 256], [4, 17, 17, 128],
115               [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 128],
116               [4, 17, 17, 128], [4, 17, 17, 320], [4, 17, 17, 96],
117               [4, 17, 17, 384], [4, 35, 35, 96], [4, 35, 35, 64],
118               [4, 35, 35, 64], [4, 35, 35, 64], [4, 35, 35, 48],
119               [4, 35, 35, 96], [4, 35, 35, 32], [4, 35, 35, 64],
120               [4, 35, 35, 48], [4, 71, 71, 192], [4, 73, 73, 64],
121               [4, 147, 147, 64]]
122  strides = [
123      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
124      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1,
125      1, 1, 1, 1, 1
126  ]
127  # Shrink sizes to make the test faster
128  for i in input_sizes:
129    i[3] //= shrink
130  for f in filter_sizes:
131    f[2] //= shrink
132    f[3] //= shrink
133  for o in out_sizes:
134    o[3] //= shrink
135  # pylint: disable=invalid-name
136  VALID = "VALID"
137  SAME = "SAME"
138  # pylint: enable=invalid-name
139  paddings = [
140      SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME,
141      VALID, SAME, SAME, VALID, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME,
142      SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME,
143      SAME, SAME, SAME, SAME, SAME, VALID, VALID, SAME, SAME, SAME, SAME, SAME,
144      SAME, SAME, SAME, SAME, VALID, VALID, VALID
145  ]
146  for i, f, o, s, p in zip(input_sizes, filter_sizes, out_sizes, strides,
147                           paddings):
148    yield i, f, o, s, p
149
150
151def GetTestConfigs():
152  """Get all the valid tests configs to run.
153
154  Returns:
155    all the valid test configs as tuples of data_format and use_gpu.
156  """
157  test_configs = [("NHWC", False), ("NHWC", True)]
158  if test.is_gpu_available(cuda_only=True):
159    # "NCHW" format is only supported on CUDA.
160    test_configs += [("NCHW", True)]
161  return test_configs
162
163
164class Conv2DTest(test.TestCase):
165
166  def _DtypesToTest(self, use_gpu):
167    if test_util.IsMklEnabled():
168      return [dtypes.float32]
169    # double datatype is currently not supported for convolution ops
170    # on the ROCm platform
171    optional_float64 = [] if test.is_built_with_rocm() else [dtypes.float64]
172    if use_gpu and not test_util.GpuSupportsHalfMatMulAndConv():
173      return [dtypes.float32] + optional_float64
174    else:
175      # It is important that float32 comes before float16 here,
176      # as we will be using its gradients as reference for fp16 gradients.
177      return [dtypes.float32, dtypes.float16] + optional_float64
178
179  def _CreateNumpyTensor(self, shape):
180    total_size = 1
181    for s in shape:
182      total_size *= s
183    return np.arange(1, total_size + 1, dtype=np.float32).reshape(shape)
184
185  def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, dilations,
186                            strides, padding, data_format, dtype, use_gpu):
187    """Verifies the output values of the convolution function.
188
189    Args:
190      tensor_in_sizes: Input tensor dimensions in
191        [batch, input_rows, input_cols, input_depth].
192      filter_in_sizes: Filter tensor dimensions in
193        [kernel_rows, kernel_cols, input_depth, output_depth].
194      dilations: Dilated rate: [col_dilation, row_dilation]
195      strides: Stride: [col_stride, row_stride]
196      padding: Padding type.
197      data_format: Format of the data tensors.
198      dtype: Data type for inputs and outputs.
199      use_gpu: True if the operations should be run on GPU
200    Returns:
201      Symbolic tensor value that can be used to execute the computation
202    """
203    x1 = self._CreateNumpyTensor(tensor_in_sizes)
204    x2 = self._CreateNumpyTensor(filter_in_sizes)
205
206    with test_util.device(use_gpu):
207      t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype)
208      t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype)
209      strides = [1] + strides + [1]
210      dilations = [1] + dilations + [1]
211      if isinstance(padding, (list, tuple)):
212        padding = [(0, 0)] + padding + [(0, 0)]
213      if data_format == "NCHW":
214        t1 = test_util.NHWCToNCHW(t1)
215        strides = test_util.NHWCToNCHW(strides)
216        dilations = test_util.NHWCToNCHW(dilations)
217        if isinstance(padding, (list, tuple)):
218          padding = test_util.NHWCToNCHW(padding)
219      conv = nn_ops.conv2d(
220          t1,
221          t2,
222          dilations=dilations,
223          strides=strides,
224          padding=padding,
225          data_format=data_format)
226      self.assertEqual(conv.dtype, dtype)
227      if data_format == "NCHW":
228        conv = test_util.NCHWToNHWC(conv)
229
230      return conv
231
232  def _CompareFwdValues(self, tensor_in_sizes, filter_in_sizes, conv_strides,
233                        padding):
234    """Verifies that CPU and GPU produce the same values.
235
236    Args:
237      tensor_in_sizes: Input tensor dimensions in
238        [batch, input_rows, input_cols, input_depth].
239      filter_in_sizes: Filter tensor dimensions in
240        [kernel_rows, kernel_cols, input_depth, output_depth].
241      conv_strides: [row_stride, col_stride] for the convolution;
242      padding: Padding type.
243    """
244    x1 = np.random.rand(*tensor_in_sizes).astype(np.float32)
245    x2 = np.random.rand(*filter_in_sizes).astype(np.float32)
246
247    def _SetupVal(data_format, use_gpu):
248      with test_util.device(use_gpu):
249        t1 = constant_op.constant(x1, shape=tensor_in_sizes)
250        t2 = constant_op.constant(x2, shape=filter_in_sizes)
251        strides = [1] + conv_strides + [1]
252        if data_format == "NCHW":
253          t1 = test_util.NHWCToNCHW(t1)
254          strides = test_util.NHWCToNCHW(strides)
255        conv = nn_ops.conv2d(
256            t1, t2, strides=strides, padding=padding, data_format=data_format)
257        if data_format == "NCHW":
258          conv = test_util.NCHWToNHWC(conv)
259        return conv
260
261    tensors = []
262    for (data_format, use_gpu) in GetTestConfigs():
263      tensors.append(_SetupVal(data_format, use_gpu))
264    values = self.evaluate(tensors)
265    for i in range(1, len(values)):
266      self.assertAllClose(values[0], values[i], rtol=1e-3, atol=1e-3)
267
268  def _ComputeReferenceDilatedConv(self, tensor_in_sizes, filter_in_sizes,
269                                   stride, dilation, padding, data_format,
270                                   use_gpu):
271    x1 = self._CreateNumpyTensor(tensor_in_sizes)
272    x2 = self._CreateNumpyTensor(filter_in_sizes)
273    with test_util.device(use_gpu):
274      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
275      t2 = constant_op.constant(x2, shape=filter_in_sizes)
276      if isinstance(stride, collections_abc.Iterable):
277        strides = list(stride)
278      else:
279        strides = [stride, stride]
280      if data_format == "NCHW":
281        t1 = test_util.NHWCToNCHW(t1)
282        full_strides = [1, 1] + strides
283        full_dilation = [1, 1] + dilation
284      else:
285        full_strides = [1] + strides + [1]
286        full_dilation = [1] + dilation + [1]
287      expected = nn_ops.convolution(
288          t1,
289          t2,
290          padding=padding,
291          strides=strides,
292          dilation_rate=dilation,
293          data_format=data_format)
294      computed = nn_ops.conv2d(
295          t1,
296          t2,
297          strides=full_strides,
298          dilations=full_dilation,
299          padding=padding,
300          data_format=data_format)
301      if data_format == "NCHW":
302        expected = test_util.NCHWToNHWC(expected)
303        computed = test_util.NCHWToNHWC(computed)
304    return expected, computed
305
306  def _VerifyDilatedConvValues(self, tensor_in_sizes, filter_in_sizes, strides,
307                               padding, dilations, rtol=1e-4):
308    expected_results = []
309    computed_results = []
310    for data_format, use_gpu in GetTestConfigs():
311      expected, computed = self._ComputeReferenceDilatedConv(
312          tensor_in_sizes, filter_in_sizes, strides, dilations, padding,
313          data_format, use_gpu)
314      expected_results.append(expected)
315      computed_results.append(computed)
316    tolerance = 1e-2 if use_gpu else 1e-5
317    expected_values = self.evaluate(expected_results)
318    computed_values = self.evaluate(computed_results)
319    for e_value, c_value in zip(expected_values, computed_values):
320      tf_logging.debug("expected = %s", e_value)
321      tf_logging.debug("actual = %s", c_value)
322      self.assertAllClose(
323          e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=rtol)
324
325  def _VerifyValues(self,
326                    tensor_in_sizes,
327                    filter_in_sizes,
328                    strides,
329                    padding,
330                    expected,
331                    dilations=(1, 1),
332                    gpu_only=False,
333                    test_grappler_layout_optimizer=False,
334                    tol=1e-5,
335                    fp16_tol=1e-3):
336    if gpu_only and not test.is_gpu_available(cuda_only=True):
337      return
338    tensors = []
339    dilations = list(dilations)
340    for (data_format, use_gpu) in GetTestConfigs():
341      if gpu_only and not use_gpu:
342        continue
343      dtypes_to_test = self._DtypesToTest(use_gpu)
344      if not test_grappler_layout_optimizer and data_format == "NHWC":
345        dtypes_to_test.append(dtypes.int32)
346      for dtype in dtypes_to_test:
347        result = self._SetupValuesForDevice(
348            tensor_in_sizes,
349            filter_in_sizes,
350            dilations,
351            strides,
352            padding,
353            data_format,
354            dtype,
355            use_gpu=use_gpu)
356        if test_grappler_layout_optimizer and data_format == "NHWC" and use_gpu:
357          # Grappler's layout optimizer will not optimize a fetch node, so
358          # this identity allows Grappler to optimize the Conv2D node.
359          result = array_ops.identity(result)
360        tensors.append(result)
361      values = self.evaluate(tensors)
362      for i in range(len(tensors)):
363        conv = tensors[i]
364        value = values[i]
365        tf_logging.debug("expected = %s", expected)
366        tf_logging.debug("actual = %s", value)
367        tol_to_use = fp16_tol if value.dtype == np.float16 else tol
368        if np.issubdtype(value.dtype, np.integer):
369          self.assertAllEqual(np.rint(expected), np.ravel(value))
370        else:
371          self.assertAllClose(expected, np.ravel(value), atol=tol_to_use,
372                              rtol=tol_to_use)
373        self.assertShapeEqual(value, conv)
374        self.assertEqual(value.dtype, conv.dtype.as_numpy_dtype)
375
376  def _VerifyExplicitPaddings(self,
377                              tensor_in_sizes,
378                              filter_in_sizes,
379                              strides,
380                              padding,
381                              dilations=(1, 1),
382                              test_grappler_layout_optimizer=False,
383                              tol=1e-5,
384                              fp16_tol=1e-3):
385    """Verifies Conv2D with explicit padding generates correct values.
386
387    It does this by comparing with Conv2D without explicit padding. This
388    function assumes Conv2D without explicit padding works correctly.
389
390    Args:
391      tensor_in_sizes: Input tensor dimensions in [batch, input_rows,
392        input_cols, input_depth].
393      filter_in_sizes: Filter tensor dimensions in [kernel_rows, kernel_cols,
394        input_depth, output_depth].
395      strides: [row_stride, col_stride] for the convolution;
396      padding: Explicit padding amounts.
397      dilations: Dilation values
398      test_grappler_layout_optimizer: If True, allow the Grappler layout
399        optimizer to run, which turns NHWC Conv2Ds on the GPU to NCHW Conv2Ds.
400      tol: The absolute and relative tolerance for non-fp16 dtypes.
401      fp16_tol: The absolute and relative tolerance for fp16.
402    """
403    input_tensor = self._CreateNumpyTensor(tensor_in_sizes)
404    filter_tensor = self._CreateNumpyTensor(filter_in_sizes)
405    input_tensor = array_ops.pad(input_tensor, [(0, 0)] + padding + [(0, 0)])
406    dilations = list(dilations)
407    conv2d_result = nn_ops.conv2d(
408        input_tensor,
409        filter_tensor, [1] + list(strides) + [1],
410        "VALID",
411        dilations=[1] + dilations + [1])
412    expected = list(self.evaluate(array_ops.reshape(conv2d_result, [-1])))
413    self._VerifyValues(
414        tensor_in_sizes,
415        filter_in_sizes,
416        strides,
417        padding,
418        expected,
419        dilations,
420        test_grappler_layout_optimizer=test_grappler_layout_optimizer,
421        tol=tol,
422        fp16_tol=fp16_tol)
423
424  @test_util.run_in_graph_and_eager_modes
425  def testConv2D1x1Filter(self):
426    expected_output = [
427        30.0, 36.0, 42.0, 66.0, 81.0, 96.0, 102.0, 126.0, 150.0, 138.0, 171.0,
428        204.0, 174.0, 216.0, 258.0, 210.0, 261.0, 312.0
429    ]
430    self._VerifyValues(
431        tensor_in_sizes=[1, 2, 3, 3],
432        filter_in_sizes=[1, 1, 3, 3],
433        strides=[1, 1],
434        padding="VALID",
435        expected=expected_output)
436
437  @test_util.run_in_graph_and_eager_modes
438  def testConv2DExpandedBatch(self):
439    tensor_in_sizes_batch = [10, 2, 3, 3]
440    tensor_in_sizes_expanded_batch = [2, 5, 2, 3, 3]
441    filter_in_sizes = [1, 1, 3, 3]
442    filter_in = self._CreateNumpyTensor(filter_in_sizes)
443    x1 = self._CreateNumpyTensor(tensor_in_sizes_batch)
444    x2 = x1.reshape(tensor_in_sizes_expanded_batch)
445    conv1 = nn_ops.conv2d(
446        x1,
447        filter_in,
448        strides=[1, 1],
449        padding="VALID")
450    conv2 = nn_ops.conv2d(
451        x2,
452        filter_in,
453        strides=[1, 1],
454        padding="VALID")
455    self.assertEqual(conv1.shape, tensor_in_sizes_batch)
456    self.assertEqual(conv2.shape, tensor_in_sizes_expanded_batch)
457    self.assertAllEqual(
458        conv1,
459        self.evaluate(conv2).reshape(conv1.shape))
460
461  @test_util.run_in_graph_and_eager_modes
462  def testConvolutionClass2DExpandedBatch(self):
463    tensor_in_sizes_batch = [10, 2, 3, 3]
464    tensor_in_sizes_expanded_batch = [2, 5, 2, 3, 3]
465    filter_in_sizes = [1, 1, 3, 3]
466    filter_in = self._CreateNumpyTensor(filter_in_sizes)
467    x1 = self._CreateNumpyTensor(tensor_in_sizes_batch)
468    x2 = x1.reshape(tensor_in_sizes_expanded_batch)
469    convolver1 = nn_ops.Convolution(
470        input_shape=x1.shape,
471        filter_shape=filter_in.shape,
472        strides=[1, 1],
473        padding="VALID")
474    self.assertEqual(convolver1.num_batch_dims, 1)
475    convolver2 = nn_ops.Convolution(
476        input_shape=x2.shape,
477        filter_shape=filter_in.shape,
478        strides=[1, 1],
479        padding="VALID")
480    self.assertEqual(convolver2.num_batch_dims, 2)
481    conv1 = convolver1(x1, filter_in)
482    conv2 = convolver2(x2, filter_in)
483    self.assertEqual(conv1.shape, tensor_in_sizes_batch)
484    self.assertEqual(conv2.shape, tensor_in_sizes_expanded_batch)
485    self.assertAllEqual(
486        conv1,
487        self.evaluate(conv2).reshape(conv1.shape))
488
489  @test_util.run_in_graph_and_eager_modes
490  def testConvolutionWith2SpatialDimensionsAndExpandedBatch(self):
491    tensor_in_sizes_batch = [10, 2, 3, 3]
492    tensor_in_sizes_expanded_batch = [2, 5, 2, 3, 3]
493    filter_in_sizes = [1, 1, 3, 3]
494    filter_in = self._CreateNumpyTensor(filter_in_sizes)
495    x1 = self._CreateNumpyTensor(tensor_in_sizes_batch)
496    x2 = x1.reshape(tensor_in_sizes_expanded_batch)
497    conv1 = nn_ops.convolution(
498        x1,
499        filter_in,
500        strides=[1, 1],
501        padding="VALID")
502    conv2 = nn_ops.convolution(
503        x2,
504        filter_in,
505        strides=[1, 1],
506        padding="VALID")
507    self.assertEqual(conv1.shape, tensor_in_sizes_batch)
508    self.assertEqual(conv2.shape, tensor_in_sizes_expanded_batch)
509    self.assertAllEqual(
510        conv1,
511        self.evaluate(conv2).reshape(conv1.shape))
512
513  @test_util.run_in_graph_and_eager_modes
514  def testConv2D2x2Filter2x1Dilation(self):
515    self._VerifyDilatedConvValues(
516        tensor_in_sizes=[1, 4, 4, 1],
517        filter_in_sizes=[2, 2, 1, 1],
518        strides=[1, 1],
519        dilations=[2, 1],
520        padding="VALID")
521
522  @test_util.run_in_graph_and_eager_modes
523  def testConv2DEmpty(self):
524    expected_output = []
525    self._VerifyValues(
526        tensor_in_sizes=[0, 2, 3, 3],
527        filter_in_sizes=[1, 1, 3, 3],
528        strides=[1, 1],
529        padding="VALID",
530        expected=expected_output)
531
532  @test_util.run_in_graph_and_eager_modes
533  def testConv2DEmptyDilation(self):
534    self._VerifyDilatedConvValues(
535        tensor_in_sizes=[0, 2, 3, 3],
536        filter_in_sizes=[1, 1, 3, 3],
537        strides=[1, 1],
538        dilations=[2, 1],
539        padding="VALID")
540
541  @test_util.run_in_graph_and_eager_modes
542  def testConv2D2x2Filter(self):
543    # The outputs are computed using third_party/py/IPython/notebook.
544    expected_output = [2271.0, 2367.0, 2463.0, 2901.0, 3033.0, 3165.0]
545    self._VerifyValues(
546        tensor_in_sizes=[1, 2, 3, 3],
547        filter_in_sizes=[2, 2, 3, 3],
548        strides=[1, 1],
549        padding="VALID",
550        expected=expected_output)
551
552  @test_util.run_in_graph_and_eager_modes
553  def testConv2D2x2FilterDilation(self):
554    self._VerifyDilatedConvValues(
555        tensor_in_sizes=[1, 2, 3, 3],
556        filter_in_sizes=[2, 2, 3, 3],
557        strides=[1, 1],
558        dilations=[1, 2],
559        padding="VALID")
560
561  @test_util.run_in_graph_and_eager_modes
562  def testConv2D1x2Filter(self):
563    # The outputs are computed using third_party/py/IPython/notebook.
564    expected_output = [
565        231.0, 252.0, 273.0, 384.0, 423.0, 462.0, 690.0, 765.0, 840.0, 843.0,
566        936.0, 1029.0
567    ]
568    self._VerifyValues(
569        tensor_in_sizes=[1, 2, 3, 3],
570        filter_in_sizes=[1, 2, 3, 3],
571        strides=[1, 1],
572        padding="VALID",
573        expected=expected_output)
574
575  @test_util.run_in_graph_and_eager_modes
576  def testConv2D1x2FilterDilation(self):
577    self._VerifyDilatedConvValues(
578        tensor_in_sizes=[1, 2, 3, 3],
579        filter_in_sizes=[1, 2, 3, 3],
580        strides=[1, 1],
581        dilations=[2, 1],
582        padding="VALID")
583
584  @test_util.run_in_graph_and_eager_modes
585  def testConv2D2x2FilterStride2(self):
586    expected_output = [2271.0, 2367.0, 2463.0]
587    self._VerifyValues(
588        tensor_in_sizes=[1, 2, 3, 3],
589        filter_in_sizes=[2, 2, 3, 3],
590        strides=[2, 2],
591        padding="VALID",
592        expected=expected_output)
593
594  @test_util.run_in_graph_and_eager_modes
595  def testConv2D2x2FilterStride2Same(self):
596    expected_output = [2271.0, 2367.0, 2463.0, 1230.0, 1305.0, 1380.0]
597    self._VerifyValues(
598        tensor_in_sizes=[1, 2, 3, 3],
599        filter_in_sizes=[2, 2, 3, 3],
600        strides=[2, 2],
601        padding="SAME",
602        expected=expected_output)
603
604  @test_util.run_in_graph_and_eager_modes
605  def testConv2D2x2FilterStride1x2(self):
606    expected_output = [58.0, 78.0, 98.0, 118.0, 138.0, 158.0]
607    self._VerifyValues(
608        tensor_in_sizes=[1, 3, 6, 1],
609        filter_in_sizes=[2, 2, 1, 1],
610        strides=[1, 2],
611        padding="VALID",
612        expected=expected_output)
613
614  @test_util.run_in_graph_and_eager_modes
615  def testConv2DKernelSmallerThanStrideValid(self):
616    expected_output = [65, 95, 275, 305]
617    self._VerifyValues(
618        tensor_in_sizes=[1, 7, 7, 1],
619        filter_in_sizes=[2, 2, 1, 1],
620        strides=[3, 3],
621        padding="VALID",
622        expected=expected_output)
623
624  @test_util.run_in_graph_and_eager_modes
625  def testConv2DKernelSmallerThanStrideSame(self):
626    self._VerifyValues(
627        tensor_in_sizes=[1, 3, 3, 1],
628        filter_in_sizes=[1, 1, 1, 1],
629        strides=[2, 2],
630        padding="SAME",
631        expected=[1, 3, 7, 9])
632
633    self._VerifyValues(
634        tensor_in_sizes=[1, 4, 4, 1],
635        filter_in_sizes=[1, 1, 1, 1],
636        strides=[2, 2],
637        padding="SAME",
638        expected=[1, 3, 9, 11])
639
640    self._VerifyValues(
641        tensor_in_sizes=[1, 4, 4, 1],
642        filter_in_sizes=[2, 2, 1, 1],
643        strides=[3, 3],
644        padding="SAME",
645        expected=[44, 28, 41, 16])
646
647  @test_util.run_in_graph_and_eager_modes
648  def testConv2DKernelSizeMatchesInputSize(self):
649    self._VerifyValues(
650        tensor_in_sizes=[1, 2, 2, 1],
651        filter_in_sizes=[2, 2, 1, 2],
652        strides=[1, 1],
653        padding="VALID",
654        expected=[50, 60])
655
656  @test_util.run_in_graph_and_eager_modes
657  def testConv2DKernelSizeMatchesInputSizeDilation(self):
658    self._VerifyDilatedConvValues(
659        tensor_in_sizes=[1, 3, 3, 1],
660        filter_in_sizes=[2, 2, 1, 2],
661        strides=[1, 1],
662        dilations=[2, 2],
663        padding="VALID")
664
665  @test_util.run_in_graph_and_eager_modes()
666  def testConv2D0x0Padding(self):
667    self._VerifyExplicitPaddings(
668        tensor_in_sizes=[1, 2, 3, 3],
669        filter_in_sizes=[2, 2, 3, 3],
670        strides=[1, 1],
671        padding=[[0, 0], [0, 0]])
672
673    self._VerifyExplicitPaddings(
674        tensor_in_sizes=[3, 4, 3, 2],
675        filter_in_sizes=[1, 1, 2, 1],
676        strides=[2, 2],
677        padding=[[0, 0], [0, 0]])
678
679  @test_util.run_in_graph_and_eager_modes()
680  def testConv2D1x1Padding(self):
681    self._VerifyExplicitPaddings(
682        tensor_in_sizes=[1, 2, 3, 2],
683        filter_in_sizes=[2, 2, 2, 2],
684        strides=[1, 1],
685        padding=[[1, 1], [1, 1]])
686
687    self._VerifyExplicitPaddings(
688        tensor_in_sizes=[1, 2, 2, 1],
689        filter_in_sizes=[1, 1, 1, 2],
690        strides=[1, 1],
691        padding=[[1, 1], [1, 1]])
692
693  @test_util.run_in_graph_and_eager_modes()
694  def testConv2D2x2Padding(self):
695    self._VerifyExplicitPaddings(
696        tensor_in_sizes=[1, 2, 1, 2],
697        filter_in_sizes=[2, 1, 2, 1],
698        strides=[1, 1],
699        padding=[[2, 2], [2, 2]])
700
701    self._VerifyExplicitPaddings(
702        tensor_in_sizes=[1, 2, 1, 2],
703        filter_in_sizes=[1, 1, 2, 1],
704        strides=[2, 1],
705        padding=[[2, 2], [2, 2]])
706
707  @test_util.run_in_graph_and_eager_modes()
708  def testConv2DOnlyBottomPadding(self):
709    self._VerifyExplicitPaddings(
710        tensor_in_sizes=[1, 2, 3, 3],
711        filter_in_sizes=[2, 2, 3, 2],
712        strides=[1, 1],
713        padding=[[0, 3], [0, 0]], tol=2e-5)
714
715    self._VerifyExplicitPaddings(
716        tensor_in_sizes=[2, 2, 4, 3],
717        filter_in_sizes=[1, 2, 3, 2],
718        strides=[2, 2],
719        padding=[[0, 3], [0, 0]])
720
721  @test_util.run_in_graph_and_eager_modes()
722  def testConv2DOnlyTopRightPadding(self):
723    self._VerifyExplicitPaddings(
724        tensor_in_sizes=[1, 2, 3, 3],
725        filter_in_sizes=[2, 2, 3, 2],
726        strides=[1, 1],
727        padding=[[1, 0], [0, 2]],
728        tol=5e-5)
729
730    self._VerifyExplicitPaddings(
731        tensor_in_sizes=[1, 2, 4, 2],
732        filter_in_sizes=[2, 2, 2, 2],
733        strides=[1, 3],
734        padding=[[1, 0], [0, 2]])
735
736  @test_util.run_in_graph_and_eager_modes()
737  def testConv2DLotsPadding(self):
738    self._VerifyExplicitPaddings(
739        tensor_in_sizes=[1, 1, 1, 3],
740        filter_in_sizes=[2, 2, 3, 3],
741        strides=[1, 1],
742        padding=[[3, 4], [4, 2]])
743
744    self._VerifyExplicitPaddings(
745        tensor_in_sizes=[1, 2, 1, 1],
746        filter_in_sizes=[2, 2, 1, 3],
747        strides=[2, 1],
748        padding=[[3, 4], [4, 2]])
749
750  @test_util.run_in_graph_and_eager_modes()
751  def testConv2DExplicitPaddingWithDilations(self):
752    self._VerifyExplicitPaddings(
753        tensor_in_sizes=[1, 3, 2, 1],
754        filter_in_sizes=[1, 2, 1, 2],
755        strides=[1, 1],
756        padding=[[1, 0], [0, 1]],
757        dilations=[2, 1])
758
759    self._VerifyExplicitPaddings(
760        tensor_in_sizes=[1, 2, 3, 2],
761        filter_in_sizes=[3, 2, 2, 1],
762        strides=[1, 1],
763        padding=[[2, 1], [1, 2]],
764        dilations=[2, 3])
765
766  def testConv2DExplicitPaddingWithLayoutOptimizer(self):
767    # Test with Grappler's layout optimizer, to ensure the layout optimizer
768    # handles explicit padding correctly.
769    self._VerifyExplicitPaddings(
770        tensor_in_sizes=[1, 3, 2, 1],
771        filter_in_sizes=[1, 2, 1, 2],
772        strides=[1, 1],
773        padding=[[1, 0], [0, 1]],
774        dilations=[2, 1],
775        test_grappler_layout_optimizer=True)
776
777    self._VerifyExplicitPaddings(
778        tensor_in_sizes=[1, 2, 3, 2],
779        filter_in_sizes=[3, 2, 2, 1],
780        strides=[1, 1],
781        padding=[[2, 1], [1, 2]],
782        dilations=[2, 3],
783        test_grappler_layout_optimizer=True)
784
785  def _VerifyGroupConvFwd(self, tensor_in_sizes, filter_in_sizes, dilations,
786                          strides, padding, data_format, dtype):
787    """Verify the output of group convolution is equal to a for-loop implementation.
788
789    Args:
790      tensor_in_sizes: Input tensor dimensions in [batch, input_rows,
791        input_cols, input_depth].
792      filter_in_sizes: Filter tensor dimensions in [kernel_rows, kernel_cols,
793        input_depth, output_depth].
794      dilations: Dilated rate: [col_dilation, row_dilation]
795      strides: Stride: [col_stride, row_stride]
796      padding: Padding type.
797      data_format: Format of the data tensors.
798      dtype: Data type for inputs and outputs.
799    """
800    tensor_in = self._CreateNumpyTensor(tensor_in_sizes)
801    filter_in = self._CreateNumpyTensor(filter_in_sizes)
802    num_groups = tensor_in_sizes[3] // filter_in_sizes[2]
803    assert num_groups > 1 and \
804        filter_in_sizes[2] * num_groups == tensor_in_sizes[3]
805    with test_util.device(True):
806      t1 = constant_op.constant(tensor_in, dtype=dtype)
807      t2 = constant_op.constant(filter_in, dtype=dtype)
808      strides = [1] + strides + [1]
809      dilations = [1] + dilations + [1]
810      if data_format == "NCHW":
811        t1 = test_util.NHWCToNCHW(t1)
812        strides = test_util.NHWCToNCHW(strides)
813        dilations = test_util.NHWCToNCHW(dilations)
814        t1_splits = array_ops.split(t1, num_groups, axis=1)
815      else:
816        t1_splits = array_ops.split(t1, num_groups, axis=3)
817      t2_splits = array_ops.split(t2, num_groups, axis=3)
818
819      def MakeConv2d(inputs, filters):
820        return nn_ops.conv2d(
821            inputs,
822            filters,
823            strides,
824            padding,
825            dilations=dilations,
826            data_format=data_format)
827
828      group_conv = MakeConv2d(t1, t2)
829      group_conv_loop = array_ops.concat(
830          [MakeConv2d(t1s, t2s) for t1s, t2s in zip(t1_splits, t2_splits)],
831          axis=1 if data_format == "NCHW" else 3)
832
833      results = self.evaluate([group_conv, group_conv_loop])
834      tol_to_use = 1e-5
835      self.assertAllClose(
836          results[0], results[1], atol=tol_to_use, rtol=tol_to_use)
837
838  @test_util.run_in_graph_and_eager_modes
839  def testConv2DGroupConvFwd(self):
840    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
841      data_formats = ["NHWC", "NCHW"]
842    else:
843      data_formats = ["NHWC"]
844    for data_format in data_formats:
845      for dilation in [1, 2]:
846        for stride in [1, 2]:
847          for filter_dims in [[3, 3, 4, 8], [1, 1, 2, 16]]:
848            self._VerifyGroupConvFwd([10, 32, 32, 16], filter_dims,
849                                     dilations=[dilation, dilation],
850                                     strides=[stride, stride],
851                                     padding="SAME",
852                                     data_format=data_format,
853                                     dtype=dtypes.float32)
854
855  @test_util.deprecated_graph_mode_only
856  @test_util.run_cuda_only
857  def testInputGradientGroupConv(self):
858    for data_format in ["NCHW", "NHWC"]:
859      for test_input in [True, False]:
860        self.ConstructAndTestGradient(
861            batch=2,
862            input_rows=5,
863            input_cols=4,
864            filter_rows=3,
865            filter_cols=3,
866            num_groups=2,
867            padding="VALID",
868            in_depth=4,
869            out_depth=6,
870            stride_rows=1,
871            stride_cols=1,
872            test_input=test_input,
873            data_format=data_format,
874            use_gpu=True,
875            max_err=0.005)
876
877  @test_util.deprecated_graph_mode_only
878  @test_util.run_cuda_only
879  def testFilterGradientGroupConv(self):
880    for data_format in ["NCHW", "NHWC"]:
881      for test_input in [True, False]:
882        self.ConstructAndTestGradient(
883            batch=2,
884            input_rows=5,
885            input_cols=4,
886            filter_rows=3,
887            filter_cols=3,
888            num_groups=2,
889            padding="VALID",
890            in_depth=4,
891            out_depth=6,
892            stride_rows=1,
893            stride_cols=1,
894            test_input=test_input,
895            data_format=data_format,
896            use_gpu=True,
897            max_err=0.005)
898  # TODO(yzhwang): this currently fails.
899  # self._VerifyValues(tensor_in_sizes=[1, 8, 8, 1],
900  #                   filter_in_sizes=[2, 2, 1, 1],
901  #                   strides=[4, 4], padding="SAME",
902  #                   expected=[72, 112, 392, 432])
903
904  # Testing for backprops
905  def _RunAndVerifyBackpropInput(self,
906                                 input_sizes,
907                                 filter_sizes,
908                                 output_sizes,
909                                 strides,
910                                 padding,
911                                 expected,
912                                 data_format,
913                                 use_gpu,
914                                 err,
915                                 dilations=(1, 1)):
916    if use_gpu and not test.is_gpu_available(cuda_only=True):
917      return
918    x1 = self._CreateNumpyTensor(filter_sizes)
919    x2 = self._CreateNumpyTensor(output_sizes)
920    dilations = list(dilations)
921    with test_util.device(use_gpu):
922      if len(input_sizes) == 4:
923        if data_format == "NCHW":
924          input_sizes = test_util.NHWCToNCHW(input_sizes)
925      t0 = constant_op.constant(input_sizes, shape=[len(input_sizes)])
926      t1 = constant_op.constant(x1, shape=filter_sizes)
927      t2 = constant_op.constant(x2, shape=output_sizes)
928      strides = [1] + strides + [1]
929      dilations = [1] + dilations + [1]
930      if isinstance(padding, (list, tuple)):
931        padding = [(0, 0)] + padding + [(0, 0)]
932      if data_format == "NCHW":
933        t2 = test_util.NHWCToNCHW(t2)
934        strides = test_util.NHWCToNCHW(strides)
935        dilations = test_util.NHWCToNCHW(dilations)
936        if isinstance(padding, (list, tuple)):
937          padding = test_util.NHWCToNCHW((padding))
938      conv = nn_ops.conv2d_backprop_input(
939          t0,
940          t1,
941          t2,
942          strides=strides,
943          padding=padding,
944          data_format=data_format,
945          dilations=dilations)
946      if data_format == "NCHW":
947        conv = test_util.NCHWToNHWC(conv)
948      # "values" consists of two tensors for two backprops
949      value = self.evaluate(conv)
950      self.assertShapeEqual(value, conv)
951    tf_logging.debug("expected = %s", expected)
952    tf_logging.debug("actual = %s", value)
953    self.assertAllCloseAccordingToType(expected, value.flatten(), atol=1e-5)
954
955  def _CompareBackpropInput(self, input_sizes, filter_sizes, output_sizes,
956                            conv_strides, padding):
957    x1 = np.random.rand(*filter_sizes).astype(np.float32)
958    x2 = np.random.rand(*output_sizes).astype(np.float32)
959
960    def _GetVal(data_format, use_gpu):
961      with test_util.device(use_gpu):
962        if data_format == "NCHW":
963          new_input_sizes = test_util.NHWCToNCHW(input_sizes)
964        else:
965          new_input_sizes = input_sizes
966        t0 = constant_op.constant(new_input_sizes, shape=[len(new_input_sizes)])
967        t1 = constant_op.constant(x1, shape=filter_sizes)
968        t2 = constant_op.constant(x2, shape=output_sizes)
969        strides = [1] + conv_strides + [1]
970        if data_format == "NCHW":
971          t2 = test_util.NHWCToNCHW(t2)
972          strides = test_util.NHWCToNCHW(strides)
973        conv = nn_ops.conv2d_backprop_input(
974            t0,
975            t1,
976            t2,
977            strides=strides,
978            padding=padding,
979            data_format=data_format)
980        if data_format == "NCHW":
981          conv = test_util.NCHWToNHWC(conv)
982        ret = self.evaluate(conv)
983        self.assertShapeEqual(ret, conv)
984        return ret
985
986    values = []
987    for (data_format, use_gpu) in GetTestConfigs():
988      values.append(_GetVal(data_format, use_gpu))
989
990    for i in range(1, len(values)):
991      self.assertAllClose(values[0], values[i], rtol=1e-2, atol=1e-2)
992
993  @test_util.run_in_graph_and_eager_modes
994  def testConv2D2x2Depth1ValidBackpropInput(self):
995    expected_output = [1.0, 4.0, 4.0, 3.0, 10.0, 8.0]
996    for (data_format, use_gpu) in GetTestConfigs():
997      self._RunAndVerifyBackpropInput(
998          input_sizes=[1, 2, 3, 1],
999          filter_sizes=[2, 2, 1, 1],
1000          output_sizes=[1, 1, 2, 1],
1001          strides=[1, 1],
1002          padding="VALID",
1003          expected=expected_output,
1004          data_format=data_format,
1005          use_gpu=use_gpu,
1006          err=1e-5)
1007
1008  @test_util.run_in_graph_and_eager_modes
1009  def testConv2DEmptyBackpropInput(self):
1010    expected_output = []
1011    for (data_format, use_gpu) in GetTestConfigs():
1012      self._RunAndVerifyBackpropInput(
1013          input_sizes=[0, 2, 3, 1],
1014          filter_sizes=[2, 2, 1, 1],
1015          output_sizes=[0, 1, 2, 1],
1016          strides=[1, 1],
1017          padding="VALID",
1018          expected=expected_output,
1019          data_format=data_format,
1020          use_gpu=use_gpu,
1021          err=1e-5)
1022
1023  @test_util.run_in_graph_and_eager_modes
1024  def testConv2D2x2Depth3ValidBackpropInput(self):
1025    expected_output = [
1026        14.0, 32.0, 50.0, 100.0, 163.0, 226.0, 167.0, 212.0, 257.0, 122.0,
1027        140.0, 158.0, 478.0, 541.0, 604.0, 437.0, 482.0, 527.0
1028    ]
1029    for (data_format, use_gpu) in GetTestConfigs():
1030      # The GPU version of this test is not very stable. So adjusting the
1031      # error threshold to 1e-4.
1032      self._RunAndVerifyBackpropInput(
1033          input_sizes=[1, 2, 3, 3],
1034          filter_sizes=[2, 2, 3, 3],
1035          output_sizes=[1, 1, 2, 3],
1036          strides=[1, 1],
1037          padding="VALID",
1038          expected=expected_output,
1039          data_format=data_format,
1040          use_gpu=use_gpu,
1041          err=1e-4)
1042
1043  @test_util.run_in_graph_and_eager_modes
1044  def testConv2D2x2Depth3ValidBackpropInputStride1x2(self):
1045    expected_output = [
1046        1.0, 2.0, 2.0, 4.0, 3.0, 6.0, 7.0, 12.0, 11.0, 18.0, 15.0, 24.0, 12.0,
1047        16.0, 15.0, 20.0, 18.0, 24.0
1048    ]
1049    for (data_format, use_gpu) in GetTestConfigs():
1050      self._RunAndVerifyBackpropInput(
1051          input_sizes=[1, 3, 6, 1],
1052          filter_sizes=[2, 2, 1, 1],
1053          output_sizes=[1, 2, 3, 1],
1054          strides=[1, 2],
1055          padding="VALID",
1056          expected=expected_output,
1057          data_format=data_format,
1058          use_gpu=use_gpu,
1059          err=1e-5)
1060
1061  @test_util.run_in_graph_and_eager_modes
1062  def testConv2DStrideTwoFilterOneSameBackpropInput(self):
1063    expected_output = [
1064        1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 4.0, 0.0, 0.0, 0.0,
1065        0.0, 0.0
1066    ]
1067    for (data_format, use_gpu) in GetTestConfigs():
1068      self._RunAndVerifyBackpropInput(
1069          input_sizes=[1, 4, 4, 1],
1070          filter_sizes=[1, 1, 1, 1],
1071          output_sizes=[1, 2, 2, 1],
1072          strides=[2, 2],
1073          padding="SAME",
1074          expected=expected_output,
1075          data_format=data_format,
1076          use_gpu=use_gpu,
1077          err=1e-5)
1078
1079  @test_util.run_in_graph_and_eager_modes
1080  def testConv2DKernelSizeMatchesInputSizeBackpropInput(self):
1081    expected_output = [5.0, 11.0, 17.0, 23.0]
1082    for (data_format, use_gpu) in GetTestConfigs():
1083      self._RunAndVerifyBackpropInput(
1084          input_sizes=[1, 2, 2, 1],
1085          filter_sizes=[2, 2, 1, 2],
1086          output_sizes=[1, 1, 1, 2],
1087          strides=[1, 1],
1088          padding="VALID",
1089          expected=expected_output,
1090          data_format=data_format,
1091          use_gpu=use_gpu,
1092          err=1e-5)
1093
1094  @test_util.run_in_graph_and_eager_modes
1095  @test_util.disable_xla("XLA requires input_sizes to be a 4D shape.")
1096  def testConv2DInputSizesContainsOnlySpatialDimensionsBackpropInput(self):
1097    expected_output = [5.0, 11.0, 17.0, 23.0]
1098    for (data_format, use_gpu) in GetTestConfigs():
1099      self._RunAndVerifyBackpropInput(
1100          input_sizes=[2, 2],
1101          filter_sizes=[2, 2, 1, 2],
1102          output_sizes=[1, 1, 1, 2],
1103          strides=[1, 1],
1104          padding="VALID",
1105          expected=expected_output,
1106          data_format=data_format,
1107          use_gpu=use_gpu,
1108          err=1e-5)
1109
1110  # Testing for backprops
1111  def _RunAndVerifyBackpropFilter(self,
1112                                  input_sizes,
1113                                  filter_sizes,
1114                                  output_sizes,
1115                                  strides,
1116                                  padding,
1117                                  expected,
1118                                  data_format,
1119                                  use_gpu,
1120                                  dilations=(1, 1),
1121                                  err=1e-5):
1122    x0 = self._CreateNumpyTensor(input_sizes)
1123    x2 = self._CreateNumpyTensor(output_sizes)
1124    dilations = list(dilations)
1125    explicit_strides = [1] + strides + [1]
1126    new_padding = padding
1127    new_dilations = [1] + dilations + [1]
1128    if isinstance(new_padding, (list, tuple)):
1129      new_padding = [(0, 0)] + new_padding + [(0, 0)]
1130    if data_format == "NCHW":
1131      explicit_strides = test_util.NHWCToNCHW(explicit_strides)
1132      new_dilations = test_util.NHWCToNCHW(new_dilations)
1133      if isinstance(padding, (list, tuple)):
1134        new_padding = test_util.NHWCToNCHW(new_padding)
1135    for dtype in self._DtypesToTest(use_gpu=use_gpu):
1136      with test_util.device(use_gpu):
1137        t0 = constant_op.constant(x0, shape=input_sizes, dtype=dtype)
1138        t1 = constant_op.constant(filter_sizes, shape=[len(filter_sizes)])
1139        t2 = constant_op.constant(x2, shape=output_sizes, dtype=dtype)
1140        if data_format == "NCHW":
1141          t0 = test_util.NHWCToNCHW(t0)
1142          t2 = test_util.NHWCToNCHW(t2)
1143        conv = nn_ops.conv2d_backprop_filter(
1144            t0,
1145            t1,
1146            t2,
1147            strides=explicit_strides,
1148            padding=new_padding,
1149            dilations=new_dilations,
1150            data_format=data_format)
1151        value = self.evaluate(conv)
1152        self.assertShapeEqual(value, conv)
1153      tf_logging.debug("expected = %s", expected)
1154      tf_logging.debug("actual = %s", value)
1155      self.assertArrayNear(expected, value.flatten(), err)
1156
1157  def _CompareBackFilter(self, input_sizes, filter_sizes, output_sizes,
1158                         conv_strides, padding):
1159    x0 = np.random.rand(*input_sizes).astype(np.float32)
1160    x2 = np.random.rand(*output_sizes).astype(np.float32)
1161
1162    def _GetVal(data_format, use_gpu):
1163      with test_util.device(use_gpu):
1164        t0 = constant_op.constant(x0, shape=input_sizes)
1165        t1 = constant_op.constant(filter_sizes, shape=[len(filter_sizes)])
1166        t2 = constant_op.constant(x2, shape=output_sizes)
1167        strides = [1] + conv_strides + [1]
1168        if data_format == "NCHW":
1169          t0 = test_util.NHWCToNCHW(t0)
1170          t2 = test_util.NHWCToNCHW(t2)
1171          strides = test_util.NHWCToNCHW(strides)
1172        conv = nn_ops.conv2d_backprop_filter(
1173            t0,
1174            t1,
1175            t2,
1176            strides=strides,
1177            padding=padding,
1178            data_format=data_format)
1179        ret = self.evaluate(conv)
1180        self.assertShapeEqual(ret, conv)
1181        return ret
1182
1183    values = []
1184    for (data_format, use_gpu) in GetTestConfigs():
1185      values.append(_GetVal(data_format, use_gpu))
1186    for i in range(1, len(values)):
1187      self.assertAllClose(values[0], values[i], rtol=1e-4, atol=1e-4)
1188
1189  @test_util.run_in_graph_and_eager_modes
1190  def testConv2D2x2Depth1ValidBackpropFilter(self):
1191    expected = [5.0, 8.0, 14.0, 17.0]
1192    for (data_format, use_gpu) in GetTestConfigs():
1193      self._RunAndVerifyBackpropFilter(
1194          input_sizes=[1, 2, 3, 1],
1195          filter_sizes=[2, 2, 1, 1],
1196          output_sizes=[1, 1, 2, 1],
1197          strides=[1, 1],
1198          padding="VALID",
1199          expected=expected,
1200          data_format=data_format,
1201          use_gpu=use_gpu)
1202
1203  @test_util.run_in_graph_and_eager_modes
1204  def testConv2DEmptyBackpropFilter(self):
1205    expected = []
1206    for (data_format, use_gpu) in GetTestConfigs():
1207      self._RunAndVerifyBackpropFilter(
1208          input_sizes=[1, 2, 3, 1],
1209          filter_sizes=[2, 2, 1, 0],
1210          output_sizes=[1, 1, 2, 0],
1211          strides=[1, 1],
1212          padding="VALID",
1213          expected=expected,
1214          data_format=data_format,
1215          use_gpu=use_gpu)
1216
1217  @test_util.run_in_graph_and_eager_modes
1218  def testConv2DBackpropFilterWithEmptyInput(self):
1219    expected = [0, 0, 0, 0]
1220    for (data_format, use_gpu) in GetTestConfigs():
1221      self._RunAndVerifyBackpropFilter(
1222          input_sizes=[0, 2, 3, 1],
1223          filter_sizes=[2, 2, 1, 1],
1224          output_sizes=[0, 1, 2, 1],
1225          strides=[1, 1],
1226          padding="VALID",
1227          expected=expected,
1228          data_format=data_format,
1229          use_gpu=use_gpu)
1230
1231  @test_util.run_in_graph_and_eager_modes
1232  def testConv2D2x2Depth3ValidBackpropFilter(self):
1233    expected = [
1234        17.0, 22.0, 27.0, 22.0, 29.0, 36.0, 27.0, 36.0, 45.0, 32.0, 43.0, 54.0,
1235        37.0, 50.0, 63.0, 42.0, 57.0, 72.0, 62.0, 85.0, 108.0, 67.0, 92.0,
1236        117.0, 72.0, 99.0, 126.0, 77.0, 106.0, 135.0, 82.0, 113.0, 144.0, 87.0,
1237        120.0, 153.0
1238    ]
1239    for (data_format, use_gpu) in GetTestConfigs():
1240      self._RunAndVerifyBackpropFilter(
1241          input_sizes=[1, 2, 3, 3],
1242          filter_sizes=[2, 2, 3, 3],
1243          output_sizes=[1, 1, 2, 3],
1244          strides=[1, 1],
1245          padding="VALID",
1246          expected=expected,
1247          data_format=data_format,
1248          use_gpu=use_gpu)
1249
1250  @test_util.run_in_graph_and_eager_modes
1251  def testConv2D2x2Depth3ValidBackpropFilterStride1x2(self):
1252    expected = [161.0, 182.0, 287.0, 308.0]
1253    for (data_format, use_gpu) in GetTestConfigs():
1254      self._RunAndVerifyBackpropFilter(
1255          input_sizes=[1, 3, 6, 1],
1256          filter_sizes=[2, 2, 1, 1],
1257          output_sizes=[1, 2, 3, 1],
1258          strides=[1, 2],
1259          padding="VALID",
1260          expected=expected,
1261          data_format=data_format,
1262          use_gpu=use_gpu)
1263
1264  @test_util.run_in_graph_and_eager_modes
1265  def testConv2DStrideTwoFilterOneSameBackpropFilter(self):
1266    expected_output = [78.]
1267    for (data_format, use_gpu) in GetTestConfigs():
1268      self._RunAndVerifyBackpropFilter(
1269          input_sizes=[1, 4, 4, 1],
1270          filter_sizes=[1, 1, 1, 1],
1271          output_sizes=[1, 2, 2, 1],
1272          strides=[2, 2],
1273          padding="SAME",
1274          expected=expected_output,
1275          data_format=data_format,
1276          use_gpu=use_gpu)
1277
1278  @test_util.run_in_graph_and_eager_modes
1279  def testConv2DKernelSizeMatchesInputSizeBackpropFilter(self):
1280    expected_output = [1.0, 2.0, 2.0, 4.0, 3.0, 6.0, 4.0, 8.0]
1281    for (data_format, use_gpu) in GetTestConfigs():
1282      self._RunAndVerifyBackpropFilter(
1283          input_sizes=[1, 2, 2, 1],
1284          filter_sizes=[2, 2, 1, 2],
1285          output_sizes=[1, 1, 1, 2],
1286          strides=[1, 1],
1287          padding="VALID",
1288          expected=expected_output,
1289          data_format=data_format,
1290          use_gpu=use_gpu)
1291
1292  # Testing for backprops
1293  def _RunAndVerifyBackpropInputDilation(self, input_sizes, filter_sizes,
1294                                         output_sizes, strides, dilations,
1295                                         padding, data_format, use_gpu, err):
1296    x1 = self._CreateNumpyTensor(input_sizes)
1297    x2 = self._CreateNumpyTensor(filter_sizes)
1298    default_dilations = (dilations[0] == 1 and dilations[1] == 1)
1299    if default_dilations or use_gpu:
1300      with self.cached_session(use_gpu=use_gpu) as sess:
1301        if data_format == "NCHW":
1302          input_sizes = test_util.NHWCToNCHW(input_sizes)
1303        t1 = constant_op.constant(x1, shape=input_sizes)
1304        t2 = constant_op.constant(x2, shape=filter_sizes)
1305        full_strides = [1] + strides + [1]
1306        full_dilations = [1] + dilations + [1]
1307        if data_format == "NCHW":
1308          full_strides = test_util.NHWCToNCHW(full_strides)
1309          full_dilations = test_util.NHWCToNCHW(full_dilations)
1310        conv_forward = nn_ops.conv2d(
1311            t1,
1312            t2,
1313            strides=full_strides,
1314            dilations=full_dilations,
1315            padding=padding,
1316            data_format=data_format)
1317        conv_forward_2 = nn_ops.convolution(
1318            t1,
1319            t2,
1320            padding=padding,
1321            strides=strides,
1322            dilation_rate=dilations,
1323            data_format=data_format)
1324        if data_format == "NCHW":
1325          conv_forward = test_util.NCHWToNHWC(conv_forward)
1326          conv_forward_2 = test_util.NCHWToNHWC(conv_forward_2)
1327        conv = gradients_impl.gradients(conv_forward, t1)[0]
1328        conv_2 = gradients_impl.gradients(conv_forward_2, t1)[0]
1329        # "values" consists of two tensors for two backprops
1330        value = self.evaluate(conv)
1331        value_2 = self.evaluate(conv_2)
1332        self.assertShapeEqual(value, conv)
1333        self.assertShapeEqual(value_2, conv_2)
1334      tf_logging.debug("expected = %s", value_2)
1335      tf_logging.debug("actual = %s", value)
1336      self.assertArrayNear(value_2.flatten(), value.flatten(), err)
1337
1338  # Testing for backprops
1339  def _RunAndVerifyBackpropFilterDilation(self, input_sizes, filter_sizes,
1340                                          output_sizes, strides, dilations,
1341                                          padding, data_format, use_gpu, err):
1342    x1 = self._CreateNumpyTensor(input_sizes)
1343    x2 = self._CreateNumpyTensor(filter_sizes)
1344    default_dilations = (dilations[0] == 1 and dilations[1] == 1)
1345    if default_dilations or use_gpu:
1346      with self.cached_session(use_gpu=use_gpu) as sess:
1347        if data_format == "NCHW":
1348          input_sizes = test_util.NHWCToNCHW(input_sizes)
1349        t1 = constant_op.constant(x1, shape=input_sizes)
1350        t2 = constant_op.constant(x2, shape=filter_sizes)
1351        full_strides = [1] + strides + [1]
1352        full_dilations = [1] + dilations + [1]
1353        if data_format == "NCHW":
1354          full_strides = test_util.NHWCToNCHW(full_strides)
1355          full_dilations = test_util.NHWCToNCHW(full_dilations)
1356        conv_forward = nn_ops.conv2d(
1357            t1,
1358            t2,
1359            strides=full_strides,
1360            dilations=full_dilations,
1361            padding=padding,
1362            data_format=data_format)
1363        conv_forward_2 = nn_ops.convolution(
1364            t1,
1365            t2,
1366            padding=padding,
1367            strides=strides,
1368            dilation_rate=dilations,
1369            data_format=data_format)
1370        if data_format == "NCHW":
1371          conv_forward = test_util.NCHWToNHWC(conv_forward)
1372          conv_forward_2 = test_util.NCHWToNHWC(conv_forward_2)
1373        conv = gradients_impl.gradients(conv_forward, t2)[0]
1374        conv_2 = gradients_impl.gradients(conv_forward, t2)[0]
1375        value = self.evaluate(conv)
1376        value_2 = self.evaluate(conv_2)
1377        self.assertShapeEqual(value, conv)
1378        self.assertShapeEqual(value_2, conv_2)
1379      tf_logging.debug("expected = %s", value_2)
1380      tf_logging.debug("actual = %s", value)
1381      self.assertArrayNear(value_2.flatten(), value.flatten(), err)
1382
1383  @test_util.deprecated_graph_mode_only
1384  def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self):
1385    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1386      for (data_format, use_gpu) in GetTestConfigs():
1387        self._RunAndVerifyBackpropFilterDilation(
1388            input_sizes=[1, 3, 6, 1],
1389            filter_sizes=[2, 2, 1, 1],
1390            output_sizes=[1, 1, 5, 1],
1391            strides=[1, 1],
1392            dilations=[2, 1],
1393            padding="VALID",
1394            data_format=data_format,
1395            use_gpu=use_gpu,
1396            err=1e-5)
1397
1398  @test_util.deprecated_graph_mode_only
1399  def testConv2D2x2Depth1ValidBackpropFilterDilation1x2(self):
1400    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1401      for (data_format, use_gpu) in GetTestConfigs():
1402        self._RunAndVerifyBackpropFilterDilation(
1403            input_sizes=[1, 2, 3, 1],
1404            filter_sizes=[2, 2, 1, 1],
1405            output_sizes=[1, 1, 2, 1],
1406            strides=[1, 1],
1407            dilations=[1, 2],
1408            padding="VALID",
1409            data_format=data_format,
1410            use_gpu=use_gpu,
1411            err=1e-5)
1412
1413  @test_util.deprecated_graph_mode_only
1414  def testConv2DEmptyBackpropFilterDilation1x2(self):
1415    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1416      for (data_format, use_gpu) in GetTestConfigs():
1417        self._RunAndVerifyBackpropFilterDilation(
1418            input_sizes=[1, 2, 3, 1],
1419            filter_sizes=[2, 2, 1, 0],
1420            output_sizes=[1, 1, 2, 0],
1421            strides=[1, 1],
1422            dilations=[1, 2],
1423            padding="VALID",
1424            data_format=data_format,
1425            use_gpu=use_gpu,
1426            err=1e-5)
1427
1428  @test_util.deprecated_graph_mode_only
1429  def testConv2D2x2Depth3ValidBackpropFilterDilation2x2(self):
1430    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1431      for (data_format, use_gpu) in GetTestConfigs():
1432        self._RunAndVerifyBackpropFilterDilation(
1433            input_sizes=[1, 3, 4, 3],
1434            filter_sizes=[2, 2, 3, 3],
1435            output_sizes=[1, 1, 2, 3],
1436            strides=[1, 1],
1437            dilations=[2, 2],
1438            padding="VALID",
1439            data_format=data_format,
1440            use_gpu=use_gpu,
1441            err=1e-5)
1442
1443  @test_util.deprecated_graph_mode_only
1444  def testConv2DKernelSizeMatchesInputSizeBackpropFilterDilation2x2(self):
1445    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1446      for (data_format, use_gpu) in GetTestConfigs():
1447        self._RunAndVerifyBackpropFilterDilation(
1448            input_sizes=[1, 3, 3, 1],
1449            filter_sizes=[2, 2, 1, 2],
1450            output_sizes=[1, 1, 1, 2],
1451            strides=[1, 1],
1452            dilations=[2, 2],
1453            padding="VALID",
1454            data_format=data_format,
1455            use_gpu=use_gpu,
1456            err=1e-5)
1457
1458  @test_util.deprecated_graph_mode_only
1459  def testConv2D2x2Depth3ValidBackpropInputStride1x1Dilation2x1(self):
1460    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1461      for (data_format, use_gpu) in GetTestConfigs():
1462        self._RunAndVerifyBackpropInputDilation(
1463            input_sizes=[1, 3, 6, 1],
1464            filter_sizes=[2, 2, 1, 1],
1465            output_sizes=[1, 1, 5, 1],
1466            strides=[1, 1],
1467            dilations=[2, 1],
1468            padding="VALID",
1469            data_format=data_format,
1470            use_gpu=use_gpu,
1471            err=1e-5)
1472
1473  @test_util.deprecated_graph_mode_only
1474  def testConv2D2x2Depth1ValidBackpropInputDilation1x2(self):
1475    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1476      for (data_format, use_gpu) in GetTestConfigs():
1477        self._RunAndVerifyBackpropInputDilation(
1478            input_sizes=[1, 2, 3, 1],
1479            filter_sizes=[2, 2, 1, 1],
1480            output_sizes=[1, 1, 2, 1],
1481            strides=[1, 1],
1482            dilations=[1, 2],
1483            padding="VALID",
1484            data_format=data_format,
1485            use_gpu=use_gpu,
1486            err=1e-5)
1487
1488  @test_util.deprecated_graph_mode_only
1489  def testConv2DEmptyBackpropInputDilation1x2(self):
1490    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1491      for (data_format, use_gpu) in GetTestConfigs():
1492        self._RunAndVerifyBackpropInputDilation(
1493            input_sizes=[0, 2, 3, 1],
1494            filter_sizes=[2, 2, 1, 1],
1495            output_sizes=[0, 1, 2, 1],
1496            strides=[1, 1],
1497            dilations=[1, 2],
1498            padding="VALID",
1499            data_format=data_format,
1500            use_gpu=use_gpu,
1501            err=1e-5)
1502
1503  @test_util.deprecated_graph_mode_only
1504  def testConv2D2x2Depth3ValidBackpropInputDilation2x1(self):
1505    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1506      for (data_format, use_gpu) in GetTestConfigs():
1507        # The GPU version of this test is not very stable. So adjusting the
1508        # error threshold to 1e-4.
1509        self._RunAndVerifyBackpropInputDilation(
1510            input_sizes=[1, 3, 2, 3],
1511            filter_sizes=[2, 2, 3, 3],
1512            output_sizes=[1, 1, 2, 3],
1513            strides=[1, 1],
1514            dilations=[2, 1],
1515            padding="VALID",
1516            data_format=data_format,
1517            use_gpu=use_gpu,
1518            err=1e-4)
1519
1520  @test_util.deprecated_graph_mode_only
1521  def testConv2DKernelSizeMatchesInputSizeBackpropInputDilation2x2(self):
1522    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1523      for (data_format, use_gpu) in GetTestConfigs():
1524        self._RunAndVerifyBackpropInputDilation(
1525            input_sizes=[1, 3, 3, 1],
1526            filter_sizes=[2, 2, 1, 2],
1527            output_sizes=[1, 1, 1, 2],
1528            strides=[1, 1],
1529            dilations=[2, 2],
1530            padding="VALID",
1531            data_format=data_format,
1532            use_gpu=use_gpu,
1533            err=1e-5)
1534
1535  def _RunAndVerifyBackpropInputExplicitPadding(self,
1536                                                input_sizes,
1537                                                filter_sizes,
1538                                                output_sizes,
1539                                                strides,
1540                                                padding,
1541                                                data_format,
1542                                                use_gpu,
1543                                                dilations=(1, 1),
1544                                                err=2e-5):
1545    if use_gpu and not test.is_gpu_available(cuda_only=True):
1546      return
1547    if not use_gpu and dilations != (1, 1):
1548      return  # Non-default dilations is currently not supported on the CPU.
1549
1550    x1 = self._CreateNumpyTensor(filter_sizes)
1551    x2 = self._CreateNumpyTensor(output_sizes)
1552    dilations = list(dilations)
1553    padded_input_sizes = input_sizes[:]
1554    padded_input_sizes[1] += padding[0][0] + padding[0][1]
1555    padded_input_sizes[2] += padding[1][0] + padding[1][1]
1556    c = nn_ops.conv2d_backprop_input(
1557        padded_input_sizes,
1558        x1,
1559        x2,
1560        strides=[1] + strides + [1],
1561        padding="VALID",
1562        dilations=[1] + dilations + [1])
1563    c = c[:, padding[0][0]:(c.shape[1] - padding[0][1]), padding[1][0]:(
1564        c.shape[2] - padding[1][1]), :]
1565    expected = list(self.evaluate(array_ops.reshape(c, [-1])))
1566    self._RunAndVerifyBackpropInput(
1567        input_sizes,
1568        filter_sizes,
1569        output_sizes,
1570        strides,
1571        padding,
1572        expected,
1573        data_format,
1574        use_gpu=use_gpu,
1575        err=err,
1576        dilations=dilations)
1577
1578  @test_util.run_in_graph_and_eager_modes()
1579  def testConv2D2x2Depth1Padding0x0BackpropInput(self):
1580    for (data_format, use_gpu) in GetTestConfigs():
1581      self._RunAndVerifyBackpropInputExplicitPadding(
1582          input_sizes=[1, 2, 3, 1],
1583          filter_sizes=[2, 2, 1, 1],
1584          output_sizes=[1, 1, 2, 1],
1585          strides=[1, 1],
1586          padding=[[0, 0], [0, 0]],
1587          data_format=data_format,
1588          use_gpu=use_gpu)
1589
1590      self._RunAndVerifyBackpropInputExplicitPadding(
1591          input_sizes=[1, 3, 4, 2],
1592          filter_sizes=[2, 2, 2, 3],
1593          output_sizes=[1, 1, 2, 3],
1594          strides=[2, 2],
1595          padding=[[0, 0], [0, 0]],
1596          data_format=data_format,
1597          use_gpu=use_gpu)
1598
1599  @test_util.run_in_graph_and_eager_modes()
1600  def testConv2D2x2Depth1Padding1x1BackpropInput(self):
1601    for (data_format, use_gpu) in GetTestConfigs():
1602      self._RunAndVerifyBackpropInputExplicitPadding(
1603          input_sizes=[1, 2, 3, 1],
1604          filter_sizes=[2, 2, 1, 2],
1605          output_sizes=[1, 3, 4, 2],
1606          strides=[1, 1],
1607          padding=[[1, 1], [1, 1]],
1608          data_format=data_format,
1609          use_gpu=use_gpu,
1610          err=1e-4)
1611
1612      self._RunAndVerifyBackpropInputExplicitPadding(
1613          input_sizes=[1, 2, 3, 2],
1614          filter_sizes=[1, 1, 2, 1],
1615          output_sizes=[1, 4, 3, 1],
1616          strides=[1, 2],
1617          padding=[[1, 1], [1, 1]],
1618          data_format=data_format,
1619          use_gpu=use_gpu)
1620
1621      self._RunAndVerifyBackpropInputExplicitPadding(
1622          input_sizes=[1, 4, 3, 1],
1623          filter_sizes=[2, 2, 1, 1],
1624          output_sizes=[1, 4, 2, 1],
1625          strides=[1, 2],
1626          padding=[[1, 1], [1, 1]],
1627          data_format=data_format,
1628          dilations=[2, 2], use_gpu=use_gpu)
1629
1630  @test_util.run_in_graph_and_eager_modes()
1631  def testConv2D2x2Depth1Padding2x2BackpropInput(self):
1632    for (data_format, use_gpu) in GetTestConfigs():
1633      self._RunAndVerifyBackpropInputExplicitPadding(
1634          input_sizes=[2, 3, 1, 1],
1635          filter_sizes=[2, 1, 1, 1],
1636          output_sizes=[2, 2, 5, 1],
1637          strides=[3, 1],
1638          padding=[[2, 2], [2, 2]],
1639          data_format=data_format,
1640          use_gpu=use_gpu)
1641
1642      self._RunAndVerifyBackpropInputExplicitPadding(
1643          input_sizes=[1, 3, 6, 1],
1644          filter_sizes=[3, 2, 1, 1],
1645          output_sizes=[1, 3, 4, 1],
1646          strides=[1, 2],
1647          padding=[[2, 2], [2, 2]],
1648          data_format=data_format,
1649          dilations=[2, 3],
1650          use_gpu=use_gpu)
1651
1652  @test_util.run_in_graph_and_eager_modes()
1653  def testConv2D2x2Depth1Padding_1_8_4_1_BackpropInput(self):
1654    for (data_format, use_gpu) in GetTestConfigs():
1655      self._RunAndVerifyBackpropInputExplicitPadding(
1656          input_sizes=[1, 2, 3, 1],
1657          filter_sizes=[2, 2, 1, 1],
1658          output_sizes=[1, 10, 8, 1],
1659          strides=[1, 1],
1660          padding=[[1, 8], [4, 2]],
1661          data_format=data_format,
1662          use_gpu=use_gpu,
1663          err=5e-5)
1664
1665      self._RunAndVerifyBackpropInputExplicitPadding(
1666          input_sizes=[1, 5, 3, 1],
1667          filter_sizes=[3, 2, 1, 1],
1668          output_sizes=[1, 4, 8, 1],
1669          strides=[3, 1],
1670          padding=[[1, 8], [4, 2]],
1671          data_format=data_format,
1672          use_gpu=use_gpu)
1673
1674  @test_util.run_in_graph_and_eager_modes()
1675  def testConv2D2x2Depth1Padding_5_0_2_2_BackpropInput(self):
1676    for (data_format, use_gpu) in GetTestConfigs():
1677      self._RunAndVerifyBackpropInputExplicitPadding(
1678          input_sizes=[1, 3, 3, 1],
1679          filter_sizes=[2, 1, 1, 1],
1680          output_sizes=[1, 7, 7, 1],
1681          strides=[1, 1],
1682          padding=[[5, 0], [2, 2]],
1683          data_format=data_format,
1684          err=5e-5,
1685          use_gpu=use_gpu)
1686
1687      self._RunAndVerifyBackpropInputExplicitPadding(
1688          input_sizes=[1, 4, 2, 1],
1689          filter_sizes=[3, 3, 1, 1],
1690          output_sizes=[1, 5, 2, 1],
1691          strides=[1, 2],
1692          padding=[[5, 0], [2, 2]],
1693          data_format=data_format,
1694          dilations=[2, 1],
1695          use_gpu=use_gpu)
1696
1697  def _RunAndVerifyBackpropFilterExplicitPadding(self,
1698                                                 input_sizes,
1699                                                 filter_sizes,
1700                                                 output_sizes,
1701                                                 strides,
1702                                                 padding,
1703                                                 data_format,
1704                                                 use_gpu,
1705                                                 dilations=(1, 1),
1706                                                 err=1e-5):
1707    if use_gpu and not test.is_gpu_available(cuda_only=True):
1708      return
1709    if not use_gpu and dilations != (1, 1):
1710      return  # Non-default dilations is currently not supported on the CPU.
1711
1712    x0 = self._CreateNumpyTensor(input_sizes)
1713    x2 = self._CreateNumpyTensor(output_sizes)
1714    dilations = list(dilations)
1715
1716    x0 = np.pad(x0, [(0, 0)] + padding + [(0, 0)], "constant")
1717    c = nn_ops.conv2d_backprop_filter(
1718        x0,
1719        filter_sizes,
1720        x2,
1721        strides=[1] + strides + [1],
1722        padding="VALID",
1723        dilations=[1] + dilations + [1])
1724    expected = list(self.evaluate(array_ops.reshape(c, [-1])))
1725    self._RunAndVerifyBackpropFilter(
1726        input_sizes,
1727        filter_sizes,
1728        output_sizes,
1729        strides,
1730        padding,
1731        expected,
1732        data_format,
1733        use_gpu=use_gpu,
1734        dilations=dilations,
1735        err=err)
1736
1737  @test_util.run_in_graph_and_eager_modes()
1738  def testConv2D2x2Depth1Padding0x0BackpropFilter(self):
1739    for (data_format, use_gpu) in GetTestConfigs():
1740      self._RunAndVerifyBackpropFilterExplicitPadding(
1741          input_sizes=[1, 2, 3, 1],
1742          filter_sizes=[2, 2, 1, 1],
1743          output_sizes=[1, 1, 2, 1],
1744          strides=[1, 1],
1745          padding=[[0, 0], [0, 0]],
1746          data_format=data_format, use_gpu=use_gpu)
1747
1748      self._RunAndVerifyBackpropFilterExplicitPadding(
1749          input_sizes=[1, 3, 4, 2],
1750          filter_sizes=[2, 2, 2, 3],
1751          output_sizes=[1, 1, 2, 3],
1752          strides=[2, 2],
1753          padding=[[0, 0], [0, 0]],
1754          data_format=data_format, use_gpu=use_gpu)
1755
1756  @test_util.run_in_graph_and_eager_modes()
1757  def testConv2D2x2Depth1Padding1x1BackpropFilter(self):
1758    for (data_format, use_gpu) in GetTestConfigs():
1759      self._RunAndVerifyBackpropFilterExplicitPadding(
1760          input_sizes=[1, 2, 3, 1],
1761          filter_sizes=[2, 2, 1, 2],
1762          output_sizes=[1, 3, 4, 2],
1763          strides=[1, 1],
1764          padding=[[1, 1], [1, 1]],
1765          data_format=data_format,
1766          use_gpu=use_gpu,
1767          err=5e-5)
1768
1769      self._RunAndVerifyBackpropFilterExplicitPadding(
1770          input_sizes=[1, 2, 3, 2],
1771          filter_sizes=[1, 1, 2, 1],
1772          output_sizes=[1, 4, 3, 1],
1773          strides=[1, 2],
1774          padding=[[1, 1], [1, 1]],
1775          use_gpu=use_gpu,
1776          data_format=data_format)
1777
1778      self._RunAndVerifyBackpropFilterExplicitPadding(
1779          input_sizes=[1, 4, 3, 1],
1780          filter_sizes=[2, 2, 1, 1],
1781          output_sizes=[1, 4, 2, 1],
1782          strides=[1, 2],
1783          padding=[[1, 1], [1, 1]],
1784          data_format=data_format,
1785          use_gpu=use_gpu,
1786          dilations=[2, 2])
1787
1788  @test_util.run_in_graph_and_eager_modes()
1789  def testConv2D2x2Depth1Padding2x2BackpropFilter(self):
1790    for (data_format, use_gpu) in GetTestConfigs():
1791      self._RunAndVerifyBackpropFilterExplicitPadding(
1792          input_sizes=[2, 3, 1, 1],
1793          filter_sizes=[2, 1, 1, 1],
1794          output_sizes=[2, 2, 5, 1],
1795          strides=[3, 1],
1796          padding=[[2, 2], [2, 2]],
1797          data_format=data_format,
1798          use_gpu=use_gpu)
1799
1800      self._RunAndVerifyBackpropFilterExplicitPadding(
1801          input_sizes=[1, 3, 6, 1],
1802          filter_sizes=[3, 2, 1, 1],
1803          output_sizes=[1, 3, 4, 1],
1804          strides=[1, 2],
1805          padding=[[2, 2], [2, 2]],
1806          data_format=data_format,
1807          use_gpu=use_gpu,
1808          dilations=[2, 3])
1809
1810  @test_util.run_in_graph_and_eager_modes()
1811  def testConv2D2x2Depth1Padding_1_8_4_1_BackpropFilter(self):
1812    for (data_format, use_gpu) in GetTestConfigs():
1813      self._RunAndVerifyBackpropFilterExplicitPadding(
1814          input_sizes=[1, 2, 3, 1],
1815          filter_sizes=[2, 2, 1, 1],
1816          output_sizes=[1, 10, 8, 1],
1817          strides=[1, 1],
1818          padding=[[1, 8], [4, 2]],
1819          data_format=data_format,
1820          use_gpu=use_gpu,
1821          err=1e-4)
1822
1823      self._RunAndVerifyBackpropFilterExplicitPadding(
1824          input_sizes=[1, 5, 3, 1],
1825          filter_sizes=[3, 2, 1, 1],
1826          output_sizes=[1, 4, 8, 1],
1827          strides=[3, 1],
1828          padding=[[1, 8], [4, 2]],
1829          use_gpu=use_gpu,
1830          data_format=data_format)
1831
1832  @test_util.run_in_graph_and_eager_modes()
1833  def testConv2D2x2Depth1Padding_5_0_2_2_BackpropFilter(self):
1834    for (data_format, use_gpu) in GetTestConfigs():
1835      self._RunAndVerifyBackpropFilterExplicitPadding(
1836          input_sizes=[1, 3, 3, 1],
1837          filter_sizes=[2, 1, 1, 1],
1838          output_sizes=[1, 7, 7, 1],
1839          strides=[1, 1],
1840          padding=[[5, 0], [2, 2]],
1841          data_format=data_format,
1842          use_gpu=use_gpu,
1843          err=1e-4)
1844
1845      self._RunAndVerifyBackpropFilterExplicitPadding(
1846          input_sizes=[1, 4, 2, 1],
1847          filter_sizes=[3, 3, 1, 1],
1848          output_sizes=[1, 5, 2, 1],
1849          strides=[1, 2],
1850          padding=[[5, 0], [2, 2]],
1851          data_format=data_format,
1852          use_gpu=use_gpu,
1853          dilations=[2, 1])
1854
1855  # Gradient checkers
1856  def ConstructAndTestGradient(self,
1857                               batch,
1858                               input_rows,
1859                               input_cols,
1860                               filter_rows,
1861                               filter_cols,
1862                               in_depth,
1863                               out_depth,
1864                               stride_rows,
1865                               stride_cols,
1866                               padding,
1867                               test_input,
1868                               data_format,
1869                               use_gpu,
1870                               num_groups=1,
1871                               max_err=0.003):
1872    assert in_depth % num_groups == 0 and out_depth % num_groups == 0
1873    input_shape = [batch, input_rows, input_cols, in_depth]
1874    filter_shape = [filter_rows, filter_cols, in_depth // num_groups, out_depth]
1875    # TODO(yangke): re-factor the computation of output shape.
1876    if padding == "VALID":
1877      output_rows = (input_rows - filter_rows + stride_rows) // stride_rows
1878      output_cols = (input_cols - filter_cols + stride_cols) // stride_cols
1879    elif padding == "SAME":
1880      output_rows = (input_rows + stride_rows - 1) // stride_rows
1881      output_cols = (input_cols + stride_cols - 1) // stride_cols
1882    else:
1883      self.assertIsInstance(padding, (list, tuple))
1884      output_rows = (input_rows + padding[1][0] + padding[1][1] - filter_rows +
1885                     stride_rows) // stride_rows
1886      output_cols = (input_cols + padding[2][0] + padding[2][1] - filter_cols +
1887                     stride_cols) // stride_cols
1888    output_shape = [batch, output_rows, output_cols, out_depth]
1889    input_size = 1
1890    for x in input_shape:
1891      input_size *= x
1892    filter_size = 1
1893    for x in filter_shape:
1894      filter_size *= x
1895    input_data = [x * 1.0 / input_size for x in range(0, input_size)]
1896    filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
1897    # Conv2DGrad functions are not compiled for double due to
1898    # a problem in the way Eigen's Conv2DGrad works for double.
1899    # So we disable the DOUBLE path.  We should re-enable this
1900    # when double support returns for CPU and/or GPU.
1901    for dtype in self._DtypesToTest(use_gpu=use_gpu):
1902      with self.cached_session(use_gpu=use_gpu):
1903        input_tensor = constant_op.constant(
1904            input_data, shape=input_shape, dtype=dtype, name="input")
1905        filter_tensor = constant_op.constant(
1906            filter_data, shape=filter_shape, dtype=dtype, name="filter")
1907        strides = [1, stride_rows, stride_cols, 1]
1908        new_padding = padding
1909        if data_format == "NCHW":
1910          new_input_tensor = test_util.NHWCToNCHW(input_tensor)
1911          strides = test_util.NHWCToNCHW(strides)
1912          if isinstance(padding, (list, tuple)):
1913            new_padding = test_util.NHWCToNCHW(padding)
1914        else:
1915          new_input_tensor = input_tensor
1916        conv = nn_ops.conv2d(
1917            new_input_tensor,
1918            filter_tensor,
1919            strides,
1920            new_padding,
1921            data_format=data_format,
1922            name="conv")
1923        if data_format == "NCHW":
1924          conv = test_util.NCHWToNHWC(conv)
1925        self.assertEqual(output_shape, conv.get_shape())
1926        if test_input:
1927          jacob_t, jacob_n = gradient_checker.compute_gradient(input_tensor,
1928                                                               input_shape,
1929                                                               conv,
1930                                                               output_shape)
1931        else:
1932          jacob_t, jacob_n = gradient_checker.compute_gradient(filter_tensor,
1933                                                               filter_shape,
1934                                                               conv,
1935                                                               output_shape)
1936        if dtype == dtypes.float32:
1937          reference_jacob_t = jacob_t
1938          err = np.fabs(jacob_t - jacob_n).max()
1939        else:
1940          # Compare fp16 theoretical gradients to fp32 theoretical gradients,
1941          # since fp16 numerical gradients are too imprecise.
1942          err = np.fabs(jacob_t - reference_jacob_t).max()
1943
1944        tf_logging.debug("conv_2d gradient error = %s", err)
1945        self.assertLess(err, max_err)
1946
1947  @test_util.deprecated_graph_mode_only
1948  def testInputGradientValidPaddingStrideOne(self):
1949    for (data_format, use_gpu) in GetTestConfigs():
1950      self.ConstructAndTestGradient(
1951          batch=2,
1952          input_rows=5,
1953          input_cols=4,
1954          filter_rows=3,
1955          filter_cols=3,
1956          in_depth=2,
1957          out_depth=3,
1958          stride_rows=1,
1959          stride_cols=1,
1960          padding="VALID",
1961          test_input=True,
1962          data_format=data_format,
1963          use_gpu=use_gpu)
1964
1965  @test_util.deprecated_graph_mode_only
1966  def testFilterGradientValidPaddingStrideOne(self):
1967    for (data_format, use_gpu) in GetTestConfigs():
1968      self.ConstructAndTestGradient(
1969          batch=4,
1970          input_rows=6,
1971          input_cols=5,
1972          filter_rows=2,
1973          filter_cols=2,
1974          in_depth=2,
1975          out_depth=3,
1976          stride_rows=1,
1977          stride_cols=1,
1978          padding="VALID",
1979          test_input=False,
1980          data_format=data_format,
1981          use_gpu=use_gpu)
1982
1983  @test_util.deprecated_graph_mode_only
1984  def testInputGradientValidPaddingStrideTwo(self):
1985    for (data_format, use_gpu) in GetTestConfigs():
1986      self.ConstructAndTestGradient(
1987          batch=2,
1988          input_rows=4,
1989          input_cols=5,
1990          filter_rows=3,
1991          filter_cols=3,
1992          in_depth=2,
1993          out_depth=3,
1994          stride_rows=2,
1995          stride_cols=2,
1996          padding="VALID",
1997          test_input=True,
1998          data_format=data_format,
1999          use_gpu=use_gpu)
2000
2001  @test_util.deprecated_graph_mode_only
2002  def testFilterGradientValidPaddingStrideTwo(self):
2003    for (data_format, use_gpu) in GetTestConfigs():
2004      self.ConstructAndTestGradient(
2005          batch=4,
2006          input_rows=6,
2007          input_cols=5,
2008          filter_rows=2,
2009          filter_cols=2,
2010          in_depth=2,
2011          out_depth=3,
2012          stride_rows=2,
2013          stride_cols=2,
2014          padding="VALID",
2015          test_input=False,
2016          data_format=data_format,
2017          use_gpu=use_gpu)
2018
2019  @test_util.deprecated_graph_mode_only
2020  def testInputGradientValidPaddingStrideThree(self):
2021    for (data_format, use_gpu) in GetTestConfigs():
2022      self.ConstructAndTestGradient(
2023          batch=2,
2024          input_rows=7,
2025          input_cols=6,
2026          filter_rows=3,
2027          filter_cols=3,
2028          in_depth=4,
2029          out_depth=5,
2030          stride_rows=3,
2031          stride_cols=3,
2032          padding="VALID",
2033          test_input=True,
2034          data_format=data_format,
2035          use_gpu=use_gpu)
2036
2037  @test_util.deprecated_graph_mode_only
2038  def testFilterGradientValidPaddingStrideThree(self):
2039    for (data_format, use_gpu) in GetTestConfigs():
2040      self.ConstructAndTestGradient(
2041          batch=2,
2042          input_rows=8,
2043          input_cols=7,
2044          filter_rows=4,
2045          filter_cols=4,
2046          in_depth=2,
2047          out_depth=3,
2048          stride_rows=3,
2049          stride_cols=3,
2050          padding="VALID",
2051          test_input=False,
2052          data_format=data_format,
2053          use_gpu=use_gpu)
2054
2055  @test_util.deprecated_graph_mode_only
2056  def testInputGradientSamePaddingStrideOne(self):
2057    for (data_format, use_gpu) in GetTestConfigs():
2058      self.ConstructAndTestGradient(
2059          batch=2,
2060          input_rows=7,
2061          input_cols=6,
2062          filter_rows=3,
2063          filter_cols=3,
2064          in_depth=2,
2065          out_depth=3,
2066          stride_rows=1,
2067          stride_cols=1,
2068          padding="SAME",
2069          test_input=True,
2070          data_format=data_format,
2071          use_gpu=use_gpu)
2072
2073  @test_util.deprecated_graph_mode_only
2074  def testFilterGradientSamePaddingStrideOne(self):
2075    for (data_format, use_gpu) in GetTestConfigs():
2076      self.ConstructAndTestGradient(
2077          batch=4,
2078          input_rows=6,
2079          input_cols=5,
2080          filter_rows=2,
2081          filter_cols=2,
2082          in_depth=2,
2083          out_depth=3,
2084          stride_rows=1,
2085          stride_cols=1,
2086          padding="SAME",
2087          test_input=False,
2088          data_format=data_format,
2089          use_gpu=use_gpu)
2090
2091  @test_util.deprecated_graph_mode_only
2092  def testInputGradientSamePaddingStrideTwo(self):
2093    for (data_format, use_gpu) in GetTestConfigs():
2094      self.ConstructAndTestGradient(
2095          batch=2,
2096          input_rows=5,
2097          input_cols=4,
2098          filter_rows=3,
2099          filter_cols=3,
2100          in_depth=3,
2101          out_depth=3,
2102          stride_rows=2,
2103          stride_cols=2,
2104          padding="SAME",
2105          test_input=True,
2106          data_format=data_format,
2107          use_gpu=use_gpu)
2108
2109  @test_util.deprecated_graph_mode_only
2110  def testFilterGradientSamePaddingStrideTwo(self):
2111    for (data_format, use_gpu) in GetTestConfigs():
2112      self.ConstructAndTestGradient(
2113          batch=4,
2114          input_rows=6,
2115          input_cols=5,
2116          filter_rows=2,
2117          filter_cols=2,
2118          in_depth=2,
2119          out_depth=3,
2120          stride_rows=2,
2121          stride_cols=2,
2122          padding="SAME",
2123          test_input=False,
2124          data_format=data_format,
2125          use_gpu=use_gpu)
2126
2127  @test_util.deprecated_graph_mode_only
2128  def testInputGradientSamePaddingStrideThree(self):
2129    for (data_format, use_gpu) in GetTestConfigs():
2130      self.ConstructAndTestGradient(
2131          batch=2,
2132          input_rows=7,
2133          input_cols=6,
2134          filter_rows=3,
2135          filter_cols=3,
2136          in_depth=4,
2137          out_depth=5,
2138          stride_rows=3,
2139          stride_cols=3,
2140          padding="SAME",
2141          test_input=True,
2142          data_format=data_format,
2143          use_gpu=use_gpu)
2144
2145  @test_util.deprecated_graph_mode_only
2146  def testFilterGradientSamePaddingStrideThree(self):
2147    for (data_format, use_gpu) in GetTestConfigs():
2148      self.ConstructAndTestGradient(
2149          batch=2,
2150          input_rows=8,
2151          input_cols=7,
2152          filter_rows=4,
2153          filter_cols=4,
2154          in_depth=2,
2155          out_depth=3,
2156          stride_rows=3,
2157          stride_cols=3,
2158          padding="SAME",
2159          test_input=False,
2160          data_format=data_format,
2161          use_gpu=use_gpu)
2162
2163  @test_util.deprecated_graph_mode_only
2164  def testFilterGradientSamePaddingStride2x1(self):
2165    for (data_format, use_gpu) in GetTestConfigs():
2166      self.ConstructAndTestGradient(
2167          batch=2,
2168          input_rows=8,
2169          input_cols=7,
2170          filter_rows=4,
2171          filter_cols=4,
2172          in_depth=2,
2173          out_depth=3,
2174          stride_rows=2,
2175          stride_cols=1,
2176          padding="SAME",
2177          test_input=False,
2178          data_format=data_format,
2179          use_gpu=use_gpu)
2180
2181  @test_util.deprecated_graph_mode_only
2182  def testInputGradientKernelSizeMatchesInputSize(self):
2183    for (data_format, use_gpu) in GetTestConfigs():
2184      self.ConstructAndTestGradient(
2185          batch=2,
2186          input_rows=4,
2187          input_cols=3,
2188          filter_rows=4,
2189          filter_cols=3,
2190          in_depth=2,
2191          out_depth=3,
2192          stride_rows=1,
2193          stride_cols=1,
2194          padding="VALID",
2195          test_input=True,
2196          data_format=data_format,
2197          use_gpu=use_gpu)
2198
2199  @test_util.deprecated_graph_mode_only
2200  def testFilterGradientKernelSizeMatchesInputSize(self):
2201    for (data_format, use_gpu) in GetTestConfigs():
2202      self.ConstructAndTestGradient(
2203          batch=2,
2204          input_rows=4,
2205          input_cols=3,
2206          filter_rows=4,
2207          filter_cols=3,
2208          in_depth=2,
2209          out_depth=3,
2210          stride_rows=1,
2211          stride_cols=1,
2212          padding="VALID",
2213          test_input=False,
2214          data_format=data_format,
2215          use_gpu=use_gpu)
2216
2217  @test_util.deprecated_graph_mode_only
2218  def testInputGradient1x1PaddingStrideOne(self):
2219    for (data_format, use_gpu) in GetTestConfigs():
2220      self.ConstructAndTestGradient(
2221          batch=2,
2222          input_rows=5,
2223          input_cols=4,
2224          filter_rows=3,
2225          filter_cols=3,
2226          in_depth=2,
2227          out_depth=3,
2228          stride_rows=1,
2229          stride_cols=1,
2230          padding=[[0, 0], [1, 1], [1, 1], [0, 0]],
2231          test_input=True,
2232          data_format=data_format,
2233          use_gpu=use_gpu,
2234          max_err=0.0025)
2235
2236  @test_util.deprecated_graph_mode_only
2237  def testFilterGradient1x1PaddingStrideOne(self):
2238    for (data_format, use_gpu) in GetTestConfigs():
2239      self.ConstructAndTestGradient(
2240          batch=2,
2241          input_rows=5,
2242          input_cols=4,
2243          filter_rows=3,
2244          filter_cols=3,
2245          in_depth=2,
2246          out_depth=3,
2247          stride_rows=1,
2248          stride_cols=1,
2249          padding=[[0, 0], [1, 1], [1, 1], [0, 0]],
2250          test_input=False,
2251          data_format=data_format,
2252          use_gpu=use_gpu)
2253
2254  @test_util.deprecated_graph_mode_only
2255  def testInputGradient1x1PaddingStrideTwo(self):
2256    for (data_format, use_gpu) in GetTestConfigs():
2257      self.ConstructAndTestGradient(
2258          batch=2,
2259          input_rows=4,
2260          input_cols=5,
2261          filter_rows=3,
2262          filter_cols=3,
2263          in_depth=2,
2264          out_depth=3,
2265          stride_rows=2,
2266          stride_cols=2,
2267          padding=[[0, 0], [1, 1], [1, 1], [0, 0]],
2268          test_input=True,
2269          data_format=data_format,
2270          use_gpu=use_gpu)
2271
2272  @test_util.deprecated_graph_mode_only
2273  def testFilterGradient1x1PaddingStrideTwo(self):
2274    for (data_format, use_gpu) in GetTestConfigs():
2275      self.ConstructAndTestGradient(
2276          batch=2,
2277          input_rows=4,
2278          input_cols=5,
2279          filter_rows=3,
2280          filter_cols=3,
2281          in_depth=2,
2282          out_depth=3,
2283          stride_rows=2,
2284          stride_cols=2,
2285          padding=[[0, 0], [1, 1], [1, 1], [0, 0]],
2286          test_input=False,
2287          data_format=data_format,
2288          use_gpu=use_gpu)
2289
2290  @test_util.deprecated_graph_mode_only
2291  def testInputGradient2x2PaddingStrideOne(self):
2292    for (data_format, use_gpu) in GetTestConfigs():
2293      self.ConstructAndTestGradient(
2294          batch=2,
2295          input_rows=5,
2296          input_cols=4,
2297          filter_rows=3,
2298          filter_cols=3,
2299          in_depth=2,
2300          out_depth=3,
2301          stride_rows=1,
2302          stride_cols=1,
2303          padding=[[0, 0], [2, 2], [2, 2], [0, 0]],
2304          test_input=True,
2305          data_format=data_format,
2306          use_gpu=use_gpu,
2307          max_err=0.003)
2308
2309  @test_util.deprecated_graph_mode_only
2310  def testFilterGradient2x2PaddingStrideOne(self):
2311    for (data_format, use_gpu) in GetTestConfigs():
2312      self.ConstructAndTestGradient(
2313          batch=2,
2314          input_rows=5,
2315          input_cols=4,
2316          filter_rows=3,
2317          filter_cols=3,
2318          in_depth=2,
2319          out_depth=3,
2320          stride_rows=1,
2321          stride_cols=1,
2322          padding=[[0, 0], [2, 2], [2, 2], [0, 0]],
2323          test_input=False,
2324          data_format=data_format,
2325          use_gpu=use_gpu,
2326          max_err=0.003)
2327
2328  @test_util.deprecated_graph_mode_only
2329  def testInputGradient1_2_3_4PaddingStride3x2(self):
2330    for (data_format, use_gpu) in GetTestConfigs():
2331      self.ConstructAndTestGradient(
2332          batch=2,
2333          input_rows=8,
2334          input_cols=5,
2335          filter_rows=4,
2336          filter_cols=2,
2337          in_depth=3,
2338          out_depth=2,
2339          stride_rows=3,
2340          stride_cols=2,
2341          padding=[[0, 0], [1, 2], [3, 4], [0, 0]],
2342          test_input=True,
2343          data_format=data_format,
2344          use_gpu=use_gpu)
2345
2346  @test_util.deprecated_graph_mode_only
2347  def testFilterGradient1_2_3_4PaddingStride3x2(self):
2348    for (data_format, use_gpu) in GetTestConfigs():
2349      self.ConstructAndTestGradient(
2350          batch=2,
2351          input_rows=8,
2352          input_cols=5,
2353          filter_rows=4,
2354          filter_cols=2,
2355          in_depth=3,
2356          out_depth=2,
2357          stride_rows=3,
2358          stride_cols=2,
2359          padding=[[0, 0], [1, 2], [3, 4], [0, 0]],
2360          test_input=False,
2361          data_format=data_format,
2362          use_gpu=use_gpu)
2363
2364  @test_util.deprecated_graph_mode_only
2365  def testInputGradient4_3_2_1PaddingStride2x1(self):
2366    for (data_format, use_gpu) in GetTestConfigs():
2367      self.ConstructAndTestGradient(
2368          batch=3,
2369          input_rows=5,
2370          input_cols=7,
2371          filter_rows=3,
2372          filter_cols=2,
2373          in_depth=1,
2374          out_depth=2,
2375          stride_rows=2,
2376          stride_cols=1,
2377          padding=[[0, 0], [4, 3], [2, 1], [0, 0]],
2378          test_input=True,
2379          data_format=data_format,
2380          use_gpu=use_gpu)
2381
2382  @test_util.deprecated_graph_mode_only
2383  def testFilterGradient4_3_2_1PaddingStride2x1(self):
2384    for (data_format, use_gpu) in GetTestConfigs():
2385      self.ConstructAndTestGradient(
2386          batch=3,
2387          input_rows=5,
2388          input_cols=7,
2389          filter_rows=3,
2390          filter_cols=2,
2391          in_depth=1,
2392          out_depth=2,
2393          stride_rows=2,
2394          stride_cols=1,
2395          padding=[[0, 0], [4, 3], [2, 1], [0, 0]],
2396          test_input=False,
2397          data_format=data_format,
2398          use_gpu=use_gpu)
2399
2400  @test_util.deprecated_graph_mode_only
2401  def testInputGradient0_0_0_5PaddingStride1x2(self):
2402    for (data_format, use_gpu) in GetTestConfigs():
2403      self.ConstructAndTestGradient(
2404          batch=2,
2405          input_rows=6,
2406          input_cols=7,
2407          filter_rows=3,
2408          filter_cols=4,
2409          in_depth=3,
2410          out_depth=2,
2411          stride_rows=1,
2412          stride_cols=2,
2413          padding=[[0, 0], [0, 0], [0, 5], [0, 0]],
2414          test_input=True,
2415          data_format=data_format,
2416          use_gpu=use_gpu)
2417
2418  @test_util.deprecated_graph_mode_only
2419  def testFilterGradient0_0_0_5PaddingStride1x2(self):
2420    for (data_format, use_gpu) in GetTestConfigs():
2421      self.ConstructAndTestGradient(
2422          batch=2,
2423          input_rows=6,
2424          input_cols=7,
2425          filter_rows=3,
2426          filter_cols=4,
2427          in_depth=3,
2428          out_depth=2,
2429          stride_rows=1,
2430          stride_cols=2,
2431          padding=[[0, 0], [0, 0], [0, 5], [0, 0]],
2432          test_input=False,
2433          data_format=data_format,
2434          use_gpu=use_gpu)
2435
2436  @test_util.deprecated_graph_mode_only
2437  def testShapeFunctionEdgeCases(self):
2438    # All shapes unknown.
2439    c1 = nn_ops.conv2d(
2440        array_ops.placeholder(dtypes.float32),
2441        array_ops.placeholder(dtypes.float32),
2442        strides=[1, 1, 1, 1],
2443        padding="SAME")
2444    self.assertEqual([None, None, None, None], c1.get_shape().as_list())
2445
2446    # Incorrect input shape.
2447    with self.assertRaises(ValueError):
2448      nn_ops.conv2d(
2449          array_ops.placeholder(
2450              dtypes.float32, shape=[1, 3]),
2451          array_ops.placeholder(dtypes.float32),
2452          strides=[1, 1, 1, 1],
2453          padding="SAME")
2454
2455    # Incorrect filter shape.
2456    with self.assertRaises(ValueError):
2457      nn_ops.conv2d(
2458          array_ops.placeholder(dtypes.float32),
2459          array_ops.placeholder(
2460              dtypes.float32, shape=[1, 3]),
2461          strides=[1, 1, 1, 1],
2462          padding="SAME")
2463
2464    # Depth mismatch.
2465    with self.assertRaises(ValueError):
2466      nn_ops.conv2d(
2467          array_ops.placeholder(
2468              dtypes.float32, shape=[32, 20, 20, 3]),
2469          array_ops.placeholder(
2470              dtypes.float32, shape=[4, 4, 2, 2]),
2471          strides=[1, 1, 1, 1],
2472          padding="SAME")
2473
2474    # Input depth divisible by filter depth (group convolution).
2475    # No exceptions should appear.
2476    nn_ops.conv2d(
2477        array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 8]),
2478        array_ops.placeholder(dtypes.float32, shape=[4, 4, 2, 16]),
2479        strides=[1, 1, 1, 1],
2480        padding="SAME")
2481
2482    # Negative padding.
2483    with self.assertRaises(ValueError):
2484      nn_ops.conv2d(
2485          array_ops.placeholder(dtypes.float32),
2486          array_ops.placeholder(dtypes.float32),
2487          strides=[1, 1, 1, 1],
2488          padding=[[0, 0], [0, -1], [1, 2], [0, 0]])
2489
2490    # Nonzero padding in nonspatial dimension.
2491    with self.assertRaises(ValueError):
2492      nn_ops.conv2d(
2493          array_ops.placeholder(dtypes.float32),
2494          array_ops.placeholder(dtypes.float32),
2495          strides=[1, 1, 1, 1],
2496          padding=[[1, 0], [0, 0], [0, 0], [0, 0]])
2497
2498    # Nonzero NCHW padding in nonspatial dimension.
2499    with self.assertRaises(ValueError):
2500      nn_ops.conv2d(
2501          array_ops.placeholder(dtypes.float32),
2502          array_ops.placeholder(dtypes.float32),
2503          strides=[1, 1, 1, 1],
2504          padding=[[0, 0], [0, 1], [0, 0], [0, 0]],
2505          data_format="NCHW")
2506
2507    # Wrong amount of padding
2508    with self.assertRaises(ValueError):
2509      nn_ops.conv2d(
2510          array_ops.placeholder(dtypes.float32),
2511          array_ops.placeholder(dtypes.float32),
2512          strides=[1, 1, 1, 1],
2513          padding=[[0, 0], [0, 0], [0, 0]])
2514
2515    # Only specify one padding amount per dimension
2516    with self.assertRaises(ValueError):
2517      nn_ops.conv2d(
2518          array_ops.placeholder(dtypes.float32),
2519          array_ops.placeholder(dtypes.float32),
2520          strides=[1, 1, 1, 1],
2521          padding=[[0], [0], [0], [0]])
2522
2523    # Explicit padding elements are not lists
2524    with self.assertRaises(ValueError):
2525      nn_ops.conv2d(
2526          array_ops.placeholder(dtypes.float32),
2527          array_ops.placeholder(dtypes.float32),
2528          strides=[1, 1, 1, 1],
2529          padding=[0, 0, 0, 0])
2530
2531  def testOpEdgeCases(self):
2532    # Illegal strides.
2533    with self.assertRaisesRegex((ValueError, errors_impl.UnimplementedError),
2534                                "strides in the batch and depth"):
2535      input_val = np.ones([2, 4, 10, 10])
2536      filter_val = np.ones([2, 4, 10, 10])
2537      self.evaluate(
2538          nn_ops.conv2d(
2539              input_val, filter_val, strides=[2, 1, 1, 1], padding="SAME"))
2540    with self.assertRaisesRegex((ValueError, errors_impl.UnimplementedError),
2541                                "strides in the batch and depth"):
2542      input_val = np.ones([2, 4, 10, 10])
2543      filter_val = np.ones([2, 4, 10, 10])
2544      self.evaluate(
2545          nn_ops.conv2d(
2546              input_val, filter_val, strides=[1, 1, 1, 2], padding="SAME"))
2547
2548    # TODO(b/195689143): Will enable when fixed for V2 behavior
2549    # # Filter larger than input.
2550    # with self.assertRaisesRegex(ValueError, "Negative dimension size"):
2551    #   input_val = np.ones([32, 20, 20, 3])
2552    #   filter_val = np.ones([20, 21, 3, 2])
2553    #   self.evaluate(
2554    #       nn_ops.conv2d(
2555    #           input_val, filter_val, strides=[1, 1, 1, 1], padding="VALID"))
2556    # with self.assertRaisesRegex(ValueError, "Negative dimension size"):
2557    #   input_val = np.ones([32, 20, 20, 3])
2558    #   filter_val = np.ones([21, 20, 3, 2])
2559    #   self.evaluate(
2560    #       nn_ops.conv2d(
2561    #           input_val, filter_val, strides=[1, 1, 1, 1], padding="VALID"))
2562    #
2563    # # Filter larger than input + padding.
2564    # with self.assertRaisesRegex(ValueError, "Negative dimension size"):
2565    #   input_val = np.ones([32, 20, 20, 3])
2566    # filter_val = np.ones([24, 25, 3, 2])
2567    #   self.evaluate(
2568    #       nn_ops.conv2d(
2569    #           input_val,
2570    #           filter_val,
2571    #           strides=[1, 1, 1, 1],
2572    #           padding=[[0, 0], [2, 2], [2, 2], [0, 0]]))
2573
2574    # Filter dimensions must be greater than 0.
2575    with self.assertRaisesRegex(
2576        errors_impl.InvalidArgumentError, "filter must not have zero elements"
2577        "|has a non-positive dimension"):
2578      input_val = np.ones([1, 1, 1, 1])
2579      filter_val = np.ones([1, 0, 1, 1])
2580      self.evaluate(
2581          nn_ops.conv2d(
2582              input_val, filter_val, strides=[1, 1, 1, 1], padding="SAME"))
2583
2584    # Negative padding during backprop.
2585    with self.assertRaisesRegex(
2586        errors_impl.InvalidArgumentError,
2587        "All elements of explicit_paddings must be nonnegative"):
2588      filter_val = np.ones([18, 18, 3, 2])
2589      out_backprop_val = np.ones([32, 3, 2, 2])
2590      self.evaluate(
2591          nn_ops.conv2d_backprop_input([32, 20, 20, 3],
2592                                       filter_val,
2593                                       out_backprop_val,
2594                                       strides=[1, 1, 1, 1],
2595                                       padding=[[0, 0], [-1, 0], [0, 0], [0,
2596                                                                          0]]))
2597    with self.assertRaisesRegex(
2598        errors_impl.InvalidArgumentError,
2599        "All elements of explicit_paddings must be nonnegative"):
2600      input_val = np.ones([32, 20, 20, 3])
2601      out_backprop_val = np.ones([32, 3, 2, 2])
2602      self.evaluate(
2603          nn_ops.conv2d_backprop_filter(
2604              input_val, [18, 18, 3, 2],
2605              out_backprop_val,
2606              strides=[1, 1, 1, 1],
2607              padding=[[0, 0], [-1, 0], [0, 0], [0, 0]]))
2608
2609
2610class DepthwiseConv2DTest(test.TestCase):
2611
2612  def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding,
2613                    expected):
2614    """Verifies the output values of the convolution function.
2615
2616    Args:
2617      tensor_in_sizes: Input tensor dimensions in [batch, input_rows,
2618        input_cols, input_depth].
2619      filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols,
2620        input_depth, depth_multiplier].
2621      stride: Stride.
2622      padding: Padding type.
2623      expected: An array containing the expected operation outputs.
2624    """
2625    total_size_1 = 1
2626    total_size_2 = 1
2627    for s in tensor_in_sizes:
2628      total_size_1 *= s
2629    for s in filter_in_sizes:
2630      total_size_2 *= s
2631    # Initializes the input tensor with array containing incrementing
2632    # numbers from 1.
2633    x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
2634    x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
2635    with self.cached_session() as sess:
2636      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
2637      t1.set_shape(tensor_in_sizes)
2638      t2 = constant_op.constant(x2, shape=filter_in_sizes)
2639      conv = nn_impl.depthwise_conv2d(
2640          t1, t2, strides=[1, stride, stride, 1], padding=padding)
2641      value = self.evaluate(conv)
2642    tf_logging.debug("value = %s", value)
2643    self.assertArrayNear(expected, np.ravel(value), 1e-5)
2644    self.assertShapeEqual(value, conv)
2645
2646  def testConv2D2x2Filter(self):
2647    # The inputs look like this (it's a 3 x 2 matrix, each of depth 2):
2648    #
2649    # [ (1.0, 2.0), (3.0,  4.0), ( 5.0,  6.0) ]
2650    # [ (7.0, 8.0), (9.0, 10.0), (11.0, 12.0) ]
2651    #  We can view this as two inputs
2652    #
2653    #  input depth 0:
2654    #
2655    #  [ 1.0,  3.0,  5.0 ]
2656    #  [ 7.0,  9.0, 11.0 ]
2657    #
2658    #  input depth 1:
2659    #
2660    #  [ 2.0,  4.0,  6.0 ]
2661    #  [ 8.0, 10.0, 12.0 ]
2662    #
2663    # The filter looks like this (it has two 2 x 2 patches, each generating 2
2664    # depths):
2665    #
2666    #  filter #0:
2667    #
2668    #  [ (1.0,  3.0), ( 5.0,  7.0)]
2669    #  [ (9.0, 11.0), (13.0, 15.0)]
2670    #
2671    #  filter #1:
2672    #
2673    #  [ ( 2.0,  4.0), ( 6.0,  8.0)]
2674    #  [ (10.0, 12.0), (14.0, 16.0)]
2675    #
2676    # So the outputs are:
2677    #
2678    # (position 0, 0: in_depth 0, output_depth 0 -- using filter #0)
2679    #  1.0 * 1.0 + 7.0 * 9.0 + 3.0 * 5.0 + 9.0 * 13.0 = 196
2680    # (position 0, 0: in_depth 0, output_depth 1 -- using filter #1)
2681    #  1.0 * 2.0 + 7.0 * 10.0 + 3.0 * 6.0 + 9.0 * 14.0 = 216
2682    # (position 0, 0: in_depth 1, output_depth 2 -- using filter #0)
2683    #  2.0 * 3.0 + 8.0 * 11.0 + 4.0 * 7.0 + 10.0 * 15.0 = 272
2684    # (position 0, 0: in_depth 1, output_depth 3 -- using filter #1)
2685    #  2.0 * 4.0 + 8.0 * 12.0 + 4.0 * 8.0 + 10.0 * 16.0 = 296
2686    #
2687    # (position 1, 0: in_depth 0, output_depth 0 -- using filter #0)
2688    #  3.0 * 1.0 + 9.0 * 9.0 + 5.0 * 5.0 + 11.0 * 13.0 = 252
2689    # (position 1, 0: in_depth 0, output_depth 1 -- using filter #1)
2690    #  3.0 * 2.0 + 9.0 * 10.0 + 5.0 * 6.0 + 11.0 * 14.0 = 280
2691    # (position 1, 0: in_depth 1, output_depth 2 -- using filter #0)
2692    #  4.0 * 3.0 + 10.0 * 11.0 + 6.0 * 7.0 + 12.0 * 15.0 = 344
2693    # (position 1, 0: in_depth 1, output_depth 3 -- using filter #1)
2694    #  4.0 * 4.0 + 10.0 * 12.0 + 6.0 * 8.0 + 12.0 * 16.0 = 376
2695    expected_output = [196, 216, 272, 296, 252, 280, 344, 376]
2696    self._VerifyValues(
2697        tensor_in_sizes=[1, 2, 3, 2],
2698        filter_in_sizes=[2, 2, 2, 2],
2699        stride=1,
2700        padding="VALID",
2701        expected=expected_output)
2702
2703
2704class SeparableConv2DTest(test.TestCase):
2705
2706  def _InitValues(self, sizes):
2707    """Initializes values for input tensors.
2708
2709    Args:
2710      sizes: Tensor dimensions.
2711
2712    Returns:
2713      Tensor initialized to values.
2714    """
2715    total_size = 1
2716    for s in sizes:
2717      total_size *= s
2718    x = [f * 0.5 for f in range(1, total_size + 1)]
2719    return constant_op.constant(x, shape=sizes)
2720
2721  def _VerifyValues(self,
2722                    tensor_in_sizes,
2723                    depthwise_filter_in_sizes,
2724                    pointwise_filter_in_sizes,
2725                    stride,
2726                    padding,
2727                    expected,
2728                    data_format="NHWC"):
2729    """Verifies the output values of the separable convolution function.
2730
2731    Args:
2732      tensor_in_sizes: Input tensor dimensions.
2733      depthwise_filter_in_sizes: Depthwise filter tensor dimensions.
2734      pointwise_filter_in_sizes: Pointwise filter tensor dimensions.
2735      stride: Stride.
2736      padding: Padding type.
2737      expected: An array containing the expected operation outputs.
2738      data_format: string data format for input tensor.
2739    """
2740    with self.cached_session():
2741      t1 = self._InitValues(tensor_in_sizes)
2742      f1 = self._InitValues(depthwise_filter_in_sizes)
2743      f1.set_shape(depthwise_filter_in_sizes)
2744      f2 = self._InitValues(pointwise_filter_in_sizes)
2745
2746      real_t1 = t1
2747      strides = [1, stride, stride, 1]
2748      if data_format == "NCHW":
2749        real_t1 = array_ops.transpose(t1, [0, 3, 1, 2])
2750        strides = [1, 1, stride, stride]
2751        if isinstance(padding, list):
2752          padding = [padding[0], padding[3], padding[1], padding[2]]
2753
2754      conv = nn_impl.separable_conv2d(
2755          real_t1,
2756          f1,
2757          f2,
2758          strides=strides,
2759          padding=padding,
2760          data_format=data_format)
2761
2762      if data_format == "NCHW":
2763        conv = array_ops.transpose(conv, [0, 2, 3, 1])
2764
2765      value = self.evaluate(conv)
2766    tf_logging.debug("value = %s", value)
2767    self.assertArrayNear(expected, np.ravel(value), 2e-3)
2768    self.assertShapeEqual(value, conv)
2769
2770  def _testSeparableConv2D(self, data_format):
2771    # The output is the result of two convolutions:
2772    # First with tensor_in[1, 4, 4, 2] * filter1[2, 2, 2, 3].
2773    # Second with intermediate_out[1, 4, 4, 6] * filter2[1, 1, 6, 7].
2774    # Complexity is O(2*3*2*2 + 6*7*1*1) as opposed to O(2*7*2*2).
2775    expected_output = [
2776        6644.5, 6971.5, 7298.5, 7625.5, 7952.5, 8279.5, 8606.5, 8154.5, 8556.5,
2777        8958.5, 9360.5, 9762.5, 10164.5, 10566.5, 9664.5, 10141.5, 10618.5,
2778        11095.5, 11572.5, 12049.5, 12526.5, 4145.5, 4346.5, 4547.5, 4748.5,
2779        4949.5, 5150.5, 5351.5, 12684.5, 13311.5, 13938.5, 14565.5, 15192.5,
2780        15819.5, 16446.5, 14194.5, 14896.5, 15598.5, 16300.5, 17002.5, 17704.5,
2781        18406.5, 15704.5, 16481.5, 17258.5, 18035.5, 18812.5, 19589.5, 20366.5,
2782        6499.5, 6814.5, 7129.5, 7444.5, 7759.5, 8074.5, 8389.5, 18724.5,
2783        19651.5, 20578.5, 21505.5, 22432.5, 23359.5, 24286.5, 20234.5, 21236.5,
2784        22238.5, 23240.5, 24242.5, 25244.5, 26246.5, 21744.5, 22821.5, 23898.5,
2785        24975.5, 26052.5, 27129.5, 28206.5, 8853.5, 9282.5, 9711.5, 10140.5,
2786        10569.5, 10998.5, 11427.5, 5746.75, 6010.75, 6274.75, 6538.75, 6802.75,
2787        7066.75, 7330.75, 6168.75, 6452.25, 6735.75, 7019.25, 7302.75, 7586.25,
2788        7869.75, 6590.75, 6893.75, 7196.75, 7499.75, 7802.75, 8105.75, 8408.75,
2789        2036.25, 2119.5, 2202.75, 2286.0, 2369.25, 2452.5, 2535.75
2790    ]
2791
2792    self._VerifyValues(
2793        tensor_in_sizes=[1, 4, 4, 2],
2794        depthwise_filter_in_sizes=[2, 2, 2, 3],
2795        pointwise_filter_in_sizes=[1, 1, 6, 7],
2796        stride=1,
2797        padding="SAME",
2798        expected=expected_output,
2799        data_format=data_format)
2800
2801  def testSeparableConv2D(self):
2802    self._testSeparableConv2D("NHWC")
2803
2804  def disabledtestSeparableConv2DNCHW(self):
2805    if not test.is_gpu_available():
2806      return
2807    self._testSeparableConv2D("NCHW")
2808
2809  def _testSeparableConv2DEqualInputOutputDepth(self, data_format):
2810    # The output is the result of two convolutions:
2811    # First with tensor_in[1, 4, 4, 2] * filter1[2, 2, 3, 3].
2812    # Second with intermediate_out[1, 4, 4, 6] * filter2[1, 1, 6, 6].
2813    # Complexity is O(2*3*2*2 + 6*6*1*1) as opposed to O(2*6*2*2).
2814    expected_output = [
2815        5742.0, 6069.0, 6396.0, 6723.0, 7050.0, 7377.0, 7047.0, 7449.0, 7851.0,
2816        8253.0, 8655.0, 9057.0, 8352.0, 8829.0, 9306.0, 9783.0, 10260.0,
2817        10737.0, 3582.0, 3783.0, 3984.0, 4185.0, 4386.0, 4587.0, 10962.0,
2818        11589.0, 12216.0, 12843.0, 13470.0, 14097.0, 12267.0, 12969.0, 13671.0,
2819        14373.0, 15075.0, 15777.0, 13572.0, 14349.0, 15126.0, 15903.0, 16680.0,
2820        17457.0, 5616.0, 5931.0, 6246.0, 6561.0, 6876.0, 7191.0, 16182.0,
2821        17109.0, 18036.0, 18963.0, 19890.0, 20817.0, 17487.0, 18489.0, 19491.0,
2822        20493.0, 21495.0, 22497.0, 18792.0, 19869.0, 20946.0, 22023.0, 23100.0,
2823        24177.0, 7650.0, 8079.0, 8508.0, 8937.0, 9366.0, 9795.0, 4963.5, 5227.5,
2824        5491.5, 5755.5, 6019.5, 6283.5, 5328.0, 5611.5, 5895.0, 6178.5, 6462.0,
2825        6745.5, 5692.5, 5995.5, 6298.5, 6601.5, 6904.5, 7207.5, 1757.25, 1840.5,
2826        1923.75, 2007.0, 2090.25, 2173.5
2827    ]
2828
2829    self._VerifyValues(
2830        tensor_in_sizes=[1, 4, 4, 2],
2831        depthwise_filter_in_sizes=[2, 2, 2, 3],
2832        pointwise_filter_in_sizes=[1, 1, 6, 6],
2833        stride=1,
2834        padding="SAME",
2835        expected=expected_output,
2836        data_format=data_format)
2837
2838  @test_util.deprecated_graph_mode_only
2839  def testSeparableConv2DEqualInputOutputDepth(self):
2840    self._testSeparableConv2DEqualInputOutputDepth("NHWC")
2841
2842  def testSeparableConv2DEqualInputOutputDepthNCHW(self):
2843    if not test.is_gpu_available():
2844      return
2845    self._testSeparableConv2DEqualInputOutputDepth("NCHW")
2846
2847  def _testSeparableConv2dExplicitPadding(self, data_format):
2848    tensor_in_sizes = [1, 4, 4, 2]
2849    depthwise_filter_in_sizes = [2, 2, 2, 3]
2850    pointwise_filter_in_sizes = [1, 1, 6, 7]
2851    padding = [[0, 0], [1, 2], [3, 4], [0, 0]]
2852    with self.cached_session():
2853      # Compute the 'expected' values by manually padding before calling
2854      # separable_conv2d
2855      t1 = self._InitValues(tensor_in_sizes)
2856      t1 = array_ops.pad(t1, padding)
2857      f1 = self._InitValues(depthwise_filter_in_sizes)
2858      f1.set_shape(depthwise_filter_in_sizes)
2859      f2 = self._InitValues(pointwise_filter_in_sizes)
2860      conv = nn_impl.separable_conv2d(
2861          t1,
2862          f1,
2863          f2,
2864          strides=[1, 1, 1, 1],
2865          padding="VALID",
2866          data_format="NHWC")
2867      expected = self.evaluate(conv)
2868      expected = np.ravel(expected)
2869    self._VerifyValues(
2870        tensor_in_sizes=tensor_in_sizes,
2871        depthwise_filter_in_sizes=depthwise_filter_in_sizes,
2872        pointwise_filter_in_sizes=pointwise_filter_in_sizes,
2873        stride=1,
2874        padding=padding,
2875        expected=expected,
2876        data_format=data_format)
2877
2878  def testSeparableConv2dExplicitPadding(self):
2879    self._testSeparableConv2dExplicitPadding("NHWC")
2880
2881  def testSeparableConv2dExplicitPaddingNCHW(self):
2882    if not test.is_gpu_available():
2883      return
2884    self._testSeparableConv2dExplicitPadding("NCHW")
2885
2886
2887class DeepConv2DTest(test.TestCase):
2888
2889  def _CompareFwdConv2D(self, tensor_in_sizes, filter_in_sizes, conv_strides,
2890                        padding):
2891    """Verifies that DeepConv2D and Conv2D produce the same values.
2892
2893    Args:
2894      tensor_in_sizes: Input tensor dimensions in
2895        [batch, input_rows, input_cols, input_depth].
2896      filter_in_sizes: Filter tensor dimensions in
2897        [kernel_rows, kernel_cols, input_depth, output_depth].
2898      conv_strides: [row_stride, col_stride] for the convolution;
2899      padding: Padding type.
2900    """
2901    x1 = np.random.rand(*tensor_in_sizes).astype(np.float32)
2902    x2 = np.random.rand(*filter_in_sizes).astype(np.float32)
2903
2904    with self.cached_session(use_gpu=False) as sess:
2905      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
2906      t2 = constant_op.constant(x2, shape=filter_in_sizes)
2907      strides = [1] + conv_strides + [1]
2908
2909      conv = nn_ops.conv2d(t1, t2, strides=strides, padding=padding)
2910
2911      os.environ["TF_USE_DEEP_CONV2D"] = "0"
2912      values_expect = self.evaluate([conv])
2913
2914      os.environ["TF_USE_DEEP_CONV2D"] = "1"
2915      values_test = self.evaluate([conv])
2916
2917      self.assertAllClose(values_expect, values_test, rtol=1e-5, atol=1e-5)
2918
2919  def _RunTestCases(self, conv_strides, padding):
2920    input_sizes = [[5, 5, 5, 1248], [3, 17, 17, 192], [2, 35, 35, 288],
2921                   [2, 6, 8, 517], [2, 7, 4, 81], [3, 11, 3, 77]]
2922    filter_sizes = [[3, 3, 1248, 128], [3, 3, 192, 192], [3, 3, 288, 384],
2923                    [3, 3, 517, 64], [3, 3, 81, 77], [3, 3, 77, 181]]
2924    for input_shape, filter_shape in zip(input_sizes, filter_sizes):
2925      self._CompareFwdConv2D(input_shape, filter_shape, conv_strides, padding)
2926
2927  def testConv2D3x3FilterStride1x1Valid(self):
2928    self._RunTestCases([1, 1], "VALID")
2929
2930  def testConv2D3x3FilterStride1x1Same(self):
2931    self._RunTestCases([1, 1], "SAME")
2932
2933
2934class Conv2DBenchmark(test.Benchmark):
2935
2936  def benchmarkGPUConvStackFirst(self):
2937    # Benchmark the first iteration of a conv-net with many identical conv
2938    # operations.
2939    if not test.is_gpu_available():
2940      return
2941
2942    with ops.Graph().as_default(), session_lib.Session() as session:
2943      batch_size = 1
2944      timesteps = 600
2945      features = 1
2946
2947      inputs = random_ops.random_uniform(
2948          [batch_size, 1, timesteps, features], seed=1234)
2949      num_outputs_list = [512] * 40 + [1]
2950      kernel_w = 3
2951      x = inputs
2952      for num_outputs in num_outputs_list:
2953        x = convolutional.conv2d(x, num_outputs, [1, kernel_w])
2954      outputs = x
2955
2956      self.evaluate(variables.global_variables_initializer())
2957      num_iterations = 4
2958      for iter_index in xrange(num_iterations):
2959        start = time.time()
2960        session.run(outputs)
2961        wall_time = time.time() - start
2962        self.report_benchmark(
2963            name="conv_stack_iter_%d" % iter_index, wall_time=wall_time)
2964        tf_logging.info("conv_stack_iter_%d: %.4f" % (iter_index, wall_time))
2965
2966  def _bench_op(self, name, op, burn_iters, num_iters):
2967    config = config_pb2.ConfigProto()
2968    # Prevent Grappler from optimizing away the entire graph.
2969    config.graph_options.rewrite_options.dependency_optimization = (
2970        rewriter_config_pb2.RewriterConfig.OFF)
2971    with session_lib.Session(config=config) as session:
2972      self.evaluate(variables.global_variables_initializer())
2973      self.run_op_benchmark(
2974          session, op, burn_iters=burn_iters, min_iters=num_iters, name=name)
2975
2976  def benchmarkExplicitVsManualPadding(self):
2977    """Compare performance of EXPLICIT padding and calling tf.pad.
2978
2979    A Conv2D op with EXPLICIT padding is benchmarked, and a tf.pad with the same
2980    padding followed by an equivalent Conv2D op is benchmarked.
2981    """
2982    if not test.is_gpu_available():
2983      return
2984
2985    with ops.Graph().as_default():
2986      burn_iters = 15
2987      num_iters = 300
2988      batch_size = 64
2989      # The input and filter correspond to the first layer of Resnet50.
2990      input = variables.Variable(  # pylint: disable=redefined-builtin
2991          random_ops.random_uniform([
2992              batch_size,
2993              3,
2994              224,
2995              224
2996          ]))
2997      filter = variables.Variable(random_ops.random_uniform([7, 7, 3, 64]))  # pylint: disable=redefined-builtin
2998      strides = [1, 1, 2, 2]
2999      padding = [(0, 0), (0, 0), (3, 3), (3, 3)]
3000      output_explicit_pad = nn_ops.conv2d(
3001          input, filter, strides, padding=padding, data_format="NCHW")
3002      input_padded = array_ops.pad(input, padding)
3003      output_manual_pad = nn_ops.conv2d(
3004          input_padded, filter, strides, padding="VALID", data_format="NCHW")
3005      # Benchmark just the forward pass.
3006      self._bench_op("explicit_pad_forward", output_explicit_pad.op, burn_iters,
3007                     num_iters)
3008      self._bench_op("manual_pad_forward", output_manual_pad.op, burn_iters,
3009                     num_iters)
3010
3011      # Benchmark both the forward and backwards passes.
3012      input_grad_explicit_pad, filter_grad_explicit_pad = (
3013          gradients_impl.gradients(output_explicit_pad, [input, filter]))
3014      self._bench_op(
3015          "explicit_pad_backward",
3016          control_flow_ops.group(input_grad_explicit_pad,
3017                                 filter_grad_explicit_pad), burn_iters,
3018          num_iters)
3019      input_grad_manual_pad, filter_grad_manual_pad = gradients_impl.gradients(
3020          output_manual_pad, [input, filter])
3021      self._bench_op(
3022          "manual_pad_backward",
3023          control_flow_ops.group(input_grad_manual_pad, filter_grad_manual_pad),
3024          burn_iters, num_iters)
3025
3026  def benchmarkExplicitVsSamePaddingGraph(self):
3027    """Compare performance of EXPLICIT and SAME padding in graph mode.
3028
3029    A Conv2D op with SAME padding is benchmarked, and an equivalent Conv2D op
3030    with explicit padding is benchmarked, where the padding is the same as in
3031    the SAME case. The purpose is to ensure EXPLICIT padding is just as
3032    efficient as the SAME case
3033    """
3034    if not test.is_gpu_available():
3035      return
3036
3037    with ops.Graph().as_default():
3038      burn_iters = 15
3039      num_convs = 20
3040      num_iters = 50
3041      batch_size = 64
3042      # The input and filter correspond to a middle layer of Resnet50.
3043      input = variables.Variable(  # pylint: disable=redefined-builtin
3044          random_ops.random_uniform([
3045              batch_size,
3046              256,
3047              14,
3048              14
3049          ]))
3050      filter = variables.Variable(random_ops.random_uniform([3, 3, 256, 256]))  # pylint: disable=redefined-builtin
3051      strides = [1, 1, 1, 1]
3052      padding = [(0, 0), (0, 0), (1, 1), (1, 1)]
3053      output_explicit_pad = input
3054      output_same_pad = input
3055
3056      for _ in range(num_convs):
3057        output_explicit_pad = nn_ops.conv2d(
3058            output_explicit_pad,
3059            filter,
3060            strides,
3061            padding=padding,
3062            data_format="NCHW")
3063        output_same_pad = nn_ops.conv2d(
3064            output_same_pad,
3065            filter,
3066            strides,
3067            padding="SAME",
3068            data_format="NCHW")
3069      grad_explicit_pad, = gradients_impl.gradients(output_explicit_pad, filter)
3070      grad_same_pad, = gradients_impl.gradients(output_same_pad, filter)
3071      self._bench_op("graph_explicit_pad", grad_explicit_pad.op, burn_iters,
3072                     num_iters)
3073      self._bench_op("graph_same_pad", grad_same_pad.op, burn_iters, num_iters)
3074
3075  def benchmarkExplicitVsSamePaddingEager(self):
3076    """Compare performance of EXPLICIT and SAME padding in eager mode.
3077
3078    A Conv2D op with SAME padding is benchmarked, and an equivalent Conv2D op
3079    with explicit padding is benchmarked, where the padding is the same as in
3080    the SAME case. Currently, EXPLICIT padding is slightly slower, due to the
3081    fact the Python padding list must be checked and processed before the Conv2D
3082    op can run.
3083    """
3084    # TODO(reedwm): Make EXPLICIT padding as fast as SAME padding.
3085    if not test.is_gpu_available():
3086      return
3087
3088    with context.eager_mode():
3089      burn_iters = 15
3090      num_convs = 20
3091      num_iters = 50
3092      batch_size = 64
3093      # The input and filter correspond to a middle layer of Resnet50.
3094      input = variables.Variable(  # pylint: disable=redefined-builtin
3095          random_ops.random_uniform([
3096              batch_size,
3097              256,
3098              14,
3099              14
3100          ]))
3101      filter = variables.Variable(random_ops.random_uniform([3, 3, 256, 256]))  # pylint: disable=redefined-builtin
3102      strides = [1, 1, 1, 1]
3103      padding = [(0, 0), (0, 0), (1, 1), (1, 1)]
3104      output_explicit_pad = input
3105      output_same_pad = input
3106      for _ in range(burn_iters):
3107        output_explicit_pad = nn_ops.conv2d(
3108            output_explicit_pad,
3109            filter,
3110            strides,
3111            padding=padding,
3112            data_format="NCHW")
3113        output_same_pad = nn_ops.conv2d(
3114            output_same_pad,
3115            filter,
3116            strides,
3117            padding="SAME",
3118            data_format="NCHW")
3119
3120      start = time.time()
3121      for _ in range(num_iters):
3122        with backprop.GradientTape() as tape:
3123          for _ in range(num_convs):
3124            output_explicit_pad = nn_ops.conv2d(
3125                output_explicit_pad,
3126                filter,
3127                strides,
3128                padding=padding,
3129                data_format="NCHW")
3130          tape.gradient(output_explicit_pad, filter)
3131      end = time.time()
3132      self.report_benchmark(
3133          name="eager_explicit_pad",
3134          wall_time=(end - start) / num_iters,
3135          iters=num_iters)
3136
3137      start = time.time()
3138      for _ in range(num_iters):
3139        with backprop.GradientTape() as tape:
3140          for _ in range(num_convs):
3141            output_same_pad = nn_ops.conv2d(
3142                output_same_pad,
3143                filter,
3144                strides,
3145                padding="SAME",
3146                data_format="NCHW")
3147          tape.gradient(output_same_pad, filter)
3148      end = time.time()
3149      self.report_benchmark(
3150          name="eager_same_pad",
3151          wall_time=(end - start) / num_iters,
3152          iters=num_iters)
3153
3154
3155def GetInceptionFwdTest(input_size, filter_size, stride, padding,
3156                        gpu_only=False):
3157
3158  def Test(self):
3159    if gpu_only and not test.is_gpu_available():
3160      tf_logging.info("Skipping InceptionFwd %s", (input_size, filter_size,
3161                                                   stride, padding))
3162      return
3163    tf_logging.info("Testing InceptionFwd %s", (input_size, filter_size, stride,
3164                                                padding))
3165    self._CompareFwdValues(input_size, filter_size, [stride, stride], padding)
3166
3167  return Test
3168
3169
3170def GetInceptionFwdDilatedConvTest(input_size, filter_size, stride, padding):
3171
3172  def Test(self):
3173    if stride == 1:
3174      tf_logging.info("Testing InceptionFwd with dilations %s",
3175                      (input_size, filter_size, stride, padding))
3176      self._VerifyDilatedConvValues(
3177          tensor_in_sizes=input_size,
3178          filter_in_sizes=filter_size,
3179          strides=[stride, stride],
3180          dilations=[2, 2],
3181          padding=padding,
3182          rtol=5e-4)
3183
3184  return Test
3185
3186
3187def GetInceptionBackInputTest(input_size, filter_size, output_size, stride,
3188                              padding,
3189                              gpu_only=False):
3190
3191  def Test(self):
3192    if gpu_only and not test.is_gpu_available():
3193      tf_logging.info("Skipping InceptionBackInput %s",
3194                      (input_size, filter_size, output_size, stride, padding))
3195      return
3196    tf_logging.info("Testing InceptionBackInput %s",
3197                    (input_size, filter_size, output_size, stride, padding))
3198    self._CompareBackpropInput(input_size, filter_size, output_size,
3199                               [stride, stride], padding)
3200
3201  return Test
3202
3203
3204def GetInceptionBackFilterTest(input_size, filter_size, output_size, strides,
3205                               padding, gpu_only=False):
3206
3207  def Test(self):
3208    if gpu_only and not test.is_gpu_available():
3209      tf_logging.info("Skipping InceptionBackFilter %s",
3210                      (input_size, filter_size, output_size, strides, padding))
3211      return
3212    tf_logging.info("Testing InceptionBackFilter %s",
3213                    (input_size, filter_size, output_size, strides, padding))
3214    self._CompareBackFilter(input_size, filter_size, output_size, strides,
3215                            padding)
3216
3217  return Test
3218
3219
3220class FusedConv2DTest(test.TestCase):
3221
3222  def _CreateNumpyTensor(self, shape):
3223    total_size = np.prod(shape)
3224    return np.arange(1, total_size + 1, dtype=np.float32).reshape(shape)
3225
3226  def _CreateConv2D(self,
3227                    input_values,
3228                    filters,
3229                    strides=[1, 1],
3230                    padding="SAME"):
3231    return nn_ops.convolution(
3232        input_values, filters, strides=strides, padding=padding)
3233
3234  # Tests tensor forwarding of a fused Conv2D+BiasAdd+Add op when the input to
3235  # Add has refcount 1.
3236  @test_util.run_in_graph_and_eager_modes(use_gpu=False)
3237  def testAddWithRefCountOne(self):
3238    expected_output = [
3239        113377, 125570, 77305, 86738, 19433, 22226, 60681, 70722, 36291, 43718,
3240        7143, 9206, 9785, 12098, 4783, 6366, 779, 1134
3241    ]
3242    tensor_in_sizes = [1, 3, 3, 2]
3243    filter_in_sizes = [2, 2, 2, 2]
3244    bias_in_sizes = [2]
3245
3246    x = self._CreateNumpyTensor(tensor_in_sizes)
3247    filter_in = self._CreateNumpyTensor(filter_in_sizes)
3248    bias_in = self._CreateNumpyTensor(bias_in_sizes)
3249    # To get different weights for filter
3250    offset = 1
3251
3252    conv1 = self._CreateConv2D(x, filter_in)
3253    conv2 = self._CreateConv2D(conv1, filter_in + offset)
3254
3255    conv = self._CreateConv2D(conv1, filter_in - offset)
3256    bias_add = nn_ops.bias_add(conv, bias_in)
3257    add = math_ops.add_n([bias_add, conv2])
3258
3259    self.assertAllEqual(
3260        np.rint(expected_output),
3261        self.evaluate(add).reshape(-1))
3262
3263  # Tests tensor forwarding of a fused Conv2D+BiasAdd+Add op when the input to
3264  # Add has a total refcount of 2, and Add is its last consumer.
3265  @test_util.run_in_graph_and_eager_modes(use_gpu=False)
3266  def testAddWithRefCountTwoAndRunAddLast(self):
3267    expected_output = [
3268        1.907175e+06, 2.253505e+06, 7.809210e+05, 9.537180e+05, 1.184170e+05,
3269        1.523070e+05, 5.367010e+05, 6.803700e+05, 1.867090e+05, 2.529460e+05,
3270        2.362300e+04, 3.522600e+04, 5.121700e+04, 7.168300e+04, 1.494300e+04,
3271        2.347400e+04, 1.558000e+03, 2.903000e+03
3272    ]
3273    tensor_in_sizes = [1, 3, 3, 2]
3274    filter_in_sizes = [2, 2, 2, 2]
3275    bias_in_sizes = [2]
3276
3277    x = self._CreateNumpyTensor(tensor_in_sizes)
3278    filter_in = self._CreateNumpyTensor(filter_in_sizes)
3279    bias_in = self._CreateNumpyTensor(bias_in_sizes)
3280    # To get different weights for filter
3281    offset = 1
3282
3283    conv1 = self._CreateConv2D(x, filter_in)
3284    conv2 = self._CreateConv2D(conv1, filter_in + offset)
3285
3286    conv = self._CreateConv2D(conv2, filter_in - offset)
3287    bias_add = nn_ops.bias_add(conv, bias_in)
3288    add = math_ops.add_n([bias_add, conv1])
3289
3290    self.assertAllEqual(
3291        np.rint(expected_output),
3292        self.evaluate(add).reshape(-1))
3293
3294  # Tests tensor forwarding of a fused Conv2D+BiasAdd+Add op when the input to
3295  # Add has refcount 2 and Add (in the fused Conv2D op) is its first consumer.
3296  @test_util.run_in_graph_and_eager_modes(use_gpu=False)
3297  def testAddWithRefCountTwoAndRunAddFirst(self):
3298    expected_output = [
3299        176161, 194450, 120673, 134822, 30545, 34734, 96041, 111102, 58149,
3300        69289, 11745, 14839, 15833, 19302, 7965, 10339, 1345, 1877
3301    ]
3302    tensor_in_sizes = [1, 3, 3, 2]
3303    filter_in_sizes = [2, 2, 2, 2]
3304    bias_in_sizes = [2]
3305
3306    x = self._CreateNumpyTensor(tensor_in_sizes)
3307    filter_in = self._CreateNumpyTensor(filter_in_sizes)
3308    bias_in = self._CreateNumpyTensor(bias_in_sizes)
3309    # To get different weights for filter
3310    offset = 1
3311
3312    conv1 = self._CreateConv2D(x, filter_in)
3313    conv2 = self._CreateConv2D(conv1, filter_in + offset)
3314
3315    conv = self._CreateConv2D(conv1, filter_in - offset)
3316    bias_add = nn_ops.bias_add(conv, bias_in)
3317    add = math_ops.add_n([bias_add, conv2])
3318
3319    relu = nn_ops.relu(add)
3320    output = math_ops.add_n([relu, conv2])
3321
3322    self.assertAllEqual(
3323        np.rint(expected_output),
3324        self.evaluate(output).reshape(-1))
3325
3326  # Tests tensor forwarding of a fused Conv2D+BiasAdd+Add op when the input to
3327  # Add has refcount 2, and there is no dependency between its two consumers.
3328  @test_util.run_in_graph_and_eager_modes(use_gpu=False)
3329  def testAddWithRefCountTwoAndNoDependence(self):
3330    expected_output = [
3331        176161, 194450, 120673, 134822, 30545, 34734, 96041, 111102, 58149,
3332        69289, 11745, 14839, 15833, 19302, 7965, 10339, 1345, 1877
3333    ]
3334    tensor_in_sizes = [1, 3, 3, 2]
3335    filter_in_sizes = [2, 2, 2, 2]
3336    bias_in_sizes = [2]
3337
3338    x = self._CreateNumpyTensor(tensor_in_sizes)
3339    filter_in = self._CreateNumpyTensor(filter_in_sizes)
3340    bias_in = self._CreateNumpyTensor(bias_in_sizes)
3341    # To get different weights for filter
3342    offset = 1
3343
3344    conv1 = self._CreateConv2D(x, filter_in)
3345    conv2 = self._CreateConv2D(conv1, filter_in + offset)
3346
3347    conv = self._CreateConv2D(conv1, filter_in - offset)
3348    bias_add = nn_ops.bias_add(conv, bias_in)
3349    add = math_ops.add_n([bias_add, conv2])
3350
3351    relu1 = nn_ops.relu(add)
3352    relu2 = nn_ops.relu(conv2)
3353    output = math_ops.add_n([relu1, relu2])
3354
3355    self.assertAllEqual(
3356        np.rint(expected_output),
3357        self.evaluate(output).reshape(-1))
3358
3359  # Tests tensor forwarding of a fused Conv2D+BiasAdd+Add op when the input to
3360  # Add is the same as the input to the fused Conv2D op and needs a tensor
3361  # buffer.
3362  @test_util.run_in_graph_and_eager_modes(use_gpu=False)
3363  def testAddWithSameSrcAndAddTensorBuffer(self):
3364    expected_output = [
3365        57157, 63298, 39249, 44026, 9971, 11402, 31193, 36306, 19126, 22948,
3366        3970, 5060, 5135, 6350, 2666, 3524, 461, 674
3367    ]
3368    tensor_in_sizes = [1, 3, 3, 2]
3369    filter_in_sizes = [2, 2, 2, 2]
3370    bias_in_sizes = [2]
3371
3372    x = self._CreateNumpyTensor(tensor_in_sizes)
3373    filter_in = self._CreateNumpyTensor(filter_in_sizes)
3374    bias_in = self._CreateNumpyTensor(bias_in_sizes)
3375
3376    conv1 = self._CreateConv2D(x, filter_in)
3377
3378    conv = self._CreateConv2D(conv1, filter_in)
3379    bias_add = nn_ops.bias_add(conv, bias_in)
3380    add = math_ops.add_n([bias_add, conv1])
3381
3382    self.assertAllEqual(
3383        np.rint(expected_output),
3384        self.evaluate(add).reshape(-1))
3385
3386
3387if __name__ == "__main__":
3388  for index, (input_size_, filter_size_, output_size_, stride_,
3389              padding_) in enumerate(GetShrunkInceptionShapes()):
3390    setattr(Conv2DTest, "testInceptionFwd_" + str(index),
3391            test_util.run_in_graph_and_eager_modes(
3392                GetInceptionFwdTest(input_size_, filter_size_, stride_,
3393                                    padding_)))
3394    setattr(
3395        Conv2DTest, "testInceptionFwdDilatedConv_" + str(index),
3396        test_util.run_in_graph_and_eager_modes(GetInceptionFwdDilatedConvTest(
3397            input_size_, filter_size_, stride_, padding_)))
3398    setattr(Conv2DTest, "testInceptionBackInput_" + str(index),
3399            test_util.run_in_graph_and_eager_modes(
3400                GetInceptionBackInputTest(input_size_, filter_size_,
3401                                          output_size_, stride_, padding_)))
3402    setattr(Conv2DTest, "testInceptionBackFilter_" + str(index),
3403            test_util.run_in_graph_and_eager_modes(
3404                GetInceptionBackFilterTest(input_size_, filter_size_,
3405                                           output_size_, [stride_, stride_],
3406                                           padding_)))
3407
3408  # TODO(b/35359731)
3409  # Fwd, BckInput, and BackFilter to test that for certain input parameter
3410  # set, winograd nonfused algorithm will be excluded from conv autotune. If
3411  # in such case, winograd nonfused algorithm is added as one option of the
3412  # conv autotune, and cuDNN version is smaller than 7, the following tests
3413  # will fail.
3414  ishape = [1, 400, 400, 1]
3415  fshape = [1, 1, 1, 256]
3416  oshape = [1, 400, 400, 256]
3417  setattr(Conv2DTest, "testInceptionFwd_No_Winograd_Nonfused",
3418          test_util.run_in_graph_and_eager_modes(
3419              GetInceptionFwdTest(ishape, fshape, 1, "SAME", gpu_only=True)))
3420  setattr(Conv2DTest, "testInceptionFwdDilatedConv_No_Winograd_Nonfused",
3421          test_util.run_in_graph_and_eager_modes(
3422              GetInceptionFwdDilatedConvTest(ishape, fshape, 1, "SAME")))
3423  setattr(Conv2DTest, "testInceptionBackInput_No_Winograd_Nonfused",
3424          test_util.run_in_graph_and_eager_modes(
3425              GetInceptionBackInputTest(ishape, fshape, oshape, 1, "SAME",
3426                                        gpu_only=True)))
3427  setattr(Conv2DTest, "testInceptionBackFilter_No_Winograd_Nonfused",
3428          test_util.run_in_graph_and_eager_modes(
3429              GetInceptionBackFilterTest(ishape, fshape, oshape, [1, 1], "SAME",
3430                                         gpu_only=True)))
3431  test.main()
3432