• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Functional tests for convolutional operations."""
16
17import os
18import time
19
20import numpy as np
21
22from tensorflow.core.protobuf import config_pb2
23from tensorflow.core.protobuf import rewriter_config_pb2
24from tensorflow.python.client import session as session_lib
25from tensorflow.python.eager import backprop
26from tensorflow.python.eager import context
27from tensorflow.python.framework import constant_op
28from tensorflow.python.framework import dtypes
29from tensorflow.python.framework import errors_impl
30from tensorflow.python.framework import ops
31from tensorflow.python.framework import test_util
32from tensorflow.python.layers import convolutional
33from tensorflow.python.ops import array_ops
34from tensorflow.python.ops import control_flow_ops
35from tensorflow.python.ops import gen_nn_ops
36from tensorflow.python.ops import gradient_checker
37from tensorflow.python.ops import gradients_impl
38from tensorflow.python.ops import math_ops
39from tensorflow.python.ops import nn_impl
40from tensorflow.python.ops import nn_ops
41from tensorflow.python.ops import random_ops
42from tensorflow.python.ops import variables
43import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
44from tensorflow.python.platform import test
45from tensorflow.python.platform import tf_logging
46from tensorflow.python.util.compat import collections_abc
47
48
49def GetShrunkInceptionShapes(shrink=10):
50  """Iterator for smaller versions of convolution shapes in 2015 Inception.
51
52  Relative to inception, each depth value is `depth // shrink`.
53
54  Args:
55    shrink: Factor to shrink each depth value by relative to Inception.
56
57  Yields:
58    Tuple (input_size, filter_size, out_size, stride, padding), the convolution
59    parameters of Inception layers.
60  """
61  input_sizes = [[4, 5, 5, 1248], [4, 8, 8, 384], [4, 8, 8, 384],
62                 [4, 8, 8, 2048], [4, 8, 8, 448], [4, 8, 8, 2048],
63                 [4, 8, 8, 2048], [4, 8, 8, 2048], [4, 8, 8, 1760],
64                 [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 8, 8, 1760],
65                 [4, 17, 17, 192], [4, 17, 17, 192], [4, 17, 17, 1248],
66                 [4, 17, 17, 128], [4, 17, 17, 1248], [4, 17, 17, 224],
67                 [4, 17, 17, 192], [4, 17, 17, 192], [4, 17, 17, 1216],
68                 [4, 17, 17, 1216], [4, 17, 17, 224], [4, 17, 17, 192],
69                 [4, 17, 17, 192], [4, 17, 17, 1152], [4, 17, 17, 1152],
70                 [4, 17, 17, 192], [4, 17, 17, 160], [4, 17, 17, 1152],
71                 [4, 17, 17, 1024], [4, 17, 17, 128], [4, 17, 17, 1024],
72                 [4, 17, 17, 128], [4, 17, 17, 1024], [4, 17, 17, 128],
73                 [4, 17, 17, 768], [4, 17, 17, 128], [4, 17, 17, 128],
74                 [4, 17, 17, 768], [4, 17, 17, 768], [4, 35, 35, 96],
75                 [4, 35, 35, 288], [4, 35, 35, 64], [4, 35, 35, 288],
76                 [4, 35, 35, 256], [4, 35, 35, 48], [4, 35, 35, 256],
77                 [4, 35, 35, 96], [4, 35, 35, 192], [4, 35, 35, 192],
78                 [4, 35, 35, 192], [4, 73, 73, 64], [4, 73, 73, 64],
79                 [4, 147, 147, 24]]
80  filter_sizes = [[1, 1, 1248, 128], [1, 3, 384, 384], [3, 1, 384, 384],
81                  [1, 1, 2048, 192], [3, 3, 448, 384], [1, 1, 2048, 320],
82                  [1, 1, 2048, 448], [1, 1, 2048, 384], [1, 1, 1760, 384],
83                  [1, 1, 1760, 192], [1, 1, 1760, 448], [1, 1, 1760, 320],
84                  [3, 3, 192, 192], [3, 3, 192, 192], [1, 1, 1248, 192],
85                  [3, 3, 128, 320], [1, 1, 1248, 128], [1, 3, 224, 224],
86                  [3, 1, 192, 256], [1, 3, 192, 256], [1, 1, 1216, 192],
87                  [1, 1, 1216, 96], [3, 1, 224, 224], [3, 3, 192, 224],
88                  [1, 3, 192, 192], [1, 1, 1152, 192], [1, 1, 1152, 128],
89                  [3, 1, 192, 192], [3, 3, 160, 192], [1, 1, 1152, 160],
90                  [1, 1, 1024, 128], [1, 3, 128, 192], [1, 1, 1024, 160],
91                  [3, 1, 128, 192], [1, 1, 1024, 256], [3, 1, 128, 128],
92                  [1, 1, 768, 192], [1, 3, 128, 128], [3, 3, 128, 128],
93                  [1, 1, 768, 128], [1, 1, 768, 320], [3, 3, 96, 96],
94                  [3, 3, 288, 384], [3, 3, 64, 96], [1, 1, 288, 64],
95                  [1, 1, 256, 64], [5, 5, 48, 64], [1, 1, 256, 48],
96                  [3, 3, 96, 96], [1, 1, 192, 32], [1, 1, 192, 64],
97                  [1, 1, 192, 48], [3, 3, 64, 192], [1, 1, 64, 64],
98                  [1, 1, 24, 64]]
99  out_sizes = [[4, 5, 5, 128], [4, 8, 8, 384], [4, 8, 8, 384],
100               [4, 8, 8, 192], [4, 8, 8, 384], [4, 8, 8, 320],
101               [4, 8, 8, 448], [4, 8, 8, 384], [4, 8, 8, 384],
102               [4, 8, 8, 192], [4, 8, 8, 448], [4, 8, 8, 320],
103               [4, 8, 8, 192], [4, 17, 17, 192], [4, 17, 17, 192],
104               [4, 8, 8, 320], [4, 17, 17, 128], [4, 17, 17, 224],
105               [4, 17, 17, 256], [4, 17, 17, 256], [4, 17, 17, 192],
106               [4, 17, 17, 96], [4, 17, 17, 224], [4, 17, 17, 224],
107               [4, 17, 17, 192], [4, 17, 17, 192], [4, 17, 17, 128],
108               [4, 17, 17, 192], [4, 17, 17, 192], [4, 17, 17, 160],
109               [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 160],
110               [4, 17, 17, 192], [4, 17, 17, 256], [4, 17, 17, 128],
111               [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 128],
112               [4, 17, 17, 128], [4, 17, 17, 320], [4, 17, 17, 96],
113               [4, 17, 17, 384], [4, 35, 35, 96], [4, 35, 35, 64],
114               [4, 35, 35, 64], [4, 35, 35, 64], [4, 35, 35, 48],
115               [4, 35, 35, 96], [4, 35, 35, 32], [4, 35, 35, 64],
116               [4, 35, 35, 48], [4, 71, 71, 192], [4, 73, 73, 64],
117               [4, 147, 147, 64]]
118  strides = [
119      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
120      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1,
121      1, 1, 1, 1, 1
122  ]
123  # Shrink sizes to make the test faster
124  for i in input_sizes:
125    i[3] //= shrink
126  for f in filter_sizes:
127    f[2] //= shrink
128    f[3] //= shrink
129  for o in out_sizes:
130    o[3] //= shrink
131  # pylint: disable=invalid-name
132  VALID = "VALID"
133  SAME = "SAME"
134  # pylint: enable=invalid-name
135  paddings = [
136      SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME,
137      VALID, SAME, SAME, VALID, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME,
138      SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME,
139      SAME, SAME, SAME, SAME, SAME, VALID, VALID, SAME, SAME, SAME, SAME, SAME,
140      SAME, SAME, SAME, SAME, VALID, VALID, VALID
141  ]
142  for i, f, o, s, p in zip(input_sizes, filter_sizes, out_sizes, strides,
143                           paddings):
144    yield i, f, o, s, p
145
146
147def GetTestConfigs():
148  """Get all the valid tests configs to run.
149
150  Returns:
151    all the valid test configs as tuples of data_format and use_gpu.
152  """
153  test_configs = [("NHWC", False), ("NHWC", True)]
154  if test.is_gpu_available(cuda_only=True):
155    # "NCHW" format is only supported on CUDA.
156    test_configs += [("NCHW", True)]
157  return test_configs
158
159
160@test_util.run_all_without_tensor_float_32("Avoid TF32 conv on GPU")
161class Conv2DTest(test.TestCase):
162
163  def _DtypesToTest(self, use_gpu):
164    if test_util.IsMklEnabled():
165      return [dtypes.float32]
166    # double datatype is currently not supported for convolution ops
167    # on the ROCm platform
168    optional_float64 = [] if test.is_built_with_rocm() else [dtypes.float64]
169    if use_gpu and not test_util.GpuSupportsHalfMatMulAndConv():
170      return [dtypes.float32] + optional_float64
171    else:
172      # It is important that float32 comes before float16 here,
173      # as we will be using its gradients as reference for fp16 gradients.
174      return [dtypes.float32, dtypes.float16] + optional_float64
175
176  def _CreateNumpyTensor(self, shape):
177    total_size = 1
178    for s in shape:
179      total_size *= s
180    return np.arange(1, total_size + 1, dtype=np.float32).reshape(shape)
181
182  def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, dilations,
183                            strides, padding, data_format, dtype, use_gpu):
184    """Verifies the output values of the convolution function.
185
186    Args:
187      tensor_in_sizes: Input tensor dimensions in
188        [batch, input_rows, input_cols, input_depth].
189      filter_in_sizes: Filter tensor dimensions in
190        [kernel_rows, kernel_cols, input_depth, output_depth].
191      dilations: Dilated rate: [col_dilation, row_dilation]
192      strides: Stride: [col_stride, row_stride]
193      padding: Padding type.
194      data_format: Format of the data tensors.
195      dtype: Data type for inputs and outputs.
196      use_gpu: True if the operations should be run on GPU
197    Returns:
198      Symbolic tensor value that can be used to execute the computation
199    """
200    x1 = self._CreateNumpyTensor(tensor_in_sizes)
201    x2 = self._CreateNumpyTensor(filter_in_sizes)
202
203    with test_util.device(use_gpu):
204      t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype)
205      t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype)
206      strides = [1] + strides + [1]
207      dilations = [1] + dilations + [1]
208      if isinstance(padding, (list, tuple)):
209        padding = [(0, 0)] + padding + [(0, 0)]
210      if data_format == "NCHW":
211        t1 = test_util.NHWCToNCHW(t1)
212        strides = test_util.NHWCToNCHW(strides)
213        dilations = test_util.NHWCToNCHW(dilations)
214        if isinstance(padding, (list, tuple)):
215          padding = test_util.NHWCToNCHW(padding)
216      conv = nn_ops.conv2d(
217          t1,
218          t2,
219          dilations=dilations,
220          strides=strides,
221          padding=padding,
222          data_format=data_format)
223      self.assertEqual(conv.dtype, dtype)
224      if data_format == "NCHW":
225        conv = test_util.NCHWToNHWC(conv)
226
227      return conv
228
229  def _CompareFwdValues(self, tensor_in_sizes, filter_in_sizes, conv_strides,
230                        padding):
231    """Verifies that CPU and GPU produce the same values.
232
233    Args:
234      tensor_in_sizes: Input tensor dimensions in
235        [batch, input_rows, input_cols, input_depth].
236      filter_in_sizes: Filter tensor dimensions in
237        [kernel_rows, kernel_cols, input_depth, output_depth].
238      conv_strides: [row_stride, col_stride] for the convolution;
239      padding: Padding type.
240    """
241    x1 = np.random.rand(*tensor_in_sizes).astype(np.float32)
242    x2 = np.random.rand(*filter_in_sizes).astype(np.float32)
243
244    def _SetupVal(data_format, use_gpu):
245      with test_util.device(use_gpu):
246        t1 = constant_op.constant(x1, shape=tensor_in_sizes)
247        t2 = constant_op.constant(x2, shape=filter_in_sizes)
248        strides = [1] + conv_strides + [1]
249        if data_format == "NCHW":
250          t1 = test_util.NHWCToNCHW(t1)
251          strides = test_util.NHWCToNCHW(strides)
252        conv = nn_ops.conv2d(
253            t1, t2, strides=strides, padding=padding, data_format=data_format)
254        if data_format == "NCHW":
255          conv = test_util.NCHWToNHWC(conv)
256        return conv
257
258    tensors = []
259    for (data_format, use_gpu) in GetTestConfigs():
260      tensors.append(_SetupVal(data_format, use_gpu))
261    values = self.evaluate(tensors)
262    for i in range(1, len(values)):
263      self.assertAllClose(values[0], values[i], rtol=1e-3, atol=1e-3)
264
265  def _ComputeReferenceDilatedConv(self, tensor_in_sizes, filter_in_sizes,
266                                   stride, dilation, padding, data_format,
267                                   use_gpu):
268    x1 = self._CreateNumpyTensor(tensor_in_sizes)
269    x2 = self._CreateNumpyTensor(filter_in_sizes)
270    with test_util.device(use_gpu):
271      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
272      t2 = constant_op.constant(x2, shape=filter_in_sizes)
273      if isinstance(stride, collections_abc.Iterable):
274        strides = list(stride)
275      else:
276        strides = [stride, stride]
277      if data_format == "NCHW":
278        t1 = test_util.NHWCToNCHW(t1)
279        full_strides = [1, 1] + strides
280        full_dilation = [1, 1] + dilation
281      else:
282        full_strides = [1] + strides + [1]
283        full_dilation = [1] + dilation + [1]
284      expected = nn_ops.convolution(
285          t1,
286          t2,
287          padding=padding,
288          strides=strides,
289          dilation_rate=dilation,
290          data_format=data_format)
291      computed = nn_ops.conv2d(
292          t1,
293          t2,
294          strides=full_strides,
295          dilations=full_dilation,
296          padding=padding,
297          data_format=data_format)
298      if data_format == "NCHW":
299        expected = test_util.NCHWToNHWC(expected)
300        computed = test_util.NCHWToNHWC(computed)
301    return expected, computed
302
303  def _VerifyDilatedConvValues(self, tensor_in_sizes, filter_in_sizes, strides,
304                               padding, dilations, rtol=1e-4):
305    expected_results = []
306    computed_results = []
307    for data_format, use_gpu in GetTestConfigs():
308      expected, computed = self._ComputeReferenceDilatedConv(
309          tensor_in_sizes, filter_in_sizes, strides, dilations, padding,
310          data_format, use_gpu)
311      expected_results.append(expected)
312      computed_results.append(computed)
313    tolerance = 1e-2 if use_gpu else 1e-5
314    expected_values = self.evaluate(expected_results)
315    computed_values = self.evaluate(computed_results)
316    for e_value, c_value in zip(expected_values, computed_values):
317      tf_logging.debug("expected = %s", e_value)
318      tf_logging.debug("actual = %s", c_value)
319      self.assertAllClose(
320          e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=rtol)
321
322  def _VerifyValues(self,
323                    tensor_in_sizes,
324                    filter_in_sizes,
325                    strides,
326                    padding,
327                    expected,
328                    dilations=(1, 1),
329                    gpu_only=False,
330                    test_grappler_layout_optimizer=False,
331                    tol=1e-5,
332                    fp16_tol=1e-3):
333    if gpu_only and not test.is_gpu_available(cuda_only=True):
334      return
335    tensors = []
336    dilations = list(dilations)
337    for (data_format, use_gpu) in GetTestConfigs():
338      if gpu_only and not use_gpu:
339        continue
340      dtypes_to_test = self._DtypesToTest(use_gpu)
341      if not test_grappler_layout_optimizer and data_format == "NHWC":
342        dtypes_to_test.append(dtypes.int32)
343      for dtype in dtypes_to_test:
344        result = self._SetupValuesForDevice(
345            tensor_in_sizes,
346            filter_in_sizes,
347            dilations,
348            strides,
349            padding,
350            data_format,
351            dtype,
352            use_gpu=use_gpu)
353        if test_grappler_layout_optimizer and data_format == "NHWC" and use_gpu:
354          # Grappler's layout optimizer will not optimize a fetch node, so
355          # this identity allows Grappler to optimize the Conv2D node.
356          result = array_ops.identity(result)
357        tensors.append(result)
358      values = self.evaluate(tensors)
359      for i in range(len(tensors)):
360        conv = tensors[i]
361        value = values[i]
362        tf_logging.debug("expected = %s", expected)
363        tf_logging.debug("actual = %s", value)
364        tol_to_use = fp16_tol if value.dtype == np.float16 else tol
365        if np.issubdtype(value.dtype, np.integer):
366          self.assertAllEqual(np.rint(expected), np.ravel(value))
367        else:
368          self.assertAllClose(expected, np.ravel(value), atol=tol_to_use,
369                              rtol=tol_to_use)
370        self.assertShapeEqual(value, conv)
371        self.assertEqual(value.dtype, conv.dtype.as_numpy_dtype)
372
373  def _VerifyExplicitPaddings(self,
374                              tensor_in_sizes,
375                              filter_in_sizes,
376                              strides,
377                              padding,
378                              dilations=(1, 1),
379                              test_grappler_layout_optimizer=False,
380                              tol=1e-5,
381                              fp16_tol=1e-3):
382    """Verifies Conv2D with explicit padding generates correct values.
383
384    It does this by comparing with Conv2D without explicit padding. This
385    function assumes Conv2D without explicit padding works correctly.
386
387    Args:
388      tensor_in_sizes: Input tensor dimensions in [batch, input_rows,
389        input_cols, input_depth].
390      filter_in_sizes: Filter tensor dimensions in [kernel_rows, kernel_cols,
391        input_depth, output_depth].
392      strides: [row_stride, col_stride] for the convolution;
393      padding: Explicit padding amounts.
394      dilations: Dilation values
395      test_grappler_layout_optimizer: If True, allow the Grappler layout
396        optimizer to run, which turns NHWC Conv2Ds on the GPU to NCHW Conv2Ds.
397      tol: The absolute and relative tolerance for non-fp16 dtypes.
398      fp16_tol: The absolute and relative tolerance for fp16.
399    """
400    input_tensor = self._CreateNumpyTensor(tensor_in_sizes)
401    filter_tensor = self._CreateNumpyTensor(filter_in_sizes)
402    input_tensor = array_ops.pad(input_tensor, [(0, 0)] + padding + [(0, 0)])
403    dilations = list(dilations)
404    conv2d_result = nn_ops.conv2d(
405        input_tensor,
406        filter_tensor, [1] + list(strides) + [1],
407        "VALID",
408        dilations=[1] + dilations + [1])
409    expected = list(self.evaluate(array_ops.reshape(conv2d_result, [-1])))
410    self._VerifyValues(
411        tensor_in_sizes,
412        filter_in_sizes,
413        strides,
414        padding,
415        expected,
416        dilations,
417        test_grappler_layout_optimizer=test_grappler_layout_optimizer,
418        tol=tol,
419        fp16_tol=fp16_tol)
420
421  @test_util.run_in_graph_and_eager_modes
422  def testConv2D1x1Filter(self):
423    expected_output = [
424        30.0, 36.0, 42.0, 66.0, 81.0, 96.0, 102.0, 126.0, 150.0, 138.0, 171.0,
425        204.0, 174.0, 216.0, 258.0, 210.0, 261.0, 312.0
426    ]
427    self._VerifyValues(
428        tensor_in_sizes=[1, 2, 3, 3],
429        filter_in_sizes=[1, 1, 3, 3],
430        strides=[1, 1],
431        padding="VALID",
432        expected=expected_output)
433
434  @test_util.run_in_graph_and_eager_modes
435  def testConv2DExpandedBatch(self):
436    tensor_in_sizes_batch = [10, 2, 3, 3]
437    tensor_in_sizes_expanded_batch = [2, 5, 2, 3, 3]
438    filter_in_sizes = [1, 1, 3, 3]
439    filter_in = self._CreateNumpyTensor(filter_in_sizes)
440    x1 = self._CreateNumpyTensor(tensor_in_sizes_batch)
441    x2 = x1.reshape(tensor_in_sizes_expanded_batch)
442    conv1 = nn_ops.conv2d(
443        x1,
444        filter_in,
445        strides=[1, 1],
446        padding="VALID")
447    conv2 = nn_ops.conv2d(
448        x2,
449        filter_in,
450        strides=[1, 1],
451        padding="VALID")
452    self.assertEqual(conv1.shape, tensor_in_sizes_batch)
453    self.assertEqual(conv2.shape, tensor_in_sizes_expanded_batch)
454    self.assertAllEqual(
455        conv1,
456        self.evaluate(conv2).reshape(conv1.shape))
457
458  @test_util.run_in_graph_and_eager_modes
459  def testConvolutionClass2DExpandedBatch(self):
460    tensor_in_sizes_batch = [10, 2, 3, 3]
461    tensor_in_sizes_expanded_batch = [2, 5, 2, 3, 3]
462    filter_in_sizes = [1, 1, 3, 3]
463    filter_in = self._CreateNumpyTensor(filter_in_sizes)
464    x1 = self._CreateNumpyTensor(tensor_in_sizes_batch)
465    x2 = x1.reshape(tensor_in_sizes_expanded_batch)
466    convolver1 = nn_ops.Convolution(
467        input_shape=x1.shape,
468        filter_shape=filter_in.shape,
469        strides=[1, 1],
470        padding="VALID")
471    self.assertEqual(convolver1.num_batch_dims, 1)
472    convolver2 = nn_ops.Convolution(
473        input_shape=x2.shape,
474        filter_shape=filter_in.shape,
475        strides=[1, 1],
476        padding="VALID")
477    self.assertEqual(convolver2.num_batch_dims, 2)
478    conv1 = convolver1(x1, filter_in)
479    conv2 = convolver2(x2, filter_in)
480    self.assertEqual(conv1.shape, tensor_in_sizes_batch)
481    self.assertEqual(conv2.shape, tensor_in_sizes_expanded_batch)
482    self.assertAllEqual(
483        conv1,
484        self.evaluate(conv2).reshape(conv1.shape))
485
486  @test_util.run_in_graph_and_eager_modes
487  def testConvolutionWith2SpatialDimensionsAndExpandedBatch(self):
488    tensor_in_sizes_batch = [10, 2, 3, 3]
489    tensor_in_sizes_expanded_batch = [2, 5, 2, 3, 3]
490    filter_in_sizes = [1, 1, 3, 3]
491    filter_in = self._CreateNumpyTensor(filter_in_sizes)
492    x1 = self._CreateNumpyTensor(tensor_in_sizes_batch)
493    x2 = x1.reshape(tensor_in_sizes_expanded_batch)
494    conv1 = nn_ops.convolution(
495        x1,
496        filter_in,
497        strides=[1, 1],
498        padding="VALID")
499    conv2 = nn_ops.convolution(
500        x2,
501        filter_in,
502        strides=[1, 1],
503        padding="VALID")
504    self.assertEqual(conv1.shape, tensor_in_sizes_batch)
505    self.assertEqual(conv2.shape, tensor_in_sizes_expanded_batch)
506    self.assertAllEqual(
507        conv1,
508        self.evaluate(conv2).reshape(conv1.shape))
509
510  @test_util.run_in_graph_and_eager_modes
511  def testConv2D2x2Filter2x1Dilation(self):
512    self._VerifyDilatedConvValues(
513        tensor_in_sizes=[1, 4, 4, 1],
514        filter_in_sizes=[2, 2, 1, 1],
515        strides=[1, 1],
516        dilations=[2, 1],
517        padding="VALID")
518
519  @test_util.run_in_graph_and_eager_modes
520  def testConv2DEmpty(self):
521    expected_output = []
522    self._VerifyValues(
523        tensor_in_sizes=[0, 2, 3, 3],
524        filter_in_sizes=[1, 1, 3, 3],
525        strides=[1, 1],
526        padding="VALID",
527        expected=expected_output)
528
529  @test_util.run_in_graph_and_eager_modes
530  def testConv2DEmptyDilation(self):
531    self._VerifyDilatedConvValues(
532        tensor_in_sizes=[0, 2, 3, 3],
533        filter_in_sizes=[1, 1, 3, 3],
534        strides=[1, 1],
535        dilations=[2, 1],
536        padding="VALID")
537
538  @test_util.run_in_graph_and_eager_modes
539  def testConv2D2x2Filter(self):
540    # The outputs are computed using third_party/py/IPython/notebook.
541    expected_output = [2271.0, 2367.0, 2463.0, 2901.0, 3033.0, 3165.0]
542    self._VerifyValues(
543        tensor_in_sizes=[1, 2, 3, 3],
544        filter_in_sizes=[2, 2, 3, 3],
545        strides=[1, 1],
546        padding="VALID",
547        expected=expected_output)
548
549  @test_util.run_in_graph_and_eager_modes
550  def testConv2D2x2FilterDilation(self):
551    self._VerifyDilatedConvValues(
552        tensor_in_sizes=[1, 2, 3, 3],
553        filter_in_sizes=[2, 2, 3, 3],
554        strides=[1, 1],
555        dilations=[1, 2],
556        padding="VALID")
557
558  @test_util.run_in_graph_and_eager_modes
559  def testConv2D1x2Filter(self):
560    # The outputs are computed using third_party/py/IPython/notebook.
561    expected_output = [
562        231.0, 252.0, 273.0, 384.0, 423.0, 462.0, 690.0, 765.0, 840.0, 843.0,
563        936.0, 1029.0
564    ]
565    self._VerifyValues(
566        tensor_in_sizes=[1, 2, 3, 3],
567        filter_in_sizes=[1, 2, 3, 3],
568        strides=[1, 1],
569        padding="VALID",
570        expected=expected_output)
571
572  @test_util.run_in_graph_and_eager_modes
573  def testConv2D1x2FilterDilation(self):
574    self._VerifyDilatedConvValues(
575        tensor_in_sizes=[1, 2, 3, 3],
576        filter_in_sizes=[1, 2, 3, 3],
577        strides=[1, 1],
578        dilations=[2, 1],
579        padding="VALID")
580
581  @test_util.run_in_graph_and_eager_modes
582  def testConv2D2x2FilterStride2(self):
583    expected_output = [2271.0, 2367.0, 2463.0]
584    self._VerifyValues(
585        tensor_in_sizes=[1, 2, 3, 3],
586        filter_in_sizes=[2, 2, 3, 3],
587        strides=[2, 2],
588        padding="VALID",
589        expected=expected_output)
590
591  @test_util.run_in_graph_and_eager_modes
592  def testConv2D2x2FilterStride2Same(self):
593    expected_output = [2271.0, 2367.0, 2463.0, 1230.0, 1305.0, 1380.0]
594    self._VerifyValues(
595        tensor_in_sizes=[1, 2, 3, 3],
596        filter_in_sizes=[2, 2, 3, 3],
597        strides=[2, 2],
598        padding="SAME",
599        expected=expected_output)
600
601  @test_util.run_in_graph_and_eager_modes
602  def testConv2D2x2FilterStride1x2(self):
603    expected_output = [58.0, 78.0, 98.0, 118.0, 138.0, 158.0]
604    self._VerifyValues(
605        tensor_in_sizes=[1, 3, 6, 1],
606        filter_in_sizes=[2, 2, 1, 1],
607        strides=[1, 2],
608        padding="VALID",
609        expected=expected_output)
610
611  @test_util.run_in_graph_and_eager_modes
612  def testConv2DKernelSmallerThanStrideValid(self):
613    expected_output = [65, 95, 275, 305]
614    self._VerifyValues(
615        tensor_in_sizes=[1, 7, 7, 1],
616        filter_in_sizes=[2, 2, 1, 1],
617        strides=[3, 3],
618        padding="VALID",
619        expected=expected_output)
620
621  @test_util.run_in_graph_and_eager_modes
622  def testConv2DKernelSmallerThanStrideSame(self):
623    self._VerifyValues(
624        tensor_in_sizes=[1, 3, 3, 1],
625        filter_in_sizes=[1, 1, 1, 1],
626        strides=[2, 2],
627        padding="SAME",
628        expected=[1, 3, 7, 9])
629
630    self._VerifyValues(
631        tensor_in_sizes=[1, 4, 4, 1],
632        filter_in_sizes=[1, 1, 1, 1],
633        strides=[2, 2],
634        padding="SAME",
635        expected=[1, 3, 9, 11])
636
637    self._VerifyValues(
638        tensor_in_sizes=[1, 4, 4, 1],
639        filter_in_sizes=[2, 2, 1, 1],
640        strides=[3, 3],
641        padding="SAME",
642        expected=[44, 28, 41, 16])
643
644  @test_util.run_in_graph_and_eager_modes
645  def testConv2DKernelSizeMatchesInputSize(self):
646    self._VerifyValues(
647        tensor_in_sizes=[1, 2, 2, 1],
648        filter_in_sizes=[2, 2, 1, 2],
649        strides=[1, 1],
650        padding="VALID",
651        expected=[50, 60])
652
653  @test_util.run_in_graph_and_eager_modes
654  def testConv2DKernelSizeMatchesInputSizeDilation(self):
655    self._VerifyDilatedConvValues(
656        tensor_in_sizes=[1, 3, 3, 1],
657        filter_in_sizes=[2, 2, 1, 2],
658        strides=[1, 1],
659        dilations=[2, 2],
660        padding="VALID")
661
662  @test_util.run_in_graph_and_eager_modes()
663  def testConv2D0x0Padding(self):
664    self._VerifyExplicitPaddings(
665        tensor_in_sizes=[1, 2, 3, 3],
666        filter_in_sizes=[2, 2, 3, 3],
667        strides=[1, 1],
668        padding=[[0, 0], [0, 0]])
669
670    self._VerifyExplicitPaddings(
671        tensor_in_sizes=[3, 4, 3, 2],
672        filter_in_sizes=[1, 1, 2, 1],
673        strides=[2, 2],
674        padding=[[0, 0], [0, 0]])
675
676  @test_util.run_in_graph_and_eager_modes()
677  def testConv2D1x1Padding(self):
678    self._VerifyExplicitPaddings(
679        tensor_in_sizes=[1, 2, 3, 2],
680        filter_in_sizes=[2, 2, 2, 2],
681        strides=[1, 1],
682        padding=[[1, 1], [1, 1]])
683
684    self._VerifyExplicitPaddings(
685        tensor_in_sizes=[1, 2, 2, 1],
686        filter_in_sizes=[1, 1, 1, 2],
687        strides=[1, 1],
688        padding=[[1, 1], [1, 1]])
689
690  @test_util.run_in_graph_and_eager_modes()
691  def testConv2D2x2Padding(self):
692    self._VerifyExplicitPaddings(
693        tensor_in_sizes=[1, 2, 1, 2],
694        filter_in_sizes=[2, 1, 2, 1],
695        strides=[1, 1],
696        padding=[[2, 2], [2, 2]])
697
698    self._VerifyExplicitPaddings(
699        tensor_in_sizes=[1, 2, 1, 2],
700        filter_in_sizes=[1, 1, 2, 1],
701        strides=[2, 1],
702        padding=[[2, 2], [2, 2]])
703
704  @test_util.run_in_graph_and_eager_modes()
705  def testConv2DOnlyBottomPadding(self):
706    self._VerifyExplicitPaddings(
707        tensor_in_sizes=[1, 2, 3, 3],
708        filter_in_sizes=[2, 2, 3, 2],
709        strides=[1, 1],
710        padding=[[0, 3], [0, 0]], tol=2e-5)
711
712    self._VerifyExplicitPaddings(
713        tensor_in_sizes=[2, 2, 4, 3],
714        filter_in_sizes=[1, 2, 3, 2],
715        strides=[2, 2],
716        padding=[[0, 3], [0, 0]])
717
718  @test_util.run_in_graph_and_eager_modes()
719  def testConv2DOnlyTopRightPadding(self):
720    self._VerifyExplicitPaddings(
721        tensor_in_sizes=[1, 2, 3, 3],
722        filter_in_sizes=[2, 2, 3, 2],
723        strides=[1, 1],
724        padding=[[1, 0], [0, 2]],
725        tol=5e-5)
726
727    self._VerifyExplicitPaddings(
728        tensor_in_sizes=[1, 2, 4, 2],
729        filter_in_sizes=[2, 2, 2, 2],
730        strides=[1, 3],
731        padding=[[1, 0], [0, 2]])
732
733  @test_util.run_in_graph_and_eager_modes()
734  def testConv2DLotsPadding(self):
735    self._VerifyExplicitPaddings(
736        tensor_in_sizes=[1, 1, 1, 3],
737        filter_in_sizes=[2, 2, 3, 3],
738        strides=[1, 1],
739        padding=[[3, 4], [4, 2]])
740
741    self._VerifyExplicitPaddings(
742        tensor_in_sizes=[1, 2, 1, 1],
743        filter_in_sizes=[2, 2, 1, 3],
744        strides=[2, 1],
745        padding=[[3, 4], [4, 2]])
746
747  @test_util.run_in_graph_and_eager_modes()
748  def testConv2DExplicitPaddingWithDilations(self):
749    self._VerifyExplicitPaddings(
750        tensor_in_sizes=[1, 3, 2, 1],
751        filter_in_sizes=[1, 2, 1, 2],
752        strides=[1, 1],
753        padding=[[1, 0], [0, 1]],
754        dilations=[2, 1])
755
756    self._VerifyExplicitPaddings(
757        tensor_in_sizes=[1, 2, 3, 2],
758        filter_in_sizes=[3, 2, 2, 1],
759        strides=[1, 1],
760        padding=[[2, 1], [1, 2]],
761        dilations=[2, 3])
762
763  @test_util.run_in_graph_and_eager_modes()
764  def testConv2dOnlyPaddingReturnsZeros(self):
765    self._VerifyValues(
766        tensor_in_sizes=[1, 0, 2, 1],
767        filter_in_sizes=[1, 1, 1, 1],
768        strides=[1, 1],
769        padding=[[1, 1], [1, 1]],
770        expected=[0, 0, 0, 0, 0, 0, 0, 0])
771
772  def testConv2DExplicitPaddingWithLayoutOptimizer(self):
773    # Test with Grappler's layout optimizer, to ensure the layout optimizer
774    # handles explicit padding correctly.
775    self._VerifyExplicitPaddings(
776        tensor_in_sizes=[1, 3, 2, 1],
777        filter_in_sizes=[1, 2, 1, 2],
778        strides=[1, 1],
779        padding=[[1, 0], [0, 1]],
780        dilations=[2, 1],
781        test_grappler_layout_optimizer=True)
782
783    self._VerifyExplicitPaddings(
784        tensor_in_sizes=[1, 2, 3, 2],
785        filter_in_sizes=[3, 2, 2, 1],
786        strides=[1, 1],
787        padding=[[2, 1], [1, 2]],
788        dilations=[2, 3],
789        test_grappler_layout_optimizer=True)
790
791  def _VerifyGroupConvFwd(self, tensor_in_sizes, filter_in_sizes, dilations,
792                          strides, padding, data_format, dtype):
793    """Verify the output of group convolution is equal to a for-loop implementation.
794
795    Args:
796      tensor_in_sizes: Input tensor dimensions in [batch, input_rows,
797        input_cols, input_depth].
798      filter_in_sizes: Filter tensor dimensions in [kernel_rows, kernel_cols,
799        input_depth, output_depth].
800      dilations: Dilated rate: [col_dilation, row_dilation]
801      strides: Stride: [col_stride, row_stride]
802      padding: Padding type.
803      data_format: Format of the data tensors.
804      dtype: Data type for inputs and outputs.
805    """
806    tensor_in = self._CreateNumpyTensor(tensor_in_sizes)
807    filter_in = self._CreateNumpyTensor(filter_in_sizes)
808    num_groups = tensor_in_sizes[3] // filter_in_sizes[2]
809    assert num_groups > 1 and \
810        filter_in_sizes[2] * num_groups == tensor_in_sizes[3]
811    with test_util.device(True):
812      t1 = constant_op.constant(tensor_in, dtype=dtype)
813      t2 = constant_op.constant(filter_in, dtype=dtype)
814      strides = [1] + strides + [1]
815      dilations = [1] + dilations + [1]
816      if data_format == "NCHW":
817        t1 = test_util.NHWCToNCHW(t1)
818        strides = test_util.NHWCToNCHW(strides)
819        dilations = test_util.NHWCToNCHW(dilations)
820        t1_splits = array_ops.split(t1, num_groups, axis=1)
821      else:
822        t1_splits = array_ops.split(t1, num_groups, axis=3)
823      t2_splits = array_ops.split(t2, num_groups, axis=3)
824
825      def MakeConv2d(inputs, filters):
826        return nn_ops.conv2d(
827            inputs,
828            filters,
829            strides,
830            padding,
831            dilations=dilations,
832            data_format=data_format)
833
834      group_conv = MakeConv2d(t1, t2)
835      group_conv_loop = array_ops.concat(
836          [MakeConv2d(t1s, t2s) for t1s, t2s in zip(t1_splits, t2_splits)],
837          axis=1 if data_format == "NCHW" else 3)
838
839      results = self.evaluate([group_conv, group_conv_loop])
840      tol_to_use = 1e-5
841      self.assertAllClose(
842          results[0], results[1], atol=tol_to_use, rtol=tol_to_use)
843
844  @test_util.run_in_graph_and_eager_modes
845  def testConv2DGroupConvFwd(self):
846    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
847      data_formats = ["NHWC", "NCHW"]
848    else:
849      data_formats = ["NHWC"]
850    for data_format in data_formats:
851      for dilation in [1, 2]:
852        for stride in [1, 2]:
853          for filter_dims in [[3, 3, 4, 8], [1, 1, 2, 16]]:
854            self._VerifyGroupConvFwd([10, 32, 32, 16], filter_dims,
855                                     dilations=[dilation, dilation],
856                                     strides=[stride, stride],
857                                     padding="SAME",
858                                     data_format=data_format,
859                                     dtype=dtypes.float32)
860
861  @test_util.deprecated_graph_mode_only
862  @test_util.run_cuda_only
863  def testInputGradientGroupConv(self):
864    for data_format in ["NCHW", "NHWC"]:
865      for test_input in [True, False]:
866        self.ConstructAndTestGradient(
867            batch=2,
868            input_rows=5,
869            input_cols=4,
870            filter_rows=3,
871            filter_cols=3,
872            num_groups=2,
873            padding="VALID",
874            in_depth=4,
875            out_depth=6,
876            stride_rows=1,
877            stride_cols=1,
878            test_input=test_input,
879            data_format=data_format,
880            use_gpu=True,
881            max_err=0.005)
882
883  @test_util.deprecated_graph_mode_only
884  @test_util.run_cuda_only
885  def testFilterGradientGroupConv(self):
886    for data_format in ["NCHW", "NHWC"]:
887      for test_input in [True, False]:
888        self.ConstructAndTestGradient(
889            batch=2,
890            input_rows=5,
891            input_cols=4,
892            filter_rows=3,
893            filter_cols=3,
894            num_groups=2,
895            padding="VALID",
896            in_depth=4,
897            out_depth=6,
898            stride_rows=1,
899            stride_cols=1,
900            test_input=test_input,
901            data_format=data_format,
902            use_gpu=True,
903            max_err=0.005)
904  # TODO(yzhwang): this currently fails.
905  # self._VerifyValues(tensor_in_sizes=[1, 8, 8, 1],
906  #                   filter_in_sizes=[2, 2, 1, 1],
907  #                   strides=[4, 4], padding="SAME",
908  #                   expected=[72, 112, 392, 432])
909
910  # Testing for backprops
911  def _RunAndVerifyBackpropInput(self,
912                                 input_sizes,
913                                 filter_sizes,
914                                 output_sizes,
915                                 strides,
916                                 padding,
917                                 expected,
918                                 data_format,
919                                 use_gpu,
920                                 err,
921                                 dilations=(1, 1)):
922    if use_gpu and not test.is_gpu_available(cuda_only=True):
923      return
924    x1 = self._CreateNumpyTensor(filter_sizes)
925    x2 = self._CreateNumpyTensor(output_sizes)
926    dilations = list(dilations)
927    with test_util.device(use_gpu):
928      if len(input_sizes) == 4:
929        if data_format == "NCHW":
930          input_sizes = test_util.NHWCToNCHW(input_sizes)
931      t0 = constant_op.constant(input_sizes, shape=[len(input_sizes)])
932      t1 = constant_op.constant(x1, shape=filter_sizes)
933      t2 = constant_op.constant(x2, shape=output_sizes)
934      strides = [1] + strides + [1]
935      dilations = [1] + dilations + [1]
936      if isinstance(padding, (list, tuple)):
937        padding = [(0, 0)] + padding + [(0, 0)]
938      if data_format == "NCHW":
939        t2 = test_util.NHWCToNCHW(t2)
940        strides = test_util.NHWCToNCHW(strides)
941        dilations = test_util.NHWCToNCHW(dilations)
942        if isinstance(padding, (list, tuple)):
943          padding = test_util.NHWCToNCHW((padding))
944      conv = nn_ops.conv2d_backprop_input(
945          t0,
946          t1,
947          t2,
948          strides=strides,
949          padding=padding,
950          data_format=data_format,
951          dilations=dilations)
952      if data_format == "NCHW":
953        conv = test_util.NCHWToNHWC(conv)
954      # "values" consists of two tensors for two backprops
955      value = self.evaluate(conv)
956      self.assertShapeEqual(value, conv)
957    tf_logging.debug("expected = %s", expected)
958    tf_logging.debug("actual = %s", value)
959    self.assertAllCloseAccordingToType(expected, value.flatten(), atol=1e-5)
960
961  def _CompareBackpropInput(self, input_sizes, filter_sizes, output_sizes,
962                            conv_strides, padding):
963    x1 = np.random.rand(*filter_sizes).astype(np.float32)
964    x2 = np.random.rand(*output_sizes).astype(np.float32)
965
966    def _GetVal(data_format, use_gpu):
967      with test_util.device(use_gpu):
968        if data_format == "NCHW":
969          new_input_sizes = test_util.NHWCToNCHW(input_sizes)
970        else:
971          new_input_sizes = input_sizes
972        t0 = constant_op.constant(new_input_sizes, shape=[len(new_input_sizes)])
973        t1 = constant_op.constant(x1, shape=filter_sizes)
974        t2 = constant_op.constant(x2, shape=output_sizes)
975        strides = [1] + conv_strides + [1]
976        if data_format == "NCHW":
977          t2 = test_util.NHWCToNCHW(t2)
978          strides = test_util.NHWCToNCHW(strides)
979        conv = nn_ops.conv2d_backprop_input(
980            t0,
981            t1,
982            t2,
983            strides=strides,
984            padding=padding,
985            data_format=data_format)
986        if data_format == "NCHW":
987          conv = test_util.NCHWToNHWC(conv)
988        ret = self.evaluate(conv)
989        self.assertShapeEqual(ret, conv)
990        return ret
991
992    values = []
993    for (data_format, use_gpu) in GetTestConfigs():
994      values.append(_GetVal(data_format, use_gpu))
995
996    for i in range(1, len(values)):
997      self.assertAllClose(values[0], values[i], rtol=1e-2, atol=1e-2)
998
999  @test_util.run_in_graph_and_eager_modes
1000  def testConv2D2x2Depth1ValidBackpropInput(self):
1001    expected_output = [1.0, 4.0, 4.0, 3.0, 10.0, 8.0]
1002    for (data_format, use_gpu) in GetTestConfigs():
1003      self._RunAndVerifyBackpropInput(
1004          input_sizes=[1, 2, 3, 1],
1005          filter_sizes=[2, 2, 1, 1],
1006          output_sizes=[1, 1, 2, 1],
1007          strides=[1, 1],
1008          padding="VALID",
1009          expected=expected_output,
1010          data_format=data_format,
1011          use_gpu=use_gpu,
1012          err=1e-5)
1013
1014  @test_util.run_in_graph_and_eager_modes
1015  def testConv2DEmptyBackpropInput(self):
1016    expected_output = []
1017    for (data_format, use_gpu) in GetTestConfigs():
1018      self._RunAndVerifyBackpropInput(
1019          input_sizes=[0, 2, 3, 1],
1020          filter_sizes=[2, 2, 1, 1],
1021          output_sizes=[0, 1, 2, 1],
1022          strides=[1, 1],
1023          padding="VALID",
1024          expected=expected_output,
1025          data_format=data_format,
1026          use_gpu=use_gpu,
1027          err=1e-5)
1028
1029  @test_util.run_in_graph_and_eager_modes
1030  def testConv2D2x2Depth3ValidBackpropInput(self):
1031    expected_output = [
1032        14.0, 32.0, 50.0, 100.0, 163.0, 226.0, 167.0, 212.0, 257.0, 122.0,
1033        140.0, 158.0, 478.0, 541.0, 604.0, 437.0, 482.0, 527.0
1034    ]
1035    for (data_format, use_gpu) in GetTestConfigs():
1036      # The GPU version of this test is not very stable. So adjusting the
1037      # error threshold to 1e-4.
1038      self._RunAndVerifyBackpropInput(
1039          input_sizes=[1, 2, 3, 3],
1040          filter_sizes=[2, 2, 3, 3],
1041          output_sizes=[1, 1, 2, 3],
1042          strides=[1, 1],
1043          padding="VALID",
1044          expected=expected_output,
1045          data_format=data_format,
1046          use_gpu=use_gpu,
1047          err=1e-4)
1048
1049  @test_util.run_in_graph_and_eager_modes
1050  def testConv2D2x2Depth3ValidBackpropInputStride1x2(self):
1051    expected_output = [
1052        1.0, 2.0, 2.0, 4.0, 3.0, 6.0, 7.0, 12.0, 11.0, 18.0, 15.0, 24.0, 12.0,
1053        16.0, 15.0, 20.0, 18.0, 24.0
1054    ]
1055    for (data_format, use_gpu) in GetTestConfigs():
1056      self._RunAndVerifyBackpropInput(
1057          input_sizes=[1, 3, 6, 1],
1058          filter_sizes=[2, 2, 1, 1],
1059          output_sizes=[1, 2, 3, 1],
1060          strides=[1, 2],
1061          padding="VALID",
1062          expected=expected_output,
1063          data_format=data_format,
1064          use_gpu=use_gpu,
1065          err=1e-5)
1066
1067  @test_util.run_in_graph_and_eager_modes
1068  def testConv2DStrideTwoFilterOneSameBackpropInput(self):
1069    expected_output = [
1070        1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 4.0, 0.0, 0.0, 0.0,
1071        0.0, 0.0
1072    ]
1073    for (data_format, use_gpu) in GetTestConfigs():
1074      self._RunAndVerifyBackpropInput(
1075          input_sizes=[1, 4, 4, 1],
1076          filter_sizes=[1, 1, 1, 1],
1077          output_sizes=[1, 2, 2, 1],
1078          strides=[2, 2],
1079          padding="SAME",
1080          expected=expected_output,
1081          data_format=data_format,
1082          use_gpu=use_gpu,
1083          err=1e-5)
1084
1085  @test_util.run_in_graph_and_eager_modes
1086  def testConv2DKernelSizeMatchesInputSizeBackpropInput(self):
1087    expected_output = [5.0, 11.0, 17.0, 23.0]
1088    for (data_format, use_gpu) in GetTestConfigs():
1089      self._RunAndVerifyBackpropInput(
1090          input_sizes=[1, 2, 2, 1],
1091          filter_sizes=[2, 2, 1, 2],
1092          output_sizes=[1, 1, 1, 2],
1093          strides=[1, 1],
1094          padding="VALID",
1095          expected=expected_output,
1096          data_format=data_format,
1097          use_gpu=use_gpu,
1098          err=1e-5)
1099
1100  @test_util.run_in_graph_and_eager_modes
1101  @test_util.disable_xla("XLA requires input_sizes to be a 4D shape.")
1102  def testConv2DInputSizesContainsOnlySpatialDimensionsBackpropInput(self):
1103    expected_output = [5.0, 11.0, 17.0, 23.0]
1104    for (data_format, use_gpu) in GetTestConfigs():
1105      self._RunAndVerifyBackpropInput(
1106          input_sizes=[2, 2],
1107          filter_sizes=[2, 2, 1, 2],
1108          output_sizes=[1, 1, 1, 2],
1109          strides=[1, 1],
1110          padding="VALID",
1111          expected=expected_output,
1112          data_format=data_format,
1113          use_gpu=use_gpu,
1114          err=1e-5)
1115
1116  @test_util.run_in_graph_and_eager_modes
1117  @test_util.disable_xla("b/239598470")
1118  def testConv2DBackpropInputDegenerateBackpropInput(self):
1119    input_sizes = [3, 1, 1, 2]
1120    expected_output = np.zeros(input_sizes).flatten()
1121    for (data_format, use_gpu) in GetTestConfigs():
1122      self._RunAndVerifyBackpropInput(
1123          input_sizes=input_sizes,
1124          filter_sizes=[1, 3, 2, 3],
1125          output_sizes=[3, 1, 0, 3],
1126          strides=[1, 2],
1127          padding="VALID",
1128          expected=expected_output,
1129          data_format=data_format,
1130          use_gpu=use_gpu,
1131          err=1e-5)
1132
1133  # Testing for backprops
1134  def _RunAndVerifyBackpropFilter(self,
1135                                  input_sizes,
1136                                  filter_sizes,
1137                                  output_sizes,
1138                                  strides,
1139                                  padding,
1140                                  expected,
1141                                  data_format,
1142                                  use_gpu,
1143                                  dilations=(1, 1),
1144                                  err=1e-5):
1145    x0 = self._CreateNumpyTensor(input_sizes)
1146    x2 = self._CreateNumpyTensor(output_sizes)
1147    dilations = list(dilations)
1148    explicit_strides = [1] + strides + [1]
1149    new_padding = padding
1150    new_dilations = [1] + dilations + [1]
1151    if isinstance(new_padding, (list, tuple)):
1152      new_padding = [(0, 0)] + new_padding + [(0, 0)]
1153    if data_format == "NCHW":
1154      explicit_strides = test_util.NHWCToNCHW(explicit_strides)
1155      new_dilations = test_util.NHWCToNCHW(new_dilations)
1156      if isinstance(padding, (list, tuple)):
1157        new_padding = test_util.NHWCToNCHW(new_padding)
1158    for dtype in self._DtypesToTest(use_gpu=use_gpu):
1159      with test_util.device(use_gpu):
1160        t0 = constant_op.constant(x0, shape=input_sizes, dtype=dtype)
1161        t1 = constant_op.constant(filter_sizes, shape=[len(filter_sizes)])
1162        t2 = constant_op.constant(x2, shape=output_sizes, dtype=dtype)
1163        if data_format == "NCHW":
1164          t0 = test_util.NHWCToNCHW(t0)
1165          t2 = test_util.NHWCToNCHW(t2)
1166        conv = nn_ops.conv2d_backprop_filter(
1167            t0,
1168            t1,
1169            t2,
1170            strides=explicit_strides,
1171            padding=new_padding,
1172            dilations=new_dilations,
1173            data_format=data_format)
1174        value = self.evaluate(conv)
1175        self.assertShapeEqual(value, conv)
1176      tf_logging.debug("expected = %s", expected)
1177      tf_logging.debug("actual = %s", value)
1178      self.assertArrayNear(expected, value.flatten(), err)
1179
1180  def _CompareBackFilter(self, input_sizes, filter_sizes, output_sizes,
1181                         conv_strides, padding):
1182    x0 = np.random.rand(*input_sizes).astype(np.float32)
1183    x2 = np.random.rand(*output_sizes).astype(np.float32)
1184
1185    def _GetVal(data_format, use_gpu):
1186      with test_util.device(use_gpu):
1187        t0 = constant_op.constant(x0, shape=input_sizes)
1188        t1 = constant_op.constant(filter_sizes, shape=[len(filter_sizes)])
1189        t2 = constant_op.constant(x2, shape=output_sizes)
1190        strides = [1] + conv_strides + [1]
1191        if data_format == "NCHW":
1192          t0 = test_util.NHWCToNCHW(t0)
1193          t2 = test_util.NHWCToNCHW(t2)
1194          strides = test_util.NHWCToNCHW(strides)
1195        conv = nn_ops.conv2d_backprop_filter(
1196            t0,
1197            t1,
1198            t2,
1199            strides=strides,
1200            padding=padding,
1201            data_format=data_format)
1202        ret = self.evaluate(conv)
1203        self.assertShapeEqual(ret, conv)
1204        return ret
1205
1206    values = []
1207    for (data_format, use_gpu) in GetTestConfigs():
1208      values.append(_GetVal(data_format, use_gpu))
1209    for i in range(1, len(values)):
1210      self.assertAllClose(values[0], values[i], rtol=1e-4, atol=1e-4)
1211
1212  @test_util.run_in_graph_and_eager_modes
1213  def testConv2D2x2Depth1ValidBackpropFilter(self):
1214    expected = [5.0, 8.0, 14.0, 17.0]
1215    for (data_format, use_gpu) in GetTestConfigs():
1216      self._RunAndVerifyBackpropFilter(
1217          input_sizes=[1, 2, 3, 1],
1218          filter_sizes=[2, 2, 1, 1],
1219          output_sizes=[1, 1, 2, 1],
1220          strides=[1, 1],
1221          padding="VALID",
1222          expected=expected,
1223          data_format=data_format,
1224          use_gpu=use_gpu)
1225
1226  @test_util.run_in_graph_and_eager_modes
1227  def testConv2DEmptyBackpropFilter(self):
1228    expected = []
1229    for (data_format, use_gpu) in GetTestConfigs():
1230      self._RunAndVerifyBackpropFilter(
1231          input_sizes=[1, 2, 3, 1],
1232          filter_sizes=[2, 2, 1, 0],
1233          output_sizes=[1, 1, 2, 0],
1234          strides=[1, 1],
1235          padding="VALID",
1236          expected=expected,
1237          data_format=data_format,
1238          use_gpu=use_gpu)
1239
1240  @test_util.run_in_graph_and_eager_modes
1241  def testConv2DBackpropFilterWithEmptyInput(self):
1242    expected = [0, 0, 0, 0]
1243    for (data_format, use_gpu) in GetTestConfigs():
1244      self._RunAndVerifyBackpropFilter(
1245          input_sizes=[0, 2, 3, 1],
1246          filter_sizes=[2, 2, 1, 1],
1247          output_sizes=[0, 1, 2, 1],
1248          strides=[1, 1],
1249          padding="VALID",
1250          expected=expected,
1251          data_format=data_format,
1252          use_gpu=use_gpu)
1253
1254  @test_util.run_in_graph_and_eager_modes
1255  def testConv2D2x2Depth3ValidBackpropFilter(self):
1256    expected = [
1257        17.0, 22.0, 27.0, 22.0, 29.0, 36.0, 27.0, 36.0, 45.0, 32.0, 43.0, 54.0,
1258        37.0, 50.0, 63.0, 42.0, 57.0, 72.0, 62.0, 85.0, 108.0, 67.0, 92.0,
1259        117.0, 72.0, 99.0, 126.0, 77.0, 106.0, 135.0, 82.0, 113.0, 144.0, 87.0,
1260        120.0, 153.0
1261    ]
1262    for (data_format, use_gpu) in GetTestConfigs():
1263      self._RunAndVerifyBackpropFilter(
1264          input_sizes=[1, 2, 3, 3],
1265          filter_sizes=[2, 2, 3, 3],
1266          output_sizes=[1, 1, 2, 3],
1267          strides=[1, 1],
1268          padding="VALID",
1269          expected=expected,
1270          data_format=data_format,
1271          use_gpu=use_gpu)
1272
1273  @test_util.run_in_graph_and_eager_modes
1274  def testConv2D2x2Depth3ValidBackpropFilterStride1x2(self):
1275    expected = [161.0, 182.0, 287.0, 308.0]
1276    for (data_format, use_gpu) in GetTestConfigs():
1277      self._RunAndVerifyBackpropFilter(
1278          input_sizes=[1, 3, 6, 1],
1279          filter_sizes=[2, 2, 1, 1],
1280          output_sizes=[1, 2, 3, 1],
1281          strides=[1, 2],
1282          padding="VALID",
1283          expected=expected,
1284          data_format=data_format,
1285          use_gpu=use_gpu)
1286
1287  @test_util.run_in_graph_and_eager_modes
1288  def testConv2DStrideTwoFilterOneSameBackpropFilter(self):
1289    expected_output = [78.]
1290    for (data_format, use_gpu) in GetTestConfigs():
1291      self._RunAndVerifyBackpropFilter(
1292          input_sizes=[1, 4, 4, 1],
1293          filter_sizes=[1, 1, 1, 1],
1294          output_sizes=[1, 2, 2, 1],
1295          strides=[2, 2],
1296          padding="SAME",
1297          expected=expected_output,
1298          data_format=data_format,
1299          use_gpu=use_gpu)
1300
1301  @test_util.run_in_graph_and_eager_modes
1302  def testConv2DKernelSizeMatchesInputSizeBackpropFilter(self):
1303    expected_output = [1.0, 2.0, 2.0, 4.0, 3.0, 6.0, 4.0, 8.0]
1304    for (data_format, use_gpu) in GetTestConfigs():
1305      self._RunAndVerifyBackpropFilter(
1306          input_sizes=[1, 2, 2, 1],
1307          filter_sizes=[2, 2, 1, 2],
1308          output_sizes=[1, 1, 1, 2],
1309          strides=[1, 1],
1310          padding="VALID",
1311          expected=expected_output,
1312          data_format=data_format,
1313          use_gpu=use_gpu)
1314
1315  # Testing for backprops
1316  def _RunAndVerifyBackpropInputDilation(self, input_sizes, filter_sizes,
1317                                         output_sizes, strides, dilations,
1318                                         padding, data_format, use_gpu, err):
1319    x1 = self._CreateNumpyTensor(input_sizes)
1320    x2 = self._CreateNumpyTensor(filter_sizes)
1321    default_dilations = (dilations[0] == 1 and dilations[1] == 1)
1322    if default_dilations or use_gpu:
1323      with self.cached_session(use_gpu=use_gpu):
1324        if data_format == "NCHW":
1325          input_sizes = test_util.NHWCToNCHW(input_sizes)
1326        t1 = constant_op.constant(x1, shape=input_sizes)
1327        t2 = constant_op.constant(x2, shape=filter_sizes)
1328        full_strides = [1] + strides + [1]
1329        full_dilations = [1] + dilations + [1]
1330        if data_format == "NCHW":
1331          full_strides = test_util.NHWCToNCHW(full_strides)
1332          full_dilations = test_util.NHWCToNCHW(full_dilations)
1333        conv_forward = nn_ops.conv2d(
1334            t1,
1335            t2,
1336            strides=full_strides,
1337            dilations=full_dilations,
1338            padding=padding,
1339            data_format=data_format)
1340        conv_forward_2 = nn_ops.convolution(
1341            t1,
1342            t2,
1343            padding=padding,
1344            strides=strides,
1345            dilation_rate=dilations,
1346            data_format=data_format)
1347        if data_format == "NCHW":
1348          conv_forward = test_util.NCHWToNHWC(conv_forward)
1349          conv_forward_2 = test_util.NCHWToNHWC(conv_forward_2)
1350        conv = gradients_impl.gradients(conv_forward, t1)[0]
1351        conv_2 = gradients_impl.gradients(conv_forward_2, t1)[0]
1352        # "values" consists of two tensors for two backprops
1353        value = self.evaluate(conv)
1354        value_2 = self.evaluate(conv_2)
1355        self.assertShapeEqual(value, conv)
1356        self.assertShapeEqual(value_2, conv_2)
1357      tf_logging.debug("expected = %s", value_2)
1358      tf_logging.debug("actual = %s", value)
1359      self.assertArrayNear(value_2.flatten(), value.flatten(), err)
1360
1361  # Testing for backprops
1362  def _RunAndVerifyBackpropFilterDilation(self, input_sizes, filter_sizes,
1363                                          output_sizes, strides, dilations,
1364                                          padding, data_format, use_gpu, err):
1365    x1 = self._CreateNumpyTensor(input_sizes)
1366    x2 = self._CreateNumpyTensor(filter_sizes)
1367    default_dilations = (dilations[0] == 1 and dilations[1] == 1)
1368    if default_dilations or use_gpu:
1369      with self.cached_session(use_gpu=use_gpu):
1370        if data_format == "NCHW":
1371          input_sizes = test_util.NHWCToNCHW(input_sizes)
1372        t1 = constant_op.constant(x1, shape=input_sizes)
1373        t2 = constant_op.constant(x2, shape=filter_sizes)
1374        full_strides = [1] + strides + [1]
1375        full_dilations = [1] + dilations + [1]
1376        if data_format == "NCHW":
1377          full_strides = test_util.NHWCToNCHW(full_strides)
1378          full_dilations = test_util.NHWCToNCHW(full_dilations)
1379        conv_forward = nn_ops.conv2d(
1380            t1,
1381            t2,
1382            strides=full_strides,
1383            dilations=full_dilations,
1384            padding=padding,
1385            data_format=data_format)
1386        conv_forward_2 = nn_ops.convolution(
1387            t1,
1388            t2,
1389            padding=padding,
1390            strides=strides,
1391            dilation_rate=dilations,
1392            data_format=data_format)
1393        if data_format == "NCHW":
1394          conv_forward = test_util.NCHWToNHWC(conv_forward)
1395          conv_forward_2 = test_util.NCHWToNHWC(conv_forward_2)
1396        conv = gradients_impl.gradients(conv_forward, t2)[0]
1397        conv_2 = gradients_impl.gradients(conv_forward, t2)[0]
1398        value = self.evaluate(conv)
1399        value_2 = self.evaluate(conv_2)
1400        self.assertShapeEqual(value, conv)
1401        self.assertShapeEqual(value_2, conv_2)
1402      tf_logging.debug("expected = %s", value_2)
1403      tf_logging.debug("actual = %s", value)
1404      self.assertArrayNear(value_2.flatten(), value.flatten(), err)
1405
1406  @test_util.deprecated_graph_mode_only
1407  def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self):
1408    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1409      for (data_format, use_gpu) in GetTestConfigs():
1410        self._RunAndVerifyBackpropFilterDilation(
1411            input_sizes=[1, 3, 6, 1],
1412            filter_sizes=[2, 2, 1, 1],
1413            output_sizes=[1, 1, 5, 1],
1414            strides=[1, 1],
1415            dilations=[2, 1],
1416            padding="VALID",
1417            data_format=data_format,
1418            use_gpu=use_gpu,
1419            err=1e-5)
1420
1421  @test_util.deprecated_graph_mode_only
1422  def testConv2D2x2Depth1ValidBackpropFilterDilation1x2(self):
1423    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1424      for (data_format, use_gpu) in GetTestConfigs():
1425        self._RunAndVerifyBackpropFilterDilation(
1426            input_sizes=[1, 2, 3, 1],
1427            filter_sizes=[2, 2, 1, 1],
1428            output_sizes=[1, 1, 2, 1],
1429            strides=[1, 1],
1430            dilations=[1, 2],
1431            padding="VALID",
1432            data_format=data_format,
1433            use_gpu=use_gpu,
1434            err=1e-5)
1435
1436  @test_util.deprecated_graph_mode_only
1437  def testConv2DEmptyBackpropFilterDilation1x2(self):
1438    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1439      for (data_format, use_gpu) in GetTestConfigs():
1440        self._RunAndVerifyBackpropFilterDilation(
1441            input_sizes=[1, 2, 3, 1],
1442            filter_sizes=[2, 2, 1, 0],
1443            output_sizes=[1, 1, 2, 0],
1444            strides=[1, 1],
1445            dilations=[1, 2],
1446            padding="VALID",
1447            data_format=data_format,
1448            use_gpu=use_gpu,
1449            err=1e-5)
1450
1451  @test_util.deprecated_graph_mode_only
1452  def testConv2D2x2Depth3ValidBackpropFilterDilation2x2(self):
1453    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1454      for (data_format, use_gpu) in GetTestConfigs():
1455        self._RunAndVerifyBackpropFilterDilation(
1456            input_sizes=[1, 3, 4, 3],
1457            filter_sizes=[2, 2, 3, 3],
1458            output_sizes=[1, 1, 2, 3],
1459            strides=[1, 1],
1460            dilations=[2, 2],
1461            padding="VALID",
1462            data_format=data_format,
1463            use_gpu=use_gpu,
1464            err=1e-5)
1465
1466  @test_util.deprecated_graph_mode_only
1467  def testConv2DKernelSizeMatchesInputSizeBackpropFilterDilation2x2(self):
1468    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1469      for (data_format, use_gpu) in GetTestConfigs():
1470        self._RunAndVerifyBackpropFilterDilation(
1471            input_sizes=[1, 3, 3, 1],
1472            filter_sizes=[2, 2, 1, 2],
1473            output_sizes=[1, 1, 1, 2],
1474            strides=[1, 1],
1475            dilations=[2, 2],
1476            padding="VALID",
1477            data_format=data_format,
1478            use_gpu=use_gpu,
1479            err=1e-5)
1480
1481  @test_util.deprecated_graph_mode_only
1482  def testConv2D2x2Depth3ValidBackpropInputStride1x1Dilation2x1(self):
1483    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1484      for (data_format, use_gpu) in GetTestConfigs():
1485        self._RunAndVerifyBackpropInputDilation(
1486            input_sizes=[1, 3, 6, 1],
1487            filter_sizes=[2, 2, 1, 1],
1488            output_sizes=[1, 1, 5, 1],
1489            strides=[1, 1],
1490            dilations=[2, 1],
1491            padding="VALID",
1492            data_format=data_format,
1493            use_gpu=use_gpu,
1494            err=1e-5)
1495
1496  @test_util.deprecated_graph_mode_only
1497  def testConv2D2x2Depth1ValidBackpropInputDilation1x2(self):
1498    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1499      for (data_format, use_gpu) in GetTestConfigs():
1500        self._RunAndVerifyBackpropInputDilation(
1501            input_sizes=[1, 2, 3, 1],
1502            filter_sizes=[2, 2, 1, 1],
1503            output_sizes=[1, 1, 2, 1],
1504            strides=[1, 1],
1505            dilations=[1, 2],
1506            padding="VALID",
1507            data_format=data_format,
1508            use_gpu=use_gpu,
1509            err=1e-5)
1510
1511  @test_util.deprecated_graph_mode_only
1512  def testConv2DEmptyBackpropInputDilation1x2(self):
1513    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1514      for (data_format, use_gpu) in GetTestConfigs():
1515        self._RunAndVerifyBackpropInputDilation(
1516            input_sizes=[0, 2, 3, 1],
1517            filter_sizes=[2, 2, 1, 1],
1518            output_sizes=[0, 1, 2, 1],
1519            strides=[1, 1],
1520            dilations=[1, 2],
1521            padding="VALID",
1522            data_format=data_format,
1523            use_gpu=use_gpu,
1524            err=1e-5)
1525
1526  @test_util.deprecated_graph_mode_only
1527  def testConv2D2x2Depth3ValidBackpropInputDilation2x1(self):
1528    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1529      for (data_format, use_gpu) in GetTestConfigs():
1530        # The GPU version of this test is not very stable. So adjusting the
1531        # error threshold to 1e-4.
1532        self._RunAndVerifyBackpropInputDilation(
1533            input_sizes=[1, 3, 2, 3],
1534            filter_sizes=[2, 2, 3, 3],
1535            output_sizes=[1, 1, 2, 3],
1536            strides=[1, 1],
1537            dilations=[2, 1],
1538            padding="VALID",
1539            data_format=data_format,
1540            use_gpu=use_gpu,
1541            err=1e-4)
1542
1543  @test_util.deprecated_graph_mode_only
1544  def testConv2DKernelSizeMatchesInputSizeBackpropInputDilation2x2(self):
1545    if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
1546      for (data_format, use_gpu) in GetTestConfigs():
1547        self._RunAndVerifyBackpropInputDilation(
1548            input_sizes=[1, 3, 3, 1],
1549            filter_sizes=[2, 2, 1, 2],
1550            output_sizes=[1, 1, 1, 2],
1551            strides=[1, 1],
1552            dilations=[2, 2],
1553            padding="VALID",
1554            data_format=data_format,
1555            use_gpu=use_gpu,
1556            err=1e-5)
1557
1558  def _RunAndVerifyBackpropInputExplicitPadding(self,
1559                                                input_sizes,
1560                                                filter_sizes,
1561                                                output_sizes,
1562                                                strides,
1563                                                padding,
1564                                                data_format,
1565                                                use_gpu,
1566                                                dilations=(1, 1),
1567                                                err=2e-5):
1568    if use_gpu and not test.is_gpu_available(cuda_only=True):
1569      return
1570    if not use_gpu and dilations != (1, 1):
1571      return  # Non-default dilations is currently not supported on the CPU.
1572
1573    x1 = self._CreateNumpyTensor(filter_sizes)
1574    x2 = self._CreateNumpyTensor(output_sizes)
1575    dilations = list(dilations)
1576    padded_input_sizes = input_sizes[:]
1577    padded_input_sizes[1] += padding[0][0] + padding[0][1]
1578    padded_input_sizes[2] += padding[1][0] + padding[1][1]
1579    c = nn_ops.conv2d_backprop_input(
1580        padded_input_sizes,
1581        x1,
1582        x2,
1583        strides=[1] + strides + [1],
1584        padding="VALID",
1585        dilations=[1] + dilations + [1])
1586    c = c[:, padding[0][0]:(c.shape[1] - padding[0][1]), padding[1][0]:(
1587        c.shape[2] - padding[1][1]), :]
1588    expected = list(self.evaluate(array_ops.reshape(c, [-1])))
1589    self._RunAndVerifyBackpropInput(
1590        input_sizes,
1591        filter_sizes,
1592        output_sizes,
1593        strides,
1594        padding,
1595        expected,
1596        data_format,
1597        use_gpu=use_gpu,
1598        err=err,
1599        dilations=dilations)
1600
1601  @test_util.run_in_graph_and_eager_modes()
1602  def testConv2D2x2Depth1Padding0x0BackpropInput(self):
1603    for (data_format, use_gpu) in GetTestConfigs():
1604      self._RunAndVerifyBackpropInputExplicitPadding(
1605          input_sizes=[1, 2, 3, 1],
1606          filter_sizes=[2, 2, 1, 1],
1607          output_sizes=[1, 1, 2, 1],
1608          strides=[1, 1],
1609          padding=[[0, 0], [0, 0]],
1610          data_format=data_format,
1611          use_gpu=use_gpu)
1612
1613      self._RunAndVerifyBackpropInputExplicitPadding(
1614          input_sizes=[1, 3, 4, 2],
1615          filter_sizes=[2, 2, 2, 3],
1616          output_sizes=[1, 1, 2, 3],
1617          strides=[2, 2],
1618          padding=[[0, 0], [0, 0]],
1619          data_format=data_format,
1620          use_gpu=use_gpu)
1621
1622  @test_util.run_in_graph_and_eager_modes()
1623  def testConv2D2x2Depth1Padding1x1BackpropInput(self):
1624    for (data_format, use_gpu) in GetTestConfigs():
1625      self._RunAndVerifyBackpropInputExplicitPadding(
1626          input_sizes=[1, 2, 3, 1],
1627          filter_sizes=[2, 2, 1, 2],
1628          output_sizes=[1, 3, 4, 2],
1629          strides=[1, 1],
1630          padding=[[1, 1], [1, 1]],
1631          data_format=data_format,
1632          use_gpu=use_gpu,
1633          err=1e-4)
1634
1635      self._RunAndVerifyBackpropInputExplicitPadding(
1636          input_sizes=[1, 2, 3, 2],
1637          filter_sizes=[1, 1, 2, 1],
1638          output_sizes=[1, 4, 3, 1],
1639          strides=[1, 2],
1640          padding=[[1, 1], [1, 1]],
1641          data_format=data_format,
1642          use_gpu=use_gpu)
1643
1644      self._RunAndVerifyBackpropInputExplicitPadding(
1645          input_sizes=[1, 4, 3, 1],
1646          filter_sizes=[2, 2, 1, 1],
1647          output_sizes=[1, 4, 2, 1],
1648          strides=[1, 2],
1649          padding=[[1, 1], [1, 1]],
1650          data_format=data_format,
1651          dilations=[2, 2], use_gpu=use_gpu)
1652
1653  @test_util.run_in_graph_and_eager_modes()
1654  def testConv2D2x2Depth1Padding2x2BackpropInput(self):
1655    for (data_format, use_gpu) in GetTestConfigs():
1656      self._RunAndVerifyBackpropInputExplicitPadding(
1657          input_sizes=[2, 3, 1, 1],
1658          filter_sizes=[2, 1, 1, 1],
1659          output_sizes=[2, 2, 5, 1],
1660          strides=[3, 1],
1661          padding=[[2, 2], [2, 2]],
1662          data_format=data_format,
1663          use_gpu=use_gpu)
1664
1665      self._RunAndVerifyBackpropInputExplicitPadding(
1666          input_sizes=[1, 3, 6, 1],
1667          filter_sizes=[3, 2, 1, 1],
1668          output_sizes=[1, 3, 4, 1],
1669          strides=[1, 2],
1670          padding=[[2, 2], [2, 2]],
1671          data_format=data_format,
1672          dilations=[2, 3],
1673          use_gpu=use_gpu)
1674
1675  @test_util.run_in_graph_and_eager_modes()
1676  def testConv2D2x2Depth1Padding_1_8_4_1_BackpropInput(self):
1677    for (data_format, use_gpu) in GetTestConfigs():
1678      self._RunAndVerifyBackpropInputExplicitPadding(
1679          input_sizes=[1, 2, 3, 1],
1680          filter_sizes=[2, 2, 1, 1],
1681          output_sizes=[1, 10, 8, 1],
1682          strides=[1, 1],
1683          padding=[[1, 8], [4, 2]],
1684          data_format=data_format,
1685          use_gpu=use_gpu,
1686          err=5e-5)
1687
1688      self._RunAndVerifyBackpropInputExplicitPadding(
1689          input_sizes=[1, 5, 3, 1],
1690          filter_sizes=[3, 2, 1, 1],
1691          output_sizes=[1, 4, 8, 1],
1692          strides=[3, 1],
1693          padding=[[1, 8], [4, 2]],
1694          data_format=data_format,
1695          use_gpu=use_gpu)
1696
1697  @test_util.run_in_graph_and_eager_modes()
1698  def testConv2D2x2Depth1Padding_5_0_2_2_BackpropInput(self):
1699    for (data_format, use_gpu) in GetTestConfigs():
1700      self._RunAndVerifyBackpropInputExplicitPadding(
1701          input_sizes=[1, 3, 3, 1],
1702          filter_sizes=[2, 1, 1, 1],
1703          output_sizes=[1, 7, 7, 1],
1704          strides=[1, 1],
1705          padding=[[5, 0], [2, 2]],
1706          data_format=data_format,
1707          err=5e-5,
1708          use_gpu=use_gpu)
1709
1710      self._RunAndVerifyBackpropInputExplicitPadding(
1711          input_sizes=[1, 4, 2, 1],
1712          filter_sizes=[3, 3, 1, 1],
1713          output_sizes=[1, 5, 2, 1],
1714          strides=[1, 2],
1715          padding=[[5, 0], [2, 2]],
1716          data_format=data_format,
1717          dilations=[2, 1],
1718          use_gpu=use_gpu)
1719
1720  def _RunAndVerifyBackpropFilterExplicitPadding(self,
1721                                                 input_sizes,
1722                                                 filter_sizes,
1723                                                 output_sizes,
1724                                                 strides,
1725                                                 padding,
1726                                                 data_format,
1727                                                 use_gpu,
1728                                                 dilations=(1, 1),
1729                                                 err=1e-5):
1730    if use_gpu and not test.is_gpu_available(cuda_only=True):
1731      return
1732    if not use_gpu and dilations != (1, 1):
1733      return  # Non-default dilations is currently not supported on the CPU.
1734
1735    x0 = self._CreateNumpyTensor(input_sizes)
1736    x2 = self._CreateNumpyTensor(output_sizes)
1737    dilations = list(dilations)
1738
1739    x0 = np.pad(x0, [(0, 0)] + padding + [(0, 0)], "constant")
1740    c = nn_ops.conv2d_backprop_filter(
1741        x0,
1742        filter_sizes,
1743        x2,
1744        strides=[1] + strides + [1],
1745        padding="VALID",
1746        dilations=[1] + dilations + [1])
1747    expected = list(self.evaluate(array_ops.reshape(c, [-1])))
1748    self._RunAndVerifyBackpropFilter(
1749        input_sizes,
1750        filter_sizes,
1751        output_sizes,
1752        strides,
1753        padding,
1754        expected,
1755        data_format,
1756        use_gpu=use_gpu,
1757        dilations=dilations,
1758        err=err)
1759
1760  @test_util.run_in_graph_and_eager_modes()
1761  def testConv2D2x2Depth1Padding0x0BackpropFilter(self):
1762    for (data_format, use_gpu) in GetTestConfigs():
1763      self._RunAndVerifyBackpropFilterExplicitPadding(
1764          input_sizes=[1, 2, 3, 1],
1765          filter_sizes=[2, 2, 1, 1],
1766          output_sizes=[1, 1, 2, 1],
1767          strides=[1, 1],
1768          padding=[[0, 0], [0, 0]],
1769          data_format=data_format, use_gpu=use_gpu)
1770
1771      self._RunAndVerifyBackpropFilterExplicitPadding(
1772          input_sizes=[1, 3, 4, 2],
1773          filter_sizes=[2, 2, 2, 3],
1774          output_sizes=[1, 1, 2, 3],
1775          strides=[2, 2],
1776          padding=[[0, 0], [0, 0]],
1777          data_format=data_format, use_gpu=use_gpu)
1778
1779  @test_util.run_in_graph_and_eager_modes()
1780  def testConv2D2x2Depth1Padding1x1BackpropFilter(self):
1781    for (data_format, use_gpu) in GetTestConfigs():
1782      self._RunAndVerifyBackpropFilterExplicitPadding(
1783          input_sizes=[1, 2, 3, 1],
1784          filter_sizes=[2, 2, 1, 2],
1785          output_sizes=[1, 3, 4, 2],
1786          strides=[1, 1],
1787          padding=[[1, 1], [1, 1]],
1788          data_format=data_format,
1789          use_gpu=use_gpu,
1790          err=5e-5)
1791
1792      self._RunAndVerifyBackpropFilterExplicitPadding(
1793          input_sizes=[1, 2, 3, 2],
1794          filter_sizes=[1, 1, 2, 1],
1795          output_sizes=[1, 4, 3, 1],
1796          strides=[1, 2],
1797          padding=[[1, 1], [1, 1]],
1798          use_gpu=use_gpu,
1799          data_format=data_format)
1800
1801      self._RunAndVerifyBackpropFilterExplicitPadding(
1802          input_sizes=[1, 4, 3, 1],
1803          filter_sizes=[2, 2, 1, 1],
1804          output_sizes=[1, 4, 2, 1],
1805          strides=[1, 2],
1806          padding=[[1, 1], [1, 1]],
1807          data_format=data_format,
1808          use_gpu=use_gpu,
1809          dilations=[2, 2])
1810
1811  @test_util.run_in_graph_and_eager_modes()
1812  def testConv2D2x2Depth1Padding2x2BackpropFilter(self):
1813    for (data_format, use_gpu) in GetTestConfigs():
1814      self._RunAndVerifyBackpropFilterExplicitPadding(
1815          input_sizes=[2, 3, 1, 1],
1816          filter_sizes=[2, 1, 1, 1],
1817          output_sizes=[2, 2, 5, 1],
1818          strides=[3, 1],
1819          padding=[[2, 2], [2, 2]],
1820          data_format=data_format,
1821          use_gpu=use_gpu)
1822
1823      self._RunAndVerifyBackpropFilterExplicitPadding(
1824          input_sizes=[1, 3, 6, 1],
1825          filter_sizes=[3, 2, 1, 1],
1826          output_sizes=[1, 3, 4, 1],
1827          strides=[1, 2],
1828          padding=[[2, 2], [2, 2]],
1829          data_format=data_format,
1830          use_gpu=use_gpu,
1831          dilations=[2, 3])
1832
1833  @test_util.run_in_graph_and_eager_modes()
1834  def testConv2D2x2Depth1Padding_1_8_4_1_BackpropFilter(self):
1835    for (data_format, use_gpu) in GetTestConfigs():
1836      self._RunAndVerifyBackpropFilterExplicitPadding(
1837          input_sizes=[1, 2, 3, 1],
1838          filter_sizes=[2, 2, 1, 1],
1839          output_sizes=[1, 10, 8, 1],
1840          strides=[1, 1],
1841          padding=[[1, 8], [4, 2]],
1842          data_format=data_format,
1843          use_gpu=use_gpu,
1844          err=1e-4)
1845
1846      self._RunAndVerifyBackpropFilterExplicitPadding(
1847          input_sizes=[1, 5, 3, 1],
1848          filter_sizes=[3, 2, 1, 1],
1849          output_sizes=[1, 4, 8, 1],
1850          strides=[3, 1],
1851          padding=[[1, 8], [4, 2]],
1852          use_gpu=use_gpu,
1853          data_format=data_format)
1854
1855  @test_util.run_in_graph_and_eager_modes()
1856  def testConv2D2x2Depth1Padding_5_0_2_2_BackpropFilter(self):
1857    for (data_format, use_gpu) in GetTestConfigs():
1858      self._RunAndVerifyBackpropFilterExplicitPadding(
1859          input_sizes=[1, 3, 3, 1],
1860          filter_sizes=[2, 1, 1, 1],
1861          output_sizes=[1, 7, 7, 1],
1862          strides=[1, 1],
1863          padding=[[5, 0], [2, 2]],
1864          data_format=data_format,
1865          use_gpu=use_gpu,
1866          err=1e-4)
1867
1868      self._RunAndVerifyBackpropFilterExplicitPadding(
1869          input_sizes=[1, 4, 2, 1],
1870          filter_sizes=[3, 3, 1, 1],
1871          output_sizes=[1, 5, 2, 1],
1872          strides=[1, 2],
1873          padding=[[5, 0], [2, 2]],
1874          data_format=data_format,
1875          use_gpu=use_gpu,
1876          dilations=[2, 1])
1877
1878  # Gradient checkers
1879  def ConstructAndTestGradient(self,
1880                               batch,
1881                               input_rows,
1882                               input_cols,
1883                               filter_rows,
1884                               filter_cols,
1885                               in_depth,
1886                               out_depth,
1887                               stride_rows,
1888                               stride_cols,
1889                               padding,
1890                               test_input,
1891                               data_format,
1892                               use_gpu,
1893                               num_groups=1,
1894                               max_err=0.003):
1895    assert in_depth % num_groups == 0 and out_depth % num_groups == 0
1896    input_shape = [batch, input_rows, input_cols, in_depth]
1897    filter_shape = [filter_rows, filter_cols, in_depth // num_groups, out_depth]
1898    # TODO(yangke): re-factor the computation of output shape.
1899    if padding == "VALID":
1900      output_rows = (input_rows - filter_rows + stride_rows) // stride_rows
1901      output_cols = (input_cols - filter_cols + stride_cols) // stride_cols
1902    elif padding == "SAME":
1903      output_rows = (input_rows + stride_rows - 1) // stride_rows
1904      output_cols = (input_cols + stride_cols - 1) // stride_cols
1905    else:
1906      self.assertIsInstance(padding, (list, tuple))
1907      output_rows = (input_rows + padding[1][0] + padding[1][1] - filter_rows +
1908                     stride_rows) // stride_rows
1909      output_cols = (input_cols + padding[2][0] + padding[2][1] - filter_cols +
1910                     stride_cols) // stride_cols
1911    output_shape = [batch, output_rows, output_cols, out_depth]
1912    input_size = 1
1913    for x in input_shape:
1914      input_size *= x
1915    filter_size = 1
1916    for x in filter_shape:
1917      filter_size *= x
1918    input_data = [x * 1.0 / input_size for x in range(0, input_size)]
1919    filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
1920    # Conv2DGrad functions are not compiled for double due to
1921    # a problem in the way Eigen's Conv2DGrad works for double.
1922    # So we disable the DOUBLE path.  We should re-enable this
1923    # when double support returns for CPU and/or GPU.
1924    for dtype in self._DtypesToTest(use_gpu=use_gpu):
1925      with self.cached_session(use_gpu=use_gpu):
1926        input_tensor = constant_op.constant(
1927            input_data, shape=input_shape, dtype=dtype, name="input")
1928        filter_tensor = constant_op.constant(
1929            filter_data, shape=filter_shape, dtype=dtype, name="filter")
1930        strides = [1, stride_rows, stride_cols, 1]
1931        new_padding = padding
1932        if data_format == "NCHW":
1933          new_input_tensor = test_util.NHWCToNCHW(input_tensor)
1934          strides = test_util.NHWCToNCHW(strides)
1935          if isinstance(padding, (list, tuple)):
1936            new_padding = test_util.NHWCToNCHW(padding)
1937        else:
1938          new_input_tensor = input_tensor
1939        conv = nn_ops.conv2d(
1940            new_input_tensor,
1941            filter_tensor,
1942            strides,
1943            new_padding,
1944            data_format=data_format,
1945            name="conv")
1946        if data_format == "NCHW":
1947          conv = test_util.NCHWToNHWC(conv)
1948        self.assertEqual(output_shape, conv.get_shape())
1949        if test_input:
1950          jacob_t, jacob_n = gradient_checker.compute_gradient(input_tensor,
1951                                                               input_shape,
1952                                                               conv,
1953                                                               output_shape)
1954        else:
1955          jacob_t, jacob_n = gradient_checker.compute_gradient(filter_tensor,
1956                                                               filter_shape,
1957                                                               conv,
1958                                                               output_shape)
1959        if dtype == dtypes.float32:
1960          reference_jacob_t = jacob_t
1961          err = np.fabs(jacob_t - jacob_n).max()
1962        else:
1963          # Compare fp16 theoretical gradients to fp32 theoretical gradients,
1964          # since fp16 numerical gradients are too imprecise.
1965          err = np.fabs(jacob_t - reference_jacob_t).max()
1966
1967        tf_logging.debug("conv_2d gradient error = %s", err)
1968        self.assertLess(err, max_err)
1969
1970  @test_util.deprecated_graph_mode_only
1971  def testInputGradientValidPaddingStrideOne(self):
1972    for (data_format, use_gpu) in GetTestConfigs():
1973      self.ConstructAndTestGradient(
1974          batch=2,
1975          input_rows=5,
1976          input_cols=4,
1977          filter_rows=3,
1978          filter_cols=3,
1979          in_depth=2,
1980          out_depth=3,
1981          stride_rows=1,
1982          stride_cols=1,
1983          padding="VALID",
1984          test_input=True,
1985          data_format=data_format,
1986          use_gpu=use_gpu)
1987
1988  @test_util.deprecated_graph_mode_only
1989  def testFilterGradientValidPaddingStrideOne(self):
1990    for (data_format, use_gpu) in GetTestConfigs():
1991      self.ConstructAndTestGradient(
1992          batch=4,
1993          input_rows=6,
1994          input_cols=5,
1995          filter_rows=2,
1996          filter_cols=2,
1997          in_depth=2,
1998          out_depth=3,
1999          stride_rows=1,
2000          stride_cols=1,
2001          padding="VALID",
2002          test_input=False,
2003          data_format=data_format,
2004          use_gpu=use_gpu)
2005
2006  @test_util.deprecated_graph_mode_only
2007  def testInputGradientValidPaddingStrideTwo(self):
2008    for (data_format, use_gpu) in GetTestConfigs():
2009      self.ConstructAndTestGradient(
2010          batch=2,
2011          input_rows=4,
2012          input_cols=5,
2013          filter_rows=3,
2014          filter_cols=3,
2015          in_depth=2,
2016          out_depth=3,
2017          stride_rows=2,
2018          stride_cols=2,
2019          padding="VALID",
2020          test_input=True,
2021          data_format=data_format,
2022          use_gpu=use_gpu)
2023
2024  @test_util.deprecated_graph_mode_only
2025  def testFilterGradientValidPaddingStrideTwo(self):
2026    for (data_format, use_gpu) in GetTestConfigs():
2027      self.ConstructAndTestGradient(
2028          batch=4,
2029          input_rows=6,
2030          input_cols=5,
2031          filter_rows=2,
2032          filter_cols=2,
2033          in_depth=2,
2034          out_depth=3,
2035          stride_rows=2,
2036          stride_cols=2,
2037          padding="VALID",
2038          test_input=False,
2039          data_format=data_format,
2040          use_gpu=use_gpu)
2041
2042  @test_util.deprecated_graph_mode_only
2043  def testInputGradientValidPaddingStrideThree(self):
2044    for (data_format, use_gpu) in GetTestConfigs():
2045      self.ConstructAndTestGradient(
2046          batch=2,
2047          input_rows=7,
2048          input_cols=6,
2049          filter_rows=3,
2050          filter_cols=3,
2051          in_depth=4,
2052          out_depth=5,
2053          stride_rows=3,
2054          stride_cols=3,
2055          padding="VALID",
2056          test_input=True,
2057          data_format=data_format,
2058          use_gpu=use_gpu)
2059
2060  @test_util.deprecated_graph_mode_only
2061  def testFilterGradientValidPaddingStrideThree(self):
2062    for (data_format, use_gpu) in GetTestConfigs():
2063      self.ConstructAndTestGradient(
2064          batch=2,
2065          input_rows=8,
2066          input_cols=7,
2067          filter_rows=4,
2068          filter_cols=4,
2069          in_depth=2,
2070          out_depth=3,
2071          stride_rows=3,
2072          stride_cols=3,
2073          padding="VALID",
2074          test_input=False,
2075          data_format=data_format,
2076          use_gpu=use_gpu)
2077
2078  @test_util.deprecated_graph_mode_only
2079  def testInputGradientSamePaddingStrideOne(self):
2080    for (data_format, use_gpu) in GetTestConfigs():
2081      self.ConstructAndTestGradient(
2082          batch=2,
2083          input_rows=7,
2084          input_cols=6,
2085          filter_rows=3,
2086          filter_cols=3,
2087          in_depth=2,
2088          out_depth=3,
2089          stride_rows=1,
2090          stride_cols=1,
2091          padding="SAME",
2092          test_input=True,
2093          data_format=data_format,
2094          use_gpu=use_gpu)
2095
2096  @test_util.deprecated_graph_mode_only
2097  def testFilterGradientSamePaddingStrideOne(self):
2098    for (data_format, use_gpu) in GetTestConfigs():
2099      self.ConstructAndTestGradient(
2100          batch=4,
2101          input_rows=6,
2102          input_cols=5,
2103          filter_rows=2,
2104          filter_cols=2,
2105          in_depth=2,
2106          out_depth=3,
2107          stride_rows=1,
2108          stride_cols=1,
2109          padding="SAME",
2110          test_input=False,
2111          data_format=data_format,
2112          use_gpu=use_gpu)
2113
2114  @test_util.deprecated_graph_mode_only
2115  def testInputGradientSamePaddingStrideTwo(self):
2116    for (data_format, use_gpu) in GetTestConfigs():
2117      self.ConstructAndTestGradient(
2118          batch=2,
2119          input_rows=5,
2120          input_cols=4,
2121          filter_rows=3,
2122          filter_cols=3,
2123          in_depth=3,
2124          out_depth=3,
2125          stride_rows=2,
2126          stride_cols=2,
2127          padding="SAME",
2128          test_input=True,
2129          data_format=data_format,
2130          use_gpu=use_gpu)
2131
2132  @test_util.deprecated_graph_mode_only
2133  def testFilterGradientSamePaddingStrideTwo(self):
2134    for (data_format, use_gpu) in GetTestConfigs():
2135      self.ConstructAndTestGradient(
2136          batch=4,
2137          input_rows=6,
2138          input_cols=5,
2139          filter_rows=2,
2140          filter_cols=2,
2141          in_depth=2,
2142          out_depth=3,
2143          stride_rows=2,
2144          stride_cols=2,
2145          padding="SAME",
2146          test_input=False,
2147          data_format=data_format,
2148          use_gpu=use_gpu)
2149
2150  @test_util.deprecated_graph_mode_only
2151  def testInputGradientSamePaddingStrideThree(self):
2152    for (data_format, use_gpu) in GetTestConfigs():
2153      self.ConstructAndTestGradient(
2154          batch=2,
2155          input_rows=7,
2156          input_cols=6,
2157          filter_rows=3,
2158          filter_cols=3,
2159          in_depth=4,
2160          out_depth=5,
2161          stride_rows=3,
2162          stride_cols=3,
2163          padding="SAME",
2164          test_input=True,
2165          data_format=data_format,
2166          use_gpu=use_gpu)
2167
2168  @test_util.deprecated_graph_mode_only
2169  def testFilterGradientSamePaddingStrideThree(self):
2170    for (data_format, use_gpu) in GetTestConfigs():
2171      self.ConstructAndTestGradient(
2172          batch=2,
2173          input_rows=8,
2174          input_cols=7,
2175          filter_rows=4,
2176          filter_cols=4,
2177          in_depth=2,
2178          out_depth=3,
2179          stride_rows=3,
2180          stride_cols=3,
2181          padding="SAME",
2182          test_input=False,
2183          data_format=data_format,
2184          use_gpu=use_gpu)
2185
2186  @test_util.deprecated_graph_mode_only
2187  def testFilterGradientSamePaddingStride2x1(self):
2188    for (data_format, use_gpu) in GetTestConfigs():
2189      self.ConstructAndTestGradient(
2190          batch=2,
2191          input_rows=8,
2192          input_cols=7,
2193          filter_rows=4,
2194          filter_cols=4,
2195          in_depth=2,
2196          out_depth=3,
2197          stride_rows=2,
2198          stride_cols=1,
2199          padding="SAME",
2200          test_input=False,
2201          data_format=data_format,
2202          use_gpu=use_gpu)
2203
2204  @test_util.deprecated_graph_mode_only
2205  def testInputGradientKernelSizeMatchesInputSize(self):
2206    for (data_format, use_gpu) in GetTestConfigs():
2207      self.ConstructAndTestGradient(
2208          batch=2,
2209          input_rows=4,
2210          input_cols=3,
2211          filter_rows=4,
2212          filter_cols=3,
2213          in_depth=2,
2214          out_depth=3,
2215          stride_rows=1,
2216          stride_cols=1,
2217          padding="VALID",
2218          test_input=True,
2219          data_format=data_format,
2220          use_gpu=use_gpu)
2221
2222  @test_util.deprecated_graph_mode_only
2223  def testFilterGradientKernelSizeMatchesInputSize(self):
2224    for (data_format, use_gpu) in GetTestConfigs():
2225      self.ConstructAndTestGradient(
2226          batch=2,
2227          input_rows=4,
2228          input_cols=3,
2229          filter_rows=4,
2230          filter_cols=3,
2231          in_depth=2,
2232          out_depth=3,
2233          stride_rows=1,
2234          stride_cols=1,
2235          padding="VALID",
2236          test_input=False,
2237          data_format=data_format,
2238          use_gpu=use_gpu)
2239
2240  @test_util.deprecated_graph_mode_only
2241  def testInputGradient1x1PaddingStrideOne(self):
2242    for (data_format, use_gpu) in GetTestConfigs():
2243      self.ConstructAndTestGradient(
2244          batch=2,
2245          input_rows=5,
2246          input_cols=4,
2247          filter_rows=3,
2248          filter_cols=3,
2249          in_depth=2,
2250          out_depth=3,
2251          stride_rows=1,
2252          stride_cols=1,
2253          padding=[[0, 0], [1, 1], [1, 1], [0, 0]],
2254          test_input=True,
2255          data_format=data_format,
2256          use_gpu=use_gpu,
2257          max_err=0.0025)
2258
2259  @test_util.deprecated_graph_mode_only
2260  def testFilterGradient1x1PaddingStrideOne(self):
2261    for (data_format, use_gpu) in GetTestConfigs():
2262      self.ConstructAndTestGradient(
2263          batch=2,
2264          input_rows=5,
2265          input_cols=4,
2266          filter_rows=3,
2267          filter_cols=3,
2268          in_depth=2,
2269          out_depth=3,
2270          stride_rows=1,
2271          stride_cols=1,
2272          padding=[[0, 0], [1, 1], [1, 1], [0, 0]],
2273          test_input=False,
2274          data_format=data_format,
2275          use_gpu=use_gpu)
2276
2277  @test_util.deprecated_graph_mode_only
2278  def testInputGradient1x1PaddingStrideTwo(self):
2279    for (data_format, use_gpu) in GetTestConfigs():
2280      self.ConstructAndTestGradient(
2281          batch=2,
2282          input_rows=4,
2283          input_cols=5,
2284          filter_rows=3,
2285          filter_cols=3,
2286          in_depth=2,
2287          out_depth=3,
2288          stride_rows=2,
2289          stride_cols=2,
2290          padding=[[0, 0], [1, 1], [1, 1], [0, 0]],
2291          test_input=True,
2292          data_format=data_format,
2293          use_gpu=use_gpu)
2294
2295  @test_util.deprecated_graph_mode_only
2296  def testFilterGradient1x1PaddingStrideTwo(self):
2297    for (data_format, use_gpu) in GetTestConfigs():
2298      self.ConstructAndTestGradient(
2299          batch=2,
2300          input_rows=4,
2301          input_cols=5,
2302          filter_rows=3,
2303          filter_cols=3,
2304          in_depth=2,
2305          out_depth=3,
2306          stride_rows=2,
2307          stride_cols=2,
2308          padding=[[0, 0], [1, 1], [1, 1], [0, 0]],
2309          test_input=False,
2310          data_format=data_format,
2311          use_gpu=use_gpu)
2312
2313  @test_util.deprecated_graph_mode_only
2314  def testInputGradient2x2PaddingStrideOne(self):
2315    for (data_format, use_gpu) in GetTestConfigs():
2316      self.ConstructAndTestGradient(
2317          batch=2,
2318          input_rows=5,
2319          input_cols=4,
2320          filter_rows=3,
2321          filter_cols=3,
2322          in_depth=2,
2323          out_depth=3,
2324          stride_rows=1,
2325          stride_cols=1,
2326          padding=[[0, 0], [2, 2], [2, 2], [0, 0]],
2327          test_input=True,
2328          data_format=data_format,
2329          use_gpu=use_gpu,
2330          max_err=0.003)
2331
2332  @test_util.deprecated_graph_mode_only
2333  def testFilterGradient2x2PaddingStrideOne(self):
2334    for (data_format, use_gpu) in GetTestConfigs():
2335      self.ConstructAndTestGradient(
2336          batch=2,
2337          input_rows=5,
2338          input_cols=4,
2339          filter_rows=3,
2340          filter_cols=3,
2341          in_depth=2,
2342          out_depth=3,
2343          stride_rows=1,
2344          stride_cols=1,
2345          padding=[[0, 0], [2, 2], [2, 2], [0, 0]],
2346          test_input=False,
2347          data_format=data_format,
2348          use_gpu=use_gpu,
2349          max_err=0.005)
2350
2351  @test_util.deprecated_graph_mode_only
2352  def testInputGradient1_2_3_4PaddingStride3x2(self):
2353    for (data_format, use_gpu) in GetTestConfigs():
2354      self.ConstructAndTestGradient(
2355          batch=2,
2356          input_rows=8,
2357          input_cols=5,
2358          filter_rows=4,
2359          filter_cols=2,
2360          in_depth=3,
2361          out_depth=2,
2362          stride_rows=3,
2363          stride_cols=2,
2364          padding=[[0, 0], [1, 2], [3, 4], [0, 0]],
2365          test_input=True,
2366          data_format=data_format,
2367          use_gpu=use_gpu)
2368
2369  @test_util.deprecated_graph_mode_only
2370  def testFilterGradient1_2_3_4PaddingStride3x2(self):
2371    for (data_format, use_gpu) in GetTestConfigs():
2372      self.ConstructAndTestGradient(
2373          batch=2,
2374          input_rows=8,
2375          input_cols=5,
2376          filter_rows=4,
2377          filter_cols=2,
2378          in_depth=3,
2379          out_depth=2,
2380          stride_rows=3,
2381          stride_cols=2,
2382          padding=[[0, 0], [1, 2], [3, 4], [0, 0]],
2383          test_input=False,
2384          data_format=data_format,
2385          use_gpu=use_gpu)
2386
2387  @test_util.deprecated_graph_mode_only
2388  def testInputGradient4_3_2_1PaddingStride2x1(self):
2389    for (data_format, use_gpu) in GetTestConfigs():
2390      self.ConstructAndTestGradient(
2391          batch=3,
2392          input_rows=5,
2393          input_cols=7,
2394          filter_rows=3,
2395          filter_cols=2,
2396          in_depth=1,
2397          out_depth=2,
2398          stride_rows=2,
2399          stride_cols=1,
2400          padding=[[0, 0], [4, 3], [2, 1], [0, 0]],
2401          test_input=True,
2402          data_format=data_format,
2403          use_gpu=use_gpu)
2404
2405  @test_util.deprecated_graph_mode_only
2406  def testFilterGradient4_3_2_1PaddingStride2x1(self):
2407    for (data_format, use_gpu) in GetTestConfigs():
2408      self.ConstructAndTestGradient(
2409          batch=3,
2410          input_rows=5,
2411          input_cols=7,
2412          filter_rows=3,
2413          filter_cols=2,
2414          in_depth=1,
2415          out_depth=2,
2416          stride_rows=2,
2417          stride_cols=1,
2418          padding=[[0, 0], [4, 3], [2, 1], [0, 0]],
2419          test_input=False,
2420          data_format=data_format,
2421          use_gpu=use_gpu)
2422
2423  @test_util.deprecated_graph_mode_only
2424  def testInputGradient0_0_0_5PaddingStride1x2(self):
2425    for (data_format, use_gpu) in GetTestConfigs():
2426      self.ConstructAndTestGradient(
2427          batch=2,
2428          input_rows=6,
2429          input_cols=7,
2430          filter_rows=3,
2431          filter_cols=4,
2432          in_depth=3,
2433          out_depth=2,
2434          stride_rows=1,
2435          stride_cols=2,
2436          padding=[[0, 0], [0, 0], [0, 5], [0, 0]],
2437          test_input=True,
2438          data_format=data_format,
2439          use_gpu=use_gpu)
2440
2441  @test_util.deprecated_graph_mode_only
2442  def testFilterGradient0_0_0_5PaddingStride1x2(self):
2443    for (data_format, use_gpu) in GetTestConfigs():
2444      self.ConstructAndTestGradient(
2445          batch=2,
2446          input_rows=6,
2447          input_cols=7,
2448          filter_rows=3,
2449          filter_cols=4,
2450          in_depth=3,
2451          out_depth=2,
2452          stride_rows=1,
2453          stride_cols=2,
2454          padding=[[0, 0], [0, 0], [0, 5], [0, 0]],
2455          test_input=False,
2456          data_format=data_format,
2457          use_gpu=use_gpu)
2458
2459  @test_util.deprecated_graph_mode_only
2460  def testShapeFunctionEdgeCases(self):
2461    # All shapes unknown.
2462    c1 = nn_ops.conv2d(
2463        array_ops.placeholder(dtypes.float32),
2464        array_ops.placeholder(dtypes.float32),
2465        strides=[1, 1, 1, 1],
2466        padding="SAME")
2467    self.assertEqual([None, None, None, None], c1.get_shape().as_list())
2468
2469    # Incorrect input shape.
2470    with self.assertRaises(ValueError):
2471      nn_ops.conv2d(
2472          array_ops.placeholder(
2473              dtypes.float32, shape=[1, 3]),
2474          array_ops.placeholder(dtypes.float32),
2475          strides=[1, 1, 1, 1],
2476          padding="SAME")
2477
2478    # Incorrect filter shape.
2479    with self.assertRaises(ValueError):
2480      nn_ops.conv2d(
2481          array_ops.placeholder(dtypes.float32),
2482          array_ops.placeholder(
2483              dtypes.float32, shape=[1, 3]),
2484          strides=[1, 1, 1, 1],
2485          padding="SAME")
2486
2487    # Depth mismatch.
2488    with self.assertRaises(ValueError):
2489      nn_ops.conv2d(
2490          array_ops.placeholder(
2491              dtypes.float32, shape=[32, 20, 20, 3]),
2492          array_ops.placeholder(
2493              dtypes.float32, shape=[4, 4, 2, 2]),
2494          strides=[1, 1, 1, 1],
2495          padding="SAME")
2496
2497    # Input depth divisible by filter depth (group convolution).
2498    # No exceptions should appear.
2499    nn_ops.conv2d(
2500        array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 8]),
2501        array_ops.placeholder(dtypes.float32, shape=[4, 4, 2, 16]),
2502        strides=[1, 1, 1, 1],
2503        padding="SAME")
2504
2505    # Negative padding.
2506    with self.assertRaises(ValueError):
2507      nn_ops.conv2d(
2508          array_ops.placeholder(dtypes.float32),
2509          array_ops.placeholder(dtypes.float32),
2510          strides=[1, 1, 1, 1],
2511          padding=[[0, 0], [0, -1], [1, 2], [0, 0]])
2512
2513    # Nonzero padding in nonspatial dimension.
2514    with self.assertRaises(ValueError):
2515      nn_ops.conv2d(
2516          array_ops.placeholder(dtypes.float32),
2517          array_ops.placeholder(dtypes.float32),
2518          strides=[1, 1, 1, 1],
2519          padding=[[1, 0], [0, 0], [0, 0], [0, 0]])
2520
2521    # Nonzero NCHW padding in nonspatial dimension.
2522    with self.assertRaises(ValueError):
2523      nn_ops.conv2d(
2524          array_ops.placeholder(dtypes.float32),
2525          array_ops.placeholder(dtypes.float32),
2526          strides=[1, 1, 1, 1],
2527          padding=[[0, 0], [0, 1], [0, 0], [0, 0]],
2528          data_format="NCHW")
2529
2530    # Wrong amount of padding
2531    with self.assertRaises(ValueError):
2532      nn_ops.conv2d(
2533          array_ops.placeholder(dtypes.float32),
2534          array_ops.placeholder(dtypes.float32),
2535          strides=[1, 1, 1, 1],
2536          padding=[[0, 0], [0, 0], [0, 0]])
2537
2538    # Only specify one padding amount per dimension
2539    with self.assertRaises(ValueError):
2540      nn_ops.conv2d(
2541          array_ops.placeholder(dtypes.float32),
2542          array_ops.placeholder(dtypes.float32),
2543          strides=[1, 1, 1, 1],
2544          padding=[[0], [0], [0], [0]])
2545
2546    # Explicit padding elements are not lists
2547    with self.assertRaises(ValueError):
2548      nn_ops.conv2d(
2549          array_ops.placeholder(dtypes.float32),
2550          array_ops.placeholder(dtypes.float32),
2551          strides=[1, 1, 1, 1],
2552          padding=[0, 0, 0, 0])
2553
2554  def testOpEdgeCases(self):
2555    # Illegal strides.
2556    with self.assertRaisesRegex((ValueError, errors_impl.UnimplementedError),
2557                                "strides in the batch and depth"):
2558      input_val = np.ones([2, 4, 10, 10])
2559      filter_val = np.ones([2, 4, 10, 10])
2560      self.evaluate(
2561          nn_ops.conv2d(
2562              input_val, filter_val, strides=[2, 1, 1, 1], padding="SAME"))
2563    with self.assertRaisesRegex((ValueError, errors_impl.UnimplementedError),
2564                                "strides in the batch and depth"):
2565      input_val = np.ones([2, 4, 10, 10])
2566      filter_val = np.ones([2, 4, 10, 10])
2567      self.evaluate(
2568          nn_ops.conv2d(
2569              input_val, filter_val, strides=[1, 1, 1, 2], padding="SAME"))
2570
2571    # TODO(b/195689143): Will enable when fixed for V2 behavior
2572    # # Filter larger than input.
2573    # with self.assertRaisesRegex(ValueError, "Negative dimension size"):
2574    #   input_val = np.ones([32, 20, 20, 3])
2575    #   filter_val = np.ones([20, 21, 3, 2])
2576    #   self.evaluate(
2577    #       nn_ops.conv2d(
2578    #           input_val, filter_val, strides=[1, 1, 1, 1], padding="VALID"))
2579    # with self.assertRaisesRegex(ValueError, "Negative dimension size"):
2580    #   input_val = np.ones([32, 20, 20, 3])
2581    #   filter_val = np.ones([21, 20, 3, 2])
2582    #   self.evaluate(
2583    #       nn_ops.conv2d(
2584    #           input_val, filter_val, strides=[1, 1, 1, 1], padding="VALID"))
2585    #
2586    # # Filter larger than input + padding.
2587    # with self.assertRaisesRegex(ValueError, "Negative dimension size"):
2588    #   input_val = np.ones([32, 20, 20, 3])
2589    # filter_val = np.ones([24, 25, 3, 2])
2590    #   self.evaluate(
2591    #       nn_ops.conv2d(
2592    #           input_val,
2593    #           filter_val,
2594    #           strides=[1, 1, 1, 1],
2595    #           padding=[[0, 0], [2, 2], [2, 2], [0, 0]]))
2596
2597    # Filter dimensions must be greater than 0.
2598    with self.assertRaisesRegex(
2599        errors_impl.InvalidArgumentError, "filter must not have zero elements"
2600        "|has a non-positive dimension"):
2601      input_val = np.ones([1, 1, 1, 1])
2602      filter_val = np.ones([1, 0, 1, 1])
2603      self.evaluate(
2604          nn_ops.conv2d(
2605              input_val, filter_val, strides=[1, 1, 1, 1], padding="SAME"))
2606
2607    # Negative padding during backprop.
2608    with self.assertRaisesRegex(
2609        errors_impl.InvalidArgumentError,
2610        "All elements of explicit_paddings must be nonnegative"):
2611      filter_val = np.ones([18, 18, 3, 2])
2612      out_backprop_val = np.ones([32, 3, 2, 2])
2613      self.evaluate(
2614          nn_ops.conv2d_backprop_input([32, 20, 20, 3],
2615                                       filter_val,
2616                                       out_backprop_val,
2617                                       strides=[1, 1, 1, 1],
2618                                       padding=[[0, 0], [-1, 0], [0, 0], [0,
2619                                                                          0]]))
2620    with self.assertRaisesRegex(
2621        errors_impl.InvalidArgumentError,
2622        "All elements of explicit_paddings must be nonnegative"):
2623      input_val = np.ones([32, 20, 20, 3])
2624      out_backprop_val = np.ones([32, 3, 2, 2])
2625      self.evaluate(
2626          nn_ops.conv2d_backprop_filter(
2627              input_val, [18, 18, 3, 2],
2628              out_backprop_val,
2629              strides=[1, 1, 1, 1],
2630              padding=[[0, 0], [-1, 0], [0, 0], [0, 0]]))
2631
2632  def testConv2DBackpropInputInvalidOutBackpropRaiseError(self):
2633    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
2634      with self.cached_session():
2635        input_sizes = constant_op.constant([65534, 65534],
2636                                           shape=[2],
2637                                           dtype=dtypes.int32)
2638        filters = constant_op.constant(
2639            0.159749106, shape=[3, 3, 2, 2], dtype=dtypes.float32)
2640        out_backprop = constant_op.constant(0, shape=[], dtype=dtypes.float32)
2641        t = gen_nn_ops.conv2d_backprop_input(
2642            input_sizes=input_sizes,
2643            filter=filters,
2644            out_backprop=out_backprop,
2645            strides=[1, 1, 1, 1],
2646            padding="SAME",
2647            use_cudnn_on_gpu=True,
2648            explicit_paddings=[],
2649            data_format="NHWC",
2650            dilations=[1, 1, 1, 1])
2651        self.evaluate(t)
2652
2653  def testConv2DBfloat16Error(self):
2654    x1 = self._CreateNumpyTensor((2, 2, 2, 2)).astype(
2655        dtypes.bfloat16.as_numpy_dtype)
2656    x2 = self._CreateNumpyTensor((2, 2, 2, 2)).astype(
2657        dtypes.bfloat16.as_numpy_dtype)
2658    with context.eager_mode():
2659      # Conv2D used to return an empty output of shape [0] when given bfloat16
2660      # inputs. Test that either a proper error message is now raised or that
2661      # the output is the correct shape.
2662      try:
2663        y = nn_ops.conv2d(x1, x2, strides=[1, 1], padding="SAME")
2664        self.assertEqual(y.shape, (2, 2, 2, 2))
2665      except errors_impl.InvalidArgumentError as e:
2666        self.assertIn("Op does not support bfloat16 inputs", e.message)
2667      except errors_impl.NotFoundError as e:
2668        self.assertIn("Could not find device for node", e.message)
2669
2670
2671@test_util.run_all_without_tensor_float_32("Avoid TF32 conv on GPU")
2672class DepthwiseConv2DTest(test.TestCase):
2673
2674  def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding,
2675                    expected):
2676    """Verifies the output values of the convolution function.
2677
2678    Args:
2679      tensor_in_sizes: Input tensor dimensions in [batch, input_rows,
2680        input_cols, input_depth].
2681      filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols,
2682        input_depth, depth_multiplier].
2683      stride: Stride.
2684      padding: Padding type.
2685      expected: An array containing the expected operation outputs.
2686    """
2687    total_size_1 = 1
2688    total_size_2 = 1
2689    for s in tensor_in_sizes:
2690      total_size_1 *= s
2691    for s in filter_in_sizes:
2692      total_size_2 *= s
2693    # Initializes the input tensor with array containing incrementing
2694    # numbers from 1.
2695    x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
2696    x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
2697    with self.cached_session():
2698      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
2699      t1.set_shape(tensor_in_sizes)
2700      t2 = constant_op.constant(x2, shape=filter_in_sizes)
2701      conv = nn_impl.depthwise_conv2d(
2702          t1, t2, strides=[1, stride, stride, 1], padding=padding)
2703      value = self.evaluate(conv)
2704    tf_logging.debug("value = %s", value)
2705    self.assertArrayNear(expected, np.ravel(value), 1e-5)
2706    self.assertShapeEqual(value, conv)
2707
2708  def testConv2D2x2Filter(self):
2709    # The inputs look like this (it's a 3 x 2 matrix, each of depth 2):
2710    #
2711    # [ (1.0, 2.0), (3.0,  4.0), ( 5.0,  6.0) ]
2712    # [ (7.0, 8.0), (9.0, 10.0), (11.0, 12.0) ]
2713    #  We can view this as two inputs
2714    #
2715    #  input depth 0:
2716    #
2717    #  [ 1.0,  3.0,  5.0 ]
2718    #  [ 7.0,  9.0, 11.0 ]
2719    #
2720    #  input depth 1:
2721    #
2722    #  [ 2.0,  4.0,  6.0 ]
2723    #  [ 8.0, 10.0, 12.0 ]
2724    #
2725    # The filter looks like this (it has two 2 x 2 patches, each generating 2
2726    # depths):
2727    #
2728    #  filter #0:
2729    #
2730    #  [ (1.0,  3.0), ( 5.0,  7.0)]
2731    #  [ (9.0, 11.0), (13.0, 15.0)]
2732    #
2733    #  filter #1:
2734    #
2735    #  [ ( 2.0,  4.0), ( 6.0,  8.0)]
2736    #  [ (10.0, 12.0), (14.0, 16.0)]
2737    #
2738    # So the outputs are:
2739    #
2740    # (position 0, 0: in_depth 0, output_depth 0 -- using filter #0)
2741    #  1.0 * 1.0 + 7.0 * 9.0 + 3.0 * 5.0 + 9.0 * 13.0 = 196
2742    # (position 0, 0: in_depth 0, output_depth 1 -- using filter #1)
2743    #  1.0 * 2.0 + 7.0 * 10.0 + 3.0 * 6.0 + 9.0 * 14.0 = 216
2744    # (position 0, 0: in_depth 1, output_depth 2 -- using filter #0)
2745    #  2.0 * 3.0 + 8.0 * 11.0 + 4.0 * 7.0 + 10.0 * 15.0 = 272
2746    # (position 0, 0: in_depth 1, output_depth 3 -- using filter #1)
2747    #  2.0 * 4.0 + 8.0 * 12.0 + 4.0 * 8.0 + 10.0 * 16.0 = 296
2748    #
2749    # (position 1, 0: in_depth 0, output_depth 0 -- using filter #0)
2750    #  3.0 * 1.0 + 9.0 * 9.0 + 5.0 * 5.0 + 11.0 * 13.0 = 252
2751    # (position 1, 0: in_depth 0, output_depth 1 -- using filter #1)
2752    #  3.0 * 2.0 + 9.0 * 10.0 + 5.0 * 6.0 + 11.0 * 14.0 = 280
2753    # (position 1, 0: in_depth 1, output_depth 2 -- using filter #0)
2754    #  4.0 * 3.0 + 10.0 * 11.0 + 6.0 * 7.0 + 12.0 * 15.0 = 344
2755    # (position 1, 0: in_depth 1, output_depth 3 -- using filter #1)
2756    #  4.0 * 4.0 + 10.0 * 12.0 + 6.0 * 8.0 + 12.0 * 16.0 = 376
2757    expected_output = [196, 216, 272, 296, 252, 280, 344, 376]
2758    self._VerifyValues(
2759        tensor_in_sizes=[1, 2, 3, 2],
2760        filter_in_sizes=[2, 2, 2, 2],
2761        stride=1,
2762        padding="VALID",
2763        expected=expected_output)
2764
2765
2766@test_util.run_all_without_tensor_float_32("Avoid TF32 conv on GPU")
2767class SeparableConv2DTest(test.TestCase):
2768
2769  def _InitValues(self, sizes):
2770    """Initializes values for input tensors.
2771
2772    Args:
2773      sizes: Tensor dimensions.
2774
2775    Returns:
2776      Tensor initialized to values.
2777    """
2778    total_size = 1
2779    for s in sizes:
2780      total_size *= s
2781    x = [f * 0.5 for f in range(1, total_size + 1)]
2782    return constant_op.constant(x, shape=sizes)
2783
2784  def _VerifyValues(self,
2785                    tensor_in_sizes,
2786                    depthwise_filter_in_sizes,
2787                    pointwise_filter_in_sizes,
2788                    stride,
2789                    padding,
2790                    expected,
2791                    data_format="NHWC"):
2792    """Verifies the output values of the separable convolution function.
2793
2794    Args:
2795      tensor_in_sizes: Input tensor dimensions.
2796      depthwise_filter_in_sizes: Depthwise filter tensor dimensions.
2797      pointwise_filter_in_sizes: Pointwise filter tensor dimensions.
2798      stride: Stride.
2799      padding: Padding type.
2800      expected: An array containing the expected operation outputs.
2801      data_format: string data format for input tensor.
2802    """
2803    with self.cached_session():
2804      t1 = self._InitValues(tensor_in_sizes)
2805      f1 = self._InitValues(depthwise_filter_in_sizes)
2806      f1.set_shape(depthwise_filter_in_sizes)
2807      f2 = self._InitValues(pointwise_filter_in_sizes)
2808
2809      real_t1 = t1
2810      strides = [1, stride, stride, 1]
2811      if data_format == "NCHW":
2812        real_t1 = array_ops.transpose(t1, [0, 3, 1, 2])
2813        strides = [1, 1, stride, stride]
2814        if isinstance(padding, list):
2815          padding = [padding[0], padding[3], padding[1], padding[2]]
2816
2817      conv = nn_impl.separable_conv2d(
2818          real_t1,
2819          f1,
2820          f2,
2821          strides=strides,
2822          padding=padding,
2823          data_format=data_format)
2824
2825      if data_format == "NCHW":
2826        conv = array_ops.transpose(conv, [0, 2, 3, 1])
2827
2828      value = self.evaluate(conv)
2829    tf_logging.debug("value = %s", value)
2830    self.assertArrayNear(expected, np.ravel(value), 2e-3)
2831    self.assertShapeEqual(value, conv)
2832
2833  def _testSeparableConv2D(self, data_format):
2834    # The output is the result of two convolutions:
2835    # First with tensor_in[1, 4, 4, 2] * filter1[2, 2, 2, 3].
2836    # Second with intermediate_out[1, 4, 4, 6] * filter2[1, 1, 6, 7].
2837    # Complexity is O(2*3*2*2 + 6*7*1*1) as opposed to O(2*7*2*2).
2838    expected_output = [
2839        6644.5, 6971.5, 7298.5, 7625.5, 7952.5, 8279.5, 8606.5, 8154.5, 8556.5,
2840        8958.5, 9360.5, 9762.5, 10164.5, 10566.5, 9664.5, 10141.5, 10618.5,
2841        11095.5, 11572.5, 12049.5, 12526.5, 4145.5, 4346.5, 4547.5, 4748.5,
2842        4949.5, 5150.5, 5351.5, 12684.5, 13311.5, 13938.5, 14565.5, 15192.5,
2843        15819.5, 16446.5, 14194.5, 14896.5, 15598.5, 16300.5, 17002.5, 17704.5,
2844        18406.5, 15704.5, 16481.5, 17258.5, 18035.5, 18812.5, 19589.5, 20366.5,
2845        6499.5, 6814.5, 7129.5, 7444.5, 7759.5, 8074.5, 8389.5, 18724.5,
2846        19651.5, 20578.5, 21505.5, 22432.5, 23359.5, 24286.5, 20234.5, 21236.5,
2847        22238.5, 23240.5, 24242.5, 25244.5, 26246.5, 21744.5, 22821.5, 23898.5,
2848        24975.5, 26052.5, 27129.5, 28206.5, 8853.5, 9282.5, 9711.5, 10140.5,
2849        10569.5, 10998.5, 11427.5, 5746.75, 6010.75, 6274.75, 6538.75, 6802.75,
2850        7066.75, 7330.75, 6168.75, 6452.25, 6735.75, 7019.25, 7302.75, 7586.25,
2851        7869.75, 6590.75, 6893.75, 7196.75, 7499.75, 7802.75, 8105.75, 8408.75,
2852        2036.25, 2119.5, 2202.75, 2286.0, 2369.25, 2452.5, 2535.75
2853    ]
2854
2855    self._VerifyValues(
2856        tensor_in_sizes=[1, 4, 4, 2],
2857        depthwise_filter_in_sizes=[2, 2, 2, 3],
2858        pointwise_filter_in_sizes=[1, 1, 6, 7],
2859        stride=1,
2860        padding="SAME",
2861        expected=expected_output,
2862        data_format=data_format)
2863
2864  def testSeparableConv2D(self):
2865    self._testSeparableConv2D("NHWC")
2866
2867  def disabledtestSeparableConv2DNCHW(self):
2868    if not test.is_gpu_available():
2869      return
2870    self._testSeparableConv2D("NCHW")
2871
2872  def _testSeparableConv2DEqualInputOutputDepth(self, data_format):
2873    # The output is the result of two convolutions:
2874    # First with tensor_in[1, 4, 4, 2] * filter1[2, 2, 3, 3].
2875    # Second with intermediate_out[1, 4, 4, 6] * filter2[1, 1, 6, 6].
2876    # Complexity is O(2*3*2*2 + 6*6*1*1) as opposed to O(2*6*2*2).
2877    expected_output = [
2878        5742.0, 6069.0, 6396.0, 6723.0, 7050.0, 7377.0, 7047.0, 7449.0, 7851.0,
2879        8253.0, 8655.0, 9057.0, 8352.0, 8829.0, 9306.0, 9783.0, 10260.0,
2880        10737.0, 3582.0, 3783.0, 3984.0, 4185.0, 4386.0, 4587.0, 10962.0,
2881        11589.0, 12216.0, 12843.0, 13470.0, 14097.0, 12267.0, 12969.0, 13671.0,
2882        14373.0, 15075.0, 15777.0, 13572.0, 14349.0, 15126.0, 15903.0, 16680.0,
2883        17457.0, 5616.0, 5931.0, 6246.0, 6561.0, 6876.0, 7191.0, 16182.0,
2884        17109.0, 18036.0, 18963.0, 19890.0, 20817.0, 17487.0, 18489.0, 19491.0,
2885        20493.0, 21495.0, 22497.0, 18792.0, 19869.0, 20946.0, 22023.0, 23100.0,
2886        24177.0, 7650.0, 8079.0, 8508.0, 8937.0, 9366.0, 9795.0, 4963.5, 5227.5,
2887        5491.5, 5755.5, 6019.5, 6283.5, 5328.0, 5611.5, 5895.0, 6178.5, 6462.0,
2888        6745.5, 5692.5, 5995.5, 6298.5, 6601.5, 6904.5, 7207.5, 1757.25, 1840.5,
2889        1923.75, 2007.0, 2090.25, 2173.5
2890    ]
2891
2892    self._VerifyValues(
2893        tensor_in_sizes=[1, 4, 4, 2],
2894        depthwise_filter_in_sizes=[2, 2, 2, 3],
2895        pointwise_filter_in_sizes=[1, 1, 6, 6],
2896        stride=1,
2897        padding="SAME",
2898        expected=expected_output,
2899        data_format=data_format)
2900
2901  @test_util.deprecated_graph_mode_only
2902  def testSeparableConv2DEqualInputOutputDepth(self):
2903    self._testSeparableConv2DEqualInputOutputDepth("NHWC")
2904
2905  def testSeparableConv2DEqualInputOutputDepthNCHW(self):
2906    if not test.is_gpu_available():
2907      return
2908    self._testSeparableConv2DEqualInputOutputDepth("NCHW")
2909
2910  def _testSeparableConv2dExplicitPadding(self, data_format):
2911    tensor_in_sizes = [1, 4, 4, 2]
2912    depthwise_filter_in_sizes = [2, 2, 2, 3]
2913    pointwise_filter_in_sizes = [1, 1, 6, 7]
2914    padding = [[0, 0], [1, 2], [3, 4], [0, 0]]
2915    with self.cached_session():
2916      # Compute the 'expected' values by manually padding before calling
2917      # separable_conv2d
2918      t1 = self._InitValues(tensor_in_sizes)
2919      t1 = array_ops.pad(t1, padding)
2920      f1 = self._InitValues(depthwise_filter_in_sizes)
2921      f1.set_shape(depthwise_filter_in_sizes)
2922      f2 = self._InitValues(pointwise_filter_in_sizes)
2923      conv = nn_impl.separable_conv2d(
2924          t1,
2925          f1,
2926          f2,
2927          strides=[1, 1, 1, 1],
2928          padding="VALID",
2929          data_format="NHWC")
2930      expected = self.evaluate(conv)
2931      expected = np.ravel(expected)
2932    self._VerifyValues(
2933        tensor_in_sizes=tensor_in_sizes,
2934        depthwise_filter_in_sizes=depthwise_filter_in_sizes,
2935        pointwise_filter_in_sizes=pointwise_filter_in_sizes,
2936        stride=1,
2937        padding=padding,
2938        expected=expected,
2939        data_format=data_format)
2940
2941  def testSeparableConv2dExplicitPadding(self):
2942    self._testSeparableConv2dExplicitPadding("NHWC")
2943
2944  def testSeparableConv2dExplicitPaddingNCHW(self):
2945    if not test.is_gpu_available():
2946      return
2947    self._testSeparableConv2dExplicitPadding("NCHW")
2948
2949
2950@test_util.run_all_without_tensor_float_32("Avoid TF32 conv on GPU")
2951class DeepConv2DTest(test.TestCase):
2952
2953  def _CompareFwdConv2D(self, tensor_in_sizes, filter_in_sizes, conv_strides,
2954                        padding):
2955    """Verifies that DeepConv2D and Conv2D produce the same values.
2956
2957    Args:
2958      tensor_in_sizes: Input tensor dimensions in
2959        [batch, input_rows, input_cols, input_depth].
2960      filter_in_sizes: Filter tensor dimensions in
2961        [kernel_rows, kernel_cols, input_depth, output_depth].
2962      conv_strides: [row_stride, col_stride] for the convolution;
2963      padding: Padding type.
2964    """
2965    x1 = np.random.rand(*tensor_in_sizes).astype(np.float32)
2966    x2 = np.random.rand(*filter_in_sizes).astype(np.float32)
2967
2968    with self.cached_session(use_gpu=False):
2969      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
2970      t2 = constant_op.constant(x2, shape=filter_in_sizes)
2971      strides = [1] + conv_strides + [1]
2972
2973      conv = nn_ops.conv2d(t1, t2, strides=strides, padding=padding)
2974
2975      os.environ["TF_USE_DEEP_CONV2D"] = "0"
2976      values_expect = self.evaluate([conv])
2977
2978      os.environ["TF_USE_DEEP_CONV2D"] = "1"
2979      values_test = self.evaluate([conv])
2980
2981      self.assertAllClose(values_expect, values_test, rtol=1e-5, atol=1e-5)
2982
2983  def _RunTestCases(self, conv_strides, padding):
2984    input_sizes = [[5, 5, 5, 1248], [3, 17, 17, 192], [2, 35, 35, 288],
2985                   [2, 6, 8, 517], [2, 7, 4, 81], [3, 11, 3, 77]]
2986    filter_sizes = [[3, 3, 1248, 128], [3, 3, 192, 192], [3, 3, 288, 384],
2987                    [3, 3, 517, 64], [3, 3, 81, 77], [3, 3, 77, 181]]
2988    for input_shape, filter_shape in zip(input_sizes, filter_sizes):
2989      self._CompareFwdConv2D(input_shape, filter_shape, conv_strides, padding)
2990
2991  def testConv2D3x3FilterStride1x1Valid(self):
2992    self._RunTestCases([1, 1], "VALID")
2993
2994  def testConv2D3x3FilterStride1x1Same(self):
2995    self._RunTestCases([1, 1], "SAME")
2996
2997
2998class Conv2DBenchmark(test.Benchmark):
2999
3000  def benchmarkGPUConvStackFirst(self):
3001    # Benchmark the first iteration of a conv-net with many identical conv
3002    # operations.
3003    if not test.is_gpu_available():
3004      return
3005
3006    with ops.Graph().as_default(), session_lib.Session() as session:
3007      batch_size = 1
3008      timesteps = 600
3009      features = 1
3010
3011      inputs = random_ops.random_uniform(
3012          [batch_size, 1, timesteps, features], seed=1234)
3013      num_outputs_list = [512] * 40 + [1]
3014      kernel_w = 3
3015      x = inputs
3016      for num_outputs in num_outputs_list:
3017        x = convolutional.conv2d(x, num_outputs, [1, kernel_w])
3018      outputs = x
3019
3020      self.evaluate(variables.global_variables_initializer())
3021      num_iterations = 4
3022      for iter_index in range(num_iterations):
3023        start = time.time()
3024        session.run(outputs)
3025        wall_time = time.time() - start
3026        self.report_benchmark(
3027            name="conv_stack_iter_%d" % iter_index, wall_time=wall_time)
3028        tf_logging.info("conv_stack_iter_%d: %.4f" % (iter_index, wall_time))
3029
3030  def _bench_op(self, name, op, burn_iters, num_iters):
3031    config = config_pb2.ConfigProto()
3032    # Prevent Grappler from optimizing away the entire graph.
3033    config.graph_options.rewrite_options.dependency_optimization = (
3034        rewriter_config_pb2.RewriterConfig.OFF)
3035    with session_lib.Session(config=config) as session:
3036      self.evaluate(variables.global_variables_initializer())
3037      self.run_op_benchmark(
3038          session, op, burn_iters=burn_iters, min_iters=num_iters, name=name)
3039
3040  def benchmarkExplicitVsManualPadding(self):
3041    """Compare performance of EXPLICIT padding and calling tf.pad.
3042
3043    A Conv2D op with EXPLICIT padding is benchmarked, and a tf.pad with the same
3044    padding followed by an equivalent Conv2D op is benchmarked.
3045    """
3046    if not test.is_gpu_available():
3047      return
3048
3049    with ops.Graph().as_default():
3050      burn_iters = 15
3051      num_iters = 300
3052      batch_size = 64
3053      # The input and filter correspond to the first layer of Resnet50.
3054      input = variables.Variable(  # pylint: disable=redefined-builtin
3055          random_ops.random_uniform([
3056              batch_size,
3057              3,
3058              224,
3059              224
3060          ]))
3061      filter = variables.Variable(random_ops.random_uniform([7, 7, 3, 64]))  # pylint: disable=redefined-builtin
3062      strides = [1, 1, 2, 2]
3063      padding = [(0, 0), (0, 0), (3, 3), (3, 3)]
3064      output_explicit_pad = nn_ops.conv2d(
3065          input, filter, strides, padding=padding, data_format="NCHW")
3066      input_padded = array_ops.pad(input, padding)
3067      output_manual_pad = nn_ops.conv2d(
3068          input_padded, filter, strides, padding="VALID", data_format="NCHW")
3069      # Benchmark just the forward pass.
3070      self._bench_op("explicit_pad_forward", output_explicit_pad.op, burn_iters,
3071                     num_iters)
3072      self._bench_op("manual_pad_forward", output_manual_pad.op, burn_iters,
3073                     num_iters)
3074
3075      # Benchmark both the forward and backwards passes.
3076      input_grad_explicit_pad, filter_grad_explicit_pad = (
3077          gradients_impl.gradients(output_explicit_pad, [input, filter]))
3078      self._bench_op(
3079          "explicit_pad_backward",
3080          control_flow_ops.group(input_grad_explicit_pad,
3081                                 filter_grad_explicit_pad), burn_iters,
3082          num_iters)
3083      input_grad_manual_pad, filter_grad_manual_pad = gradients_impl.gradients(
3084          output_manual_pad, [input, filter])
3085      self._bench_op(
3086          "manual_pad_backward",
3087          control_flow_ops.group(input_grad_manual_pad, filter_grad_manual_pad),
3088          burn_iters, num_iters)
3089
3090  def benchmarkExplicitVsSamePaddingGraph(self):
3091    """Compare performance of EXPLICIT and SAME padding in graph mode.
3092
3093    A Conv2D op with SAME padding is benchmarked, and an equivalent Conv2D op
3094    with explicit padding is benchmarked, where the padding is the same as in
3095    the SAME case. The purpose is to ensure EXPLICIT padding is just as
3096    efficient as the SAME case
3097    """
3098    if not test.is_gpu_available():
3099      return
3100
3101    with ops.Graph().as_default():
3102      burn_iters = 15
3103      num_convs = 20
3104      num_iters = 50
3105      batch_size = 64
3106      # The input and filter correspond to a middle layer of Resnet50.
3107      input = variables.Variable(  # pylint: disable=redefined-builtin
3108          random_ops.random_uniform([
3109              batch_size,
3110              256,
3111              14,
3112              14
3113          ]))
3114      filter = variables.Variable(random_ops.random_uniform([3, 3, 256, 256]))  # pylint: disable=redefined-builtin
3115      strides = [1, 1, 1, 1]
3116      padding = [(0, 0), (0, 0), (1, 1), (1, 1)]
3117      output_explicit_pad = input
3118      output_same_pad = input
3119
3120      for _ in range(num_convs):
3121        output_explicit_pad = nn_ops.conv2d(
3122            output_explicit_pad,
3123            filter,
3124            strides,
3125            padding=padding,
3126            data_format="NCHW")
3127        output_same_pad = nn_ops.conv2d(
3128            output_same_pad,
3129            filter,
3130            strides,
3131            padding="SAME",
3132            data_format="NCHW")
3133      grad_explicit_pad, = gradients_impl.gradients(output_explicit_pad, filter)
3134      grad_same_pad, = gradients_impl.gradients(output_same_pad, filter)
3135      self._bench_op("graph_explicit_pad", grad_explicit_pad.op, burn_iters,
3136                     num_iters)
3137      self._bench_op("graph_same_pad", grad_same_pad.op, burn_iters, num_iters)
3138
3139  def benchmarkExplicitVsSamePaddingEager(self):
3140    """Compare performance of EXPLICIT and SAME padding in eager mode.
3141
3142    A Conv2D op with SAME padding is benchmarked, and an equivalent Conv2D op
3143    with explicit padding is benchmarked, where the padding is the same as in
3144    the SAME case. Currently, EXPLICIT padding is slightly slower, due to the
3145    fact the Python padding list must be checked and processed before the Conv2D
3146    op can run.
3147    """
3148    # TODO(reedwm): Make EXPLICIT padding as fast as SAME padding.
3149    if not test.is_gpu_available():
3150      return
3151
3152    with context.eager_mode():
3153      burn_iters = 15
3154      num_convs = 20
3155      num_iters = 50
3156      batch_size = 64
3157      # The input and filter correspond to a middle layer of Resnet50.
3158      input = variables.Variable(  # pylint: disable=redefined-builtin
3159          random_ops.random_uniform([
3160              batch_size,
3161              256,
3162              14,
3163              14
3164          ]))
3165      filter = variables.Variable(random_ops.random_uniform([3, 3, 256, 256]))  # pylint: disable=redefined-builtin
3166      strides = [1, 1, 1, 1]
3167      padding = [(0, 0), (0, 0), (1, 1), (1, 1)]
3168      output_explicit_pad = input
3169      output_same_pad = input
3170      for _ in range(burn_iters):
3171        output_explicit_pad = nn_ops.conv2d(
3172            output_explicit_pad,
3173            filter,
3174            strides,
3175            padding=padding,
3176            data_format="NCHW")
3177        output_same_pad = nn_ops.conv2d(
3178            output_same_pad,
3179            filter,
3180            strides,
3181            padding="SAME",
3182            data_format="NCHW")
3183
3184      start = time.time()
3185      for _ in range(num_iters):
3186        with backprop.GradientTape() as tape:
3187          for _ in range(num_convs):
3188            output_explicit_pad = nn_ops.conv2d(
3189                output_explicit_pad,
3190                filter,
3191                strides,
3192                padding=padding,
3193                data_format="NCHW")
3194          tape.gradient(output_explicit_pad, filter)
3195      end = time.time()
3196      self.report_benchmark(
3197          name="eager_explicit_pad",
3198          wall_time=(end - start) / num_iters,
3199          iters=num_iters)
3200
3201      start = time.time()
3202      for _ in range(num_iters):
3203        with backprop.GradientTape() as tape:
3204          for _ in range(num_convs):
3205            output_same_pad = nn_ops.conv2d(
3206                output_same_pad,
3207                filter,
3208                strides,
3209                padding="SAME",
3210                data_format="NCHW")
3211          tape.gradient(output_same_pad, filter)
3212      end = time.time()
3213      self.report_benchmark(
3214          name="eager_same_pad",
3215          wall_time=(end - start) / num_iters,
3216          iters=num_iters)
3217
3218
3219def GetInceptionFwdTest(input_size, filter_size, stride, padding,
3220                        gpu_only=False):
3221
3222  def Test(self):
3223    if gpu_only and not test.is_gpu_available():
3224      tf_logging.info("Skipping InceptionFwd %s", (input_size, filter_size,
3225                                                   stride, padding))
3226      return
3227    tf_logging.info("Testing InceptionFwd %s", (input_size, filter_size, stride,
3228                                                padding))
3229    self._CompareFwdValues(input_size, filter_size, [stride, stride], padding)
3230
3231  return Test
3232
3233
3234def GetInceptionFwdDilatedConvTest(input_size, filter_size, stride, padding):
3235
3236  def Test(self):
3237    if stride == 1:
3238      tf_logging.info("Testing InceptionFwd with dilations %s",
3239                      (input_size, filter_size, stride, padding))
3240      self._VerifyDilatedConvValues(
3241          tensor_in_sizes=input_size,
3242          filter_in_sizes=filter_size,
3243          strides=[stride, stride],
3244          dilations=[2, 2],
3245          padding=padding,
3246          rtol=5e-4)
3247
3248  return Test
3249
3250
3251def GetInceptionBackInputTest(input_size, filter_size, output_size, stride,
3252                              padding,
3253                              gpu_only=False):
3254
3255  def Test(self):
3256    if gpu_only and not test.is_gpu_available():
3257      tf_logging.info("Skipping InceptionBackInput %s",
3258                      (input_size, filter_size, output_size, stride, padding))
3259      return
3260    tf_logging.info("Testing InceptionBackInput %s",
3261                    (input_size, filter_size, output_size, stride, padding))
3262    self._CompareBackpropInput(input_size, filter_size, output_size,
3263                               [stride, stride], padding)
3264
3265  return Test
3266
3267
3268def GetInceptionBackFilterTest(input_size, filter_size, output_size, strides,
3269                               padding, gpu_only=False):
3270
3271  def Test(self):
3272    if gpu_only and not test.is_gpu_available():
3273      tf_logging.info("Skipping InceptionBackFilter %s",
3274                      (input_size, filter_size, output_size, strides, padding))
3275      return
3276    tf_logging.info("Testing InceptionBackFilter %s",
3277                    (input_size, filter_size, output_size, strides, padding))
3278    self._CompareBackFilter(input_size, filter_size, output_size, strides,
3279                            padding)
3280
3281  return Test
3282
3283
3284@test_util.run_all_without_tensor_float_32("Avoid TF32 conv on GPU")
3285class FusedConv2DTest(test.TestCase):
3286
3287  def _CreateNumpyTensor(self, shape):
3288    total_size = np.prod(shape)
3289    return np.arange(1, total_size + 1, dtype=np.float32).reshape(shape)
3290
3291  def _CreateConv2D(self,
3292                    input_values,
3293                    filters,
3294                    strides=[1, 1],
3295                    padding="SAME"):
3296    return nn_ops.convolution(
3297        input_values, filters, strides=strides, padding=padding)
3298
3299  # Tests tensor forwarding of a fused Conv2D+BiasAdd+Add op when the input to
3300  # Add has refcount 1.
3301  @test_util.run_in_graph_and_eager_modes(use_gpu=False)
3302  def testAddWithRefCountOne(self):
3303    expected_output = [
3304        113377, 125570, 77305, 86738, 19433, 22226, 60681, 70722, 36291, 43718,
3305        7143, 9206, 9785, 12098, 4783, 6366, 779, 1134
3306    ]
3307    tensor_in_sizes = [1, 3, 3, 2]
3308    filter_in_sizes = [2, 2, 2, 2]
3309    bias_in_sizes = [2]
3310
3311    x = self._CreateNumpyTensor(tensor_in_sizes)
3312    filter_in = self._CreateNumpyTensor(filter_in_sizes)
3313    bias_in = self._CreateNumpyTensor(bias_in_sizes)
3314    # To get different weights for filter
3315    offset = 1
3316
3317    conv1 = self._CreateConv2D(x, filter_in)
3318    conv2 = self._CreateConv2D(conv1, filter_in + offset)
3319
3320    conv = self._CreateConv2D(conv1, filter_in - offset)
3321    bias_add = nn_ops.bias_add(conv, bias_in)
3322    add = math_ops.add_n([bias_add, conv2])
3323
3324    self.assertAllEqual(
3325        np.rint(expected_output),
3326        self.evaluate(add).reshape(-1))
3327
3328  # Tests tensor forwarding of a fused Conv2D+BiasAdd+Add op when the input to
3329  # Add has a total refcount of 2, and Add is its last consumer.
3330  @test_util.run_in_graph_and_eager_modes(use_gpu=False)
3331  def testAddWithRefCountTwoAndRunAddLast(self):
3332    expected_output = [
3333        1.907175e+06, 2.253505e+06, 7.809210e+05, 9.537180e+05, 1.184170e+05,
3334        1.523070e+05, 5.367010e+05, 6.803700e+05, 1.867090e+05, 2.529460e+05,
3335        2.362300e+04, 3.522600e+04, 5.121700e+04, 7.168300e+04, 1.494300e+04,
3336        2.347400e+04, 1.558000e+03, 2.903000e+03
3337    ]
3338    tensor_in_sizes = [1, 3, 3, 2]
3339    filter_in_sizes = [2, 2, 2, 2]
3340    bias_in_sizes = [2]
3341
3342    x = self._CreateNumpyTensor(tensor_in_sizes)
3343    filter_in = self._CreateNumpyTensor(filter_in_sizes)
3344    bias_in = self._CreateNumpyTensor(bias_in_sizes)
3345    # To get different weights for filter
3346    offset = 1
3347
3348    conv1 = self._CreateConv2D(x, filter_in)
3349    conv2 = self._CreateConv2D(conv1, filter_in + offset)
3350
3351    conv = self._CreateConv2D(conv2, filter_in - offset)
3352    bias_add = nn_ops.bias_add(conv, bias_in)
3353    add = math_ops.add_n([bias_add, conv1])
3354
3355    self.assertAllEqual(
3356        np.rint(expected_output),
3357        self.evaluate(add).reshape(-1))
3358
3359  # Tests tensor forwarding of a fused Conv2D+BiasAdd+Add op when the input to
3360  # Add has refcount 2 and Add (in the fused Conv2D op) is its first consumer.
3361  @test_util.run_in_graph_and_eager_modes(use_gpu=False)
3362  def testAddWithRefCountTwoAndRunAddFirst(self):
3363    expected_output = [
3364        176161, 194450, 120673, 134822, 30545, 34734, 96041, 111102, 58149,
3365        69289, 11745, 14839, 15833, 19302, 7965, 10339, 1345, 1877
3366    ]
3367    tensor_in_sizes = [1, 3, 3, 2]
3368    filter_in_sizes = [2, 2, 2, 2]
3369    bias_in_sizes = [2]
3370
3371    x = self._CreateNumpyTensor(tensor_in_sizes)
3372    filter_in = self._CreateNumpyTensor(filter_in_sizes)
3373    bias_in = self._CreateNumpyTensor(bias_in_sizes)
3374    # To get different weights for filter
3375    offset = 1
3376
3377    conv1 = self._CreateConv2D(x, filter_in)
3378    conv2 = self._CreateConv2D(conv1, filter_in + offset)
3379
3380    conv = self._CreateConv2D(conv1, filter_in - offset)
3381    bias_add = nn_ops.bias_add(conv, bias_in)
3382    add = math_ops.add_n([bias_add, conv2])
3383
3384    relu = nn_ops.relu(add)
3385    output = math_ops.add_n([relu, conv2])
3386
3387    self.assertAllEqual(
3388        np.rint(expected_output),
3389        self.evaluate(output).reshape(-1))
3390
3391  # Tests tensor forwarding of a fused Conv2D+BiasAdd+Add op when the input to
3392  # Add has refcount 2, and there is no dependency between its two consumers.
3393  @test_util.run_in_graph_and_eager_modes(use_gpu=False)
3394  def testAddWithRefCountTwoAndNoDependence(self):
3395    expected_output = [
3396        176161, 194450, 120673, 134822, 30545, 34734, 96041, 111102, 58149,
3397        69289, 11745, 14839, 15833, 19302, 7965, 10339, 1345, 1877
3398    ]
3399    tensor_in_sizes = [1, 3, 3, 2]
3400    filter_in_sizes = [2, 2, 2, 2]
3401    bias_in_sizes = [2]
3402
3403    x = self._CreateNumpyTensor(tensor_in_sizes)
3404    filter_in = self._CreateNumpyTensor(filter_in_sizes)
3405    bias_in = self._CreateNumpyTensor(bias_in_sizes)
3406    # To get different weights for filter
3407    offset = 1
3408
3409    conv1 = self._CreateConv2D(x, filter_in)
3410    conv2 = self._CreateConv2D(conv1, filter_in + offset)
3411
3412    conv = self._CreateConv2D(conv1, filter_in - offset)
3413    bias_add = nn_ops.bias_add(conv, bias_in)
3414    add = math_ops.add_n([bias_add, conv2])
3415
3416    relu1 = nn_ops.relu(add)
3417    relu2 = nn_ops.relu(conv2)
3418    output = math_ops.add_n([relu1, relu2])
3419
3420    self.assertAllEqual(
3421        np.rint(expected_output),
3422        self.evaluate(output).reshape(-1))
3423
3424  # Tests tensor forwarding of a fused Conv2D+BiasAdd+Add op when the input to
3425  # Add is the same as the input to the fused Conv2D op and needs a tensor
3426  # buffer.
3427  @test_util.run_in_graph_and_eager_modes(use_gpu=False)
3428  def testAddWithSameSrcAndAddTensorBuffer(self):
3429    expected_output = [
3430        57157, 63298, 39249, 44026, 9971, 11402, 31193, 36306, 19126, 22948,
3431        3970, 5060, 5135, 6350, 2666, 3524, 461, 674
3432    ]
3433    tensor_in_sizes = [1, 3, 3, 2]
3434    filter_in_sizes = [2, 2, 2, 2]
3435    bias_in_sizes = [2]
3436
3437    x = self._CreateNumpyTensor(tensor_in_sizes)
3438    filter_in = self._CreateNumpyTensor(filter_in_sizes)
3439    bias_in = self._CreateNumpyTensor(bias_in_sizes)
3440
3441    conv1 = self._CreateConv2D(x, filter_in)
3442
3443    conv = self._CreateConv2D(conv1, filter_in)
3444    bias_add = nn_ops.bias_add(conv, bias_in)
3445    add = math_ops.add_n([bias_add, conv1])
3446
3447    self.assertAllEqual(
3448        np.rint(expected_output),
3449        self.evaluate(add).reshape(-1))
3450
3451
3452if __name__ == "__main__":
3453  for index, (input_size_, filter_size_, output_size_, stride_,
3454              padding_) in enumerate(GetShrunkInceptionShapes()):
3455    setattr(Conv2DTest, "testInceptionFwd_" + str(index),
3456            test_util.run_in_graph_and_eager_modes(
3457                GetInceptionFwdTest(input_size_, filter_size_, stride_,
3458                                    padding_)))
3459    setattr(
3460        Conv2DTest, "testInceptionFwdDilatedConv_" + str(index),
3461        test_util.run_in_graph_and_eager_modes(GetInceptionFwdDilatedConvTest(
3462            input_size_, filter_size_, stride_, padding_)))
3463    setattr(Conv2DTest, "testInceptionBackInput_" + str(index),
3464            test_util.run_in_graph_and_eager_modes(
3465                GetInceptionBackInputTest(input_size_, filter_size_,
3466                                          output_size_, stride_, padding_)))
3467    setattr(Conv2DTest, "testInceptionBackFilter_" + str(index),
3468            test_util.run_in_graph_and_eager_modes(
3469                GetInceptionBackFilterTest(input_size_, filter_size_,
3470                                           output_size_, [stride_, stride_],
3471                                           padding_)))
3472
3473  # TODO(b/35359731)
3474  # Fwd, BckInput, and BackFilter to test that for certain input parameter
3475  # set, winograd nonfused algorithm will be excluded from conv autotune. If
3476  # in such case, winograd nonfused algorithm is added as one option of the
3477  # conv autotune, and cuDNN version is smaller than 7, the following tests
3478  # will fail.
3479  ishape = [1, 400, 400, 1]
3480  fshape = [1, 1, 1, 256]
3481  oshape = [1, 400, 400, 256]
3482  setattr(Conv2DTest, "testInceptionFwd_No_Winograd_Nonfused",
3483          test_util.run_in_graph_and_eager_modes(
3484              GetInceptionFwdTest(ishape, fshape, 1, "SAME", gpu_only=True)))
3485  setattr(Conv2DTest, "testInceptionFwdDilatedConv_No_Winograd_Nonfused",
3486          test_util.run_in_graph_and_eager_modes(
3487              GetInceptionFwdDilatedConvTest(ishape, fshape, 1, "SAME")))
3488  setattr(Conv2DTest, "testInceptionBackInput_No_Winograd_Nonfused",
3489          test_util.run_in_graph_and_eager_modes(
3490              GetInceptionBackInputTest(ishape, fshape, oshape, 1, "SAME",
3491                                        gpu_only=True)))
3492  setattr(Conv2DTest, "testInceptionBackFilter_No_Winograd_Nonfused",
3493          test_util.run_in_graph_and_eager_modes(
3494              GetInceptionBackFilterTest(ishape, fshape, oshape, [1, 1], "SAME",
3495                                         gpu_only=True)))
3496  test.main()
3497