1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Functional tests for BiasAdd.""" 16 17import numpy as np 18 19from tensorflow.python.eager import backprop 20from tensorflow.python.eager import context 21from tensorflow.python.framework import constant_op 22from tensorflow.python.framework import dtypes 23from tensorflow.python.framework import errors_impl 24from tensorflow.python.framework import test_util 25from tensorflow.python.ops import array_ops 26from tensorflow.python.ops import gradient_checker 27from tensorflow.python.ops import gradient_checker_v2 28from tensorflow.python.ops import gradients_impl 29from tensorflow.python.ops import nn_ops 30import tensorflow.python.ops.nn_grad # pylint: disable=unused-import 31from tensorflow.python.platform import test 32 33 34@test_util.run_all_in_graph_and_eager_modes 35class BiasAddTestBase(test.TestCase): 36 37 def _npBias(self, inputs, bias): 38 assert len(bias.shape) == 1 39 assert inputs.shape[-1] == bias.shape[0] 40 return inputs + bias.reshape(([1] * 41 (len(inputs.shape) - 1)) + [bias.shape[0]]) 42 43 def testNpBias(self): 44 self.assertAllClose( 45 np.array([[11, 22, 33], [41, 52, 63]]), 46 self._npBias( 47 np.array([[10, 20, 30], [40, 50, 60]]), np.array([1, 2, 3]))) 48 49 def _testBias(self, np_inputs, np_bias, use_gpu=False): 50 np_val = self._npBias(np_inputs, np_bias) 51 with self.cached_session(use_gpu=use_gpu): 52 tf_val = self.evaluate(nn_ops.bias_add(np_inputs, np_bias)) 53 self.assertAllCloseAccordingToType(np_val, tf_val) 54 55 def _AtLeast3d(self, np_value): 56 # fill the input value to at least 3-dimension 57 if np_value.ndim < 3: 58 return np.reshape(np_value, (1,) * (3 - np_value.ndim) + np_value.shape) 59 return np_value 60 61 def _NHWCToNCHW(self, np_value): 62 # fill the input value to at least 3-dimension 63 np_value = self._AtLeast3d(np_value) 64 # move the last dimension to second 65 np_dim = list(range(np_value.ndim)) 66 np_dim_new = list(np_dim[0:1]) + list(np_dim[-1:]) + list(np_dim[1:-1]) 67 return np.transpose(np_value, np_dim_new) 68 69 def _NCHWToNHWC(self, np_value): 70 assert len(np_value.shape) >= 3 71 np_dim = list(range(np_value.ndim)) 72 # move the second dimension to the last 73 np_dim_new = list(np_dim[0:1]) + list(np_dim[2:]) + list(np_dim[1:2]) 74 return np.transpose(np_value, np_dim_new) 75 76 def _testBiasNCHW(self, np_inputs, np_bias, use_gpu): 77 np_val = self._npBias(np_inputs, np_bias) 78 np_inputs = self._NHWCToNCHW(np_inputs) 79 with self.cached_session(use_gpu=use_gpu): 80 tf_val = self.evaluate( 81 nn_ops.bias_add(np_inputs, np_bias, data_format="NCHW")) 82 tf_val = self._NCHWToNHWC(tf_val) 83 self.assertAllCloseAccordingToType(self._AtLeast3d(np_val), tf_val) 84 85 def _testAll(self, np_inputs, np_bias): 86 self._testBias(np_inputs, np_bias, use_gpu=False) 87 self._testBiasNCHW(np_inputs, np_bias, use_gpu=False) 88 if np_inputs.dtype in [np.float16, np.float32, np.float64, np.int32]: 89 self._testBias(np_inputs, np_bias, use_gpu=True) 90 self._testBiasNCHW(np_inputs, np_bias, use_gpu=True) 91 92 def _expectedException(self): 93 if context.executing_eagerly(): 94 return errors_impl.InvalidArgumentError 95 else: 96 return ValueError 97 98 def testInputDims(self): 99 with self.assertRaises(self._expectedException()): 100 nn_ops.bias_add([1, 2], [1]) 101 102 def testBiasVec(self): 103 with self.assertRaises(self._expectedException()): 104 nn_ops.bias_add( 105 array_ops.reshape([1, 2], shape=[1, 2]), 106 array_ops.reshape([1, 2], shape=[1, 2])) 107 108 def testBiasInputsMatch(self): 109 with self.assertRaises(self._expectedException()): 110 nn_ops.bias_add( 111 array_ops.reshape([1, 2], shape=[1, 2]), 112 array_ops.reshape([1], shape=[1])) 113 114 def testIntTypes(self): 115 for t in [np.int8, np.int16, np.int32, np.int64]: 116 self._testAll( 117 np.array([[10, 20, 30], [40, 50, 60]]).astype(t), 118 np.array([1, 2, 3]).astype(t)) 119 120 def testFloatTypes(self): 121 for t in [np.float16, np.float32, np.float64]: 122 self._testAll( 123 np.random.rand(4, 3, 3).astype(t), 124 np.random.rand(3).astype(t)) 125 126 def test4DFloatTypes(self): 127 for t in [np.float16, np.float32, np.float64]: 128 self._testAll( 129 np.random.rand(4, 3, 2, 3).astype(t), 130 np.random.rand(3).astype(t)) 131 self._testAll( 132 np.random.rand(2048, 4, 4, 4).astype(t), 133 np.random.rand(4).astype(t)) 134 self._testAll( 135 np.random.rand(4, 4, 4, 2048).astype(t), 136 np.random.rand(2048).astype(t)) 137 138 def test5DFloatTypes(self): 139 for t in [np.float16, np.float32, np.float64]: 140 self._testAll( 141 np.random.rand(4, 3, 2, 3, 4).astype(t), 142 np.random.rand(4).astype(t)) 143 144 def _random_tensor(self, shape, dtype): 145 return constant_op.constant(2 * np.random.rand(*shape) - 1, dtype=dtype) 146 147 def _computeGradient(self, np_input, bias, dtype, data_format): 148 input_shape = output_shape = np_input.shape 149 bias_shape = bias.shape 150 input_tensor = constant_op.constant( 151 np_input, shape=input_shape, dtype=dtype) 152 bias_tensor = constant_op.constant(bias, shape=bias_shape, dtype=dtype) 153 154 if context.executing_eagerly(): 155 156 def bias_add(input_tensor, bias_tensor): 157 return nn_ops.bias_add( 158 input_tensor, bias_tensor, data_format=data_format) 159 160 # The following is a work-around for TF issue 33660. Instead of 161 # calculating the analytical and numerical gradients for both 162 # inputs in a single call to compute_gradient, compute_gradient 163 # is called for each input separately. 164 def bias_add_1(input_tensor): 165 return bias_add(input_tensor, bias_tensor) 166 167 def bias_add_2(bias_tensor): 168 return bias_add(input_tensor, bias_tensor) 169 170 input_jacob_a, input_jacob_n = gradient_checker_v2.compute_gradient( 171 bias_add_1, [input_tensor]) 172 bias_jacob_a, bias_jacob_n = gradient_checker_v2.compute_gradient( 173 bias_add_2, [bias_tensor]) 174 175 # Test gradient of BiasAddGrad 176 def bias_add_grad_function(upstream_gradients): 177 with backprop.GradientTape() as tape: 178 tape.watch(bias_tensor) 179 bias_add_output = bias_add(input_tensor, bias_tensor) 180 gradient_injector_output = bias_add_output * upstream_gradients 181 return tape.gradient(gradient_injector_output, bias_tensor) 182 183 upstream_tensor = self._random_tensor(output_shape, dtype) 184 grad_jacob_a, grad_jacob_n = gradient_checker_v2.compute_gradient( 185 bias_add_grad_function, [upstream_tensor]) 186 else: 187 output_tensor = nn_ops.bias_add( 188 input_tensor, bias_tensor, data_format=data_format) 189 jacobians = gradient_checker.compute_gradient([input_tensor, bias_tensor], 190 [input_shape, bias_shape], 191 output_tensor, output_shape) 192 (input_jacob_a, input_jacob_n), (bias_jacob_a, bias_jacob_n) = jacobians 193 # Test gradient of BiasAddGrad 194 bias_add_grad = gradients_impl.gradients( 195 nn_ops.l2_loss(output_tensor), bias_tensor)[0] 196 grad_jacob_a, grad_jacob_n = gradient_checker.compute_gradient( 197 output_tensor, output_shape, bias_add_grad, bias_shape) 198 199 return ((input_jacob_a, bias_jacob_a, grad_jacob_a), 200 (input_jacob_n, bias_jacob_n, grad_jacob_n)) 201 202 def _testGradient(self, np_input, bias, dtype, data_format, use_gpu): 203 with self.cached_session(use_gpu=use_gpu): 204 if data_format == "NCHW": 205 np_input = self._NHWCToNCHW(np_input) 206 jacob_a, jacob_n = self._computeGradient(np_input, bias, dtype, 207 data_format) 208 input_jacob_a, bias_jacob_a, grad_jacob_a = jacob_a 209 input_jacob_n, bias_jacob_n, grad_jacob_n = jacob_n 210 211 if dtype == np.float16: 212 # Compare fp16 analytical gradients to fp32 numerical gradients, 213 # since fp16 numerical gradients are too imprecise unless great 214 # care is taken with choosing the inputs and the delta. This is 215 # a weaker, but pragmatic, check (in particular, it does not test 216 # the op itself, only its gradient). 217 _, jacob_n = self._computeGradient(np_input, bias, np.float32, 218 data_format) 219 input_jacob_n, bias_jacob_n, grad_jacob_n = jacob_n 220 221 if dtype == dtypes.float64: 222 threshold = 1e-10 223 elif np_input.size >= 512: 224 # The 5e-3 threshold seems to have been marginal in these cases, and 225 # small changes in the test were pushing it over the limit. 226 threshold = 5e-2 227 else: 228 threshold = 5e-3 229 self.assertAllClose(input_jacob_a, input_jacob_n, threshold, threshold) 230 self.assertAllClose(bias_jacob_a, bias_jacob_n, threshold, threshold) 231 self.assertAllClose(grad_jacob_a, grad_jacob_n, threshold, threshold) 232 233 def testGradientTensor2D(self): 234 for (data_format, use_gpu) in ("NHWC", False), ("NHWC", True): 235 for dtype in (dtypes.float16, dtypes.float32, dtypes.float64): 236 np_input = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 237 dtype=dtype.as_numpy_dtype).reshape(3, 2) 238 bias = np.array([1.3, 2.4], dtype=dtype.as_numpy_dtype) 239 self._testGradient(np_input, bias, dtype, data_format, use_gpu) 240 241 def testGradientTensor3D(self): 242 for (data_format, use_gpu) in [("NHWC", False), ("NHWC", True), 243 ("NCHW", False), ("NCHW", True)]: 244 for dtype in (dtypes.float16, dtypes.float32, dtypes.float64): 245 # pylint: disable=too-many-function-args 246 np_input = np.array( 247 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 248 dtype=dtype.as_numpy_dtype).reshape(1, 3, 2) 249 bias = np.array([1.3, 2.4], dtype=dtype.as_numpy_dtype) 250 self._testGradient(np_input, bias, dtype, data_format, use_gpu) 251 252 def testGradientTensor4D(self): 253 for (data_format, use_gpu) in [("NHWC", False)]: 254 for dtype in (dtypes.float16, dtypes.float32, dtypes.float64): 255 np_input = np.arange( 256 1.0, 49.0, 257 dtype=dtype.as_numpy_dtype).reshape([2, 3, 4, 2]).astype(np.float32) 258 bias = np.array([1.3, 2.4], dtype=dtype.as_numpy_dtype) 259 self._testGradient(np_input, bias, dtype, data_format, use_gpu) 260 np_input = np.arange( 261 1.0, 513.0, 262 dtype=dtype.as_numpy_dtype).reshape([64, 2, 2, 263 2]).astype(np.float32) 264 self._testGradient(np_input, bias, dtype, data_format, use_gpu) 265 np_input = np.arange( 266 1.0, 513.0, 267 dtype=dtype.as_numpy_dtype).reshape([2, 2, 2, 268 64]).astype(np.float32) 269 self._testGradient(np_input, 270 np.random.rand(64).astype(dtype.as_numpy_dtype), 271 dtype, data_format, use_gpu) 272 273 def testGradientTensor5D(self): 274 for (data_format, use_gpu) in [("NHWC", False), ("NHWC", True), 275 ("NCHW", False), ("NCHW", True)]: 276 for dtype in (dtypes.float16, dtypes.float32, dtypes.float64): 277 np_input = np.arange( 278 1.0, 49.0, 279 dtype=dtype.as_numpy_dtype).reshape([1, 2, 3, 4, 280 2]).astype(np.float32) 281 bias = np.array([1.3, 2.4], dtype=dtype.as_numpy_dtype) 282 self._testGradient(np_input, bias, dtype, data_format, use_gpu) 283 284 def test1x1Image(self): 285 for (data_format, use_gpu) in [("NHWC", False), ("NCHW", False)]: 286 np_input = np.arange(1.0, 129.0).reshape([4, 1, 1, 32]).astype(np.float32) 287 self._testGradient(np_input, 288 np.random.rand(32).astype(np.float32), dtypes.float32, 289 data_format, use_gpu) 290 291 def testEmpty(self): 292 np.random.seed(7) 293 for shape in (0, 0), (2, 0), (0, 2), (4, 3, 0), (4, 0, 3), (0, 4, 3): 294 self._testAll(np.random.randn(*shape), np.random.randn(shape[-1])) 295 296 def testEmptyGradient(self): 297 for (data_format, use_gpu) in ("NHWC", False), ("NHWC", True): 298 for shape in (0, 0), (2, 0), (0, 2): 299 self._testGradient( 300 np.random.randn(*shape), np.random.randn(shape[-1]), dtypes.float64, 301 data_format, use_gpu) 302 303 for (data_format, use_gpu) in [("NHWC", False), ("NHWC", True), 304 ("NCHW", False), ("NCHW", True)]: 305 for shape in (4, 3, 0), (4, 0, 3), (0, 4, 3): 306 self._testGradient( 307 np.random.randn(*shape), np.random.randn(shape[-1]), dtypes.float64, 308 data_format, use_gpu) 309