1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Gradient checker for functions. 16 17The gradient checker verifies numerically that an function properly 18computes the gradients 19""" 20import numpy as np 21 22from tensorflow.python.eager import backprop 23from tensorflow.python.eager import context 24from tensorflow.python.framework import dtypes 25from tensorflow.python.framework import indexed_slices 26from tensorflow.python.framework import ops 27from tensorflow.python.ops import array_ops 28from tensorflow.python.platform import tf_logging as logging 29from tensorflow.python.util.tf_export import tf_export 30 31 32def _product(t): 33 if isinstance(t, int): 34 return t 35 else: 36 y = 1 37 for x in t: 38 y *= x 39 return y 40 41 42def _eval_indexed_slices(a): 43 """Converts IndexedSlices to IndexedSlicesValue with numpy indices/values. 44 45 When eager execution is enabled, converts IndexedSlices 46 to IndexedSlicesValue with numpy indices/values. 47 48 Args: 49 a: any value. 50 51 Returns: 52 If a is IndexedSlices and eager execution is enabled, calls numpy() on a's 53 fields. Otherwise returns a unchanged. 54 """ 55 if (isinstance(a, indexed_slices.IndexedSlices) and 56 context.executing_eagerly()): 57 return indexed_slices.IndexedSlicesValue( 58 indices=[x.numpy() for x in a.indices], 59 values=[x.numpy() for x in a.values], 60 dense_shape=a.dense_shape) 61 return a 62 63 64def _to_numpy(a): 65 """Converts Tensors, EagerTensors, and IndexedSlicesValue to numpy arrays. 66 67 Args: 68 a: any value. 69 70 Returns: 71 If a is EagerTensor or Tensor, returns the evaluation of a by calling 72 numpy() or run(). If a is IndexedSlicesValue, constructs the corresponding 73 dense numpy array. Otherwise returns a unchanged. 74 """ 75 if isinstance(a, ops.EagerTensor): 76 return a.numpy() 77 if isinstance(a, ops.Tensor): 78 sess = ops.get_default_session() 79 return sess.run(a) 80 if isinstance(a, indexed_slices.IndexedSlicesValue): 81 arr = np.zeros(a.dense_shape) 82 assert len(a.values) == len(a.indices), ( 83 "IndexedSlicesValue has %s value slices but %s indices\n%s" % 84 (a.values, a.indices, a)) 85 for values_slice, index in zip(a.values, a.indices): 86 assert 0 <= index < len(arr), ( 87 "IndexedSlicesValue has invalid index %s\n%s" % (index, a)) 88 arr[index] += values_slice 89 return arr 90 return a 91 92 93def _prepare(f, xs_dtypes, xs_shapes): 94 """Return a function that executes 'f'. 95 96 In TF 2.x, this is the same as `f`. 97 In TF 1.x, returns a Python function that executes the graph defined by `f` 98 in a Session. 99 100 Args: 101 f: the function. 102 xs_dtypes: dtypes of f's arguments. 103 xs_shapes: shapes of f's arguments. 104 105 Returns: 106 """ 107 if context.executing_eagerly(): 108 109 def decorated_eager(*xs_data): 110 return f(*map(ops.convert_to_tensor, xs_data)) 111 112 return decorated_eager 113 xs = [ 114 array_ops.placeholder(x_dtype, shape=x_shape) 115 for x_dtype, x_shape in zip(xs_dtypes, xs_shapes) 116 ] 117 y = f(*xs) 118 sess = ops.get_default_session() 119 120 def decorated_graph(*xs_data): 121 xs_data = [_to_numpy(a) for a in xs_data] 122 return sess.run(y, feed_dict=dict(zip(xs, xs_data))) 123 124 return decorated_graph 125 126 127def _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param): 128 """Computes the theoretical Jacobian for f regarding xs[param]. 129 130 One can think of the relation among f, xs and y as y = f(xs). 131 132 Args: 133 f: the function. 134 y_shape: the shape of the result. 135 y_dtype: the dtype of the result. 136 xs: a list of tensors. 137 param: the index of the target parameter. 138 139 Returns: 140 A 2-d numpy array representing the Jacobian. It has "y_size" rows 141 and "x_size" columns where "x_size" is the number of elements in xs[param] 142 and "y_size" is the number of elements in the result. 143 144 Raises: 145 ValueError: If result is empty but the gradient is nonzero. 146 """ 147 x = xs[param] 148 # Complex vectors are treated as vectors of twice as many reals. 149 x_shape = tuple(x.shape) + (2,) if x.dtype.is_complex else x.shape 150 y_factor = 2 if y_dtype.is_complex else 1 151 152 # To compute the jacobian, we treat x and y as one-dimensional vectors. 153 x_size = _product(x_shape) 154 x_val_size = _product(x_shape[1:]) # This is used for sparse gradients 155 y_size = _product(y_shape) * y_factor 156 157 # Allocate 2-D Jacobian, with y dimensions smashed into the first 158 # dimension and x dimensions smashed into the second. 159 jacobian = np.zeros((y_size, x_size), dtype=x.dtype.real_dtype.as_numpy_dtype) 160 161 # For each of the entry of dy, we set this to be 1 and 162 # everything else to be 0 and compute the gradients -- this will give us one 163 # row of the Jacobian matrix. 164 dy_data = np.zeros(y_shape, dtype=y_dtype.as_numpy_dtype) 165 dy_data_flat = dy_data.ravel().view(y_dtype.real_dtype.as_numpy_dtype) 166 grad_fn_unprep = backprop.gradients_function(f, [param]) 167 grad_fn = _prepare(lambda dy, *xs: grad_fn_unprep(*xs, dy=dy), 168 [y_dtype] + [z.dtype for z in xs], 169 [None] + [z.shape for z in xs]) 170 for row in range(y_size): 171 dy_data_flat[row] = 1 172 grad = _to_numpy(grad_fn(dy_data, *xs)[0]) 173 grad = _eval_indexed_slices(grad) 174 if isinstance(grad, indexed_slices.IndexedSlicesValue): 175 for i, v in zip(grad.indices, grad.values): 176 c_begin = i * x_val_size 177 c_end = c_begin + x_val_size 178 jacobian[row, c_begin:c_end] += v.flat 179 elif grad is not None: 180 jacobian[row, :] = grad.ravel().view(jacobian.dtype) 181 # This reset of `dy_data_flat` needs to happen after `grad` is copied to 182 # `jacobian` because `grad` and `dy_data_flat` may share memory. 183 dy_data_flat[row] = 0 184 185 # If the output is empty, run the gradients at least once and make sure 186 # they produce zeros. 187 if y_size == 0: # don't use 'not y_size', because y_size may not be an int 188 grad = _to_numpy(grad_fn(dy_data, *xs)[0]) 189 if grad.shape != x.shape: 190 raise ValueError("Empty gradient has wrong shape: expected %s, got %s" % 191 (x.shape, grad.shape)) 192 if np.any(grad): 193 raise ValueError("Empty tensor with nonzero gradients") 194 195 logging.vlog(1, "Theoretical Jacobian =\n%s", jacobian) 196 return jacobian 197 198 199def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta): 200 """Computes the numeric Jacobian for f regarding xs[param]. 201 202 One can think of the relation among f, xs and y as y = f(xs). 203 204 Args: 205 f: the function. 206 y_size: the number of elements of the result. 207 y_dtype: the dtype of the result. 208 xs: a list of tensors. 209 param: the index of the target parameter. 210 delta: the amount of perturbation we give to the input. 211 212 Returns: 213 A 2-d numpy array representing the Jacobian. It has "y_size" rows 214 and "x_size" columns where "x_size" is the number of elements in xs[param] 215 and "y_size" is the number of elements in the result. 216 """ 217 x_shape = xs[param].shape 218 x_dtype = xs[param].dtype 219 220 # To compute the jacobian, we treat x and y as one-dimensional vectors 221 x_size = _product(x_shape) * (2 if x_dtype.is_complex else 1) 222 y_size = y_size * (2 if y_dtype.is_complex else 1) 223 x_dtype = x_dtype.real_dtype.as_numpy_dtype 224 y_dtype = y_dtype.real_dtype.as_numpy_dtype 225 226 xs_dtypes = [x.dtype for x in xs] 227 xs_shapes = [x.shape for x in xs] 228 # Converts xs to numpy arrays to do in-place perturbation. 229 # Calls asarray() to avoid copying in ravel() later. 230 xs = [np.asarray(_to_numpy(x)) for x in xs] 231 x = xs[param] 232 233 # Make sure we have the right types 234 scale = np.asarray(2 * delta, dtype=y_dtype)[()] 235 236 jacobian = np.zeros((y_size, x_size), dtype=x_dtype) 237 238 # For each of the entry of x, we slightly perturbs this by adding and 239 # subtracting a delta and then compute difference between the outputs. This 240 # will give us one column of the Jacobian matrix. 241 f = _prepare(f, xs_dtypes, xs_shapes) 242 for col in range(x_size): 243 original = x.ravel().view(x_dtype)[col] 244 x.ravel().view(x_dtype)[col] += delta 245 y_pos = _to_numpy(f(*xs)) 246 x.ravel().view(x_dtype)[col] = original 247 x.ravel().view(x_dtype)[col] -= delta 248 y_neg = _to_numpy(f(*xs)) 249 x.ravel().view(x_dtype)[col] = original 250 diff = (y_pos - y_neg) / scale 251 jacobian[:, col] = diff.ravel().view(y_dtype) 252 253 logging.vlog(1, "Numeric Jacobian =\n%s", jacobian) 254 return jacobian 255 256 257def _compute_gradient(f, y_shape, y_dtype, xs, param, delta): 258 """Computes the theoretical and numerical jacobian.""" 259 x = xs[param] 260 t = x.dtype 261 allowed_types = [ 262 dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64, 263 dtypes.complex64, dtypes.complex128 264 ] 265 assert t.base_dtype in allowed_types, ("Cannot compute gradient for " 266 "unsupported type %s of argument %s" % 267 (t.name, param)) 268 t2 = y_dtype 269 assert t2.base_dtype in allowed_types, ("Cannot compute gradient for " 270 "unsupported type %s of y" % t2.name) 271 y_size = _product(y_shape) 272 jacob_t = _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param) 273 jacob_n = _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta) 274 return jacob_t, jacob_n 275 276 277def _compute_gradient_list(f, xs, delta): 278 """Compute gradients for a list of x values.""" 279 # convert xs to tensors so that dtype and shape have uniform types 280 xs = [ops.convert_to_tensor(x) for x in xs] 281 # run the function to get info of the result 282 xs_dtypes = [x.dtype for x in xs] 283 xs_shapes = [x.shape for x in xs] 284 f_temp = _prepare(f, xs_dtypes, xs_shapes) 285 y = f_temp(*xs) 286 return tuple( 287 zip(*[ 288 _compute_gradient(f, y.shape, dtypes.as_dtype(y.dtype), xs, i, delta) 289 for i in range(len(xs)) 290 ])) 291 292 293@tf_export("test.compute_gradient", v1=[]) 294def compute_gradient(f, x, delta=None): 295 """Computes the theoretical and numeric Jacobian of `f`. 296 297 With y = f(x), computes the theoretical and numeric Jacobian dy/dx. 298 299 Args: 300 f: the function. 301 x: the arguments for the function as a list or tuple of values convertible 302 to a Tensor. 303 delta: (optional) perturbation used to compute numeric Jacobian. 304 305 Returns: 306 A pair of lists, where the first is a list of 2-d numpy arrays representing 307 the theoretical Jacobians for each argument, and the second list is the 308 numerical ones. Each 2-d array has "y_size" rows 309 and "x_size" columns where "x_size" is the number of elements in the 310 corresponding argument and "y_size" is the number of elements in f(x). 311 312 Raises: 313 ValueError: If result is empty but the gradient is nonzero. 314 ValueError: If x is not list, but any other type. 315 316 Example: 317 318 >>> @tf.function 319 ... def test_func(x): 320 ... return x*x 321 ... 322 >>> 323 >>> class MyTest(tf.test.TestCase): 324 ... 325 ... def test_gradient_of_test_func(self): 326 ... theoretical, numerical = tf.test.compute_gradient(test_func, [1.0]) 327 ... # ((array([[2.]], dtype=float32),), 328 ... # (array([[2.000004]], dtype=float32),)) 329 ... self.assertAllClose(theoretical, numerical) 330 331 """ 332 if not isinstance(x, (list, tuple)): 333 raise ValueError( 334 "`x` must be a list or tuple of values convertible to a Tensor " 335 "(arguments to `f`), not a %s" % type(x)) 336 if delta is None: 337 # By default, we use a step size for the central finite difference 338 # approximation that is exactly representable as a binary floating 339 # point number, since this reduces the amount of noise due to rounding 340 # in the approximation of some functions. 341 delta = 1.0 / 1024 342 return _compute_gradient_list(f, x, delta) 343 344 345def max_error(grad1, grad2): 346 """Computes maximum elementwise gap. 347 348 Computes the maximum elementwise gap between two lists of tensors of the same 349 shape. 350 351 Args: 352 grad1: a lists of tensors. 353 grad2: a lists of tensors with the same shape as grad1. 354 355 Returns: 356 The maximum elementwise gap between the two. 357 """ 358 error = 0 359 for j_t, j_n in zip(grad1, grad2): 360 if j_t.size or j_n.size: # Handle zero size tensors correctly 361 error = np.maximum(error, np.fabs(j_t - j_n).max()) 362 return error 363