1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Gradient checker for functions. 16 17The gradient checker verifies numerically that an function properly 18computes the gradients 19""" 20from __future__ import absolute_import 21from __future__ import division 22from __future__ import print_function 23 24import numpy as np 25 26from tensorflow.python.eager import backprop 27from tensorflow.python.eager import context 28from tensorflow.python.framework import dtypes 29from tensorflow.python.framework import ops 30from tensorflow.python.ops import array_ops 31from tensorflow.python.platform import tf_logging as logging 32from tensorflow.python.util.tf_export import tf_export 33 34 35def _product(t): 36 if isinstance(t, int): 37 return t 38 else: 39 y = 1 40 for x in t: 41 y *= x 42 return y 43 44 45def _eval_indexed_slices(a): 46 """Converts IndexedSlices to IndexedSlicesValue with numpy indices/values. 47 48 When eager execution is enabled, converts IndexedSlices 49 to IndexedSlicesValue with numpy indices/values. 50 51 Args: 52 a: any value. 53 54 Returns: 55 If a is IndexedSlices and eager execution is enabled, calls numpy() on a's 56 fields. Otherwise returns a unchanged. 57 """ 58 if isinstance(a, ops.IndexedSlices) and context.executing_eagerly(): 59 return ops.IndexedSlicesValue( 60 indices=[x.numpy() for x in a.indices], 61 values=[x.numpy() for x in a.values], 62 dense_shape=a.dense_shape) 63 return a 64 65 66def _to_numpy(a): 67 """Converts Tensors, EagerTensors, and IndexedSlicesValue to numpy arrays. 68 69 Args: 70 a: any value. 71 72 Returns: 73 If a is EagerTensor or Tensor, returns the evaluation of a by calling 74 numpy() or run(). If a is IndexedSlicesValue, constructs the corresponding 75 dense numpy array. Otherwise returns a unchanged. 76 """ 77 if isinstance(a, ops.EagerTensor): 78 return a.numpy() 79 if isinstance(a, ops.Tensor): 80 sess = ops.get_default_session() 81 return sess.run(a) 82 if isinstance(a, ops.IndexedSlicesValue): 83 arr = np.zeros(a.dense_shape) 84 assert len(a.values) == len(a.indices), ( 85 "IndexedSlicesValue has %s value slices but %s indices\n%s" % 86 (a.values, a.indices, a)) 87 for values_slice, index in zip(a.values, a.indices): 88 assert 0 <= index < len(arr), ( 89 "IndexedSlicesValue has invalid index %s\n%s" % (index, a)) 90 arr[index] += values_slice 91 return arr 92 return a 93 94 95def _prepare(f, xs_dtypes, xs_shapes): 96 """Return a function that executes 'f'. 97 98 In TF 2.x, this is the same as `f`. 99 In TF 1.x, returns a Python function that executes the graph defined by `f` 100 in a Session. 101 102 Args: 103 f: the function. 104 xs_dtypes: dtypes of f's arguments. 105 xs_shapes: shapes of f's arguments. 106 107 Returns: 108 """ 109 if context.executing_eagerly(): 110 111 def decorated_eager(*xs_data): 112 return f(*map(ops.convert_to_tensor, xs_data)) 113 114 return decorated_eager 115 xs = [ 116 array_ops.placeholder(x_dtype, shape=x_shape) 117 for x_dtype, x_shape in zip(xs_dtypes, xs_shapes) 118 ] 119 y = f(*xs) 120 sess = ops.get_default_session() 121 122 def decorated_graph(*xs_data): 123 xs_data = [_to_numpy(a) for a in xs_data] 124 return sess.run(y, feed_dict=dict(zip(xs, xs_data))) 125 126 return decorated_graph 127 128 129def _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param): 130 """Computes the theoretical Jacobian for f regarding xs[param]. 131 132 One can think of the relation among f, xs and y as y = f(xs). 133 134 Args: 135 f: the function. 136 y_shape: the shape of the result. 137 y_dtype: the dtype of the result. 138 xs: a list of tensors. 139 param: the index of the target parameter. 140 141 Returns: 142 A 2-d numpy array representing the Jacobian. It has "y_size" rows 143 and "x_size" columns where "x_size" is the number of elements in xs[param] 144 and "y_size" is the number of elements in the result. 145 146 Raises: 147 ValueError: If result is empty but the gradient is nonzero. 148 """ 149 x = xs[param] 150 # Complex vectors are treated as vectors of twice as many reals. 151 x_shape = tuple(x.shape) + (2,) if x.dtype.is_complex else x.shape 152 y_factor = 2 if y_dtype.is_complex else 1 153 154 # To compute the jacobian, we treat x and y as one-dimensional vectors. 155 x_size = _product(x_shape) 156 x_val_size = _product(x_shape[1:]) # This is used for sparse gradients 157 y_size = _product(y_shape) * y_factor 158 159 # Allocate 2-D Jacobian, with y dimensions smashed into the first 160 # dimension and x dimensions smashed into the second. 161 jacobian = np.zeros((y_size, x_size), dtype=x.dtype.real_dtype.as_numpy_dtype) 162 163 # For each of the entry of dy, we set this to be 1 and 164 # everything else to be 0 and compute the gradients -- this will give us one 165 # row of the Jacobian matrix. 166 dy_data = np.zeros(y_shape, dtype=y_dtype.as_numpy_dtype) 167 dy_data_flat = dy_data.ravel().view(y_dtype.real_dtype.as_numpy_dtype) 168 grad_fn_unprep = backprop.gradients_function(f, [param]) 169 grad_fn = _prepare(lambda dy, *xs: grad_fn_unprep(*xs, dy=dy), 170 [y_dtype] + [z.dtype for z in xs], 171 [None] + [z.shape for z in xs]) 172 for row in range(y_size): 173 dy_data_flat[row] = 1 174 grad = _to_numpy(grad_fn(dy_data, *xs)[0]) 175 grad = _eval_indexed_slices(grad) 176 if isinstance(grad, ops.IndexedSlicesValue): 177 for i, v in zip(grad.indices, grad.values): 178 c_begin = i * x_val_size 179 c_end = c_begin + x_val_size 180 jacobian[row, c_begin:c_end] += v.flat 181 elif grad is not None: 182 jacobian[row, :] = grad.ravel().view(jacobian.dtype) 183 # This reset of `dy_data_flat` needs to happen after `grad` is copied to 184 # `jacobian` because `grad` and `dy_data_flat` may share memory. 185 dy_data_flat[row] = 0 186 187 # If the output is empty, run the gradients at least once and make sure 188 # they produce zeros. 189 if y_size == 0: # don't use 'not y_size', because y_size may not be an int 190 grad = _to_numpy(grad_fn(dy_data, *xs)[0]) 191 if grad.shape != x.shape: 192 raise ValueError("Empty gradient has wrong shape: expected %s, got %s" % 193 (x.shape, grad.shape)) 194 if np.any(grad): 195 raise ValueError("Empty tensor with nonzero gradients") 196 197 logging.vlog(1, "Theoretical Jacobian =\n%s", jacobian) 198 return jacobian 199 200 201def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta): 202 """Computes the numeric Jacobian for f regarding xs[param]. 203 204 One can think of the relation among f, xs and y as y = f(xs). 205 206 Args: 207 f: the function. 208 y_size: the number of elements of the result. 209 y_dtype: the dtype of the result. 210 xs: a list of tensors. 211 param: the index of the target parameter. 212 delta: the amount of perturbation we give to the input. 213 214 Returns: 215 A 2-d numpy array representing the Jacobian. It has "y_size" rows 216 and "x_size" columns where "x_size" is the number of elements in xs[param] 217 and "y_size" is the number of elements in the result. 218 """ 219 x_shape = xs[param].shape 220 x_dtype = xs[param].dtype 221 222 # To compute the jacobian, we treat x and y as one-dimensional vectors 223 x_size = _product(x_shape) * (2 if x_dtype.is_complex else 1) 224 y_size = y_size * (2 if y_dtype.is_complex else 1) 225 x_dtype = x_dtype.real_dtype.as_numpy_dtype 226 y_dtype = y_dtype.real_dtype.as_numpy_dtype 227 228 xs_dtypes = [x.dtype for x in xs] 229 xs_shapes = [x.shape for x in xs] 230 # Converts xs to numpy arrays to do in-place perturbation. 231 # Calls asarray() to avoid copying in ravel() later. 232 xs = [np.asarray(_to_numpy(x)) for x in xs] 233 x = xs[param] 234 235 # Make sure we have the right types 236 scale = np.asarray(2 * delta, dtype=y_dtype)[()] 237 238 jacobian = np.zeros((y_size, x_size), dtype=x_dtype) 239 240 # For each of the entry of x, we slightly perturbs this by adding and 241 # subtracting a delta and then compute difference between the outputs. This 242 # will give us one column of the Jacobian matrix. 243 f = _prepare(f, xs_dtypes, xs_shapes) 244 for col in range(x_size): 245 original = x.ravel().view(x_dtype)[col] 246 x.ravel().view(x_dtype)[col] += delta 247 y_pos = _to_numpy(f(*xs)) 248 x.ravel().view(x_dtype)[col] = original 249 x.ravel().view(x_dtype)[col] -= delta 250 y_neg = _to_numpy(f(*xs)) 251 x.ravel().view(x_dtype)[col] = original 252 diff = (y_pos - y_neg) / scale 253 jacobian[:, col] = diff.ravel().view(y_dtype) 254 255 logging.vlog(1, "Numeric Jacobian =\n%s", jacobian) 256 return jacobian 257 258 259def _compute_gradient(f, y_shape, y_dtype, xs, param, delta): 260 """Computes the theoretical and numerical jacobian.""" 261 x = xs[param] 262 t = x.dtype 263 allowed_types = [ 264 dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64, 265 dtypes.complex64, dtypes.complex128 266 ] 267 assert t.base_dtype in allowed_types, ("Cannot compute gradient for " 268 "unsupported type %s of argument %s" % 269 (t.name, param)) 270 t2 = y_dtype 271 assert t2.base_dtype in allowed_types, ("Cannot compute gradient for " 272 "unsupported type %s of y" % t2.name) 273 y_size = _product(y_shape) 274 jacob_t = _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param) 275 jacob_n = _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta) 276 return jacob_t, jacob_n 277 278 279def _compute_gradient_list(f, xs, delta): 280 """Compute gradients for a list of x values.""" 281 # convert xs to tensors so that dtype and shape have uniform types 282 xs = [ops.convert_to_tensor(x) for x in xs] 283 # run the function to get info of the result 284 xs_dtypes = [x.dtype for x in xs] 285 xs_shapes = [x.shape for x in xs] 286 f_temp = _prepare(f, xs_dtypes, xs_shapes) 287 y = f_temp(*xs) 288 return tuple(zip(*[ 289 _compute_gradient(f, y.shape, dtypes.as_dtype(y.dtype), xs, i, delta) 290 for i in range(len(xs)) 291 ])) 292 293 294@tf_export("test.compute_gradient", v1=[]) 295def compute_gradient(f, x, delta=None): 296 """Computes the theoretical and numeric Jacobian of `f`. 297 298 With y = f(x), computes the theoretical and numeric Jacobian dy/dx. 299 300 Args: 301 f: the function. 302 x: the arguments for the function as a list or tuple of values convertible 303 to a Tensor. 304 delta: (optional) perturbation used to compute numeric Jacobian. 305 306 Returns: 307 A pair of lists, where the first is a list of 2-d numpy arrays representing 308 the theoretical Jacobians for each argument, and the second list is the 309 numerical ones. Each 2-d array has "y_size" rows 310 and "x_size" columns where "x_size" is the number of elements in the 311 corresponding argument and "y_size" is the number of elements in f(x). 312 313 Raises: 314 ValueError: If result is empty but the gradient is nonzero. 315 ValueError: If x is not list, but any other type. 316 317 Example: 318 ```python 319 @tf.function 320 def test_func(x): 321 return x*x 322 323 theoretical, numerical = tf.test.compute_gradient(test_func, [1.0]) 324 theoretical, numerical 325 # ((array([[2.]], dtype=float32),), (array([[2.000004]], dtype=float32),)) 326 ``` 327 """ 328 if not isinstance(x, (list, tuple)): 329 raise ValueError( 330 "`x` must be a list or tuple of values convertible to a Tensor " 331 "(arguments to `f`), not a %s" % type(x)) 332 if delta is None: 333 # By default, we use a step size for the central finite difference 334 # approximation that is exactly representable as a binary floating 335 # point number, since this reduces the amount of noise due to rounding 336 # in the approximation of some functions. 337 delta = 1.0 / 1024 338 return _compute_gradient_list(f, x, delta) 339 340 341def max_error(grad1, grad2): 342 """Computes maximum elementwise gap. 343 344 Computes the maximum elementwise gap between two lists of tensors of the same 345 shape. 346 347 Args: 348 grad1: a lists of tensors. 349 grad2: a lists of tensors with the same shape as grad1. 350 351 Returns: 352 The maximum elementwise gap between the two. 353 """ 354 error = 0 355 for j_t, j_n in zip(grad1, grad2): 356 if j_t.size or j_n.size: # Handle zero size tensors correctly 357 error = np.maximum(error, np.fabs(j_t - j_n).max()) 358 return error 359