• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Gradient checker for functions.
16
17The gradient checker verifies numerically that an function properly
18computes the gradients
19"""
20import numpy as np
21
22from tensorflow.python.eager import backprop
23from tensorflow.python.eager import context
24from tensorflow.python.framework import dtypes
25from tensorflow.python.framework import indexed_slices
26from tensorflow.python.framework import ops
27from tensorflow.python.ops import array_ops
28from tensorflow.python.platform import tf_logging as logging
29from tensorflow.python.util.tf_export import tf_export
30
31
32def _product(t):
33  if isinstance(t, int):
34    return t
35  else:
36    y = 1
37    for x in t:
38      y *= x
39    return y
40
41
42def _eval_indexed_slices(a):
43  """Converts IndexedSlices to IndexedSlicesValue with numpy indices/values.
44
45  When eager execution is enabled, converts IndexedSlices
46  to IndexedSlicesValue with numpy indices/values.
47
48  Args:
49    a: any value.
50
51  Returns:
52    If a is IndexedSlices and eager execution is enabled, calls numpy() on a's
53    fields. Otherwise returns a unchanged.
54  """
55  if (isinstance(a, indexed_slices.IndexedSlices) and
56      context.executing_eagerly()):
57    return indexed_slices.IndexedSlicesValue(
58        indices=[x.numpy() for x in a.indices],
59        values=[x.numpy() for x in a.values],
60        dense_shape=a.dense_shape)
61  return a
62
63
64def _to_numpy(a):
65  """Converts Tensors, EagerTensors, and IndexedSlicesValue to numpy arrays.
66
67  Args:
68    a: any value.
69
70  Returns:
71    If a is EagerTensor or Tensor, returns the evaluation of a by calling
72    numpy() or run(). If a is IndexedSlicesValue, constructs the corresponding
73    dense numpy array. Otherwise returns a unchanged.
74  """
75  if isinstance(a, ops.EagerTensor):
76    return a.numpy()
77  if isinstance(a, ops.Tensor):
78    sess = ops.get_default_session()
79    return sess.run(a)
80  if isinstance(a, indexed_slices.IndexedSlicesValue):
81    arr = np.zeros(a.dense_shape)
82    assert len(a.values) == len(a.indices), (
83        "IndexedSlicesValue has %s value slices but %s indices\n%s" %
84        (a.values, a.indices, a))
85    for values_slice, index in zip(a.values, a.indices):
86      assert 0 <= index < len(arr), (
87          "IndexedSlicesValue has invalid index %s\n%s" % (index, a))
88      arr[index] += values_slice
89    return arr
90  return a
91
92
93def _prepare(f, xs_dtypes, xs_shapes):
94  """Return a function that executes 'f'.
95
96    In TF 2.x, this is the same as `f`.
97    In TF 1.x, returns a Python function that executes the graph defined by `f`
98    in a Session.
99
100  Args:
101    f: the function.
102    xs_dtypes: dtypes of f's arguments.
103    xs_shapes: shapes of f's arguments.
104
105  Returns:
106  """
107  if context.executing_eagerly():
108
109    def decorated_eager(*xs_data):
110      return f(*map(ops.convert_to_tensor, xs_data))
111
112    return decorated_eager
113  xs = [
114      array_ops.placeholder(x_dtype, shape=x_shape)
115      for x_dtype, x_shape in zip(xs_dtypes, xs_shapes)
116  ]
117  y = f(*xs)
118  sess = ops.get_default_session()
119
120  def decorated_graph(*xs_data):
121    xs_data = [_to_numpy(a) for a in xs_data]
122    return sess.run(y, feed_dict=dict(zip(xs, xs_data)))
123
124  return decorated_graph
125
126
127def _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param):
128  """Computes the theoretical Jacobian for f regarding xs[param].
129
130  One can think of the relation among f, xs and y as y = f(xs).
131
132  Args:
133    f: the function.
134    y_shape: the shape of the result.
135    y_dtype: the dtype of the result.
136    xs: a list of tensors.
137    param: the index of the target parameter.
138
139  Returns:
140    A 2-d numpy array representing the Jacobian. It has "y_size" rows
141    and "x_size" columns where "x_size" is the number of elements in xs[param]
142    and "y_size" is the number of elements in the result.
143
144  Raises:
145    ValueError: If result is empty but the gradient is nonzero.
146  """
147  x = xs[param]
148  # Complex vectors are treated as vectors of twice as many reals.
149  x_shape = tuple(x.shape) + (2,) if x.dtype.is_complex else x.shape
150  y_factor = 2 if y_dtype.is_complex else 1
151
152  # To compute the jacobian, we treat x and y as one-dimensional vectors.
153  x_size = _product(x_shape)
154  x_val_size = _product(x_shape[1:])  # This is used for sparse gradients
155  y_size = _product(y_shape) * y_factor
156
157  # Allocate 2-D Jacobian, with y dimensions smashed into the first
158  # dimension and x dimensions smashed into the second.
159  jacobian = np.zeros((y_size, x_size), dtype=x.dtype.real_dtype.as_numpy_dtype)
160
161  # For each of the entry of dy, we set this to be 1 and
162  # everything else to be 0 and compute the gradients -- this will give us one
163  # row of the Jacobian matrix.
164  dy_data = np.zeros(y_shape, dtype=y_dtype.as_numpy_dtype)
165  dy_data_flat = dy_data.ravel().view(y_dtype.real_dtype.as_numpy_dtype)
166  grad_fn_unprep = backprop.gradients_function(f, [param])
167  grad_fn = _prepare(lambda dy, *xs: grad_fn_unprep(*xs, dy=dy),
168                     [y_dtype] + [z.dtype for z in xs],
169                     [None] + [z.shape for z in xs])
170  for row in range(y_size):
171    dy_data_flat[row] = 1
172    grad = _to_numpy(grad_fn(dy_data, *xs)[0])
173    grad = _eval_indexed_slices(grad)
174    if isinstance(grad, indexed_slices.IndexedSlicesValue):
175      for i, v in zip(grad.indices, grad.values):
176        c_begin = i * x_val_size
177        c_end = c_begin + x_val_size
178        jacobian[row, c_begin:c_end] += v.flat
179    elif grad is not None:
180      jacobian[row, :] = grad.ravel().view(jacobian.dtype)
181    # This reset of `dy_data_flat` needs to happen after `grad` is copied to
182    # `jacobian` because `grad` and `dy_data_flat` may share memory.
183    dy_data_flat[row] = 0
184
185  # If the output is empty, run the gradients at least once and make sure
186  # they produce zeros.
187  if y_size == 0:  # don't use 'not y_size', because y_size may not be an int
188    grad = _to_numpy(grad_fn(dy_data, *xs)[0])
189    if grad.shape != x.shape:
190      raise ValueError("Empty gradient has wrong shape: expected %s, got %s" %
191                       (x.shape, grad.shape))
192    if np.any(grad):
193      raise ValueError("Empty tensor with nonzero gradients")
194
195  logging.vlog(1, "Theoretical Jacobian =\n%s", jacobian)
196  return jacobian
197
198
199def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta):
200  """Computes the numeric Jacobian for f regarding xs[param].
201
202  One can think of the relation among f, xs and y as y = f(xs).
203
204  Args:
205    f: the function.
206    y_size: the number of elements of the result.
207    y_dtype: the dtype of the result.
208    xs: a list of tensors.
209    param: the index of the target parameter.
210    delta: the amount of perturbation we give to the input.
211
212  Returns:
213    A 2-d numpy array representing the Jacobian. It has "y_size" rows
214    and "x_size" columns where "x_size" is the number of elements in xs[param]
215    and "y_size" is the number of elements in the result.
216  """
217  x_shape = xs[param].shape
218  x_dtype = xs[param].dtype
219
220  # To compute the jacobian, we treat x and y as one-dimensional vectors
221  x_size = _product(x_shape) * (2 if x_dtype.is_complex else 1)
222  y_size = y_size * (2 if y_dtype.is_complex else 1)
223  x_dtype = x_dtype.real_dtype.as_numpy_dtype
224  y_dtype = y_dtype.real_dtype.as_numpy_dtype
225
226  xs_dtypes = [x.dtype for x in xs]
227  xs_shapes = [x.shape for x in xs]
228  # Converts xs to numpy arrays to do in-place perturbation.
229  # Calls asarray() to avoid copying in ravel() later.
230  xs = [np.asarray(_to_numpy(x)) for x in xs]
231  x = xs[param]
232
233  # Make sure we have the right types
234  scale = np.asarray(2 * delta, dtype=y_dtype)[()]
235
236  jacobian = np.zeros((y_size, x_size), dtype=x_dtype)
237
238  # For each of the entry of x, we slightly perturbs this by adding and
239  # subtracting a delta and then compute difference between the outputs. This
240  # will give us one column of the Jacobian matrix.
241  f = _prepare(f, xs_dtypes, xs_shapes)
242  for col in range(x_size):
243    original = x.ravel().view(x_dtype)[col]
244    x.ravel().view(x_dtype)[col] += delta
245    y_pos = _to_numpy(f(*xs))
246    x.ravel().view(x_dtype)[col] = original
247    x.ravel().view(x_dtype)[col] -= delta
248    y_neg = _to_numpy(f(*xs))
249    x.ravel().view(x_dtype)[col] = original
250    diff = (y_pos - y_neg) / scale
251    jacobian[:, col] = diff.ravel().view(y_dtype)
252
253  logging.vlog(1, "Numeric Jacobian =\n%s", jacobian)
254  return jacobian
255
256
257def _compute_gradient(f, y_shape, y_dtype, xs, param, delta):
258  """Computes the theoretical and numerical jacobian."""
259  x = xs[param]
260  t = x.dtype
261  allowed_types = [
262      dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64,
263      dtypes.complex64, dtypes.complex128
264  ]
265  assert t.base_dtype in allowed_types, ("Cannot compute gradient for "
266                                         "unsupported type %s of argument %s" %
267                                         (t.name, param))
268  t2 = y_dtype
269  assert t2.base_dtype in allowed_types, ("Cannot compute gradient for "
270                                          "unsupported type %s of y" % t2.name)
271  y_size = _product(y_shape)
272  jacob_t = _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param)
273  jacob_n = _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta)
274  return jacob_t, jacob_n
275
276
277def _compute_gradient_list(f, xs, delta):
278  """Compute gradients for a list of x values."""
279  # convert xs to tensors so that dtype and shape have uniform types
280  xs = [ops.convert_to_tensor(x) for x in xs]
281  # run the function to get info of the result
282  xs_dtypes = [x.dtype for x in xs]
283  xs_shapes = [x.shape for x in xs]
284  f_temp = _prepare(f, xs_dtypes, xs_shapes)
285  y = f_temp(*xs)
286  return tuple(
287      zip(*[
288          _compute_gradient(f, y.shape, dtypes.as_dtype(y.dtype), xs, i, delta)
289          for i in range(len(xs))
290      ]))
291
292
293@tf_export("test.compute_gradient", v1=[])
294def compute_gradient(f, x, delta=None):
295  """Computes the theoretical and numeric Jacobian of `f`.
296
297  With y = f(x), computes the theoretical and numeric Jacobian dy/dx.
298
299  Args:
300    f: the function.
301    x: the arguments for the function as a list or tuple of values convertible
302      to a Tensor.
303    delta: (optional) perturbation used to compute numeric Jacobian.
304
305  Returns:
306    A pair of lists, where the first is a list of 2-d numpy arrays representing
307    the theoretical Jacobians for each argument, and the second list is the
308    numerical ones. Each 2-d array has "y_size" rows
309    and "x_size" columns where "x_size" is the number of elements in the
310    corresponding argument and "y_size" is the number of elements in f(x).
311
312  Raises:
313    ValueError: If result is empty but the gradient is nonzero.
314    ValueError: If x is not list, but any other type.
315
316  Example:
317
318  >>> @tf.function
319  ... def test_func(x):
320  ...   return x*x
321  ...
322  >>>
323  >>> class MyTest(tf.test.TestCase):
324  ...
325  ...   def test_gradient_of_test_func(self):
326  ...     theoretical, numerical = tf.test.compute_gradient(test_func, [1.0])
327  ...     # ((array([[2.]], dtype=float32),),
328  ...     #  (array([[2.000004]], dtype=float32),))
329  ...     self.assertAllClose(theoretical, numerical)
330
331  """
332  if not isinstance(x, (list, tuple)):
333    raise ValueError(
334        "`x` must be a list or tuple of values convertible to a Tensor "
335        "(arguments to `f`), not a %s" % type(x))
336  if delta is None:
337    # By default, we use a step size for the central finite difference
338    # approximation that is exactly representable as a binary floating
339    # point number, since this reduces the amount of noise due to rounding
340    # in the approximation of some functions.
341    delta = 1.0 / 1024
342  return _compute_gradient_list(f, x, delta)
343
344
345def max_error(grad1, grad2):
346  """Computes maximum elementwise gap.
347
348  Computes the maximum elementwise gap between two lists of tensors of the same
349  shape.
350
351  Args:
352    grad1: a lists of tensors.
353    grad2: a lists of tensors with the same shape as grad1.
354
355  Returns:
356    The maximum elementwise gap between the two.
357  """
358  error = 0
359  for j_t, j_n in zip(grad1, grad2):
360    if j_t.size or j_n.size:  # Handle zero size tensors correctly
361      error = np.maximum(error, np.fabs(j_t - j_n).max())
362  return error
363