• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Gradient checker for functions.
16
17The gradient checker verifies numerically that an function properly
18computes the gradients
19"""
20from __future__ import absolute_import
21from __future__ import division
22from __future__ import print_function
23
24import numpy as np
25
26from tensorflow.python.eager import backprop
27from tensorflow.python.eager import context
28from tensorflow.python.framework import dtypes
29from tensorflow.python.framework import ops
30from tensorflow.python.ops import array_ops
31from tensorflow.python.platform import tf_logging as logging
32from tensorflow.python.util.tf_export import tf_export
33
34
35def _product(t):
36  if isinstance(t, int):
37    return t
38  else:
39    y = 1
40    for x in t:
41      y *= x
42    return y
43
44
45def _eval_indexed_slices(a):
46  """Converts IndexedSlices to IndexedSlicesValue with numpy indices/values.
47
48  When eager execution is enabled, converts IndexedSlices
49  to IndexedSlicesValue with numpy indices/values.
50
51  Args:
52    a: any value.
53
54  Returns:
55    If a is IndexedSlices and eager execution is enabled, calls numpy() on a's
56    fields. Otherwise returns a unchanged.
57  """
58  if isinstance(a, ops.IndexedSlices) and context.executing_eagerly():
59    return ops.IndexedSlicesValue(
60        indices=[x.numpy() for x in a.indices],
61        values=[x.numpy() for x in a.values],
62        dense_shape=a.dense_shape)
63  return a
64
65
66def _to_numpy(a):
67  """Converts Tensors, EagerTensors, and IndexedSlicesValue to numpy arrays.
68
69  Args:
70    a: any value.
71
72  Returns:
73    If a is EagerTensor or Tensor, returns the evaluation of a by calling
74    numpy() or run(). If a is IndexedSlicesValue, constructs the corresponding
75    dense numpy array. Otherwise returns a unchanged.
76  """
77  if isinstance(a, ops.EagerTensor):
78    return a.numpy()
79  if isinstance(a, ops.Tensor):
80    sess = ops.get_default_session()
81    return sess.run(a)
82  if isinstance(a, ops.IndexedSlicesValue):
83    arr = np.zeros(a.dense_shape)
84    assert len(a.values) == len(a.indices), (
85        "IndexedSlicesValue has %s value slices but %s indices\n%s" %
86        (a.values, a.indices, a))
87    for values_slice, index in zip(a.values, a.indices):
88      assert 0 <= index < len(arr), (
89          "IndexedSlicesValue has invalid index %s\n%s" % (index, a))
90      arr[index] += values_slice
91    return arr
92  return a
93
94
95def _prepare(f, xs_dtypes, xs_shapes):
96  """Return a function that executes 'f'.
97
98    In TF 2.x, this is the same as `f`.
99    In TF 1.x, returns a Python function that executes the graph defined by `f`
100    in a Session.
101
102  Args:
103    f: the function.
104    xs_dtypes: dtypes of f's arguments.
105    xs_shapes: shapes of f's arguments.
106
107  Returns:
108  """
109  if context.executing_eagerly():
110
111    def decorated_eager(*xs_data):
112      return f(*map(ops.convert_to_tensor, xs_data))
113
114    return decorated_eager
115  xs = [
116      array_ops.placeholder(x_dtype, shape=x_shape)
117      for x_dtype, x_shape in zip(xs_dtypes, xs_shapes)
118  ]
119  y = f(*xs)
120  sess = ops.get_default_session()
121
122  def decorated_graph(*xs_data):
123    xs_data = [_to_numpy(a) for a in xs_data]
124    return sess.run(y, feed_dict=dict(zip(xs, xs_data)))
125
126  return decorated_graph
127
128
129def _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param):
130  """Computes the theoretical Jacobian for f regarding xs[param].
131
132  One can think of the relation among f, xs and y as y = f(xs).
133
134  Args:
135    f: the function.
136    y_shape: the shape of the result.
137    y_dtype: the dtype of the result.
138    xs: a list of tensors.
139    param: the index of the target parameter.
140
141  Returns:
142    A 2-d numpy array representing the Jacobian. It has "y_size" rows
143    and "x_size" columns where "x_size" is the number of elements in xs[param]
144    and "y_size" is the number of elements in the result.
145
146  Raises:
147    ValueError: If result is empty but the gradient is nonzero.
148  """
149  x = xs[param]
150  # Complex vectors are treated as vectors of twice as many reals.
151  x_shape = tuple(x.shape) + (2,) if x.dtype.is_complex else x.shape
152  y_factor = 2 if y_dtype.is_complex else 1
153
154  # To compute the jacobian, we treat x and y as one-dimensional vectors.
155  x_size = _product(x_shape)
156  x_val_size = _product(x_shape[1:])  # This is used for sparse gradients
157  y_size = _product(y_shape) * y_factor
158
159  # Allocate 2-D Jacobian, with y dimensions smashed into the first
160  # dimension and x dimensions smashed into the second.
161  jacobian = np.zeros((y_size, x_size), dtype=x.dtype.real_dtype.as_numpy_dtype)
162
163  # For each of the entry of dy, we set this to be 1 and
164  # everything else to be 0 and compute the gradients -- this will give us one
165  # row of the Jacobian matrix.
166  dy_data = np.zeros(y_shape, dtype=y_dtype.as_numpy_dtype)
167  dy_data_flat = dy_data.ravel().view(y_dtype.real_dtype.as_numpy_dtype)
168  grad_fn_unprep = backprop.gradients_function(f, [param])
169  grad_fn = _prepare(lambda dy, *xs: grad_fn_unprep(*xs, dy=dy),
170                     [y_dtype] + [z.dtype for z in xs],
171                     [None] + [z.shape for z in xs])
172  for row in range(y_size):
173    dy_data_flat[row] = 1
174    grad = _to_numpy(grad_fn(dy_data, *xs)[0])
175    grad = _eval_indexed_slices(grad)
176    if isinstance(grad, ops.IndexedSlicesValue):
177      for i, v in zip(grad.indices, grad.values):
178        c_begin = i * x_val_size
179        c_end = c_begin + x_val_size
180        jacobian[row, c_begin:c_end] += v.flat
181    elif grad is not None:
182      jacobian[row, :] = grad.ravel().view(jacobian.dtype)
183    # This reset of `dy_data_flat` needs to happen after `grad` is copied to
184    # `jacobian` because `grad` and `dy_data_flat` may share memory.
185    dy_data_flat[row] = 0
186
187  # If the output is empty, run the gradients at least once and make sure
188  # they produce zeros.
189  if y_size == 0:  # don't use 'not y_size', because y_size may not be an int
190    grad = _to_numpy(grad_fn(dy_data, *xs)[0])
191    if grad.shape != x.shape:
192      raise ValueError("Empty gradient has wrong shape: expected %s, got %s" %
193                       (x.shape, grad.shape))
194    if np.any(grad):
195      raise ValueError("Empty tensor with nonzero gradients")
196
197  logging.vlog(1, "Theoretical Jacobian =\n%s", jacobian)
198  return jacobian
199
200
201def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta):
202  """Computes the numeric Jacobian for f regarding xs[param].
203
204  One can think of the relation among f, xs and y as y = f(xs).
205
206  Args:
207    f: the function.
208    y_size: the number of elements of the result.
209    y_dtype: the dtype of the result.
210    xs: a list of tensors.
211    param: the index of the target parameter.
212    delta: the amount of perturbation we give to the input.
213
214  Returns:
215    A 2-d numpy array representing the Jacobian. It has "y_size" rows
216    and "x_size" columns where "x_size" is the number of elements in xs[param]
217    and "y_size" is the number of elements in the result.
218  """
219  x_shape = xs[param].shape
220  x_dtype = xs[param].dtype
221
222  # To compute the jacobian, we treat x and y as one-dimensional vectors
223  x_size = _product(x_shape) * (2 if x_dtype.is_complex else 1)
224  y_size = y_size * (2 if y_dtype.is_complex else 1)
225  x_dtype = x_dtype.real_dtype.as_numpy_dtype
226  y_dtype = y_dtype.real_dtype.as_numpy_dtype
227
228  xs_dtypes = [x.dtype for x in xs]
229  xs_shapes = [x.shape for x in xs]
230  # Converts xs to numpy arrays to do in-place perturbation.
231  # Calls asarray() to avoid copying in ravel() later.
232  xs = [np.asarray(_to_numpy(x)) for x in xs]
233  x = xs[param]
234
235  # Make sure we have the right types
236  scale = np.asarray(2 * delta, dtype=y_dtype)[()]
237
238  jacobian = np.zeros((y_size, x_size), dtype=x_dtype)
239
240  # For each of the entry of x, we slightly perturbs this by adding and
241  # subtracting a delta and then compute difference between the outputs. This
242  # will give us one column of the Jacobian matrix.
243  f = _prepare(f, xs_dtypes, xs_shapes)
244  for col in range(x_size):
245    original = x.ravel().view(x_dtype)[col]
246    x.ravel().view(x_dtype)[col] += delta
247    y_pos = _to_numpy(f(*xs))
248    x.ravel().view(x_dtype)[col] = original
249    x.ravel().view(x_dtype)[col] -= delta
250    y_neg = _to_numpy(f(*xs))
251    x.ravel().view(x_dtype)[col] = original
252    diff = (y_pos - y_neg) / scale
253    jacobian[:, col] = diff.ravel().view(y_dtype)
254
255  logging.vlog(1, "Numeric Jacobian =\n%s", jacobian)
256  return jacobian
257
258
259def _compute_gradient(f, y_shape, y_dtype, xs, param, delta):
260  """Computes the theoretical and numerical jacobian."""
261  x = xs[param]
262  t = x.dtype
263  allowed_types = [
264      dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64,
265      dtypes.complex64, dtypes.complex128
266  ]
267  assert t.base_dtype in allowed_types, ("Cannot compute gradient for "
268                                         "unsupported type %s of argument %s" %
269                                         (t.name, param))
270  t2 = y_dtype
271  assert t2.base_dtype in allowed_types, ("Cannot compute gradient for "
272                                          "unsupported type %s of y" % t2.name)
273  y_size = _product(y_shape)
274  jacob_t = _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param)
275  jacob_n = _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta)
276  return jacob_t, jacob_n
277
278
279def _compute_gradient_list(f, xs, delta):
280  """Compute gradients for a list of x values."""
281  # convert xs to tensors so that dtype and shape have uniform types
282  xs = [ops.convert_to_tensor(x) for x in xs]
283  # run the function to get info of the result
284  xs_dtypes = [x.dtype for x in xs]
285  xs_shapes = [x.shape for x in xs]
286  f_temp = _prepare(f, xs_dtypes, xs_shapes)
287  y = f_temp(*xs)
288  return tuple(zip(*[
289      _compute_gradient(f, y.shape, dtypes.as_dtype(y.dtype), xs, i, delta)
290      for i in range(len(xs))
291  ]))
292
293
294@tf_export("test.compute_gradient", v1=[])
295def compute_gradient(f, x, delta=None):
296  """Computes the theoretical and numeric Jacobian of `f`.
297
298  With y = f(x), computes the theoretical and numeric Jacobian dy/dx.
299
300  Args:
301    f: the function.
302    x: the arguments for the function as a list or tuple of values convertible
303      to a Tensor.
304    delta: (optional) perturbation used to compute numeric Jacobian.
305
306  Returns:
307    A pair of lists, where the first is a list of 2-d numpy arrays representing
308    the theoretical Jacobians for each argument, and the second list is the
309    numerical ones. Each 2-d array has "y_size" rows
310    and "x_size" columns where "x_size" is the number of elements in the
311    corresponding argument and "y_size" is the number of elements in f(x).
312
313  Raises:
314    ValueError: If result is empty but the gradient is nonzero.
315    ValueError: If x is not list, but any other type.
316
317  Example:
318  ```python
319  @tf.function
320  def test_func(x):
321    return x*x
322
323  theoretical, numerical = tf.test.compute_gradient(test_func, [1.0])
324  theoretical, numerical
325  # ((array([[2.]], dtype=float32),), (array([[2.000004]], dtype=float32),))
326  ```
327  """
328  if not isinstance(x, (list, tuple)):
329    raise ValueError(
330        "`x` must be a list or tuple of values convertible to a Tensor "
331        "(arguments to `f`), not a %s" % type(x))
332  if delta is None:
333    # By default, we use a step size for the central finite difference
334    # approximation that is exactly representable as a binary floating
335    # point number, since this reduces the amount of noise due to rounding
336    # in the approximation of some functions.
337    delta = 1.0 / 1024
338  return _compute_gradient_list(f, x, delta)
339
340
341def max_error(grad1, grad2):
342  """Computes maximum elementwise gap.
343
344  Computes the maximum elementwise gap between two lists of tensors of the same
345  shape.
346
347  Args:
348    grad1: a lists of tensors.
349    grad2: a lists of tensors with the same shape as grad1.
350
351  Returns:
352    The maximum elementwise gap between the two.
353  """
354  error = 0
355  for j_t, j_n in zip(grad1, grad2):
356    if j_t.size or j_n.size:  # Handle zero size tensors correctly
357      error = np.maximum(error, np.fabs(j_t - j_n).max())
358  return error
359