• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15
16"""Gradient checker for functions.
17
18The gradient checker verifies numerically that an function properly
19computes the gradients
20"""
21from __future__ import absolute_import
22from __future__ import division
23from __future__ import print_function
24
25import numpy as np
26
27from tensorflow.python.eager import backprop
28from tensorflow.python.eager import context
29from tensorflow.python.framework import dtypes
30from tensorflow.python.framework import ops
31from tensorflow.python.ops import array_ops
32from tensorflow.python.ops import math_ops
33from tensorflow.python.platform import tf_logging as logging
34from tensorflow.python.util.tf_export import tf_export
35
36
37def _product(t):
38  if isinstance(t, int):
39    return t
40  else:
41    y = 1
42    for x in t:
43      y *= x
44    return y
45
46
47def _eval_indexed_slices(a):
48  """Converts IndexedSlices to IndexedSlicesValue with numpy indices/values.
49
50  When eager execution is enabled, converts IndexedSlices
51  to IndexedSlicesValue with numpy indices/values.
52
53  Args:
54    a: any value.
55
56  Returns:
57    If a is IndexedSlices and eager execution is enabled, calls numpy() on a's
58    fields. Otherwise returns a unchanged.
59  """
60  if isinstance(a, ops.IndexedSlices) and context.executing_eagerly():
61    return ops.IndexedSlicesValue(
62        indices=[x.numpy() for x in a.indices],
63        values=[x.numpy() for x in a.values],
64        dense_shape=a.dense_shape)
65  return a
66
67
68def _to_numpy(a):
69  """Converts Tensors, EagerTensors, and IndexedSlicesValue to numpy arrays.
70
71  Args:
72    a: any value.
73
74  Returns:
75    If a is EagerTensor or Tensor, returns the evaluation of a by calling
76    numpy() or run(). If a is IndexedSlicesValue, constructs the corresponding
77    dense numpy array. Otherwise returns a unchanged.
78  """
79  if isinstance(a, ops.EagerTensor):
80    return a.numpy()
81  if isinstance(a, ops.Tensor):
82    sess = ops.get_default_session()
83    return sess.run(a)
84  if isinstance(a, ops.IndexedSlicesValue):
85    arr = np.zeros(a.dense_shape)
86    assert len(a.values) == len(a.indices), (
87        "IndexedSlicesValue has %s value slices but %s indices\n%s" %
88        (a.values, a.indices, a))
89    for values_slice, index in zip(a.values, a.indices):
90      assert 0 <= index < len(arr), (
91          "IndexedSlicesValue has invalid index %s\n%s" % (index, a))
92      arr[index] += values_slice
93    return arr
94  return a
95
96
97def _prepare(f, xs_dtypes):
98  """Return a function that executes 'f'.
99
100    In TF 2.x, this is the same as `f`.
101    In TF 1.x, returns a Python function that executes the graph defined by `f`
102    in a Session.
103
104  Args:
105    f: the function.
106    xs_dtypes: dtypes of f's arguments.
107
108  Returns:
109    a function that will be evaluated in both graph and eager mode
110  """
111  if context.executing_eagerly():
112
113    def decorated_eager(*xs_data):
114      return f(*map(ops.convert_to_tensor, xs_data))
115
116    return decorated_eager
117  xs = [array_ops.placeholder(x_dtype) for x_dtype in xs_dtypes]
118  y = f(*xs)
119  sess = ops.get_default_session()
120  def decorated_graph(*xs_data):
121    xs_data = [_to_numpy(a) for a in xs_data]
122    return sess.run(y, feed_dict=dict(zip(xs, xs_data)))
123  return decorated_graph
124
125
126def _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param):
127  """Computes the theoretical Jacobian for f regarding xs[param].
128
129  One can think of the relation among f, xs and y as y = f(xs).
130
131  Args:
132    f: the function.
133    y_shape: the shape of the result.
134    y_dtype: the dtype of the result.
135    xs: a list of tensors.
136    param: the index of the target parameter.
137
138  Returns:
139    A 2-d numpy array representing the Jacobian. It has "x_size" rows
140    and "y_size" columns where "x_size" is the number of elements in xs[param]
141    and "y_size" is the number of elements in the result.
142
143  Raises:
144    ValueError: If result is empty but the gradient is nonzero.
145  """
146  x = xs[param]
147  # Complex vectors are treated as vectors of twice as many reals.
148  x_shape = tuple(x.shape) + (2,) if x.dtype.is_complex else x.shape
149  y_factor = 2 if y_dtype.is_complex else 1
150
151  # To compute the jacobian, we treat x and y as one-dimensional vectors.
152  x_size = _product(x_shape)
153  x_val_size = _product(x_shape[1:])  # This is used for sparse gradients
154  y_size = _product(y_shape) * y_factor
155
156  # Allocate 2-D Jacobian, with x dimensions smashed into the first
157  # dimension and y dimensions smashed into the second.
158  jacobian = np.zeros((x_size, y_size), dtype=x.dtype.real_dtype.as_numpy_dtype)
159
160  # For each of the entry of dy, we set this to be 1 and
161  # everything else to be 0 and compute the gradients -- this will give us one
162  # one column of the Jacobian matrix.
163  dy_data = np.zeros(y_shape, dtype=y_dtype.as_numpy_dtype)
164  dy_data_flat = dy_data.ravel().view(y_dtype.real_dtype.as_numpy_dtype)
165  grad_fn_unprep = backprop.gradients_function(f, [param])
166  grad_fn = _prepare(lambda dy, *xs: grad_fn_unprep(*xs, dy=dy),
167                     [y_dtype] + [x.dtype for x in xs])
168  for col in range(y_size):
169    dy_data_flat[col] = 1
170    grad = _to_numpy(grad_fn(dy_data, *xs)[0])
171    grad = _eval_indexed_slices(grad)
172    dy_data_flat[col] = 0
173    if isinstance(grad, ops.IndexedSlicesValue):
174      for i, v in zip(grad.indices, grad.values):
175        r_begin = i * x_val_size
176        r_end = r_begin + x_val_size
177        jacobian[r_begin:r_end, col] += v.flat
178    else:
179      jacobian[:, col] = grad.ravel().view(jacobian.dtype)
180
181  # If the output is empty, run the gradients at least once and make sure
182  # they produce zeros.
183  if y_size == 0:  # don't use 'not y_size', because y_size may not be an int
184    grad = _to_numpy(grad_fn(dy_data, *xs)[0])
185    if grad.shape != x.shape:
186      raise ValueError("Empty gradient has wrong shape: expected %s, got %s" %
187                       (x.shape, grad.shape))
188    if np.any(grad):
189      raise ValueError("Empty tensor with nonzero gradients")
190
191  logging.vlog(1, "Theoretical Jacobian =\n%s", jacobian)
192  return jacobian
193
194
195def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param,
196                              delta):
197  """Computes the numeric Jacobian for f regarding xs[param].
198
199  One can think of the relation among f, xs and y as y = f(xs).
200
201  Args:
202    f: the function.
203    y_size: the number of elements of the result.
204    y_dtype: the dtype of the result.
205    xs: a list of tensors.
206    param: the index of the target parameter.
207    delta: the amount of perturbation we give to the input.
208
209  Returns:
210    A 2-d numpy array representing the Jacobian. It has "x_size" rows
211    and "y_size" columns where "x_size" is the number of elements in xs[param]
212    and "y_size" is the number of elements in the result.
213  """
214  # bfloat16 doesn't have enough bits to represent high precision numbers such
215  # as delta. Convert to float32 here. Since numeric_jacobian is expected to
216  # be the groundtruth to compare against, it shouldn't lose any information.
217  x_shape = xs[param].shape
218  x_dtype = xs[param].dtype
219  if y_dtype == dtypes.bfloat16:
220    f = lambda *xs: math_ops.cast(f(*xs), dtypes.float32)
221    y_dtype = dtypes.float32
222
223  # To compute the jacobian, we treat x and y as one-dimensional vectors
224  x_size = _product(x_shape) * (2 if x_dtype.is_complex else 1)
225  y_size = y_size * (2 if y_dtype.is_complex else 1)
226  x_dtype = x_dtype.real_dtype.as_numpy_dtype
227  y_dtype = y_dtype.real_dtype.as_numpy_dtype
228
229  xs_dtypes = [x.dtype for x in xs]
230  # Converts xs to numpy arrays to do in-place perturbation.
231  # Calls asarray() to avoid copying in ravel() later.
232  xs = [np.asarray(_to_numpy(x)) for x in xs]
233  x = xs[param]
234
235  # Make sure we have the right types
236  scale = np.asarray(2 * delta, dtype=y_dtype)[()]
237
238  jacobian = np.zeros((x_size, y_size), dtype=x_dtype)
239  # For each of the entry of x, we slightly perturbs this by adding and
240  # subtracting a delta and then compute difference between the outputs. This
241  # will give us one row of the Jacobian matrix.
242
243  f = _prepare(f, xs_dtypes)
244  for row in range(x_size):
245    original = x.ravel().view(x_dtype)[row]
246    x.ravel().view(x_dtype)[row] += delta
247    y_pos = _to_numpy(f(*xs))
248    x.ravel().view(x_dtype)[row] = original
249    x.ravel().view(x_dtype)[row] -= delta
250    y_neg = _to_numpy(f(*xs))
251    x.ravel().view(x_dtype)[row] = original
252    diff = (y_pos - y_neg) / scale
253    jacobian[row, :] = diff.ravel().view(y_dtype)
254
255  logging.vlog(1, "Numeric Jacobian =\n%s", jacobian)
256  return jacobian
257
258
259def _compute_gradient(f,
260                      y_shape,
261                      y_dtype,
262                      xs,
263                      param,
264                      delta):
265  """Computes the theoretical and numerical jacobian."""
266  x = xs[param]
267  t = x.dtype
268  allowed_types = [dtypes.float16, dtypes.bfloat16, dtypes.float32,
269                   dtypes.float64, dtypes.complex64, dtypes.complex128]
270  assert t.base_dtype in allowed_types, ("Cannot compute gradient for"
271                                         "unsupported type %s of argument %s" %
272                                         (t.name, param))
273  t2 = y_dtype
274  assert t2.base_dtype in allowed_types, ("Cannot compute gradient for"
275                                          "unsupported type %s of y" % t2.name)
276  y_size = _product(y_shape)
277  jacob_t = _compute_theoretical_jacobian(f, y_shape, y_dtype,
278                                          xs, param)
279  jacob_n = _compute_numeric_jacobian(f, y_size, y_dtype, xs,
280                                      param, delta)
281  return jacob_t, jacob_n
282
283
284def _compute_gradient_list(f, xs, delta):
285  """Compute gradients for a list of x values."""
286  # convert xs to tensors so that dtype and shape have uniform types
287  xs = list(map(ops.convert_to_tensor, xs))
288  # run the function to get info of the result
289  xs_dtypes = [x.dtype for x in xs]
290  f_temp = _prepare(f, xs_dtypes)
291  y = f_temp(*xs)
292  return zip(*[_compute_gradient(f, y.shape, dtypes.as_dtype(y.dtype),
293                                 xs, i, delta) for i in range(len(xs))])
294
295
296@tf_export("test.compute_gradient", v1=[])
297def compute_gradient(f, x, delta=1e-3):
298  """Computes the theoretical and numeric Jacobian of f.
299
300  With y = f(x), computes the theoretical and numeric Jacobian dy/dx.
301
302  Args:
303    f: the function.
304    x: a list of tensors.
305    delta: (optional) perturbation used to compute numeric Jacobian.
306
307  Returns:
308    A pair of lists, where the first is a list of 2-d numpy arrays representing
309    the theoretical Jacobians for each argument, and the second list is the
310    numerical ones. Each 2-d array has "x_size" rows
311    and "y_size" columns where "x_size" is the number of elements in the
312    corresponding argument and "y_size" is the number of elements in f(x).
313
314  Raises:
315    ValueError: If result is empty but the gradient is nonzero.
316  """
317  if not isinstance(x, list):
318    raise ValueError(
319        "`x` must be a list of Tensors (arguments to `f`), not a %s" % type(x))
320  return _compute_gradient_list(f, x, delta)
321
322
323def max_error(grad1, grad2):
324  """Computes maximum elementwise gap.
325
326  Computes the maximum elementwise gap between two lists of tensors of the same
327  shape.
328
329  Args:
330    grad1: a lists of tensors.
331    grad2: a lists of tensors with the same shape as grad1.
332
333  Returns:
334    The maximum elementwise gap between the two.
335  """
336  error = 0
337  for j_t, j_n in zip(grad1, grad2):
338    if j_t.size or j_n.size:  # Handle zero size tensors correctly
339      error = np.maximum(error, np.fabs(j_t - j_n).max())
340  return error
341