1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/cc/framework/gradient_checker.h"
17
18 #include "tensorflow/cc/client/client_session.h"
19 #include "tensorflow/cc/framework/gradients.h"
20 #include "tensorflow/cc/ops/standard_ops.h"
21 #include "tensorflow/core/framework/tensor_util.h"
22 #include "tensorflow/core/framework/type_traits.h"
23 #include "tensorflow/core/lib/core/errors.h"
24
25 namespace tensorflow {
26 namespace {
27
28 // TODO(andydavis) Support returning relative error (as opposed to max error)
29 // between theoretical and numerical jacobians:
30 // fabs(jac_t - jac_n) / max(fabs(jac_t), fabs(jac_n))
31
32 // TODO(andydavis) Vectorize and/or multi-thread Jacobian computations if
33 // performance becomes an issue.
34
35 // BaseUnitsForType provides a list of typed unit values for each basis in the
36 // requested type.
37 // When T is real,
38 // BaseUnitsForType<T>::values() is just a single-entry vector [1]
39 // When T is complex,
40 // BaseUnitsForType<T>::values() is a two-entry vector [1, i] - the unit
41 // values in each of its two bases.
42 template <typename T>
43 struct BaseUnitsForType {}; // Specializations below
44
45 // Template specialization for BaseUnitsForType
46 #define SET_BASE_UNITS_FOR_TYPE(TYPE, INIT) \
47 template <> \
48 struct BaseUnitsForType<TYPE> { \
49 static const std::vector<TYPE>& values() { \
50 static std::vector<TYPE>* units = new std::vector<TYPE> INIT; \
51 return *units; \
52 } \
53 }
54
55 SET_BASE_UNITS_FOR_TYPE(float, {1});
56 SET_BASE_UNITS_FOR_TYPE(double, {1});
57 SET_BASE_UNITS_FOR_TYPE(complex64, ({{1, 0}, {0, 1}}));
58 SET_BASE_UNITS_FOR_TYPE(complex128, ({{1, 0}, {0, 1}}));
59
60 // SetJacobian sets the jacobian value at the provided row and column from a
61 // tensor entry with type T.
62 // When T is real, this is a simple assignment that casts the entry into the
63 // jacobian type.
64 // When T is complex, it assigns the real and complex values to successive rows
65 // or columns in the matrix depending on the expand_by_row parameter
66 template <typename T, typename JAC_T>
SetJacobian(typename TTypes<JAC_T>::Matrix * jacobian,const int row,const int col,const T & value,const bool expand_by_row)67 typename std::enable_if<std::is_floating_point<T>::value>::type SetJacobian(
68 typename TTypes<JAC_T>::Matrix* jacobian, const int row, const int col,
69 const T& value, const bool expand_by_row) {
70 (*jacobian)(row, col) = JAC_T{value};
71 }
72
73 template <typename T, typename JAC_T>
SetJacobian(typename TTypes<JAC_T>::Matrix * jacobian,const int row,const int col,const T & value,const bool expand_by_row)74 typename std::enable_if<is_complex<T>::value>::type SetJacobian(
75 typename TTypes<JAC_T>::Matrix* jacobian, const int row, const int col,
76 const T& value, const bool expand_by_row) {
77 (*jacobian)(row, col) = JAC_T{value.real()};
78 if (expand_by_row) {
79 (*jacobian)(row + 1, col) = JAC_T{value.imag()};
80 } else {
81 (*jacobian)(row, col + 1) = JAC_T{value.imag()};
82 }
83 }
84
85 // JacobianStride<T>::value holds the number of Jacobian elements needed to
86 // represent one element of the given type.
87 // When T is real the stride is 1, and when T is complex the stride is 2.
88 template <typename T>
89 struct JacobianStride {}; // Specializations below
90
91 #define SET_JACOBIAN_STRIDE(TYPE, VALUE) \
92 template <> \
93 struct JacobianStride<TYPE> { \
94 static constexpr int value = VALUE; \
95 }
96
97 SET_JACOBIAN_STRIDE(float, 1);
98 SET_JACOBIAN_STRIDE(double, 1);
99 SET_JACOBIAN_STRIDE(complex64, 2);
100 SET_JACOBIAN_STRIDE(complex128, 2);
101
102 template <typename X_T, typename Y_T, typename JAC_T>
ComputeTheoreticalJacobianTranspose(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const std::vector<Tensor> & x_datas,const OutputList & ys,const std::vector<TensorShape> & y_shapes,std::vector<Tensor> * jacobian_ts)103 Status ComputeTheoreticalJacobianTranspose(
104 const Scope& scope, const OutputList& xs,
105 const std::vector<TensorShape>& x_shapes,
106 const std::vector<Tensor>& x_datas, const OutputList& ys,
107 const std::vector<TensorShape>& y_shapes,
108 std::vector<Tensor>* jacobian_ts) {
109 size_t y_num = y_shapes.size();
110 size_t x_num = x_shapes.size();
111 // Call AddSymbolicGradients to get 'dxs' (we will feed 'dys').
112 OutputList dys;
113 dys.reserve(y_shapes.size());
114 for (const auto& y_shape : y_shapes) {
115 // TODO(suharshs): This currently assumes that all y's are the same type.
116 dys.push_back(
117 ops::Cast(scope, ops::Const(scope, 1.0, y_shape), ys[0].type()));
118 }
119 OutputList dxs;
120 TF_RETURN_IF_ERROR(AddSymbolicGradients(scope, ys, xs, dys, &dxs));
121
122 // Initialize 'dy_data' to zeros.
123 std::vector<Tensor> dy_datas(y_num);
124 for (int i = 0; i < y_num; i++) {
125 dy_datas[i] = Tensor(ys[i].type(), y_shapes[i]);
126 auto dy_data_flat = dy_datas[i].flat<Y_T>();
127 dy_data_flat.setZero();
128 }
129
130 // Create the feed list.
131 ClientSession::FeedType feed_list;
132 for (int i = 0; i < x_num; i++) {
133 feed_list.insert({xs[i], x_datas[i]});
134 }
135 for (int i = 0; i < y_num; i++) {
136 feed_list.insert({dys[i], dy_datas[i]});
137 }
138
139 // x_stride and y_stride are used to calculate the correct jacobian row and
140 // column position for a pair of elements at positions r, c within the x and y
141 // tensors respectively.
142 const int x_stride = JacobianStride<X_T>::value;
143 const int y_stride = JacobianStride<Y_T>::value;
144 ClientSession session(scope);
145 for (int y_idx = 0; y_idx < y_num; y_idx++) {
146 auto dy_data_flat = dy_datas[y_idx].flat<Y_T>();
147 const int64 dy_size = y_shapes[y_idx].num_elements();
148
149 // Compute the theoretical Jacobians one row at a time by back propagating
150 // '1.0' (or '1' and 'i' if y is complex) for each element of 'dy', while
151 // holding all other elements of 'dy' at zero.
152 for (int c = 0; c < dy_size; ++c) {
153 int unit_dimension = 0;
154 for (Y_T unit : BaseUnitsForType<Y_T>::values()) {
155 dy_data_flat(c) = unit;
156
157 std::vector<Tensor> dxout;
158 TF_RETURN_IF_ERROR(session.Run(feed_list, dxs, &dxout));
159
160 for (int x_idx = 0; x_idx < x_num; x_idx++) {
161 const int64 x_size = x_shapes[x_idx].num_elements();
162 auto jacobian = (*jacobian_ts)[x_idx * y_num + y_idx].matrix<JAC_T>();
163 auto dx_flat = dxout[x_idx].flat<X_T>();
164 for (int r = 0; r < x_size; ++r) {
165 SetJacobian<X_T, JAC_T>(&jacobian, r * x_stride,
166 c * y_stride + unit_dimension, dx_flat(r),
167 true /* expand_by_row=true */);
168 }
169 }
170
171 dy_data_flat(c) = Y_T{0};
172 unit_dimension++;
173 }
174 }
175 }
176 return Status::OK();
177 }
178
EvaluateGraph(ClientSession * session,const OutputList & xs,const OutputList & ys,std::vector<Tensor> * x_datas,std::vector<Tensor> * y_datas)179 Status EvaluateGraph(ClientSession* session, const OutputList& xs,
180 const OutputList& ys, std::vector<Tensor>* x_datas,
181 std::vector<Tensor>* y_datas) {
182 // Create the feed list.
183 ClientSession::FeedType feed_list;
184 for (int i = 0; i < x_datas->size(); i++) {
185 feed_list.insert({xs[i], (*x_datas)[i]});
186 }
187
188 TF_RETURN_IF_ERROR(session->Run(feed_list, ys, y_datas));
189 for (int y_idx = 0; y_idx < y_datas->size(); y_idx++) {
190 for (int x_idx = 0; x_idx < x_datas->size(); x_idx++) {
191 Tensor y_data = (*y_datas)[y_idx];
192 if (y_data.SharesBufferWith((*x_datas)[x_idx])) {
193 // Create copies of outputs that share a buffer with any inputs since
194 // the underlying buffer of the input Tensors are not copied for some
195 // operations (i.e. Identity), which can lead to incorrect results for
196 // the centered difference calculation.
197 (*y_datas)[y_idx] = tensor::DeepCopy(y_data);
198 }
199 }
200 }
201 return Status::OK();
202 }
203
204 template <typename X_T, typename Y_T, typename JAC_T>
ComputeNumericJacobianTranspose(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const OutputList & ys,const std::vector<TensorShape> & y_shapes,const JAC_T delta,std::vector<Tensor> * x_datas,std::vector<Tensor> * jacobian_ts)205 Status ComputeNumericJacobianTranspose(const Scope& scope, const OutputList& xs,
206 const std::vector<TensorShape>& x_shapes,
207 const OutputList& ys,
208 const std::vector<TensorShape>& y_shapes,
209 const JAC_T delta,
210 std::vector<Tensor>* x_datas,
211 std::vector<Tensor>* jacobian_ts) {
212 size_t y_num = y_shapes.size();
213 size_t x_num = x_shapes.size();
214 // x_stride and y_stride are used to calculate the correct jacobian row and
215 // column position for a pair of elements at positions r, c within the x and y
216 // tensors respectively.
217 const int x_stride = JacobianStride<X_T>::value;
218 const int y_stride = JacobianStride<Y_T>::value;
219
220 ClientSession session(scope);
221 for (int x_idx = 0; x_idx < x_num; x_idx++) {
222 auto x_data_flat = (*x_datas)[x_idx].flat<X_T>();
223 const int64 x_size = x_shapes[x_idx].num_elements();
224
225 // Compute the numeric Jacobian one column at a time by perturbing each
226 // element of 'x_data' (positively and negatively) by 'delta', and
227 // updating the jacobian with the centered difference. When x_data is
228 // complex-valued, we perturb its real and complex parts separately.
229 for (int r = 0; r < x_size; ++r) {
230 int unit_dimension = 0;
231 for (X_T unit : BaseUnitsForType<X_T>::values()) {
232 X_T x_delta = unit * X_T{delta};
233 // Store current value of 'x' at 'r'.
234 X_T v = x_data_flat(r);
235 // Evaluate at positive delta.
236 x_data_flat(r) = v + x_delta;
237 std::vector<Tensor> y_pos;
238 TF_RETURN_IF_ERROR(EvaluateGraph(&session, xs, ys, x_datas, &y_pos));
239 // Evaluate at negative delta.
240 x_data_flat(r) = v - x_delta;
241 std::vector<Tensor> y_neg;
242 TF_RETURN_IF_ERROR(EvaluateGraph(&session, xs, ys, x_datas, &y_neg));
243
244 for (int y_idx = 0; y_idx < y_num; y_idx++) {
245 // Compute element-wise centered difference and store in each
246 // Jacobian.
247 auto y_pos_flat = y_pos[y_idx].flat<Y_T>();
248 auto y_neg_flat = y_neg[y_idx].flat<Y_T>();
249 const int64 y_size = y_shapes[y_idx].num_elements();
250 const Y_T scale = 2 * delta;
251 auto jacobian = (*jacobian_ts)[x_idx * y_num + y_idx].matrix<JAC_T>();
252 for (int c = 0; c < y_size; ++c) {
253 SetJacobian<Y_T, JAC_T>(&jacobian, r * x_stride + unit_dimension,
254 c * y_stride,
255 (y_pos_flat(c) - y_neg_flat(c)) / scale,
256 false /* expand_by_row=false */);
257 }
258 }
259 // Restore pre-perturbation value.
260 x_data_flat(r) = v;
261 unit_dimension++;
262 }
263 }
264 }
265 return Status::OK();
266 }
267
268 // The Jacobian is always a real-valued matrix.
269 // Given y = f(x) for tensors y and x, it contains the derivatives dy_i/dx_j for
270 // every pair y_i in y and x_j in x. Note that the Jacobian is defined directly
271 // over the elements of tensors y and x, and doesn't depend on their shapes.
272 //
273 // If x = (x_1, x_2, ..., x_m) and y = (y_1, y_2, .., y_n) the matrix evaluated
274 // is actually the Jacobian transpose, defined as this mxn matrix:
275 // dy_1/d_x1 dy_2/dx_1 ... dy_n/dx_1
276 // dy_1/dx_2 dy_2/dx_2 ... dy_n/dx_2
277 // .
278 // .
279 // .
280 // dy_1/dx_m dy_2/dx_m ... dy_n/dx_m
281 //
282 // If x or y is complex, each complex entry is "expanded" into a real and
283 // imaginary entry, and the Jacobian is organized as above on the expanded list.
284 // e.g.
285 // [y1, y2] = Square([x1, x2]) where x and y are complex.
286 // Writing
287 // x = [x1_real, x1_imag, x2_real, x2_imag]
288 // y = [y1_real, y1_imag, y2_real, y2_imag]
289 // the Jacobian transpose is
290 // the 4x4 matrix:
291 // dy1_real/dx1_real dy1_imag/dx1_real dy2_real/dx1_real dy2_imag/dx1_real
292 // dy1_real/dx1_imag dy1_imag/dx1_imag dy2_real/dx1_imag dy2_imag/dx1_imag
293 // dy1_real/dx2_real dy1_imag/dx2_real dy2_real/dx2_real dy2_imag/dx2_real
294 // dy1_real/dx2_imag dy1_imag/dx2_imag dy2_real/dx2_imag dy2_imag/dx2_imag
295 template <typename X_T, typename Y_T, typename JAC_T>
InitJacobians(const OutputList & xs,const std::vector<TensorShape> & x_shapes,const std::vector<TensorShape> & y_shapes,std::vector<Tensor> * jacobians)296 void InitJacobians(const OutputList& xs,
297 const std::vector<TensorShape>& x_shapes,
298 const std::vector<TensorShape>& y_shapes,
299 std::vector<Tensor>* jacobians) {
300 const size_t y_num = y_shapes.size();
301 const size_t x_num = x_shapes.size();
302 const DataType jacobian_type = DataTypeToEnum<JAC_T>::v();
303
304 jacobians->resize(y_num * x_num);
305 for (int x_idx = 0; x_idx < x_num; x_idx++) {
306 // The number of rows is the number of elements in the x tensor multiplied
307 // by the number of Jacobian entries needed to represent each x type.
308 const int64 x_size =
309 x_shapes[x_idx].num_elements() * JacobianStride<X_T>::value;
310 for (int y_idx = 0; y_idx < y_num; y_idx++) {
311 // The number of columns is the number of elements in the y tensor
312 // multiplied by the number of Jacobian entries needed to represent each
313 // y type.
314 const int64 y_size =
315 y_shapes[y_idx].num_elements() * JacobianStride<Y_T>::value;
316 Tensor jacobian_t(jacobian_type, {x_size, y_size});
317 auto jacobian_t_flat = jacobian_t.flat<JAC_T>();
318 jacobian_t_flat.setZero();
319 (*jacobians)[x_idx * y_num + y_idx] = std::move(jacobian_t);
320 }
321 }
322 }
323
324 template <typename X_T, typename Y_T, typename JAC_T>
ComputeGradientErrorInternal(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const OutputList & ys,const std::vector<TensorShape> & y_shapes,std::vector<Tensor> * x_datas,JAC_T * max_error)325 Status ComputeGradientErrorInternal(const Scope& scope, const OutputList& xs,
326 const std::vector<TensorShape>& x_shapes,
327 const OutputList& ys,
328 const std::vector<TensorShape>& y_shapes,
329 std::vector<Tensor>* x_datas,
330 JAC_T* max_error) {
331 // Initialize theoretical Jacobians to zeros.
332 std::vector<Tensor> jacobian_ts;
333 InitJacobians<X_T, Y_T, JAC_T>(xs, x_shapes, y_shapes, &jacobian_ts);
334
335 // Compute theoretical Jacobian.
336 TF_RETURN_IF_ERROR((ComputeTheoreticalJacobianTranspose<X_T, Y_T, JAC_T>(
337 scope, xs, x_shapes, *x_datas, ys, y_shapes, &jacobian_ts)));
338
339 // Initialize numeric Jacobian to zeros.
340 std::vector<Tensor> jacobian_ns;
341 InitJacobians<X_T, Y_T, JAC_T>(xs, x_shapes, y_shapes, &jacobian_ns);
342
343 // Compute numeric Jacobian.
344 TF_RETURN_IF_ERROR((ComputeNumericJacobianTranspose<X_T, Y_T, JAC_T>(
345 scope, xs, x_shapes, ys, y_shapes, JAC_T{1e-3f}, x_datas, &jacobian_ns)));
346
347 for (int i = 0; i < jacobian_ts.size(); i++) {
348 // Compute the maximum error between theoretical and numeric Jacobians.
349 *max_error = 0.0;
350 auto jac_t = jacobian_ts[i].matrix<JAC_T>();
351 auto jac_n = jacobian_ns[i].matrix<JAC_T>();
352 for (int r = 0; r < jacobian_ts[i].dim_size(0); ++r) {
353 for (int c = 0; c < jacobian_ts[i].dim_size(1); ++c) {
354 auto cur_error = std::fabs(jac_t(r, c) - jac_n(r, c));
355 // Treat any NaN as max_error and immediately return.
356 // (Note that std::max may ignore NaN arguments.)
357 if (std::isnan(cur_error)) {
358 *max_error = cur_error;
359 return Status::OK();
360 }
361 *max_error = std::max(*max_error, cur_error);
362 }
363 }
364 }
365 return Status::OK();
366 }
367
368 } // namespace
369
370 template <typename X_T, typename Y_T, typename JAC_T>
ComputeGradientError(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const OutputList & ys,const std::vector<TensorShape> & y_shapes,JAC_T * max_error)371 Status ComputeGradientError(const Scope& scope, const OutputList& xs,
372 const std::vector<TensorShape>& x_shapes,
373 const OutputList& ys,
374 const std::vector<TensorShape>& y_shapes,
375 JAC_T* max_error) {
376 if (xs.size() != x_shapes.size()) {
377 return errors::InvalidArgument("xs(size ", xs.size(),
378 ") and x_shapes(size ", x_shapes.size(),
379 ") must be the same size.");
380 }
381 if (ys.size() != y_shapes.size()) {
382 return errors::InvalidArgument("ys(size ", ys.size(),
383 ") and y_shapes(size ", y_shapes.size(),
384 ") must be the same size.");
385 }
386 // Initialize 'x_datas' to random values.
387 std::vector<Tensor> x_datas(x_shapes.size());
388 for (int i = 0; i < x_shapes.size(); i++) {
389 x_datas[i] = Tensor(xs[i].type(), x_shapes[i]);
390 auto x_data_flat = x_datas[i].flat<X_T>();
391 x_data_flat.setRandom();
392 }
393 // Compute gradient error.
394 return ComputeGradientErrorInternal<X_T, Y_T, JAC_T>(
395 scope, xs, x_shapes, ys, y_shapes, &x_datas, max_error);
396 }
397
398 template <typename X_T, typename Y_T, typename JAC_T>
ComputeGradientError(const Scope & scope,const Output & x,const Tensor & x_init_value,const Output & y,const TensorShape & y_shape,JAC_T * max_error)399 Status ComputeGradientError(const Scope& scope, const Output& x,
400 const Tensor& x_init_value, const Output& y,
401 const TensorShape& y_shape, JAC_T* max_error) {
402 // Initialize 'x_data' from 'x_init_value'.
403 std::vector<Tensor> x_datas(1, Tensor(x_init_value));
404 // Compute gradient error.
405 return ComputeGradientErrorInternal<X_T, Y_T, JAC_T>(
406 scope, {x}, {x_datas[0].shape()}, {y}, {y_shape}, &x_datas, max_error);
407 }
408
409 #define INSTANTIATE_GRAD_ERR_TYPE(X_T, Y_T, JAC_T) \
410 template Status ComputeGradientError<X_T, Y_T, JAC_T>( \
411 const Scope& scope, const OutputList& xs, \
412 const std::vector<TensorShape>& x_shapes, const OutputList& ys, \
413 const std::vector<TensorShape>& y_shapes, JAC_T* max_error); \
414 template Status ComputeGradientError<X_T, Y_T, JAC_T>( \
415 const Scope& scope, const Output& x, const Tensor& x_init_value, \
416 const Output& y, const TensorShape& y_shape, JAC_T* max_error);
417
418 INSTANTIATE_GRAD_ERR_TYPE(float, float, float);
419 INSTANTIATE_GRAD_ERR_TYPE(double, float, double);
420 INSTANTIATE_GRAD_ERR_TYPE(double, double, double);
421 INSTANTIATE_GRAD_ERR_TYPE(complex64, float, float);
422 INSTANTIATE_GRAD_ERR_TYPE(float, complex64, float);
423 INSTANTIATE_GRAD_ERR_TYPE(complex64, complex64, float);
424 INSTANTIATE_GRAD_ERR_TYPE(complex128, complex128, double);
425
426 } // namespace tensorflow
427