• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/cc/framework/gradient_checker.h"
17 
18 #include "tensorflow/cc/client/client_session.h"
19 #include "tensorflow/cc/framework/gradients.h"
20 #include "tensorflow/cc/ops/standard_ops.h"
21 #include "tensorflow/core/framework/tensor_util.h"
22 #include "tensorflow/core/framework/type_traits.h"
23 #include "tensorflow/core/lib/core/errors.h"
24 
25 namespace tensorflow {
26 namespace {
27 
28 // TODO(andydavis) Support returning relative error (as opposed to max error)
29 // between theoretical and numerical jacobians:
30 //   fabs(jac_t - jac_n) / max(fabs(jac_t), fabs(jac_n))
31 
32 // TODO(andydavis) Vectorize and/or multi-thread Jacobian computations if
33 // performance becomes an issue.
34 
35 // BaseUnitsForType provides a list of typed unit values for each basis in the
36 // requested type.
37 // When T is real,
38 // BaseUnitsForType<T>::values() is just a single-entry vector [1]
39 // When T is complex,
40 // BaseUnitsForType<T>::values() is a two-entry vector [1, i] - the unit
41 // values in each of its two bases.
42 template <typename T>
43 struct BaseUnitsForType {};  // Specializations below
44 
45 // Template specialization for BaseUnitsForType
46 #define SET_BASE_UNITS_FOR_TYPE(TYPE, INIT)                         \
47   template <>                                                       \
48   struct BaseUnitsForType<TYPE> {                                   \
49     static const std::vector<TYPE>& values() {                      \
50       static std::vector<TYPE>* units = new std::vector<TYPE> INIT; \
51       return *units;                                                \
52     }                                                               \
53   }
54 
55 SET_BASE_UNITS_FOR_TYPE(float, {1});
56 SET_BASE_UNITS_FOR_TYPE(double, {1});
57 SET_BASE_UNITS_FOR_TYPE(complex64, ({{1, 0}, {0, 1}}));
58 SET_BASE_UNITS_FOR_TYPE(complex128, ({{1, 0}, {0, 1}}));
59 
60 // SetJacobian sets the jacobian value at the provided row and column from a
61 // tensor entry with type T.
62 // When T is real, this is a simple assignment that casts the entry into the
63 // jacobian type.
64 // When T is complex, it assigns the real and complex values to successive rows
65 // or columns in the matrix depending on the expand_by_row parameter
66 template <typename T, typename JAC_T>
SetJacobian(typename TTypes<JAC_T>::Matrix * jacobian,const int row,const int col,const T & value,const bool expand_by_row)67 typename std::enable_if<std::is_floating_point<T>::value>::type SetJacobian(
68     typename TTypes<JAC_T>::Matrix* jacobian, const int row, const int col,
69     const T& value, const bool expand_by_row) {
70   (*jacobian)(row, col) = JAC_T{value};
71 }
72 
73 template <typename T, typename JAC_T>
SetJacobian(typename TTypes<JAC_T>::Matrix * jacobian,const int row,const int col,const T & value,const bool expand_by_row)74 typename std::enable_if<is_complex<T>::value>::type SetJacobian(
75     typename TTypes<JAC_T>::Matrix* jacobian, const int row, const int col,
76     const T& value, const bool expand_by_row) {
77   (*jacobian)(row, col) = JAC_T{value.real()};
78   if (expand_by_row) {
79     (*jacobian)(row + 1, col) = JAC_T{value.imag()};
80   } else {
81     (*jacobian)(row, col + 1) = JAC_T{value.imag()};
82   }
83 }
84 
85 // JacobianStride<T>::value holds the number of Jacobian elements needed to
86 // represent one element of the given type.
87 // When T is real the stride is 1, and when T is complex the stride is 2.
88 template <typename T>
89 struct JacobianStride {};  // Specializations below
90 
91 #define SET_JACOBIAN_STRIDE(TYPE, VALUE) \
92   template <>                            \
93   struct JacobianStride<TYPE> {          \
94     static constexpr int value = VALUE;  \
95   }
96 
97 SET_JACOBIAN_STRIDE(float, 1);
98 SET_JACOBIAN_STRIDE(double, 1);
99 SET_JACOBIAN_STRIDE(complex64, 2);
100 SET_JACOBIAN_STRIDE(complex128, 2);
101 
102 template <typename X_T, typename Y_T, typename JAC_T>
ComputeTheoreticalJacobianTranspose(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const std::vector<Tensor> & x_datas,const OutputList & ys,const std::vector<TensorShape> & y_shapes,std::vector<Tensor> * jacobian_ts)103 Status ComputeTheoreticalJacobianTranspose(
104     const Scope& scope, const OutputList& xs,
105     const std::vector<TensorShape>& x_shapes,
106     const std::vector<Tensor>& x_datas, const OutputList& ys,
107     const std::vector<TensorShape>& y_shapes,
108     std::vector<Tensor>* jacobian_ts) {
109   size_t y_num = y_shapes.size();
110   size_t x_num = x_shapes.size();
111   // Call AddSymbolicGradients to get 'dxs' (we will feed 'dys').
112   OutputList dys;
113   dys.reserve(y_shapes.size());
114   for (const auto& y_shape : y_shapes) {
115     // TODO(suharshs): This currently assumes that all y's are the same type.
116     dys.push_back(
117         ops::Cast(scope, ops::Const(scope, 1.0, y_shape), ys[0].type()));
118   }
119   OutputList dxs;
120   TF_RETURN_IF_ERROR(AddSymbolicGradients(scope, ys, xs, dys, &dxs));
121 
122   // Initialize 'dy_data' to zeros.
123   std::vector<Tensor> dy_datas(y_num);
124   for (int i = 0; i < y_num; i++) {
125     dy_datas[i] = Tensor(ys[i].type(), y_shapes[i]);
126     auto dy_data_flat = dy_datas[i].flat<Y_T>();
127     dy_data_flat.setZero();
128   }
129 
130   // Create the feed list.
131   ClientSession::FeedType feed_list;
132   for (int i = 0; i < x_num; i++) {
133     feed_list.insert({xs[i], x_datas[i]});
134   }
135   for (int i = 0; i < y_num; i++) {
136     feed_list.insert({dys[i], dy_datas[i]});
137   }
138 
139   // x_stride and y_stride are used to calculate the correct jacobian row and
140   // column position for a pair of elements at positions r, c within the x and y
141   // tensors respectively.
142   const int x_stride = JacobianStride<X_T>::value;
143   const int y_stride = JacobianStride<Y_T>::value;
144   ClientSession session(scope);
145   for (int y_idx = 0; y_idx < y_num; y_idx++) {
146     auto dy_data_flat = dy_datas[y_idx].flat<Y_T>();
147     const int64 dy_size = y_shapes[y_idx].num_elements();
148 
149     // Compute the theoretical Jacobians one row at a time by back propagating
150     // '1.0' (or '1' and 'i' if y is complex) for each element of 'dy', while
151     // holding all other elements of 'dy' at zero.
152     for (int c = 0; c < dy_size; ++c) {
153       int unit_dimension = 0;
154       for (Y_T unit : BaseUnitsForType<Y_T>::values()) {
155         dy_data_flat(c) = unit;
156 
157         std::vector<Tensor> dxout;
158         TF_RETURN_IF_ERROR(session.Run(feed_list, dxs, &dxout));
159 
160         for (int x_idx = 0; x_idx < x_num; x_idx++) {
161           const int64 x_size = x_shapes[x_idx].num_elements();
162           auto jacobian = (*jacobian_ts)[x_idx * y_num + y_idx].matrix<JAC_T>();
163           auto dx_flat = dxout[x_idx].flat<X_T>();
164           for (int r = 0; r < x_size; ++r) {
165             SetJacobian<X_T, JAC_T>(&jacobian, r * x_stride,
166                                     c * y_stride + unit_dimension, dx_flat(r),
167                                     true /* expand_by_row=true */);
168           }
169         }
170 
171         dy_data_flat(c) = Y_T{0};
172         unit_dimension++;
173       }
174     }
175   }
176   return Status::OK();
177 }
178 
EvaluateGraph(ClientSession * session,const OutputList & xs,const OutputList & ys,std::vector<Tensor> * x_datas,std::vector<Tensor> * y_datas)179 Status EvaluateGraph(ClientSession* session, const OutputList& xs,
180                      const OutputList& ys, std::vector<Tensor>* x_datas,
181                      std::vector<Tensor>* y_datas) {
182   // Create the feed list.
183   ClientSession::FeedType feed_list;
184   for (int i = 0; i < x_datas->size(); i++) {
185     feed_list.insert({xs[i], (*x_datas)[i]});
186   }
187 
188   TF_RETURN_IF_ERROR(session->Run(feed_list, ys, y_datas));
189   for (int y_idx = 0; y_idx < y_datas->size(); y_idx++) {
190     for (int x_idx = 0; x_idx < x_datas->size(); x_idx++) {
191       Tensor y_data = (*y_datas)[y_idx];
192       if (y_data.SharesBufferWith((*x_datas)[x_idx])) {
193         // Create copies of outputs that share a buffer with any inputs since
194         // the underlying buffer of the input Tensors are not copied for some
195         // operations (i.e. Identity), which can lead to incorrect results for
196         // the centered difference calculation.
197         (*y_datas)[y_idx] = tensor::DeepCopy(y_data);
198       }
199     }
200   }
201   return Status::OK();
202 }
203 
204 template <typename X_T, typename Y_T, typename JAC_T>
ComputeNumericJacobianTranspose(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const OutputList & ys,const std::vector<TensorShape> & y_shapes,const JAC_T delta,std::vector<Tensor> * x_datas,std::vector<Tensor> * jacobian_ts)205 Status ComputeNumericJacobianTranspose(const Scope& scope, const OutputList& xs,
206                                        const std::vector<TensorShape>& x_shapes,
207                                        const OutputList& ys,
208                                        const std::vector<TensorShape>& y_shapes,
209                                        const JAC_T delta,
210                                        std::vector<Tensor>* x_datas,
211                                        std::vector<Tensor>* jacobian_ts) {
212   size_t y_num = y_shapes.size();
213   size_t x_num = x_shapes.size();
214   // x_stride and y_stride are used to calculate the correct jacobian row and
215   // column position for a pair of elements at positions r, c within the x and y
216   // tensors respectively.
217   const int x_stride = JacobianStride<X_T>::value;
218   const int y_stride = JacobianStride<Y_T>::value;
219 
220   ClientSession session(scope);
221   for (int x_idx = 0; x_idx < x_num; x_idx++) {
222     auto x_data_flat = (*x_datas)[x_idx].flat<X_T>();
223     const int64 x_size = x_shapes[x_idx].num_elements();
224 
225     // Compute the numeric Jacobian one column at a time by perturbing each
226     // element of 'x_data' (positively and negatively) by 'delta', and
227     // updating the jacobian with the centered difference. When x_data is
228     // complex-valued, we perturb its real and complex parts separately.
229     for (int r = 0; r < x_size; ++r) {
230       int unit_dimension = 0;
231       for (X_T unit : BaseUnitsForType<X_T>::values()) {
232         X_T x_delta = unit * X_T{delta};
233         // Store current value of 'x' at 'r'.
234         X_T v = x_data_flat(r);
235         // Evaluate at positive delta.
236         x_data_flat(r) = v + x_delta;
237         std::vector<Tensor> y_pos;
238         TF_RETURN_IF_ERROR(EvaluateGraph(&session, xs, ys, x_datas, &y_pos));
239         // Evaluate at negative delta.
240         x_data_flat(r) = v - x_delta;
241         std::vector<Tensor> y_neg;
242         TF_RETURN_IF_ERROR(EvaluateGraph(&session, xs, ys, x_datas, &y_neg));
243 
244         for (int y_idx = 0; y_idx < y_num; y_idx++) {
245           // Compute element-wise centered difference and store in each
246           // Jacobian.
247           auto y_pos_flat = y_pos[y_idx].flat<Y_T>();
248           auto y_neg_flat = y_neg[y_idx].flat<Y_T>();
249           const int64 y_size = y_shapes[y_idx].num_elements();
250           const Y_T scale = 2 * delta;
251           auto jacobian = (*jacobian_ts)[x_idx * y_num + y_idx].matrix<JAC_T>();
252           for (int c = 0; c < y_size; ++c) {
253             SetJacobian<Y_T, JAC_T>(&jacobian, r * x_stride + unit_dimension,
254                                     c * y_stride,
255                                     (y_pos_flat(c) - y_neg_flat(c)) / scale,
256                                     false /* expand_by_row=false */);
257           }
258         }
259         // Restore pre-perturbation value.
260         x_data_flat(r) = v;
261         unit_dimension++;
262       }
263     }
264   }
265   return Status::OK();
266 }
267 
268 // The Jacobian is always a real-valued matrix.
269 // Given y = f(x) for tensors y and x, it contains the derivatives dy_i/dx_j for
270 // every pair y_i in y and x_j in x.  Note that the Jacobian is defined directly
271 // over the elements of tensors y and x, and doesn't depend on their shapes.
272 //
273 // If x = (x_1, x_2, ..., x_m) and y = (y_1, y_2, .., y_n) the matrix evaluated
274 // is actually the Jacobian transpose, defined as this mxn matrix:
275 // dy_1/d_x1 dy_2/dx_1 ... dy_n/dx_1
276 // dy_1/dx_2 dy_2/dx_2 ... dy_n/dx_2
277 //     .
278 //     .
279 //     .
280 // dy_1/dx_m dy_2/dx_m ... dy_n/dx_m
281 //
282 // If x or y is complex, each complex entry is "expanded" into a real and
283 // imaginary entry, and the Jacobian is organized as above on the expanded list.
284 // e.g.
285 // [y1, y2] = Square([x1, x2]) where x and y are complex.
286 // Writing
287 // x = [x1_real, x1_imag, x2_real, x2_imag]
288 // y = [y1_real, y1_imag, y2_real, y2_imag]
289 // the Jacobian transpose is
290 // the 4x4 matrix:
291 // dy1_real/dx1_real dy1_imag/dx1_real dy2_real/dx1_real dy2_imag/dx1_real
292 // dy1_real/dx1_imag dy1_imag/dx1_imag dy2_real/dx1_imag dy2_imag/dx1_imag
293 // dy1_real/dx2_real dy1_imag/dx2_real dy2_real/dx2_real dy2_imag/dx2_real
294 // dy1_real/dx2_imag dy1_imag/dx2_imag dy2_real/dx2_imag dy2_imag/dx2_imag
295 template <typename X_T, typename Y_T, typename JAC_T>
InitJacobians(const OutputList & xs,const std::vector<TensorShape> & x_shapes,const std::vector<TensorShape> & y_shapes,std::vector<Tensor> * jacobians)296 void InitJacobians(const OutputList& xs,
297                    const std::vector<TensorShape>& x_shapes,
298                    const std::vector<TensorShape>& y_shapes,
299                    std::vector<Tensor>* jacobians) {
300   const size_t y_num = y_shapes.size();
301   const size_t x_num = x_shapes.size();
302   const DataType jacobian_type = DataTypeToEnum<JAC_T>::v();
303 
304   jacobians->resize(y_num * x_num);
305   for (int x_idx = 0; x_idx < x_num; x_idx++) {
306     // The number of rows is the number of elements in the x tensor multiplied
307     // by the number of Jacobian entries needed to represent each x type.
308     const int64 x_size =
309         x_shapes[x_idx].num_elements() * JacobianStride<X_T>::value;
310     for (int y_idx = 0; y_idx < y_num; y_idx++) {
311       // The number of columns is the number of elements in the y tensor
312       // multiplied by the number of Jacobian entries needed to represent each
313       // y type.
314       const int64 y_size =
315           y_shapes[y_idx].num_elements() * JacobianStride<Y_T>::value;
316       Tensor jacobian_t(jacobian_type, {x_size, y_size});
317       auto jacobian_t_flat = jacobian_t.flat<JAC_T>();
318       jacobian_t_flat.setZero();
319       (*jacobians)[x_idx * y_num + y_idx] = std::move(jacobian_t);
320     }
321   }
322 }
323 
324 template <typename X_T, typename Y_T, typename JAC_T>
ComputeGradientErrorInternal(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const OutputList & ys,const std::vector<TensorShape> & y_shapes,std::vector<Tensor> * x_datas,JAC_T * max_error)325 Status ComputeGradientErrorInternal(const Scope& scope, const OutputList& xs,
326                                     const std::vector<TensorShape>& x_shapes,
327                                     const OutputList& ys,
328                                     const std::vector<TensorShape>& y_shapes,
329                                     std::vector<Tensor>* x_datas,
330                                     JAC_T* max_error) {
331   // Initialize theoretical Jacobians to zeros.
332   std::vector<Tensor> jacobian_ts;
333   InitJacobians<X_T, Y_T, JAC_T>(xs, x_shapes, y_shapes, &jacobian_ts);
334 
335   // Compute theoretical Jacobian.
336   TF_RETURN_IF_ERROR((ComputeTheoreticalJacobianTranspose<X_T, Y_T, JAC_T>(
337       scope, xs, x_shapes, *x_datas, ys, y_shapes, &jacobian_ts)));
338 
339   // Initialize numeric Jacobian to zeros.
340   std::vector<Tensor> jacobian_ns;
341   InitJacobians<X_T, Y_T, JAC_T>(xs, x_shapes, y_shapes, &jacobian_ns);
342 
343   // Compute numeric Jacobian.
344   TF_RETURN_IF_ERROR((ComputeNumericJacobianTranspose<X_T, Y_T, JAC_T>(
345       scope, xs, x_shapes, ys, y_shapes, JAC_T{1e-3f}, x_datas, &jacobian_ns)));
346 
347   for (int i = 0; i < jacobian_ts.size(); i++) {
348     // Compute the maximum error between theoretical and numeric Jacobians.
349     *max_error = 0.0;
350     auto jac_t = jacobian_ts[i].matrix<JAC_T>();
351     auto jac_n = jacobian_ns[i].matrix<JAC_T>();
352     for (int r = 0; r < jacobian_ts[i].dim_size(0); ++r) {
353       for (int c = 0; c < jacobian_ts[i].dim_size(1); ++c) {
354         auto cur_error = std::fabs(jac_t(r, c) - jac_n(r, c));
355         // Treat any NaN as max_error and immediately return.
356         // (Note that std::max may ignore NaN arguments.)
357         if (std::isnan(cur_error)) {
358           *max_error = cur_error;
359           return Status::OK();
360         }
361         *max_error = std::max(*max_error, cur_error);
362       }
363     }
364   }
365   return Status::OK();
366 }
367 
368 }  // namespace
369 
370 template <typename X_T, typename Y_T, typename JAC_T>
ComputeGradientError(const Scope & scope,const OutputList & xs,const std::vector<TensorShape> & x_shapes,const OutputList & ys,const std::vector<TensorShape> & y_shapes,JAC_T * max_error)371 Status ComputeGradientError(const Scope& scope, const OutputList& xs,
372                             const std::vector<TensorShape>& x_shapes,
373                             const OutputList& ys,
374                             const std::vector<TensorShape>& y_shapes,
375                             JAC_T* max_error) {
376   if (xs.size() != x_shapes.size()) {
377     return errors::InvalidArgument("xs(size ", xs.size(),
378                                    ") and x_shapes(size ", x_shapes.size(),
379                                    ") must be the same size.");
380   }
381   if (ys.size() != y_shapes.size()) {
382     return errors::InvalidArgument("ys(size ", ys.size(),
383                                    ") and y_shapes(size ", y_shapes.size(),
384                                    ") must be the same size.");
385   }
386   // Initialize 'x_datas' to random values.
387   std::vector<Tensor> x_datas(x_shapes.size());
388   for (int i = 0; i < x_shapes.size(); i++) {
389     x_datas[i] = Tensor(xs[i].type(), x_shapes[i]);
390     auto x_data_flat = x_datas[i].flat<X_T>();
391     x_data_flat.setRandom();
392   }
393   // Compute gradient error.
394   return ComputeGradientErrorInternal<X_T, Y_T, JAC_T>(
395       scope, xs, x_shapes, ys, y_shapes, &x_datas, max_error);
396 }
397 
398 template <typename X_T, typename Y_T, typename JAC_T>
ComputeGradientError(const Scope & scope,const Output & x,const Tensor & x_init_value,const Output & y,const TensorShape & y_shape,JAC_T * max_error)399 Status ComputeGradientError(const Scope& scope, const Output& x,
400                             const Tensor& x_init_value, const Output& y,
401                             const TensorShape& y_shape, JAC_T* max_error) {
402   // Initialize 'x_data' from 'x_init_value'.
403   std::vector<Tensor> x_datas(1, Tensor(x_init_value));
404   // Compute gradient error.
405   return ComputeGradientErrorInternal<X_T, Y_T, JAC_T>(
406       scope, {x}, {x_datas[0].shape()}, {y}, {y_shape}, &x_datas, max_error);
407 }
408 
409 #define INSTANTIATE_GRAD_ERR_TYPE(X_T, Y_T, JAC_T)                     \
410   template Status ComputeGradientError<X_T, Y_T, JAC_T>(               \
411       const Scope& scope, const OutputList& xs,                        \
412       const std::vector<TensorShape>& x_shapes, const OutputList& ys,  \
413       const std::vector<TensorShape>& y_shapes, JAC_T* max_error);     \
414   template Status ComputeGradientError<X_T, Y_T, JAC_T>(               \
415       const Scope& scope, const Output& x, const Tensor& x_init_value, \
416       const Output& y, const TensorShape& y_shape, JAC_T* max_error);
417 
418 INSTANTIATE_GRAD_ERR_TYPE(float, float, float);
419 INSTANTIATE_GRAD_ERR_TYPE(double, float, double);
420 INSTANTIATE_GRAD_ERR_TYPE(double, double, double);
421 INSTANTIATE_GRAD_ERR_TYPE(complex64, float, float);
422 INSTANTIATE_GRAD_ERR_TYPE(float, complex64, float);
423 INSTANTIATE_GRAD_ERR_TYPE(complex64, complex64, float);
424 INSTANTIATE_GRAD_ERR_TYPE(complex128, complex128, double);
425 
426 }  // namespace tensorflow
427