• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15
16"""Implementation of Numerical gradients checking."""
17# pylint: disable=missing-docstring
18
19from typing import Callable, List, Any
20
21import numpy as np
22import mindspore._c_expression as _c_expression
23
24from mindspore import ParameterTuple
25from mindspore import Tensor
26from mindspore import context
27from mindspore.ops.composite import GradOperation
28from .block_util import get_output_cell, gen_net, gen_grad_net, \
29    get_uniform_with_shape, set_block_phase, get_output_reduce_cell, set_block_param_with_rand
30
31
32class _GradChecker:
33    """
34    Check the theoretical Jacobian against numeric
35
36    Arguments:
37        fn: The function under test.
38        gfn: The high order function to compute the derivative function.
39        args: The point in the function's domain where we want
40            to estimate the gradient.
41
42    """
43
44    def __init__(self,
45                 fn: Callable,
46                 grad_wraper: GradOperation,
47                 args: List[Any],
48                 delta: float = 1e-3,
49                 max_error: float = 1e-3,
50                 input_selector=None,
51                 output_selector=None,
52                 sampling_times=-1,
53                 reduce_output=False) -> None:
54        """Initialize a GradChecker."""
55        self.delta = delta
56        self.scale = 2 * delta
57        self.max_error = max_error
58        self.sampling_times = sampling_times
59
60        self.fn = self.prepare_func(fn)
61
62        self.args = args
63        out = self.fn(*self.args)
64        self.out = self.wrap(out)
65
66        self.nin = len(self.args)
67        self.nout = len(self.out)
68        self.gfns = []
69
70        if reduce_output:
71            fn = get_output_reduce_cell(fn, self.nout)
72            self.fn = self.prepare_func(fn)
73            out = self.fn(*self.args)
74            self.out = self.wrap(out)
75
76        if self.nout == 0:
77            raise Exception(f'number of outputs expected to be >=1, but got {self.nout}')
78
79        if self.nout == 1:
80            self.gfns.append(self.prepare_func(fn, grad_wraper))
81        else:
82            for i in range(self.nout):
83                cell = get_output_cell(fn, self.nin, i)
84                self.gfns.append(self.prepare_func(cell, grad_wraper))
85
86        self.input_selector = input_selector
87        self.adjust_input_selector()
88        if output_selector:
89            self.output_selector = output_selector
90        else:
91            self.output_selector = [i for i in range(self.nout)]
92
93    def adjust_input_selector(self):
94        raise Exception('Not implemented')
95
96    def sampling(self, superset):
97        # -1 stands for all
98        if self.sampling_times == -1 or self.sampling_times >= len(superset):
99            return superset
100        np.random.seed(0)
101        ret = np.random.choice(superset, self.sampling_times, replace=False)
102        return list(ret)
103
104    def prepare_func(self, f, grad_wraper=None):
105        """Return a function that executes 'f'.
106
107        Args:
108        f: the function.
109        grad_wraper: grad op
110
111        Returns:
112        a function that will be evaluated in both Graph and PyNative mode
113        """
114        set_block_param_with_rand(f, get_uniform_with_shape)
115
116        if context.get_context("mode") == context.PYNATIVE_MODE:
117            if grad_wraper:
118                def func_backward_pynative(*inputs):
119                    net = gen_grad_net(f, grad_wraper, len(inputs) - 1, inputs[-1])
120
121                    def _func_pynative(*inputs):
122                        return net(*inputs)
123
124                    return _func_pynative(*(inputs[:-1]))
125
126                return func_backward_pynative
127
128            def func_forward_pynative(*inputs):
129                net = gen_net(f, len(inputs))
130
131                def _func_pynative(*inputs):
132                    return net(*inputs)
133
134                return _func_pynative(*inputs)
135
136            return func_forward_pynative
137
138        if grad_wraper:
139            def func_backward_graph(*inputs):
140                set_block_phase(f, 'train')
141                net = gen_grad_net(f, grad_wraper, len(inputs) - 1, inputs[-1])
142                return net(*(inputs[:-1]))
143
144            return func_backward_graph
145
146        def func_forward_graph(*inputs):
147            set_block_phase(f, 'predict')
148            net = gen_net(f, len(inputs))
149            return net(*inputs)
150
151        return func_forward_graph
152
153    def to_numpy(self, x):
154        if isinstance(x, (Tensor, _c_expression.Tensor)):
155            return x.asnumpy()
156        return x
157
158    def to_numpy_and_scale(self, x):
159        if isinstance(x, (Tensor, _c_expression.Tensor)):
160            return x.asnumpy() * self.delta
161        return x * self.delta
162
163    def wrap(self, x):
164        if isinstance(x, tuple):
165            return x
166        return (x,)
167
168    def get_sens(self, i):
169        raise Exception('Not implemented')
170
171    def get_ith_elem(self, c, i):
172        if isinstance(c, (list, tuple)):
173            return c[i]
174        return c
175
176    def compute_theoretical(self, i):
177        args = list(self.args)
178        args.append(self.get_sens(i))
179
180        print('GradChecker.compute_theoretical.args', args)
181        gout = self.gfns[i](*args)
182        gout = self.wrap(gout)
183        self.gout = [self.to_numpy_and_scale(g) if isinstance(g, _c_expression.Tensor) \
184                         else self.to_numpy_and_scale(np.array(g)) for g in gout]
185        print('GradChecker.compute_theoretical.gout', self.gout)
186
187    def check_against_numeric(self, out_index):
188        raise Exception('Not implemented')
189
190    def check_against_numeric_one_step(self, args, index, out_index):
191        if isinstance(args, ParameterTuple):
192            x = args[index].data.asnumpy()
193        else:
194            x = args[index]
195        x_shape = x.shape
196        x_size = np.product(x_shape)
197        for row in self.sampling(list(range(x_size))):
198            original = x.ravel().view()[row]
199            x.ravel().view()[row] += self.delta
200            y_pos = self.to_numpy_and_scale(self.get_ith_elem(self.fn(*self.args), out_index))
201            x.ravel().view()[row] = original
202            x.ravel().view()[row] -= self.delta
203            y_neg = self.to_numpy_and_scale(self.get_ith_elem(self.fn(*self.args), out_index))
204            x.ravel().view()[row] = original
205            diff = (y_pos - y_neg) / self.scale
206            numeric_grad = diff.sum()
207            insert_virtual_grad = False
208            if numeric_grad == 0 and not insert_virtual_grad:
209                self.gout.insert(0, 0)
210                insert_virtual_grad = True
211                continue
212            theoretical_grad = self.gout[index].ravel().view()[row]
213
214            if np.fabs(numeric_grad - theoretical_grad).max() > self.max_error:
215                raise Exception(f'Gradients of df{out_index}/darg{index},{row} do not match, '
216                                f'expect {numeric_grad}, actual {theoretical_grad}')
217
218            print(f'GradChecker.check_against_numeric.numeric df{out_index}/darg{index}: '
219                  f'{numeric_grad}, theoretical: {theoretical_grad}')
220
221    # approximate accuracy, but efficient
222    def assert_match(self):
223        print(f'==========================={self.fn.__name__}==================================')
224        print('GradChecker.delta', self.delta)
225        print('GradChecker.max_error', self.max_error)
226        print('GradChecker.args', self.args)
227        print('GradChecker.out', self.out)
228        print('GradChecker.nin', self.nin)
229        print('GradChecker.nout', self.nout)
230        for i in self.output_selector:
231            self.compute_theoretical(i)
232            self.check_against_numeric(i)
233
234    def check_against_numeric_jacobian(self, out_index):
235        raise Exception('Not implemented')
236
237    def check_against_numeric_jacobian_one_step(self, args, index, out_index):
238        if isinstance(args, ParameterTuple):
239            x = args[index].data.asnumpy()
240        else:
241            x = args[index]
242        x_shape = x.shape
243        x_size = np.product(x_shape)
244        dy = self.to_numpy(self.get_sens(out_index))
245        dy_size = np.product(dy.shape)
246        numeric_jacobian = np.zeros((x_size, dy_size), dtype=self.to_numpy(x).dtype)
247        for row in range(x_size):
248            original = x.ravel().view()[row]
249            x.ravel().view()[row] += self.delta
250            y_pos = self.to_numpy_and_scale(self.get_ith_elem(self.fn(*self.args), out_index))
251            x.ravel().view()[row] = original
252            x.ravel().view()[row] -= self.delta
253            y_neg = self.to_numpy_and_scale(self.get_ith_elem(self.fn(*self.args), out_index))
254            x.ravel().view()[row] = original
255            diff = (y_pos - y_neg) / self.scale
256            numeric_jacobian[row, :] = diff.ravel().view(numeric_jacobian.dtype)
257
258        dy_mask = np.zeros(dy.shape, dtype=dy.dtype)
259        theoretical_jacobian = np.zeros((x_size, dy_size), dtype=self.to_numpy(x).dtype)
260        for col in range(dy_size):
261            col_jacobian = self.compute_theoretical_jacobian(index, out_index, dy_mask, col)
262            theoretical_jacobian[:, col] = col_jacobian.ravel().view(theoretical_jacobian.dtype)
263
264        if np.fabs(numeric_jacobian - theoretical_jacobian).max() > self.max_error:
265            raise Exception(f'GradChecker.check_against_numeric_jacobian_one_step expect {out_index}/darg{index}: '
266                            f'{numeric_jacobian}, actual: {theoretical_jacobian}')
267
268        print(f'GradChecker.check_against_numeric_jacobian_one_step.numeric jacobian of output{out_index}/darg{index}: '
269              f'{numeric_jacobian}, theoretical: {theoretical_jacobian}')
270
271    def compute_theoretical_jacobian(self, index, out_index, dy_mask, jacobian_col):
272        if (out_index, jacobian_col, index) in self.theoretical_jacobian_cache:
273            return self.theoretical_jacobian_cache[(out_index, jacobian_col, index)]
274
275        dy_mask.ravel().view()[jacobian_col] = 1.0
276        args = list(self.args)
277        args.append(Tensor(dy_mask))
278        print('GradChecker.compute_theoretical.args', args)
279        gout = self.wrap(self.gfns[out_index](*args))
280        gout = [self.to_numpy_and_scale(g) if isinstance(g, _c_expression.Tensor) \
281                    else self.to_numpy_and_scale(np.array(g)) for g in gout]
282        print('GradChecker.compute_theoretical.gout', gout)
283        dy_mask.ravel().view()[jacobian_col] = 0.0
284
285        for i, g in enumerate(gout):
286            self.theoretical_jacobian_cache[(out_index, jacobian_col, i)] = g
287
288        return gout[index]
289
290    # more accurate, but inefficient
291    def assert_match_jacobian(self):
292        print(f'==========================={self.fn.__name__}==================================')
293        print('GradChecker.delta', self.delta)
294        print('GradChecker.max_error', self.max_error)
295        print('GradChecker.args', self.args)
296        print('GradChecker.out', self.out)
297        print('GradChecker.nin', self.nin)
298        print('GradChecker.nout', self.nout)
299
300        self.theoretical_jacobian_cache = {}
301        for i in self.output_selector:
302            self.check_against_numeric_jacobian(i)
303
304
305class ScalarGradChecker(_GradChecker):
306    def __init__(self,
307                 fn: Callable,
308                 args: List[Any],
309                 delta: float = 1e-3,
310                 max_error: float = 1e-3,
311                 input_selector=None,
312                 output_selector=None,
313                 sampling_times=-1,
314                 reduce_output=False) -> None:
315        grad_op = GradOperation(get_all=True, sens_param=True)
316        super(ScalarGradChecker, self).__init__(fn, grad_op, args, delta, max_error, input_selector, \
317                                                output_selector, sampling_times, reduce_output)
318
319    def adjust_input_selector(self):
320        if not self.input_selector:
321            self.input_selector = [i for i in range(self.nin)]
322
323    def get_sens(self, i):
324        return 1.0
325
326    def check_against_numeric(self, out_index):
327        args = list(self.args)
328        for i in self.sampling(self.input_selector):
329            print(f'GradChecker.check_against_numeric.args[{i}]', args[i])
330            args_pos = args[:i] + [args[i] + self.delta] + args[i + 1:]
331            args_neg = args[:i] + [args[i] - self.delta] + args[i + 1:]
332            y_pos = self.to_numpy_and_scale(self.get_ith_elem(self.fn(*args_pos), out_index))
333            y_neg = self.to_numpy_and_scale(self.get_ith_elem(self.fn(*args_neg), out_index))
334            diff = (y_pos - y_neg) / self.scale
335
336            if np.fabs(diff - self.gout[i]).max() > self.max_error:
337                raise Exception(f'Gradients of df{out_index}/darg{i} do not match,'
338                                f'expect {diff}, actual {self.gout[i]}')
339
340            print(f'GradChecker.check_against_numeric.numeric df{out_index}/darg{i}: {diff}, '
341                  f'theoretical: {self.gout[i]}')
342
343    # for scalar, jacobian is same with gradient
344    def assert_match_jacobian(self):
345        self.assert_match()
346
347
348class OperationGradChecker(_GradChecker):
349    def __init__(self,
350                 fn: Callable,
351                 args: List[Any],
352                 delta: float = 1e-3,
353                 max_error: float = 1e-3,
354                 input_selector=None,
355                 output_selector=None,
356                 sampling_times=-1,
357                 reduce_output=False) -> None:
358        grad_op = GradOperation(get_all=True, sens_param=True)
359        super(OperationGradChecker, self).__init__(fn, grad_op, args, delta, max_error, input_selector, \
360                                                   output_selector, sampling_times, reduce_output)
361
362    def get_sens(self, i):
363        return Tensor(np.ones_like(self.out[i].asnumpy()))
364
365    def adjust_input_selector(self):
366        if not self.input_selector:
367            self.input_selector = [i for i in range(self.nin)]
368
369    def check_against_numeric(self, out_index):
370        args = [self.to_numpy(arg) for arg in self.args]
371        for i in self.input_selector:
372            self.check_against_numeric_one_step(args, i, out_index)
373
374    def check_against_numeric_jacobian(self, out_index):
375        args = [self.to_numpy(arg) for arg in self.args]
376        for i in self.input_selector:
377            self.check_against_numeric_jacobian_one_step(args, i, out_index)
378
379
380class NNGradChecker(_GradChecker):
381    def __init__(self,
382                 fn: Callable,
383                 args: List[Any],
384                 delta: float = 1e-3,
385                 max_error: float = 1e-3,
386                 input_selector=None,
387                 output_selector=None,
388                 sampling_times=-1,
389                 reduce_output=False) -> None:
390        grad_op = GradOperation(get_by_list=True, sens_param=True)
391        self.params = ParameterTuple(fn.trainable_params())
392        super(NNGradChecker, self).__init__(fn, grad_op, args, delta, max_error, input_selector, \
393                                            output_selector, sampling_times, reduce_output)
394
395    def get_sens(self, i):
396        return Tensor(np.ones_like(self.out[i].asnumpy()))
397
398    def adjust_input_selector(self):
399        if not self.input_selector:
400            self.input_selector = [i for i in range(len(self.params))]
401
402    def check_against_numeric(self, out_index):
403        for i in self.input_selector:
404            self.check_against_numeric_one_step(self.params, i, out_index)
405
406    def check_against_numeric_jacobian(self, out_index):
407        for i in self.input_selector:
408            self.check_against_numeric_jacobian_one_step(self.params, i, out_index)
409
410
411def check_gradient(fn, *args, delta=1e-3, max_error=1e-3,
412                   grad_checker_class=OperationGradChecker,
413                   input_selector=None,
414                   output_selector=None,
415                   sampling_times=-1,
416                   reduce_output=False):
417    """Check the theoretical Jacobian against numeric of `fn`.
418    Args:
419        fn: the function that might be scalar function, operation, or neural network.
420        args: a list arguments for the function
421        delta: (optional) perturbation used to compute numeric Jacobian.
422        max_error: (optional) max_error that is allowed between theoretical and numeric.
423        grad_checker_class: (optional) checker, default OperationGradChecker.
424        input_selector: list of input index that will be checked against numeric
425        output_selector: list of output index that will be checked against numeric
426    """
427    grad_checker = grad_checker_class(fn=fn,
428                                      args=list(args),
429                                      delta=delta,
430                                      max_error=max_error,
431                                      input_selector=input_selector,
432                                      output_selector=output_selector,
433                                      sampling_times=sampling_times,
434                                      reduce_output=reduce_output)
435    grad_checker.assert_match()
436
437
438def check_jacobian(fn, *args, delta=1e-3, max_error=1e-3,
439                   grad_checker_class=OperationGradChecker,
440                   input_selector=None,
441                   output_selector=None):
442    """Check the theoretical Jacobian against numeric of `fn`.
443    Args:
444        fn: the function that might be scalar function, operation, or neural network.
445        args: a list arguments for the function
446        delta: (optional) perturbation used to compute numeric Jacobian.
447        max_error: (optional) max_error that is allowed between theoretical and numeric.
448        grad_checker_class: (optional) checker, default OperationGradChecker
449        input_selector: list of input index that will be checked against numeric
450        output_selector: list of output index that will be checked against numeric
451    """
452    grad_checker = grad_checker_class(fn=fn,
453                                      args=list(args),
454                                      delta=delta,
455                                      max_error=max_error,
456                                      input_selector=input_selector,
457                                      output_selector=output_selector)
458    grad_checker.assert_match_jacobian()
459