1# Copyright 2020 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15 16"""Implementation of Numerical gradients checking.""" 17# pylint: disable=missing-docstring 18 19from typing import Callable, List, Any 20 21import numpy as np 22import mindspore._c_expression as _c_expression 23 24from mindspore import ParameterTuple 25from mindspore import Tensor 26from mindspore import context 27from mindspore.ops.composite import GradOperation 28from .block_util import get_output_cell, gen_net, gen_grad_net, \ 29 get_uniform_with_shape, set_block_phase, get_output_reduce_cell, set_block_param_with_rand 30 31 32class _GradChecker: 33 """ 34 Check the theoretical Jacobian against numeric 35 36 Arguments: 37 fn: The function under test. 38 gfn: The high order function to compute the derivative function. 39 args: The point in the function's domain where we want 40 to estimate the gradient. 41 42 """ 43 44 def __init__(self, 45 fn: Callable, 46 grad_wraper: GradOperation, 47 args: List[Any], 48 delta: float = 1e-3, 49 max_error: float = 1e-3, 50 input_selector=None, 51 output_selector=None, 52 sampling_times=-1, 53 reduce_output=False) -> None: 54 """Initialize a GradChecker.""" 55 self.delta = delta 56 self.scale = 2 * delta 57 self.max_error = max_error 58 self.sampling_times = sampling_times 59 60 self.fn = self.prepare_func(fn) 61 62 self.args = args 63 out = self.fn(*self.args) 64 self.out = self.wrap(out) 65 66 self.nin = len(self.args) 67 self.nout = len(self.out) 68 self.gfns = [] 69 70 if reduce_output: 71 fn = get_output_reduce_cell(fn, self.nout) 72 self.fn = self.prepare_func(fn) 73 out = self.fn(*self.args) 74 self.out = self.wrap(out) 75 76 if self.nout == 0: 77 raise Exception(f'number of outputs expected to be >=1, but got {self.nout}') 78 79 if self.nout == 1: 80 self.gfns.append(self.prepare_func(fn, grad_wraper)) 81 else: 82 for i in range(self.nout): 83 cell = get_output_cell(fn, self.nin, i) 84 self.gfns.append(self.prepare_func(cell, grad_wraper)) 85 86 self.input_selector = input_selector 87 self.adjust_input_selector() 88 if output_selector: 89 self.output_selector = output_selector 90 else: 91 self.output_selector = [i for i in range(self.nout)] 92 93 def adjust_input_selector(self): 94 raise Exception('Not implemented') 95 96 def sampling(self, superset): 97 # -1 stands for all 98 if self.sampling_times == -1 or self.sampling_times >= len(superset): 99 return superset 100 np.random.seed(0) 101 ret = np.random.choice(superset, self.sampling_times, replace=False) 102 return list(ret) 103 104 def prepare_func(self, f, grad_wraper=None): 105 """Return a function that executes 'f'. 106 107 Args: 108 f: the function. 109 grad_wraper: grad op 110 111 Returns: 112 a function that will be evaluated in both Graph and PyNative mode 113 """ 114 set_block_param_with_rand(f, get_uniform_with_shape) 115 116 if context.get_context("mode") == context.PYNATIVE_MODE: 117 if grad_wraper: 118 def func_backward_pynative(*inputs): 119 net = gen_grad_net(f, grad_wraper, len(inputs) - 1, inputs[-1]) 120 121 def _func_pynative(*inputs): 122 return net(*inputs) 123 124 return _func_pynative(*(inputs[:-1])) 125 126 return func_backward_pynative 127 128 def func_forward_pynative(*inputs): 129 net = gen_net(f, len(inputs)) 130 131 def _func_pynative(*inputs): 132 return net(*inputs) 133 134 return _func_pynative(*inputs) 135 136 return func_forward_pynative 137 138 if grad_wraper: 139 def func_backward_graph(*inputs): 140 set_block_phase(f, 'train') 141 net = gen_grad_net(f, grad_wraper, len(inputs) - 1, inputs[-1]) 142 return net(*(inputs[:-1])) 143 144 return func_backward_graph 145 146 def func_forward_graph(*inputs): 147 set_block_phase(f, 'predict') 148 net = gen_net(f, len(inputs)) 149 return net(*inputs) 150 151 return func_forward_graph 152 153 def to_numpy(self, x): 154 if isinstance(x, (Tensor, _c_expression.Tensor)): 155 return x.asnumpy() 156 return x 157 158 def to_numpy_and_scale(self, x): 159 if isinstance(x, (Tensor, _c_expression.Tensor)): 160 return x.asnumpy() * self.delta 161 return x * self.delta 162 163 def wrap(self, x): 164 if isinstance(x, tuple): 165 return x 166 return (x,) 167 168 def get_sens(self, i): 169 raise Exception('Not implemented') 170 171 def get_ith_elem(self, c, i): 172 if isinstance(c, (list, tuple)): 173 return c[i] 174 return c 175 176 def compute_theoretical(self, i): 177 args = list(self.args) 178 args.append(self.get_sens(i)) 179 180 print('GradChecker.compute_theoretical.args', args) 181 gout = self.gfns[i](*args) 182 gout = self.wrap(gout) 183 self.gout = [self.to_numpy_and_scale(g) if isinstance(g, _c_expression.Tensor) \ 184 else self.to_numpy_and_scale(np.array(g)) for g in gout] 185 print('GradChecker.compute_theoretical.gout', self.gout) 186 187 def check_against_numeric(self, out_index): 188 raise Exception('Not implemented') 189 190 def check_against_numeric_one_step(self, args, index, out_index): 191 if isinstance(args, ParameterTuple): 192 x = args[index].data.asnumpy() 193 else: 194 x = args[index] 195 x_shape = x.shape 196 x_size = np.product(x_shape) 197 for row in self.sampling(list(range(x_size))): 198 original = x.ravel().view()[row] 199 x.ravel().view()[row] += self.delta 200 y_pos = self.to_numpy_and_scale(self.get_ith_elem(self.fn(*self.args), out_index)) 201 x.ravel().view()[row] = original 202 x.ravel().view()[row] -= self.delta 203 y_neg = self.to_numpy_and_scale(self.get_ith_elem(self.fn(*self.args), out_index)) 204 x.ravel().view()[row] = original 205 diff = (y_pos - y_neg) / self.scale 206 numeric_grad = diff.sum() 207 insert_virtual_grad = False 208 if numeric_grad == 0 and not insert_virtual_grad: 209 self.gout.insert(0, 0) 210 insert_virtual_grad = True 211 continue 212 theoretical_grad = self.gout[index].ravel().view()[row] 213 214 if np.fabs(numeric_grad - theoretical_grad).max() > self.max_error: 215 raise Exception(f'Gradients of df{out_index}/darg{index},{row} do not match, ' 216 f'expect {numeric_grad}, actual {theoretical_grad}') 217 218 print(f'GradChecker.check_against_numeric.numeric df{out_index}/darg{index}: ' 219 f'{numeric_grad}, theoretical: {theoretical_grad}') 220 221 # approximate accuracy, but efficient 222 def assert_match(self): 223 print(f'==========================={self.fn.__name__}==================================') 224 print('GradChecker.delta', self.delta) 225 print('GradChecker.max_error', self.max_error) 226 print('GradChecker.args', self.args) 227 print('GradChecker.out', self.out) 228 print('GradChecker.nin', self.nin) 229 print('GradChecker.nout', self.nout) 230 for i in self.output_selector: 231 self.compute_theoretical(i) 232 self.check_against_numeric(i) 233 234 def check_against_numeric_jacobian(self, out_index): 235 raise Exception('Not implemented') 236 237 def check_against_numeric_jacobian_one_step(self, args, index, out_index): 238 if isinstance(args, ParameterTuple): 239 x = args[index].data.asnumpy() 240 else: 241 x = args[index] 242 x_shape = x.shape 243 x_size = np.product(x_shape) 244 dy = self.to_numpy(self.get_sens(out_index)) 245 dy_size = np.product(dy.shape) 246 numeric_jacobian = np.zeros((x_size, dy_size), dtype=self.to_numpy(x).dtype) 247 for row in range(x_size): 248 original = x.ravel().view()[row] 249 x.ravel().view()[row] += self.delta 250 y_pos = self.to_numpy_and_scale(self.get_ith_elem(self.fn(*self.args), out_index)) 251 x.ravel().view()[row] = original 252 x.ravel().view()[row] -= self.delta 253 y_neg = self.to_numpy_and_scale(self.get_ith_elem(self.fn(*self.args), out_index)) 254 x.ravel().view()[row] = original 255 diff = (y_pos - y_neg) / self.scale 256 numeric_jacobian[row, :] = diff.ravel().view(numeric_jacobian.dtype) 257 258 dy_mask = np.zeros(dy.shape, dtype=dy.dtype) 259 theoretical_jacobian = np.zeros((x_size, dy_size), dtype=self.to_numpy(x).dtype) 260 for col in range(dy_size): 261 col_jacobian = self.compute_theoretical_jacobian(index, out_index, dy_mask, col) 262 theoretical_jacobian[:, col] = col_jacobian.ravel().view(theoretical_jacobian.dtype) 263 264 if np.fabs(numeric_jacobian - theoretical_jacobian).max() > self.max_error: 265 raise Exception(f'GradChecker.check_against_numeric_jacobian_one_step expect {out_index}/darg{index}: ' 266 f'{numeric_jacobian}, actual: {theoretical_jacobian}') 267 268 print(f'GradChecker.check_against_numeric_jacobian_one_step.numeric jacobian of output{out_index}/darg{index}: ' 269 f'{numeric_jacobian}, theoretical: {theoretical_jacobian}') 270 271 def compute_theoretical_jacobian(self, index, out_index, dy_mask, jacobian_col): 272 if (out_index, jacobian_col, index) in self.theoretical_jacobian_cache: 273 return self.theoretical_jacobian_cache[(out_index, jacobian_col, index)] 274 275 dy_mask.ravel().view()[jacobian_col] = 1.0 276 args = list(self.args) 277 args.append(Tensor(dy_mask)) 278 print('GradChecker.compute_theoretical.args', args) 279 gout = self.wrap(self.gfns[out_index](*args)) 280 gout = [self.to_numpy_and_scale(g) if isinstance(g, _c_expression.Tensor) \ 281 else self.to_numpy_and_scale(np.array(g)) for g in gout] 282 print('GradChecker.compute_theoretical.gout', gout) 283 dy_mask.ravel().view()[jacobian_col] = 0.0 284 285 for i, g in enumerate(gout): 286 self.theoretical_jacobian_cache[(out_index, jacobian_col, i)] = g 287 288 return gout[index] 289 290 # more accurate, but inefficient 291 def assert_match_jacobian(self): 292 print(f'==========================={self.fn.__name__}==================================') 293 print('GradChecker.delta', self.delta) 294 print('GradChecker.max_error', self.max_error) 295 print('GradChecker.args', self.args) 296 print('GradChecker.out', self.out) 297 print('GradChecker.nin', self.nin) 298 print('GradChecker.nout', self.nout) 299 300 self.theoretical_jacobian_cache = {} 301 for i in self.output_selector: 302 self.check_against_numeric_jacobian(i) 303 304 305class ScalarGradChecker(_GradChecker): 306 def __init__(self, 307 fn: Callable, 308 args: List[Any], 309 delta: float = 1e-3, 310 max_error: float = 1e-3, 311 input_selector=None, 312 output_selector=None, 313 sampling_times=-1, 314 reduce_output=False) -> None: 315 grad_op = GradOperation(get_all=True, sens_param=True) 316 super(ScalarGradChecker, self).__init__(fn, grad_op, args, delta, max_error, input_selector, \ 317 output_selector, sampling_times, reduce_output) 318 319 def adjust_input_selector(self): 320 if not self.input_selector: 321 self.input_selector = [i for i in range(self.nin)] 322 323 def get_sens(self, i): 324 return 1.0 325 326 def check_against_numeric(self, out_index): 327 args = list(self.args) 328 for i in self.sampling(self.input_selector): 329 print(f'GradChecker.check_against_numeric.args[{i}]', args[i]) 330 args_pos = args[:i] + [args[i] + self.delta] + args[i + 1:] 331 args_neg = args[:i] + [args[i] - self.delta] + args[i + 1:] 332 y_pos = self.to_numpy_and_scale(self.get_ith_elem(self.fn(*args_pos), out_index)) 333 y_neg = self.to_numpy_and_scale(self.get_ith_elem(self.fn(*args_neg), out_index)) 334 diff = (y_pos - y_neg) / self.scale 335 336 if np.fabs(diff - self.gout[i]).max() > self.max_error: 337 raise Exception(f'Gradients of df{out_index}/darg{i} do not match,' 338 f'expect {diff}, actual {self.gout[i]}') 339 340 print(f'GradChecker.check_against_numeric.numeric df{out_index}/darg{i}: {diff}, ' 341 f'theoretical: {self.gout[i]}') 342 343 # for scalar, jacobian is same with gradient 344 def assert_match_jacobian(self): 345 self.assert_match() 346 347 348class OperationGradChecker(_GradChecker): 349 def __init__(self, 350 fn: Callable, 351 args: List[Any], 352 delta: float = 1e-3, 353 max_error: float = 1e-3, 354 input_selector=None, 355 output_selector=None, 356 sampling_times=-1, 357 reduce_output=False) -> None: 358 grad_op = GradOperation(get_all=True, sens_param=True) 359 super(OperationGradChecker, self).__init__(fn, grad_op, args, delta, max_error, input_selector, \ 360 output_selector, sampling_times, reduce_output) 361 362 def get_sens(self, i): 363 return Tensor(np.ones_like(self.out[i].asnumpy())) 364 365 def adjust_input_selector(self): 366 if not self.input_selector: 367 self.input_selector = [i for i in range(self.nin)] 368 369 def check_against_numeric(self, out_index): 370 args = [self.to_numpy(arg) for arg in self.args] 371 for i in self.input_selector: 372 self.check_against_numeric_one_step(args, i, out_index) 373 374 def check_against_numeric_jacobian(self, out_index): 375 args = [self.to_numpy(arg) for arg in self.args] 376 for i in self.input_selector: 377 self.check_against_numeric_jacobian_one_step(args, i, out_index) 378 379 380class NNGradChecker(_GradChecker): 381 def __init__(self, 382 fn: Callable, 383 args: List[Any], 384 delta: float = 1e-3, 385 max_error: float = 1e-3, 386 input_selector=None, 387 output_selector=None, 388 sampling_times=-1, 389 reduce_output=False) -> None: 390 grad_op = GradOperation(get_by_list=True, sens_param=True) 391 self.params = ParameterTuple(fn.trainable_params()) 392 super(NNGradChecker, self).__init__(fn, grad_op, args, delta, max_error, input_selector, \ 393 output_selector, sampling_times, reduce_output) 394 395 def get_sens(self, i): 396 return Tensor(np.ones_like(self.out[i].asnumpy())) 397 398 def adjust_input_selector(self): 399 if not self.input_selector: 400 self.input_selector = [i for i in range(len(self.params))] 401 402 def check_against_numeric(self, out_index): 403 for i in self.input_selector: 404 self.check_against_numeric_one_step(self.params, i, out_index) 405 406 def check_against_numeric_jacobian(self, out_index): 407 for i in self.input_selector: 408 self.check_against_numeric_jacobian_one_step(self.params, i, out_index) 409 410 411def check_gradient(fn, *args, delta=1e-3, max_error=1e-3, 412 grad_checker_class=OperationGradChecker, 413 input_selector=None, 414 output_selector=None, 415 sampling_times=-1, 416 reduce_output=False): 417 """Check the theoretical Jacobian against numeric of `fn`. 418 Args: 419 fn: the function that might be scalar function, operation, or neural network. 420 args: a list arguments for the function 421 delta: (optional) perturbation used to compute numeric Jacobian. 422 max_error: (optional) max_error that is allowed between theoretical and numeric. 423 grad_checker_class: (optional) checker, default OperationGradChecker. 424 input_selector: list of input index that will be checked against numeric 425 output_selector: list of output index that will be checked against numeric 426 """ 427 grad_checker = grad_checker_class(fn=fn, 428 args=list(args), 429 delta=delta, 430 max_error=max_error, 431 input_selector=input_selector, 432 output_selector=output_selector, 433 sampling_times=sampling_times, 434 reduce_output=reduce_output) 435 grad_checker.assert_match() 436 437 438def check_jacobian(fn, *args, delta=1e-3, max_error=1e-3, 439 grad_checker_class=OperationGradChecker, 440 input_selector=None, 441 output_selector=None): 442 """Check the theoretical Jacobian against numeric of `fn`. 443 Args: 444 fn: the function that might be scalar function, operation, or neural network. 445 args: a list arguments for the function 446 delta: (optional) perturbation used to compute numeric Jacobian. 447 max_error: (optional) max_error that is allowed between theoretical and numeric. 448 grad_checker_class: (optional) checker, default OperationGradChecker 449 input_selector: list of input index that will be checked against numeric 450 output_selector: list of output index that will be checked against numeric 451 """ 452 grad_checker = grad_checker_class(fn=fn, 453 args=list(args), 454 delta=delta, 455 max_error=max_error, 456 input_selector=input_selector, 457 output_selector=output_selector) 458 grad_checker.assert_match_jacobian() 459