1# Copyright 2020 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15 16import numpy as np 17import pytest 18 19import mindspore.context as context 20import mindspore.nn as nn 21from mindspore import Tensor 22from mindspore.common.api import ms_function 23from mindspore.ops import operations as P 24from mindspore.ops import functional as F 25from mindspore.common import dtype as mstype 26from mindspore.common.parameter import Parameter 27 28context.set_context(mode=context.GRAPH_MODE, device_target="GPU") 29 30 31class Net(nn.Cell): 32 def __init__(self, decay_flag=True): 33 super(Net, self).__init__() 34 self.decay_flag = decay_flag 35 self.op_mul = P.Mul() 36 self.op_square = P.Square() 37 self.op_sqrt = P.Sqrt() 38 self.op_cast = P.Cast() 39 self.op_reshape = P.Reshape() 40 self.op_shape = P.Shape() 41 self.param = Parameter(Tensor(np.array([0.1, 0.3, 0.5]).astype(np.float32)), name='param') 42 self.m = Parameter(Tensor(np.array([0.1, 0.3, 0.5]).astype(np.float32)), name='m') 43 self.v = Parameter(Tensor(np.array([0.1, 0.3, 0.5]).astype(np.float32)), name='v') 44 45 @ms_function 46 def construct(self, beta1, beta2, gradient, eps, weight_decay_tensor, lr): 47 param_fp32 = self.op_cast(self.param, mstype.float32) 48 m_fp32 = self.op_cast(self.m, mstype.float32) 49 v_fp32 = self.op_cast(self.v, mstype.float32) 50 gradient_fp32 = self.op_cast(gradient, mstype.float32) 51 52 next_m = self.op_mul(beta1, m_fp32) + \ 53 self.op_mul(self.op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta1, gradient_fp32) 54 next_v = self.op_mul(beta2, v_fp32) + self.op_mul(self.op_cast(F.tuple_to_array((1.0,)), mstype.float32) - \ 55 beta2, self.op_square(gradient_fp32)) 56 update = next_m / (eps + self.op_sqrt(next_v)) 57 if self.decay_flag: 58 update = self.op_mul(weight_decay_tensor, param_fp32) + update 59 update_with_lr = self.op_mul(lr, update) 60 next_param = param_fp32 - self.op_reshape(update_with_lr, self.op_shape(param_fp32)) 61 62 next_v = F.depend(next_v, F.assign(self.param, next_param)) 63 next_v = F.depend(next_v, F.assign(self.m, next_m)) 64 next_v = F.depend(next_v, F.assign(self.v, next_v)) 65 return next_v 66 67 68@pytest.mark.level0 69@pytest.mark.platform_x86_gpu_training 70@pytest.mark.env_onecard 71def test_adam_fusion(): 72 beta1 = Tensor(np.array([0.9]).astype(np.float32)) 73 beta2 = Tensor(np.array([0.999]).astype(np.float32)) 74 lr = Tensor(np.array([0.001]).astype(np.float32)) 75 eps = Tensor(np.array([1e-6]).astype(np.float32)) 76 weight_decay_tensor = Tensor(np.array([0.001]).astype(np.float32)) 77 78 gradient = Tensor(np.array([0.01, 0.03, 0.05]).astype(np.float32)) 79 opt = Net(True) 80 _ = opt(beta1, beta2, gradient, eps, weight_decay_tensor, lr) 81 82 param_expect = np.array([0.09971199, 0.29950103, 0.4993557]).astype(np.float32) 83 m_expect = np.array([0.091, 0.273, 0.45499998]).astype(np.float32) 84 v_expect = np.array([0.0999001, 0.29970092, 0.4995025]).astype(np.float32) 85 assert np.allclose(opt.param.data.asnumpy(), param_expect) 86 assert np.allclose(opt.m.data.asnumpy(), m_expect) 87 assert np.allclose(opt.v.data.asnumpy(), v_expect) 88