1# Copyright 2020-2021 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15 16import numpy as np 17import pytest 18import mindspore.context as context 19from mindspore import Tensor 20from mindspore.nn import Cell 21import mindspore.ops.operations as P 22import mindspore.ops.operations._grad_ops as G 23 24 25class GeluNet(Cell): 26 def __init__(self): 27 super(GeluNet, self).__init__() 28 self.gelu = P.GeLU() 29 30 def construct(self, x): 31 return self.gelu(x) 32 33 34class GeluGradNet(Cell): 35 def __init__(self): 36 super(GeluGradNet, self).__init__() 37 self.gelu_grad = G.GeLUGrad() 38 39 def construct(self, dy, x, y): 40 return self.gelu_grad(dy, x, y) 41 42 43def cal_gelu(x): 44 tmp = np.sqrt(2.0 / np.pi) * (x + 0.044715 * x * x * x) 45 expect = 0.5 * x * (1.0 + np.tanh(tmp)) 46 return expect 47 48def gelu(x, enable_graph_kernel=False): 49 context.set_context(enable_graph_kernel=enable_graph_kernel) 50 net = GeluNet() 51 result = net(Tensor(x)) 52 return result 53 54def test_gelu(): 55 np.random.seed(0) 56 input_x = np.random.normal(0, 1, [2, 3, 4, 3]).astype(np.float32) 57 expect = gelu(input_x, False) 58 result = gelu(input_x, True) 59 res = np.allclose(expect.asnumpy(), result.asnumpy(), rtol=1.e-4, atol=1.e-4, equal_nan=True) 60 assert res 61 62def cal_gelu_grad(): 63 tanh_res = np.tanh(0.7978845608 * (input_x + 0.044715 * input_x * input_x * input_x)) 64 mul_right = 0.7978845608 + 0.1070322244 * input_x * input_x 65 dx = 0.5 * (1.0 + tanh_res) + 0.5 * input_x * (1.0 - tanh_res * tanh_res) * mul_right 66 expect = input_dy * dx 67 return expect 68 69def gelu_grad(input_dy, input_x, input_y, enable_graph_kernel=False): 70 context.set_context(enable_graph_kernel=enable_graph_kernel) 71 net = GeluGradNet() 72 result = net(Tensor(input_dy), Tensor(input_x), Tensor(input_y)) 73 return result 74 75def test_gelu_grad(): 76 np.random.seed(0) 77 input_dy = np.random.normal(0, 1, [2, 3, 4, 3]).astype(np.float32) 78 input_x = np.random.normal(0, 1, [2, 3, 4, 3]).astype(np.float32) 79 input_y = cal_gelu(input_x) 80 81 expect = gelu_grad(input_dy, input_x, input_y, False) 82 result = gelu_grad(input_dy, input_x, input_y, True) 83 res = np.allclose(expect.asnumpy(), result.asnumpy(), rtol=1.e-4, atol=1.e-4, equal_nan=True) 84 assert res 85 86 87@pytest.mark.level0 88@pytest.mark.platform_x86_gpu_training 89@pytest.mark.env_onecard 90def test_gelu_gpu(): 91 context.set_context(mode=context.GRAPH_MODE, enable_graph_kernel=True, device_target="GPU") 92 test_gelu() 93 94@pytest.mark.level0 95@pytest.mark.platform_arm_ascend_training 96@pytest.mark.platform_x86_ascend_training 97@pytest.mark.env_onecard 98def test_gelu_ascend(): 99 context.set_context(mode=context.GRAPH_MODE, enable_graph_kernel=True, device_target="Ascend") 100 test_gelu() 101 102 103@pytest.mark.level0 104@pytest.mark.platform_x86_gpu_training 105@pytest.mark.env_onecard 106def test_gelu_grad_gpu(): 107 context.set_context(mode=context.GRAPH_MODE, enable_graph_kernel=True, device_target="GPU") 108 test_gelu_grad() 109 110@pytest.mark.level0 111@pytest.mark.platform_arm_ascend_training 112@pytest.mark.platform_x86_ascend_training 113@pytest.mark.env_onecard 114def test_gelu_grad_ascend(): 115 context.set_context(mode=context.GRAPH_MODE, enable_graph_kernel=True, device_target="Ascend") 116 test_gelu_grad() 117