1# Copyright 2021 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15 16import math 17import pytest 18import numpy as np 19from mindspore import context 20from mindspore import nn 21from mindspore import Tensor 22from mindspore.common.initializer import initializer 23from mindspore.common.parameter import ParameterTuple 24from mindspore.common.parameter import Parameter 25from mindspore.ops import composite as c 26 27 28class GradOfAllInputsAndParams(nn.Cell): 29 def __init__(self, network, sens_param): 30 super().__init__() 31 self.grad = c.GradOperation(get_all=True, get_by_list=True, sens_param=sens_param) 32 self.network = network 33 self.params = ParameterTuple(self.network.trainable_params()) 34 35 def construct(self, *inputs): 36 gout = self.grad(self.network, self.params)(*inputs) 37 return gout 38 39 40class LSTM(nn.Cell): 41 def __init__(self, input_s, hidden_s, num_layers, has_bias, batch_first, bidirectional, dropout): 42 super().__init__() 43 self.lstm = nn.LSTM(input_size=input_s, hidden_size=hidden_s, num_layers=num_layers, has_bias=has_bias, 44 batch_first=batch_first, bidirectional=bidirectional, dropout=dropout) 45 46 def construct(self, inp, h0, c0): 47 return self.lstm(inp, (h0, c0)) 48 49 50class LSTMWeightBias(): 51 def __init__(self, num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional): 52 self.num_layers = num_layers 53 self.has_bias = has_bias 54 self.input_s = input_s 55 self.num_directions = num_directions 56 self.hidden_s = hidden_s 57 self.bidirectional = bidirectional 58 59 def get_weight_bias(self): 60 stdv = 1 / math.sqrt(self.hidden_s) 61 gate_size = 4 * self.hidden_s 62 w_list_value = [] 63 b_list_value = [] 64 65 for i in range(self.num_layers): 66 b0 = np.zeros(gate_size, dtype=np.float16) 67 w_shape = self.input_s if i == 0 else (self.num_directions * self.hidden_s) 68 w_np = np.random.uniform(-stdv, stdv, (w_shape + self.hidden_s, gate_size)).astype(np.float16) 69 w_list_value.append(Parameter(initializer(Tensor(w_np), [w_shape + self.hidden_s, gate_size]), 70 name="weight_fw" + str(i))) 71 72 if self.has_bias: 73 b_np = np.random.uniform(-stdv, stdv, gate_size).astype(np.float16) 74 b_list_value.append(Parameter(initializer(Tensor(b_np), [gate_size]), name="bias_fw" + str(i))) 75 else: 76 b_list_value.append(Parameter(initializer(Tensor(b0), [gate_size]), name="bias_fw" + str(i))) 77 78 if self.bidirectional: 79 w_bw_np = np.random.uniform(-stdv, stdv, (w_shape + self.hidden_s, gate_size)).astype(np.float16) 80 b_list_value.append(Parameter(initializer(Tensor(w_bw_np), [w_shape + self.hidden_s, gate_size]), 81 name="weight_bw" + str(i))) 82 b_bw_np = np.random.uniform(-stdv, stdv, (4 * self.hidden_s)).astype( 83 np.float16) if self.has_bias else b0 84 b_list_value.append(Parameter(initializer(Tensor(b_bw_np), [gate_size]), name="bias_bw" + str(i))) 85 w_list_value = ParameterTuple(w_list_value) 86 b_list_value = ParameterTuple(b_list_value) 87 return w_list_value, b_list_value 88 89 90@pytest.mark.level0 91@pytest.mark.platform_arm_ascend_training 92@pytest.mark.platform_x86_ascend_training 93@pytest.mark.env_onecard 94def test_sit_lstm_forward_input_3_32_32_is_32_hs_16(): 95 input_s = 32 96 hidden_s = 16 97 has_bias = True 98 bidirectional = False 99 num_layers = 1 100 num_directions = 1 101 102 fact = LSTMWeightBias(num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional) 103 w_list_value, b_list_value = fact.get_weight_bias() 104 105 h0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32)) 106 c0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32)) 107 input_ms = Tensor(np.random.randn(3, 32, 32).astype(np.float32)) 108 109 # graph mode 110 context.set_context(mode=context.GRAPH_MODE) 111 net = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False, 112 bidirectional=bidirectional, dropout=0.0) 113 net.lstm.w_list = w_list_value 114 net.lstm.b_list = b_list_value 115 out, (hy, cy) = net(input_ms, h0, c0) 116 117 # pynative mode 118 context.set_context(mode=context.PYNATIVE_MODE) 119 net_pynative = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False, 120 bidirectional=bidirectional, dropout=0.0) 121 net_pynative.lstm.w_list = w_list_value 122 net_pynative.lstm.b_list = b_list_value 123 out_pynative, (hy_pynative, cy_pynative) = net_pynative(input_ms, h0, c0) 124 125 assert np.allclose(out.asnumpy(), out_pynative.asnumpy(), 0.0001, 0.0001) 126 assert np.allclose(hy.asnumpy(), hy_pynative.asnumpy(), 0.0001, 0.0001) 127 assert np.allclose(cy.asnumpy(), cy_pynative.asnumpy(), 0.0001, 0.0001) 128 129 130@pytest.mark.level0 131@pytest.mark.platform_arm_ascend_training 132@pytest.mark.platform_x86_ascend_training 133@pytest.mark.env_onecard 134def test_sit_lstm_grad_input_3_32_32_is_32_hs_16(): 135 input_s = 32 136 hidden_s = 16 137 has_bias = True 138 bidirectional = False 139 num_layers = 1 140 num_directions = 1 141 142 fact = LSTMWeightBias(num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional) 143 w_list_value, b_list_value = fact.get_weight_bias() 144 145 h0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32)) 146 c0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32)) 147 input_ms = Tensor(np.random.randn(3, 32, 32).astype(np.float32)) 148 149 # graph mode 150 context.set_context(mode=context.GRAPH_MODE) 151 net = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False, 152 bidirectional=bidirectional, dropout=0.0) 153 net.lstm.w_list = w_list_value 154 net.lstm.b_list = b_list_value 155 156 grad_net_inp = GradOfAllInputsAndParams(net, sens_param=False) 157 grad_net_inp.set_train() 158 out_grad, _ = grad_net_inp(input_ms, h0, c0) 159 x_grad = out_grad[0].asnumpy() 160 h_grad = out_grad[1].asnumpy() 161 c_grad = out_grad[2].asnumpy() 162 163 # pynative mode 164 context.set_context(mode=context.PYNATIVE_MODE) 165 net_pynative = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False, 166 bidirectional=bidirectional, dropout=0.0) 167 net_pynative.lstm.w_list = w_list_value 168 net_pynative.lstm.b_list = b_list_value 169 170 grad_net_inp_pynative = GradOfAllInputsAndParams(net_pynative, sens_param=False) 171 grad_net_inp_pynative.set_train() 172 out_grad_pynative, _ = grad_net_inp_pynative(input_ms, h0, c0) 173 x_grad_pynative = out_grad_pynative[0].asnumpy() 174 h_grad_pynative = out_grad_pynative[1].asnumpy() 175 c_grad_pynative = out_grad_pynative[2].asnumpy() 176 177 assert np.allclose(x_grad, x_grad_pynative, 0.0001, 0.0001) 178 assert np.allclose(h_grad, h_grad_pynative, 0.0001, 0.0001) 179 assert np.allclose(c_grad, c_grad_pynative, 0.0001, 0.0001) 180