# Copyright 2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import math import pytest import numpy as np from mindspore import context from mindspore import nn from mindspore import Tensor from mindspore.common.initializer import initializer from mindspore.common.parameter import ParameterTuple from mindspore.common.parameter import Parameter from mindspore.ops import composite as c class GradOfAllInputsAndParams(nn.Cell): def __init__(self, network, sens_param): super().__init__() self.grad = c.GradOperation(get_all=True, get_by_list=True, sens_param=sens_param) self.network = network self.params = ParameterTuple(self.network.trainable_params()) def construct(self, *inputs): gout = self.grad(self.network, self.params)(*inputs) return gout class LSTM(nn.Cell): def __init__(self, input_s, hidden_s, num_layers, has_bias, batch_first, bidirectional, dropout): super().__init__() self.lstm = nn.LSTM(input_size=input_s, hidden_size=hidden_s, num_layers=num_layers, has_bias=has_bias, batch_first=batch_first, bidirectional=bidirectional, dropout=dropout) def construct(self, inp, h0, c0): return self.lstm(inp, (h0, c0)) class LSTMWeightBias(): def __init__(self, num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional): self.num_layers = num_layers self.has_bias = has_bias self.input_s = input_s self.num_directions = num_directions self.hidden_s = hidden_s self.bidirectional = bidirectional def get_weight_bias(self): stdv = 1 / math.sqrt(self.hidden_s) gate_size = 4 * self.hidden_s w_list_value = [] b_list_value = [] for i in range(self.num_layers): b0 = np.zeros(gate_size, dtype=np.float16) w_shape = self.input_s if i == 0 else (self.num_directions * self.hidden_s) w_np = np.random.uniform(-stdv, stdv, (w_shape + self.hidden_s, gate_size)).astype(np.float16) w_list_value.append(Parameter(initializer(Tensor(w_np), [w_shape + self.hidden_s, gate_size]), name="weight_fw" + str(i))) if self.has_bias: b_np = np.random.uniform(-stdv, stdv, gate_size).astype(np.float16) b_list_value.append(Parameter(initializer(Tensor(b_np), [gate_size]), name="bias_fw" + str(i))) else: b_list_value.append(Parameter(initializer(Tensor(b0), [gate_size]), name="bias_fw" + str(i))) if self.bidirectional: w_bw_np = np.random.uniform(-stdv, stdv, (w_shape + self.hidden_s, gate_size)).astype(np.float16) b_list_value.append(Parameter(initializer(Tensor(w_bw_np), [w_shape + self.hidden_s, gate_size]), name="weight_bw" + str(i))) b_bw_np = np.random.uniform(-stdv, stdv, (4 * self.hidden_s)).astype( np.float16) if self.has_bias else b0 b_list_value.append(Parameter(initializer(Tensor(b_bw_np), [gate_size]), name="bias_bw" + str(i))) w_list_value = ParameterTuple(w_list_value) b_list_value = ParameterTuple(b_list_value) return w_list_value, b_list_value @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training @pytest.mark.platform_x86_ascend_training @pytest.mark.env_onecard def test_sit_lstm_forward_input_3_32_32_is_32_hs_16(): input_s = 32 hidden_s = 16 has_bias = True bidirectional = False num_layers = 1 num_directions = 1 fact = LSTMWeightBias(num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional) w_list_value, b_list_value = fact.get_weight_bias() h0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32)) c0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32)) input_ms = Tensor(np.random.randn(3, 32, 32).astype(np.float32)) # graph mode context.set_context(mode=context.GRAPH_MODE) net = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False, bidirectional=bidirectional, dropout=0.0) net.lstm.w_list = w_list_value net.lstm.b_list = b_list_value out, (hy, cy) = net(input_ms, h0, c0) # pynative mode context.set_context(mode=context.PYNATIVE_MODE) net_pynative = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False, bidirectional=bidirectional, dropout=0.0) net_pynative.lstm.w_list = w_list_value net_pynative.lstm.b_list = b_list_value out_pynative, (hy_pynative, cy_pynative) = net_pynative(input_ms, h0, c0) assert np.allclose(out.asnumpy(), out_pynative.asnumpy(), 0.0001, 0.0001) assert np.allclose(hy.asnumpy(), hy_pynative.asnumpy(), 0.0001, 0.0001) assert np.allclose(cy.asnumpy(), cy_pynative.asnumpy(), 0.0001, 0.0001) @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training @pytest.mark.platform_x86_ascend_training @pytest.mark.env_onecard def test_sit_lstm_grad_input_3_32_32_is_32_hs_16(): input_s = 32 hidden_s = 16 has_bias = True bidirectional = False num_layers = 1 num_directions = 1 fact = LSTMWeightBias(num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional) w_list_value, b_list_value = fact.get_weight_bias() h0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32)) c0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32)) input_ms = Tensor(np.random.randn(3, 32, 32).astype(np.float32)) # graph mode context.set_context(mode=context.GRAPH_MODE) net = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False, bidirectional=bidirectional, dropout=0.0) net.lstm.w_list = w_list_value net.lstm.b_list = b_list_value grad_net_inp = GradOfAllInputsAndParams(net, sens_param=False) grad_net_inp.set_train() out_grad, _ = grad_net_inp(input_ms, h0, c0) x_grad = out_grad[0].asnumpy() h_grad = out_grad[1].asnumpy() c_grad = out_grad[2].asnumpy() # pynative mode context.set_context(mode=context.PYNATIVE_MODE) net_pynative = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False, bidirectional=bidirectional, dropout=0.0) net_pynative.lstm.w_list = w_list_value net_pynative.lstm.b_list = b_list_value grad_net_inp_pynative = GradOfAllInputsAndParams(net_pynative, sens_param=False) grad_net_inp_pynative.set_train() out_grad_pynative, _ = grad_net_inp_pynative(input_ms, h0, c0) x_grad_pynative = out_grad_pynative[0].asnumpy() h_grad_pynative = out_grad_pynative[1].asnumpy() c_grad_pynative = out_grad_pynative[2].asnumpy() assert np.allclose(x_grad, x_grad_pynative, 0.0001, 0.0001) assert np.allclose(h_grad, h_grad_pynative, 0.0001, 0.0001) assert np.allclose(c_grad, c_grad_pynative, 0.0001, 0.0001)