• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2021 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15
16import math
17import pytest
18import numpy as np
19from mindspore import context
20from mindspore import nn
21from mindspore import Tensor
22from mindspore.common.initializer import initializer
23from mindspore.common.parameter import ParameterTuple
24from mindspore.common.parameter import Parameter
25from mindspore.ops import composite as c
26
27
28class GradOfAllInputsAndParams(nn.Cell):
29    def __init__(self, network, sens_param):
30        super().__init__()
31        self.grad = c.GradOperation(get_all=True, get_by_list=True, sens_param=sens_param)
32        self.network = network
33        self.params = ParameterTuple(self.network.trainable_params())
34
35    def construct(self, *inputs):
36        gout = self.grad(self.network, self.params)(*inputs)
37        return gout
38
39
40class LSTM(nn.Cell):
41    def __init__(self, input_s, hidden_s, num_layers, has_bias, batch_first, bidirectional, dropout):
42        super().__init__()
43        self.lstm = nn.LSTM(input_size=input_s, hidden_size=hidden_s, num_layers=num_layers, has_bias=has_bias,
44                            batch_first=batch_first, bidirectional=bidirectional, dropout=dropout)
45
46    def construct(self, inp, h0, c0):
47        return self.lstm(inp, (h0, c0))
48
49
50class LSTMWeightBias():
51    def __init__(self, num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional):
52        self.num_layers = num_layers
53        self.has_bias = has_bias
54        self.input_s = input_s
55        self.num_directions = num_directions
56        self.hidden_s = hidden_s
57        self.bidirectional = bidirectional
58
59    def get_weight_bias(self):
60        stdv = 1 / math.sqrt(self.hidden_s)
61        gate_size = 4 * self.hidden_s
62        w_list_value = []
63        b_list_value = []
64
65        for i in range(self.num_layers):
66            b0 = np.zeros(gate_size, dtype=np.float16)
67            w_shape = self.input_s if i == 0 else (self.num_directions * self.hidden_s)
68            w_np = np.random.uniform(-stdv, stdv, (w_shape + self.hidden_s, gate_size)).astype(np.float16)
69            w_list_value.append(Parameter(initializer(Tensor(w_np), [w_shape + self.hidden_s, gate_size]),
70                                          name="weight_fw" + str(i)))
71
72            if self.has_bias:
73                b_np = np.random.uniform(-stdv, stdv, gate_size).astype(np.float16)
74                b_list_value.append(Parameter(initializer(Tensor(b_np), [gate_size]), name="bias_fw" + str(i)))
75            else:
76                b_list_value.append(Parameter(initializer(Tensor(b0), [gate_size]), name="bias_fw" + str(i)))
77
78            if self.bidirectional:
79                w_bw_np = np.random.uniform(-stdv, stdv, (w_shape + self.hidden_s, gate_size)).astype(np.float16)
80                b_list_value.append(Parameter(initializer(Tensor(w_bw_np), [w_shape + self.hidden_s, gate_size]),
81                                              name="weight_bw" + str(i)))
82                b_bw_np = np.random.uniform(-stdv, stdv, (4 * self.hidden_s)).astype(
83                    np.float16) if self.has_bias else b0
84                b_list_value.append(Parameter(initializer(Tensor(b_bw_np), [gate_size]), name="bias_bw" + str(i)))
85        w_list_value = ParameterTuple(w_list_value)
86        b_list_value = ParameterTuple(b_list_value)
87        return w_list_value, b_list_value
88
89
90@pytest.mark.level0
91@pytest.mark.platform_arm_ascend_training
92@pytest.mark.platform_x86_ascend_training
93@pytest.mark.env_onecard
94def test_sit_lstm_forward_input_3_32_32_is_32_hs_16():
95    input_s = 32
96    hidden_s = 16
97    has_bias = True
98    bidirectional = False
99    num_layers = 1
100    num_directions = 1
101
102    fact = LSTMWeightBias(num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional)
103    w_list_value, b_list_value = fact.get_weight_bias()
104
105    h0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32))
106    c0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32))
107    input_ms = Tensor(np.random.randn(3, 32, 32).astype(np.float32))
108
109    # graph mode
110    context.set_context(mode=context.GRAPH_MODE)
111    net = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False,
112               bidirectional=bidirectional, dropout=0.0)
113    net.lstm.w_list = w_list_value
114    net.lstm.b_list = b_list_value
115    out, (hy, cy) = net(input_ms, h0, c0)
116
117    # pynative mode
118    context.set_context(mode=context.PYNATIVE_MODE)
119    net_pynative = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False,
120                        bidirectional=bidirectional, dropout=0.0)
121    net_pynative.lstm.w_list = w_list_value
122    net_pynative.lstm.b_list = b_list_value
123    out_pynative, (hy_pynative, cy_pynative) = net_pynative(input_ms, h0, c0)
124
125    assert np.allclose(out.asnumpy(), out_pynative.asnumpy(), 0.0001, 0.0001)
126    assert np.allclose(hy.asnumpy(), hy_pynative.asnumpy(), 0.0001, 0.0001)
127    assert np.allclose(cy.asnumpy(), cy_pynative.asnumpy(), 0.0001, 0.0001)
128
129
130@pytest.mark.level0
131@pytest.mark.platform_arm_ascend_training
132@pytest.mark.platform_x86_ascend_training
133@pytest.mark.env_onecard
134def test_sit_lstm_grad_input_3_32_32_is_32_hs_16():
135    input_s = 32
136    hidden_s = 16
137    has_bias = True
138    bidirectional = False
139    num_layers = 1
140    num_directions = 1
141
142    fact = LSTMWeightBias(num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional)
143    w_list_value, b_list_value = fact.get_weight_bias()
144
145    h0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32))
146    c0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32))
147    input_ms = Tensor(np.random.randn(3, 32, 32).astype(np.float32))
148
149    # graph mode
150    context.set_context(mode=context.GRAPH_MODE)
151    net = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False,
152               bidirectional=bidirectional, dropout=0.0)
153    net.lstm.w_list = w_list_value
154    net.lstm.b_list = b_list_value
155
156    grad_net_inp = GradOfAllInputsAndParams(net, sens_param=False)
157    grad_net_inp.set_train()
158    out_grad, _ = grad_net_inp(input_ms, h0, c0)
159    x_grad = out_grad[0].asnumpy()
160    h_grad = out_grad[1].asnumpy()
161    c_grad = out_grad[2].asnumpy()
162
163    # pynative mode
164    context.set_context(mode=context.PYNATIVE_MODE)
165    net_pynative = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False,
166                        bidirectional=bidirectional, dropout=0.0)
167    net_pynative.lstm.w_list = w_list_value
168    net_pynative.lstm.b_list = b_list_value
169
170    grad_net_inp_pynative = GradOfAllInputsAndParams(net_pynative, sens_param=False)
171    grad_net_inp_pynative.set_train()
172    out_grad_pynative, _ = grad_net_inp_pynative(input_ms, h0, c0)
173    x_grad_pynative = out_grad_pynative[0].asnumpy()
174    h_grad_pynative = out_grad_pynative[1].asnumpy()
175    c_grad_pynative = out_grad_pynative[2].asnumpy()
176
177    assert np.allclose(x_grad, x_grad_pynative, 0.0001, 0.0001)
178    assert np.allclose(h_grad, h_grad_pynative, 0.0001, 0.0001)
179    assert np.allclose(c_grad, c_grad_pynative, 0.0001, 0.0001)
180