• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15import math
16
17import pytest
18import numpy as np
19import mindspore.nn as nn
20import mindspore.context as context
21from mindspore.common.api import ms_function
22from mindspore.common.initializer import initializer
23from mindspore.ops import composite as C
24from mindspore.ops import operations as P
25from mindspore.common.tensor import Tensor
26from mindspore.common.parameter import ParameterTuple, Parameter
27
28context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
29
30
31class StackLSTM(nn.Cell):
32    """
33    Stack multi-layers LSTM together.
34    """
35
36    def __init__(self,
37                 input_size,
38                 hidden_size,
39                 num_layers=1,
40                 has_bias=True,
41                 batch_first=False,
42                 dropout=0.0,
43                 bidirectional=False):
44        super(StackLSTM, self).__init__()
45        self.num_layers = num_layers
46        self.batch_first = batch_first
47        self.transpose = P.Transpose()
48
49        # direction number
50        num_directions = 2 if bidirectional else 1
51
52        # input_size list
53        input_size_list = [input_size]
54        for i in range(num_layers - 1):
55            input_size_list.append(hidden_size * num_directions)
56
57        # layers
58        layers = []
59        for i in range(num_layers):
60            layers.append(nn.LSTMCell(input_size=input_size_list[i],
61                                      hidden_size=hidden_size,
62                                      has_bias=has_bias,
63                                      batch_first=batch_first,
64                                      bidirectional=bidirectional,
65                                      dropout=dropout))
66
67        # weights
68        weights = []
69        for i in range(num_layers):
70            # weight size
71            weight_size = (input_size_list[i] + hidden_size) * num_directions * hidden_size * 4
72            if has_bias:
73                bias_size = num_directions * hidden_size * 4
74                weight_size = weight_size + bias_size
75
76            # numpy weight
77            stdv = 1 / math.sqrt(hidden_size)
78            w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32)
79
80            # lstm weight
81            weights.append(Parameter(initializer(Tensor(w_np), w_np.shape), name="weight" + str(i)))
82
83        #
84        self.lstms = layers
85        self.weight = ParameterTuple(tuple(weights))
86
87    def construct(self, x, hx):
88        """construct"""
89        if self.batch_first:
90            x = self.transpose(x, (1, 0, 2))
91        # stack lstm
92        h, c = hx
93        hn = cn = None
94        for i in range(self.num_layers):
95            x, hn, cn, _, _ = self.lstms[i](x, h[i], c[i], self.weight[i])
96        if self.batch_first:
97            x = self.transpose(x, (1, 0, 2))
98        return x, (hn, cn)
99
100
101class LstmNet(nn.Cell):
102    def __init__(self, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
103        super(LstmNet, self).__init__()
104
105        num_directions = 1
106        if bidirectional:
107            num_directions = 2
108
109        self.lstm = StackLSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
110        input_np = np.array([[[0.6755, -1.6607, 0.1367], [0.4276, -0.7850, -0.3758]],
111                             [[-0.6424, -0.6095, 0.6639], [0.7918, 0.4147, -0.5089]],
112                             [[-1.5612, 0.0120, -0.7289], [-0.6656, -0.6626, -0.5883]],
113                             [[-0.9667, -0.6296, -0.7310], [0.1026, -0.6821, -0.4387]],
114                             [[-0.4710, 0.6558, -0.3144], [-0.8449, -0.2184, -0.1806]]
115                             ]).astype(np.float32)
116        self.x = Tensor(input_np)
117
118        self.h = Tensor(np.array([0., 0., 0., 0.]).reshape((num_directions, batch_size, hidden_size)).astype(
119            np.float32))
120
121        self.c = Tensor(np.array([0., 0., 0., 0.]).reshape((num_directions, batch_size, hidden_size)).astype(
122            np.float32))
123        self.h = tuple((self.h,))
124        self.c = tuple((self.c,))
125        wih = np.array([[3.4021e-01, -4.6622e-01, 4.5117e-01],
126                        [-6.4257e-02, -2.4807e-01, 1.3550e-02],  # i
127                        [-3.2140e-01, 5.5578e-01, 6.3589e-01],
128                        [1.6547e-01, -7.9030e-02, -2.0045e-01],
129                        [-6.9863e-01, 5.9773e-01, -3.9062e-01],
130                        [-3.0253e-01, -1.9464e-01, 7.0591e-01],
131                        [-4.0835e-01, 3.6751e-01, 4.7989e-01],
132                        [-5.6894e-01, -5.0359e-01, 4.7491e-01]]).astype(np.float32).reshape([1, -1])
133        whh = np.array([[-0.4820, -0.2350],
134                        [-0.1195, 0.0519],
135                        [0.2162, -0.1178],
136                        [0.6237, 0.0711],
137                        [0.4511, -0.3961],
138                        [-0.5962, 0.0906],
139                        [0.1867, -0.1225],
140                        [0.1831, 0.0850]]).astype(np.float32).reshape([1, -1])
141        bih = np.zeros((1, 8)).astype(np.float32)
142        w_np = np.concatenate((wih, whh, bih), axis=1).reshape([-1, 1, 1])
143        self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
144        self.lstm.weight = ParameterTuple((self.w,))
145
146    @ms_function
147    def construct(self):
148        return self.lstm(self.x, (self.h, self.c))
149
150
151@pytest.mark.level0
152@pytest.mark.platform_x86_cpu
153@pytest.mark.env_onecard
154def test_lstm():
155    seq_len = 5
156    batch_size = 2
157    input_size = 3
158    hidden_size = 2
159    num_layers = 1
160    has_bias = True
161    bidirectional = False
162    dropout = 0.0
163    num_directions = 1
164    if bidirectional:
165        num_directions = 2
166    net = LstmNet(batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
167    y, (h, c) = net()
168    print(y)
169    print(c)
170    print(h)
171    expect_y = [[[-0.17992045, 0.07819052],
172                 [-0.10745212, -0.06291768]],
173
174                [[-0.28830513, 0.30579978],
175                 [-0.07570618, -0.08868407]],
176
177                [[-0.00814095, 0.16889746],
178                 [0.02814853, -0.11208838]],
179
180                [[0.08157863, 0.06088024],
181                 [-0.04227093, -0.11514835]],
182
183                [[0.18908429, -0.02963362],
184                 [0.09106826, -0.00602506]]]
185    expect_h = [[[0.18908429, -0.02963362],
186                 [0.09106826, -0.00602506]]]
187    expect_c = [[[0.3434288, -0.06561527],
188                 [0.16838229, -0.00972614]]]
189
190    diff_y = y.asnumpy() - expect_y
191    error_y = np.ones([seq_len, batch_size, hidden_size]) * 1.0e-4
192    assert np.all(diff_y < error_y)
193    assert np.all(-diff_y < error_y)
194    diff_h = h.asnumpy() - expect_h
195    error_h = np.ones([num_layers * num_directions, batch_size, hidden_size]) * 1.0e-4
196    assert np.all(diff_h < error_h)
197    assert np.all(-diff_h < error_h)
198    diff_c = c.asnumpy() - expect_c
199    error_c = np.ones([num_layers * num_directions, batch_size, hidden_size]) * 1.0e-4
200    assert np.all(diff_c < error_c)
201    assert np.all(-diff_c < error_c)
202
203
204class MultiLayerBiLstmNet(nn.Cell):
205    def __init__(self, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
206        super(MultiLayerBiLstmNet, self).__init__()
207
208        num_directions = 1
209        if bidirectional:
210            num_directions = 2
211
212        self.lstm = StackLSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias,
213                              bidirectional=bidirectional, dropout=dropout)
214
215        input_np = np.array([[[-0.1887, -0.4144, -0.0235, 0.7489, 0.7522, 0.5969, 0.3342, 1.2198, 0.6786, -0.9404],
216                              [-0.8643, -1.6835, -2.4965, 2.8093, 0.1741, 0.2707, 0.7387, -0.0939, -1.7990, 0.4765]],
217
218                             [[-0.5963, -1.2598, -0.7226, 1.1365, -1.7320, -0.7302, 0.1221, -0.2111, -1.6173, -0.0706],
219                              [0.8964, 0.1737, -1.0077, -0.1389, 0.4889, 0.4391, 0.7911, 0.3614, -1.9533, -0.9936]],
220
221                             [[0.3260, -1.3312, 0.0601, 1.0726, -1.6010, -1.8733, -1.5775, 1.1579, -0.8801, -0.5742],
222                              [-2.2998, -0.6344, -0.5409, -0.9221, -0.6500, 0.1206, 1.5215, 0.7517, 1.3691, 2.0021]],
223
224                             [[-0.1245, -0.3690, 2.1193, 1.3852, -0.1841, -0.8899, -0.3646, -0.8575, -0.3131, 0.2026],
225                              [1.0218, -1.4331, 0.1744, 0.5442, -0.7808, 0.2527, 0.1566, 1.1484, -0.7766, -0.6747]],
226
227                             [[-0.6752, 0.9906, -0.4973, 0.3471, -0.1202, -0.4213, 2.0213, 0.0441, 0.9016, 1.0365],
228                              [1.2223, -1.3248, 0.1207, -0.8256, 0.1816, 0.7057, -0.3105, 0.5713, 0.2804,
229                               -1.0685]]]).astype(np.float32)
230
231        self.x = Tensor(input_np)
232
233        self.h0 = Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32))
234        self.c0 = Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32))
235        self.h1 = Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32))
236        self.c1 = Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32))
237
238        self.h = tuple((self.h0, self.h1))
239        self.c = tuple((self.c0, self.c1))
240        input_size_list = [input_size, hidden_size * num_directions]
241        weights = []
242        bias_size = 0 if not has_bias else num_directions * hidden_size * 4
243        for i in range(num_layers):
244            weight_size = (input_size_list[i] + hidden_size) * num_directions * hidden_size * 4
245            w_np = np.ones([weight_size, 1, 1]).astype(np.float32) * 0.02
246            if has_bias:
247                bias_np = np.zeros([bias_size, 1, 1]).astype(np.float32)
248                w_np = np.concatenate([w_np, bias_np], axis=0)
249            weights.append(Parameter(initializer(Tensor(w_np), w_np.shape), name='weight' + str(i)))
250        self.lstm.weight = weights
251
252    @ms_function
253    def construct(self):
254        return self.lstm(self.x, (self.h, self.c))
255
256
257@pytest.mark.level1
258@pytest.mark.platform_x86_cpu
259@pytest.mark.env_onecard
260def test_multi_layer_bilstm():
261    batch_size = 2
262    input_size = 10
263    hidden_size = 2
264    num_layers = 2
265    has_bias = True
266    bidirectional = True
267    dropout = 0.0
268
269    net = MultiLayerBiLstmNet(batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional,
270                              dropout)
271    y, (h, c) = net()
272    print(y)
273    print(h)
274    print(c)
275
276
277class Grad(nn.Cell):
278    def __init__(self, network):
279        super(Grad, self).__init__()
280        self.network = network
281        self.weights = ParameterTuple(network.trainable_params())
282        self.grad = C.GradOperation(get_by_list=True,
283                                    sens_param=True)
284
285    @ms_function
286    def construct(self, output_grad):
287        weights = self.weights
288        grads = self.grad(self.network, weights)(output_grad)
289        return grads
290
291
292class Net(nn.Cell):
293    def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
294        super(Net, self).__init__()
295
296        num_directions = 1
297        if bidirectional:
298            num_directions = 2
299        input_np = np.array([[[0.6755, -1.6607, 0.1367], [0.4276, -0.7850, -0.3758]],
300                             [[-0.6424, -0.6095, 0.6639], [0.7918, 0.4147, -0.5089]],
301                             [[-1.5612, 0.0120, -0.7289], [-0.6656, -0.6626, -0.5883]],
302                             [[-0.9667, -0.6296, -0.7310], [0.1026, -0.6821, -0.4387]],
303                             [[-0.4710, 0.6558, -0.3144], [-0.8449, -0.2184, -0.1806]]
304                             ]).astype(np.float32)
305        self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
306        self.hlist = []
307        self.clist = []
308        self.hlist.append(Parameter(initializer(
309            Tensor(
310                np.array([0.1, 0.1, 0.1, 0.1]).reshape((num_directions, batch_size, hidden_size)).astype(
311                    np.float32)),
312            [num_directions, batch_size, hidden_size]), name='h'))
313        self.clist.append(Parameter(initializer(
314            Tensor(
315                np.array([0.2, 0.2, 0.2, 0.2]).reshape((num_directions, batch_size, hidden_size)).astype(
316                    np.float32)),
317            [num_directions, batch_size, hidden_size]), name='c'))
318        self.h = ParameterTuple(tuple(self.hlist))
319        self.c = ParameterTuple(tuple(self.clist))
320        wih = np.array([[3.4021e-01, -4.6622e-01, 4.5117e-01],
321                        [-6.4257e-02, -2.4807e-01, 1.3550e-02],  # i
322                        [-3.2140e-01, 5.5578e-01, 6.3589e-01],
323                        [1.6547e-01, -7.9030e-02, -2.0045e-01],
324                        [-6.9863e-01, 5.9773e-01, -3.9062e-01],
325                        [-3.0253e-01, -1.9464e-01, 7.0591e-01],
326                        [-4.0835e-01, 3.6751e-01, 4.7989e-01],
327                        [-5.6894e-01, -5.0359e-01, 4.7491e-01]]).astype(np.float32).reshape([1, -1])
328        whh = np.array([[-0.4820, -0.2350],
329                        [-0.1195, 0.0519],
330                        [0.2162, -0.1178],
331                        [0.6237, 0.0711],
332                        [0.4511, -0.3961],
333                        [-0.5962, 0.0906],
334                        [0.1867, -0.1225],
335                        [0.1831, 0.0850]]).astype(np.float32).reshape([1, -1])
336        bih = np.zeros((1, 8)).astype(np.float32)
337        w_np = np.concatenate((wih, whh, bih), axis=1).reshape([-1, 1, 1])
338        self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='weight0')
339        self.lstm = StackLSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
340                              has_bias=has_bias, bidirectional=bidirectional, dropout=dropout)
341        self.lstm.weight = ParameterTuple(tuple([self.w]))
342
343    @ms_function
344    def construct(self):
345        return self.lstm(self.x, (self.h, self.c))[0]
346
347
348@pytest.mark.level1
349@pytest.mark.platform_x86_cpu
350@pytest.mark.env_onecard
351def test_grad():
352    seq_len = 5
353    batch_size = 2
354    input_size = 3
355    hidden_size = 2
356    num_layers = 1
357    has_bias = True
358    bidirectional = False
359    dropout = 0.0
360    net = Grad(Net(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout))
361    dy = np.array([[[-3.5471e-01, 7.0540e-01],
362                    [2.7161e-01, 1.0865e+00]],
363
364                   [[-4.2431e-01, 1.4955e+00],
365                    [-4.0418e-01, -2.3282e-01]],
366
367                   [[-1.3654e+00, 1.9251e+00],
368                    [-4.6481e-01, 1.3138e+00]],
369
370                   [[1.2914e+00, -2.3753e-01],
371                    [5.3589e-01, -1.0981e-01]],
372
373                   [[-1.6032e+00, -1.8818e-01],
374                    [1.0065e-01, 9.2045e-01]]]).astype(np.float32)
375    dx, dhx, dcx, dw = net(Tensor(dy))
376    print(dx)
377    print(dhx)
378    print(dcx)
379    print(dw)
380
381test_multi_layer_bilstm()
382test_lstm()
383test_grad()
384