• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15
16import numpy as np
17import pytest
18
19import mindspore.context as context
20import mindspore.nn as nn
21from mindspore.common.api import ms_function
22from mindspore.common.initializer import initializer
23from mindspore.common.parameter import ParameterTuple, Parameter
24from mindspore.common.tensor import Tensor
25from mindspore.ops import composite as C
26from mindspore.ops import operations as P
27
28context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')
29
30
31class LstmNet(nn.Cell):
32    def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
33        super(LstmNet, self).__init__()
34
35        num_directions = 1
36        if bidirectional:
37            num_directions = 2
38
39        self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
40
41        input_np = np.array([[[0.6755, -1.6607, 0.1367, -0.9209, -1.7088, 0.3953, 2.7120, 0.1103, 0.1504, -0.3611],
42                              [0.4276, -0.7850, -0.3758, 0.8604, -0.1361, -1.3618, -0.6251, -0.8391, 0.8142, 0.4068]],
43
44                             [[-0.6424, -0.6095, 0.6639, -0.7253, 2.1190, -0.2840, 0.3858, 0.1691, 0.6764, 1.2903],
45                              [0.7918, 0.4147, -0.5089, -0.3582, -1.4279, -0.7975, -0.0390, -0.4718, 0.4322, -0.7995]],
46
47                             [[-1.5612, 0.0120, -0.7289, -1.2479, -0.6197, -0.6099, 0.9543, 0.4362, -1.3141, 0.4273],
48                              [-0.6656, -0.6626, -0.5883, -0.6922, 0.5512, 1.7031, -1.2812, -0.2004, -0.9224, 0.4106]],
49
50                             [[-0.9667, -0.6296, -0.7310, 1.2503, -0.1650, 1.2050, -0.1704, -0.5215, 0.1595, 0.3904],
51                              [0.1026, -0.6821, -0.4387, -1.1637, -0.5000, 0.0590, 0.5219, -0.6835, 2.4406, 0.7135]],
52
53                             [[-0.4710, 0.6558, -0.3144, -1.2213, 0.1556, -0.3836, -0.1081, -0.1440, -1.1231, 0.6279],
54                              [-0.8449, -0.2184, -0.1806, -0.0615, -0.5660, -0.3556, 1.6891, -1.0286, 1.3361,
55                               -0.4313]]]).astype(np.float32)
56
57        self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
58
59        self.h = Parameter(initializer(
60            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
61            [num_layers * num_directions, batch_size, hidden_size]), name='h')
62
63        self.c = Parameter(initializer(
64            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
65            [num_layers * num_directions, batch_size, hidden_size]), name='c')
66
67        wih = np.array([[3.4021e-01, -4.6622e-01, 4.5117e-01, 2.3627e-01, 3.7844e-01,
68                         2.8770e-01, 4.1631e-01, -6.2628e-01, -4.8008e-01, -4.9148e-01],
69                        [-6.4257e-02, -2.4807e-01, 1.3550e-02, 6.8946e-01, -1.2608e-02,
70                         -7.1719e-02, -1.3566e-01, -4.9215e-01, 2.8509e-01, -6.3540e-01],
71                        [-6.9863e-01, 5.9773e-01, -3.9062e-01, -7.6151e-02, 5.6803e-04,
72                         -7.0420e-01, -6.1822e-01, 4.1854e-01, 4.0596e-01, 6.4867e-01],
73                        [-3.0253e-01, -1.9464e-01, 7.0591e-01, 4.9368e-01, -5.9758e-01,
74                         1.3251e-02, 3.5685e-01, -3.7640e-01, -4.4612e-01, 5.1794e-01],
75                        [-3.2140e-01, 5.5578e-01, 6.3589e-01, -6.4249e-01, 5.7258e-01,
76                         2.4256e-01, -2.7954e-01, 2.5202e-01, 2.9235e-01, -3.9979e-01],
77                        [1.6547e-01, -7.9030e-02, -2.0045e-01, 6.2484e-01, -1.0727e-01,
78                         -5.0010e-01, -2.9165e-01, -1.7620e-01, 1.5939e-01, -2.2744e-01],
79                        [-4.0835e-01, 3.6751e-01, 4.7989e-01, 5.8886e-01, 5.3598e-01,
80                         -2.9055e-01, -2.8129e-01, 6.0219e-01, 4.9193e-01, 3.3115e-01],
81                        [-5.6894e-01, -5.0359e-01, 4.7491e-01, 5.8110e-01, -5.4921e-01,
82                         -6.1343e-01, -5.8236e-02, -3.7682e-01, 4.8338e-01, -2.1551e-01]]).astype(np.float32).reshape(
83                             [1, -1])
84
85        whh = np.array([[-0.4820, -0.2350],
86                        [-0.1195, 0.0519],
87                        [0.4511, -0.3961],
88                        [-0.5962, 0.0906],
89                        [0.2162, -0.1178],
90                        [0.6237, 0.0711],
91                        [0.1867, -0.1225],
92                        [0.1831, 0.0850]]).astype(np.float32).reshape([1, -1])
93
94        bih = np.array([-0.2862, 0.0034, 0.2059, -0.6544, 0.3244, -0.2472, 0.0852, -0.3050]).astype(np.float32).reshape(
95            [1, -1])
96        bhh = np.array([-0.6575, 0.1562, -0.6434, 0.0212, -0.2493, -0.5626, 0.1530, -0.5235]).astype(
97            np.float32).reshape([1, -1])
98
99        w_np = np.concatenate((wih, whh, bih, bhh), axis=1).reshape([-1, 1, 1])
100
101        self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
102
103    @ms_function
104    def construct(self):
105        return self.lstm(self.x, self.h, self.c, self.w)
106
107
108@pytest.mark.level0
109@pytest.mark.platform_x86_gpu_training
110@pytest.mark.env_onecard
111def test_lstm():
112    seq_len = 5
113    batch_size = 2
114
115    input_size = 10
116    hidden_size = 2
117    num_layers = 1
118    has_bias = True
119    bidirectional = False
120    dropout = 0.0
121
122    num_directions = 1
123    if bidirectional:
124        num_directions = 2
125
126    net = LstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
127    y, h, c, _, _ = net()
128    expect_y = np.array([[[-2.1429e-02, 1.1760e-01],
129                          [3.1144e-01, 6.3090e-01]],
130
131                         [[-5.0190e-04, -4.5812e-02],
132                          [2.0324e-02, 2.0392e-01]],
133
134                         [[-1.0370e-02, -6.0141e-02],
135                          [6.0931e-02, -1.8913e-02]],
136
137                         [[-1.6031e-01, -2.3428e-01],
138                          [4.1886e-02, -2.2162e-01]],
139
140                         [[-3.9243e-02, -3.2950e-02],
141                          [-4.1257e-02, -4.5276e-01]]])
142
143    error = np.ones([num_layers, batch_size, hidden_size]) * 1.0e-4
144    diff = y.asnumpy() - expect_y
145    assert np.all(diff < error)
146    assert np.all(-diff < error)
147
148    expect_h = np.array([[[-0.0392, -0.0329],
149                          [-0.0413, -0.4528]]])
150    error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4
151    diff = h.asnumpy() - expect_h
152    assert np.all(diff < error)
153    assert np.all(-diff < error)
154
155    expect_c = np.array([[[-0.0984, -0.3665],
156                          [-0.1010, -0.6792]]])
157    error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4
158    diff = c.asnumpy() - expect_c
159    assert np.all(diff < error)
160    assert np.all(-diff < error)
161
162
163class BiLstmNet(nn.Cell):
164    def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
165        super(BiLstmNet, self).__init__()
166
167        num_directions = 1
168        if bidirectional:
169            num_directions = 2
170
171        self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
172
173        input_np = np.array([[[-1.7322, 1.6642, -1.1861, 0.2955, -0.7907, 0.2982, -1.3413, 1.0665, -0.0436, -0.1883],
174                              [0.2195, 0.5917, -0.6739, 0.2388, -0.5364, -1.3309, -0.6018, -0.3081, -0.9648, -1.1627]],
175
176                             [[-0.5094, -2.6025, -0.9302, -1.1937, 0.6501, -0.1903, -0.0661, 0.1080, 0.9829, -0.2280],
177                              [1.3961, 0.2239, -0.1947, -0.3206, 0.5791, 0.3396, 0.1728, -1.2007, -1.0994, -1.3278]],
178
179                             [[0.1870, -1.1090, -0.9705, 0.2207, 0.3743, 0.1158, -0.5443, -0.5559, 0.1538, -0.3975],
180                              [-0.2347, -0.1245, -0.2335, 0.3164, 1.0997, -0.3928, -1.8517, 1.1136, -1.5051, -0.0071]],
181
182                             [[1.2739, 2.5438, -0.4289, -0.7981, -1.3682, -2.2509, 0.2028, 1.3410, 2.9502, -1.1650],
183                              [0.1254, 0.2726, 0.0251, 0.9323, 0.7315, 0.8231, -0.2123, -0.6885, 0.9893, -0.2047]],
184
185                             [[0.1870, -0.9066, 0.7155, 0.5438, -0.9757, -0.5828, -0.3417, 1.5681, 1.0326, -0.0179],
186                              [-0.7746, -1.0695, -0.5278, 2.5307, -0.1002, -1.5773, 0.7717, 1.0266, -0.0798,
187                               1.2333]]]).astype(np.float32)
188
189        self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
190
191        self.h = Parameter(initializer(
192            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
193            [num_layers * num_directions, batch_size, hidden_size]), name='h')
194
195        self.c = Parameter(initializer(
196            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
197            [num_layers * num_directions, batch_size, hidden_size]), name='c')
198
199        wih = np.array([[-0.2959, -0.1142, 0.3662, 0.5406, 0.1738, 0.2697, -0.6960, -0.0464, 0.3486, 0.1888],
200                        [0.3043, 0.1505, -0.1207, -0.2456, 0.2735, 0.6673, -0.3352, -0.6153, -0.5731, -0.2726],
201                        [-0.2657, -0.5570, 0.6785, -0.1861, -0.0652, 0.5757, 0.6442, -0.4068, -0.3260, 0.7054],
202                        [0.6607, 0.6927, -0.1354, 0.2484, 0.2053, 0.5743, -0.0212, 0.3340, -0.5685, -0.5668],
203                        [0.6701, -0.3013, -0.1202, -0.4200, -0.4280, -0.6329, -0.6074, -0.4997, -0.6215, -0.6259],
204                        [0.0299, -0.6071, -0.4683, -0.3363, -0.0044, -0.0007, 0.2700, 0.0202, -0.2880, -0.6869],
205                        [0.3025, -0.2461, -0.5128, 0.6327, -0.1438, -0.5100, 0.1924, 0.2023, 0.3129, 0.2271],
206                        [0.3777, 0.0546, 0.4790, -0.1895, 0.3588, 0.4490, 0.6850, 0.6240, -0.2739, -0.4474]]).astype(
207                            np.float32).reshape([1, -1])
208
209        whh = np.array([[0.6346, -0.6366],
210                        [-0.0248, -0.6156],
211                        [-0.3821, 0.6327],
212                        [-0.6132, -0.5071],
213                        [0.4029, 0.0906],
214                        [-0.5671, 0.2556],
215                        [0.0268, -0.4347],
216                        [0.1152, -0.3124]]).astype(np.float32).reshape([1, -1])
217
218        bih = np.array([-0.3839, -0.5365, -0.6691, 0.1697, -0.1564, -0.0451, -0.5921, -0.5367]).astype(
219            np.float32).reshape([1, -1])
220        bhh = np.array([0.5952, -0.4905, 0.0423, -0.0293, -0.6638, 0.4348, -0.4291, -0.5541]).astype(
221            np.float32).reshape([1, -1])
222
223        wih_reverse = np.array([[-0.2938, 0.0048, 0.2704, -0.3387, -0.4529, -0.2586, 0.1352, -0.1208, -0.1423, -0.0220],
224                                [-0.3701, 0.0201, -0.0255, 0.1340, -0.1938, -0.7056, -0.2303, 0.4814, 0.3636, -0.5018],
225                                [-0.0284, -0.0108, -0.5788, 0.2389, 0.2604, 0.6774, -0.5525, 0.6265, -0.6126, 0.3197],
226                                [-0.6906, 0.6991, -0.6138, 0.0044, 0.5714, 0.4176, 0.5451, -0.5114, -0.2286, 0.1105],
227                                [0.3547, 0.6233, -0.4543, -0.6799, 0.1109, 0.5601, 0.0212, 0.6926, 0.0597, -0.4383],
228                                [-0.1370, -0.5852, 0.0596, 0.5494, 0.5789, -0.0534, 0.1092, 0.3544, -0.1571, 0.4444],
229                                [-0.5886, -0.4765, -0.3837, -0.6634, 0.0963, -0.1385, -0.0837, -0.1354, 0.0547,
230                                 -0.2870],
231                                [0.2049, -0.7057, -0.1736, 0.4724, 0.1957, -0.3037, 0.4626, -0.6465, 0.4575,
232                                 0.4230]]).astype(np.float32).reshape([1, -1])
233
234        whh_reverse = np.array([[0.2339, -0.0307],
235                                [-0.5850, 0.6328],
236                                [0.5856, -0.5601],
237                                [0.4875, -0.6929],
238                                [0.0314, 0.2531],
239                                [-0.2523, 0.3244],
240                                [0.5199, 0.5146],
241                                [0.3968, 0.4511]]).astype(np.float32).reshape([1, -1])
242
243        bih_reverse = np.array([-0.1760, 0.2828, 0.2450, -0.4016, -0.4664, 0.4031, -0.1945, -0.1509]).astype(
244            np.float32).reshape([1, -1])
245        bhh_reverse = np.array([0.6427, 0.4806, 0.6278, 0.1596, 0.0038, -0.3418, 0.0549, -0.3900]).astype(
246            np.float32).reshape([1, -1])
247
248        w_np = np.concatenate((wih, whh, wih_reverse, whh_reverse, bih, bhh, bih_reverse, bhh_reverse), axis=1).reshape(
249            [-1, 1, 1])
250
251        self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
252
253    @ms_function
254    def construct(self):
255        return self.lstm(self.x, self.h, self.c, self.w)
256
257
258@pytest.mark.level0
259@pytest.mark.platform_x86_gpu_training
260@pytest.mark.env_onecard
261def test_bilstm():
262    seq_len = 5
263    batch_size = 2
264
265    input_size = 10
266    hidden_size = 2
267    num_layers = 1
268    has_bias = True
269    bidirectional = True
270    dropout = 0.0
271
272    num_directions = 1
273    if bidirectional:
274        num_directions = 2
275
276    net = BiLstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
277    y, h, c, _, _ = net()
278    expect_y = np.array([[[-0.0826, 0.0209, 0.1715, -0.0072],
279                          [0.1035, 0.0594, -0.0867, -0.1077]],
280
281                         [[-0.1647, 0.0293, -0.2189, 0.3809],
282                          [0.0466, 0.4461, 0.0784, 0.0905]],
283
284                         [[-0.0182, 0.0512, 0.1758, -0.1147],
285                          [0.0460, 0.1588, -0.0314, 0.0886]],
286
287                         [[-0.0330, 0.0551, 0.2084, -0.1154],
288                          [-0.1641, 0.1118, -0.0122, 0.4916]],
289
290                         [[-0.2997, 0.0223, 0.1328, 0.3377],
291                          [-0.6669, 0.0089, 0.1138, 0.7786]]])
292
293    error = np.ones([num_layers, batch_size, hidden_size * num_directions]) * 1.0e-4
294    diff = y.asnumpy() - expect_y
295    assert np.all(diff < error)
296    assert np.all(-diff < error)
297
298    expect_h = np.array([[[-0.2997, 0.0223],
299                          [-0.6669, 0.0089]],
300
301                         [[0.1715, -0.0072],
302                          [-0.0867, -0.1077]]])
303    error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4
304    diff = h.asnumpy() - expect_h
305    assert np.all(diff < error)
306    assert np.all(-diff < error)
307
308    expect_c = np.array([[[-0.6049, 0.0825],
309                          [-0.9433, 0.1006]],
310
311                         [[0.3037, -0.2036],
312                          [-0.1633, -0.5663]]])
313
314    error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-3
315    diff = c.asnumpy() - expect_c
316    assert np.all(diff < error)
317    assert np.all(-diff < error)
318
319
320class MultiLayerBiLstmNet(nn.Cell):
321    def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
322        super(MultiLayerBiLstmNet, self).__init__()
323
324        num_directions = 1
325        if bidirectional:
326            num_directions = 2
327
328        self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
329
330        input_np = np.array([[[-0.1887, -0.4144, -0.0235, 0.7489, 0.7522, 0.5969, 0.3342, 1.2198, 0.6786, -0.9404],
331                              [-0.8643, -1.6835, -2.4965, 2.8093, 0.1741, 0.2707, 0.7387, -0.0939, -1.7990, 0.4765]],
332
333                             [[-0.5963, -1.2598, -0.7226, 1.1365, -1.7320, -0.7302, 0.1221, -0.2111, -1.6173, -0.0706],
334                              [0.8964, 0.1737, -1.0077, -0.1389, 0.4889, 0.4391, 0.7911, 0.3614, -1.9533, -0.9936]],
335
336                             [[0.3260, -1.3312, 0.0601, 1.0726, -1.6010, -1.8733, -1.5775, 1.1579, -0.8801, -0.5742],
337                              [-2.2998, -0.6344, -0.5409, -0.9221, -0.6500, 0.1206, 1.5215, 0.7517, 1.3691, 2.0021]],
338
339                             [[-0.1245, -0.3690, 2.1193, 1.3852, -0.1841, -0.8899, -0.3646, -0.8575, -0.3131, 0.2026],
340                              [1.0218, -1.4331, 0.1744, 0.5442, -0.7808, 0.2527, 0.1566, 1.1484, -0.7766, -0.6747]],
341
342                             [[-0.6752, 0.9906, -0.4973, 0.3471, -0.1202, -0.4213, 2.0213, 0.0441, 0.9016, 1.0365],
343                              [1.2223, -1.3248, 0.1207, -0.8256, 0.1816, 0.7057, -0.3105, 0.5713, 0.2804,
344                               -1.0685]]]).astype(np.float32)
345
346        self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
347
348        self.h = Parameter(initializer(
349            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
350            [num_layers * num_directions, batch_size, hidden_size]), name='h')
351
352        self.c = Parameter(initializer(
353            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
354            [num_layers * num_directions, batch_size, hidden_size]), name='c')
355
356        wih_l0 = np.array([[0.3715, -0.0723, 0.6017, 0.5115, -0.5357, 0.3794, -0.3752, -0.6205, -0.0370, -0.2904],
357                           [0.7055, -0.4156, -0.3650, -0.0964, 0.4141, -0.2584, -0.4765, -0.0045, 0.2943, -0.2648],
358                           [0.1355, 0.1697, 0.1883, 0.3754, 0.3744, -0.6128, 0.2328, -0.1275, 0.6604, 0.6498],
359                           [-0.0266, 0.5805, -0.5358, -0.0929, 0.0797, 0.3744, 0.3299, -0.3825, 0.5804, -0.0855],
360                           [0.1141, 0.2587, -0.4370, 0.6430, -0.0017, 0.4865, 0.2814, 0.6213, -0.6415, 0.4574],
361                           [-0.3958, -0.5827, -0.1056, 0.6987, -0.6591, -0.1326, 0.5237, 0.4667, -0.7001, -0.2326],
362                           [0.3074, -0.3118, -0.4591, 0.2481, -0.2978, -0.1850, 0.4770, -0.0126, 0.3655, -0.4306],
363                           [0.3033, -0.6264, -0.6551, 0.0069, -0.5238, -0.3950, 0.5681, -0.4931, -0.6258,
364                            0.4079]]).astype(np.float32).reshape([1, -1])
365
366        whh_l0 = np.array([[-0.3870, 0.0238],
367                           [-0.3758, 0.2490],
368                           [0.5437, -0.4117],
369                           [0.1181, -0.2043],
370                           [-0.5335, 0.1188],
371                           [-0.0822, 0.2154],
372                           [0.5844, -0.3239],
373                           [-0.6537, 0.0278]]).astype(np.float32).reshape([1, -1])
374
375        bih_l0 = np.array([0.5440, 0.5995, 0.0155, -0.6254, 0.5114, 0.3364, -0.1824, -0.6262]).astype(
376            np.float32).reshape([1, -1])
377        bhh_l0 = np.array([0.4139, -0.2513, -0.4023, 0.4222, 0.6387, -0.6147, 0.0677, 0.5355]).astype(
378            np.float32).reshape([1, -1])
379
380        wih_reverse_l0 = np.array([[6.5219e-01, 5.6162e-01, -1.8653e-01, 6.8789e-01, 1.3240e-01, 1.7699e-01, 1.2940e-01,
381                                    -1.8520e-01, -5.5439e-01, -3.4946e-01],
382                                   [3.7645e-01, 6.5475e-01, 3.5964e-01, 2.2433e-01, -1.7869e-01, -2.9047e-01,
383                                    1.7615e-01, -5.3353e-01, -7.4204e-02, -2.5270e-01],
384                                   [5.8095e-01, -4.6426e-04, 1.9262e-01, -5.1306e-01, -3.6811e-01, 4.4858e-01,
385                                    6.2580e-01, 9.5494e-02, -6.9505e-01, 4.9500e-01],
386                                   [-3.7810e-01, 1.5485e-01, -1.4735e-01, -1.5327e-01, -4.5702e-01, 3.0816e-01,
387                                    -3.4280e-01, 2.1604e-01, 1.4087e-01, -5.7707e-01],
388                                   [-3.8700e-01, -6.4653e-01, 6.0653e-01, -4.7297e-01, 6.8413e-02, -1.2681e-01,
389                                    6.8464e-02, 6.7011e-01, 3.9950e-01, -2.0577e-01],
390                                   [-1.8648e-01, -6.7198e-01, 3.8017e-01, -3.3147e-01, 5.3193e-01, -5.4952e-01,
391                                    2.1774e-01, -4.6271e-01, 3.2611e-01, 6.3554e-02],
392                                   [-4.5403e-01, -1.5910e-01, -7.5886e-02, 2.6313e-01, 6.8093e-01, -3.9960e-01,
393                                    5.5428e-01, 1.0429e-01, 5.1322e-01, 1.9406e-01],
394                                   [3.9698e-01, -5.2101e-01, 5.1372e-01, -3.9866e-01, 1.0115e-01, -4.1290e-02,
395                                    -3.0980e-01, 2.1607e-01, 4.8420e-01, -1.9267e-01]]).astype(np.float32).reshape(
396                                        [1, -1])
397
398        whh_reverse_l0 = np.array([[-0.3231, -0.3960],
399                                   [-0.1625, -0.3032],
400                                   [0.3892, -0.0666],
401                                   [0.0159, -0.4870],
402                                   [-0.4953, 0.2278],
403                                   [-0.5380, -0.5250],
404                                   [0.0371, -0.4534],
405                                   [-0.5452, 0.5012]]).astype(np.float32).reshape([1, -1])
406
407        bih_reverse_l0 = np.array([0.0469, -0.0107, 0.3783, -0.2657, -0.0089, 0.5032, -0.0757, -0.2022]).astype(
408            np.float32).reshape([1, -1])
409        bhh_reverse_l0 = np.array([-0.6584, 0.3977, 0.5597, -0.4784, 0.5360, -0.2532, 0.5362, -0.1063]).astype(
410            np.float32).reshape([1, -1])
411
412        wih_l1 = np.array([[0.0602, 0.6977, -0.3882, 0.3734],
413                           [-0.6896, -0.6014, -0.2311, 0.6433],
414                           [-0.6778, -0.5100, -0.1496, 0.5774],
415                           [-0.5824, 0.4656, -0.2835, -0.5688],
416                           [0.5623, 0.3599, 0.1731, 0.3124],
417                           [0.1492, -0.6663, -0.1099, -0.5282],
418                           [0.4696, -0.1795, -0.6712, -0.3903],
419                           [0.4995, 0.0709, -0.1738, 0.2822]]).astype(np.float32).reshape([1, -1])
420
421        whh_l1 = np.array([[0.3770, 0.4139],
422                           [0.5351, 0.6394],
423                           [0.3901, -0.1072],
424                           [0.1106, 0.1331],
425                           [0.3970, 0.4693],
426                           [0.2958, -0.3813],
427                           [-0.3064, 0.5519],
428                           [-0.2827, 0.5844]]).astype(np.float32).reshape([1, -1])
429
430        bih_l1 = np.array([0.5242, 0.5896, 0.3709, 0.6202, 0.5008, 0.2674, 0.4356, -0.3261]).astype(np.float32).reshape(
431            [1, -1])
432        bhh_l1 = np.array([-0.6648, 0.6680, 0.2510, -0.1245, -0.0524, 0.5439, -0.1650, 0.5303]).astype(
433            np.float32).reshape([1, -1])
434
435        wih_reverse_l1 = np.array([[0.6477, 0.4416, 0.3803, -0.4708],
436                                   [0.4497, 0.2833, -0.4739, -0.6361],
437                                   [-0.5573, -0.3867, -0.0349, -0.4128],
438                                   [-0.1545, 0.3720, 0.2354, -0.6090],
439                                   [0.5965, 0.6301, -0.4591, -0.0120],
440                                   [-0.1253, -0.1881, -0.4388, 0.4335],
441                                   [0.1944, -0.1230, -0.6170, 0.1043],
442                                   [-0.6700, 0.4343, 0.6474, 0.0113]]).astype(np.float32).reshape([1, -1])
443
444        whh_reverse_l1 = np.array([[0.6576, 0.5573],
445                                   [0.2318, 0.0187],
446                                   [-0.6365, 0.5744],
447                                   [-0.6494, -0.1820],
448                                   [0.6461, -0.3344],
449                                   [0.0906, -0.5405],
450                                   [-0.5999, 0.5571],
451                                   [-0.0488, 0.5345]]).astype(np.float32).reshape([1, -1])
452
453        bih_reverse_l1 = np.array([-0.6058, -0.2812, -0.4449, -0.0802, 0.4931, 0.4066, 0.5960, 0.1968]).astype(
454            np.float32).reshape([1, -1])
455        bhh_reverse_l1 = np.array([-0.2490, -0.3402, -0.5089, -0.3875, 0.4852, -0.0402, -0.0072, -0.1017]).astype(
456            np.float32).reshape([1, -1])
457
458        '''
459        weight
460            layer0
461                forward
462                    wih
463                    whh
464                reverse
465                    wih
466                    whh
467            layer1
468                forward
469                    wih
470                    whh
471                reverse
472                    wih
473                    whh
474            ... ...
475        bias:
476            layer0
477                forward
478                    bih
479                    bhh
480                reverse
481                    bih
482                    bhh
483            layer1
484                forward
485                    bih
486                    bhh
487                reverse
488                    bih
489                    bhh
490            ... ...
491        '''
492        w_np = np.concatenate(
493            (wih_l0, whh_l0, wih_reverse_l0, whh_reverse_l0, wih_l1, whh_l1, wih_reverse_l1, whh_reverse_l1,
494             bih_l0, bhh_l0, bih_reverse_l0, bhh_reverse_l0, bih_l1, bhh_l1, bih_reverse_l1, bhh_reverse_l1),
495            axis=1).reshape([-1, 1, 1])
496
497        self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
498
499    @ms_function
500    def construct(self):
501        return self.lstm(self.x, self.h, self.c, self.w)
502
503
504@pytest.mark.level0
505@pytest.mark.platform_x86_gpu_training
506@pytest.mark.env_onecard
507def test_multi_layer_bilstm():
508    seq_len = 5
509    batch_size = 2
510
511    input_size = 10
512    hidden_size = 2
513    num_layers = 2
514    has_bias = True
515    bidirectional = True
516    dropout = 0.0
517
518    num_directions = 1
519    if bidirectional:
520        num_directions = 2
521
522    net = MultiLayerBiLstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional,
523                              dropout)
524    y, h, c, _, _ = net()
525    expect_y = np.array([[[0.5186, 0.5419, 0.2710, 0.0384],
526                          [0.6196, 0.5539, 0.3266, 0.0866]],
527
528                         [[0.5244, 0.5276, 0.3042, 0.0510],
529                          [0.5143, 0.4937, 0.2828, 0.0387]],
530
531                         [[0.5124, 0.5079, 0.2951, 0.0548],
532                          [0.4051, 0.4493, 0.2369, 0.0077]],
533
534                         [[0.4532, 0.4749, 0.2557, 0.0611],
535                          [0.4879, 0.4812, 0.3160, 0.0368]],
536
537                         [[0.4535, 0.4806, 0.3880, 0.0462],
538                          [0.4674, 0.4849, 0.3890, 0.1008]]])
539
540    error = np.ones([seq_len, batch_size, hidden_size * num_directions]) * 1.0e-4
541    diff = y.asnumpy() - expect_y
542    assert np.all(diff < error)
543    assert np.all(-diff < error)
544
545    expect_h = np.array([[[0.4730, 0.1638],
546                          [0.1406, -0.0697]],
547
548                         [[0.3887, -0.0518],
549                          [-0.3988, -0.0071]],
550
551                         [[0.4535, 0.4806],
552                          [0.4674, 0.4849]],
553
554                         [[0.2710, 0.0384],
555                          [0.3266, 0.0866]]])
556    error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4
557    diff = h.asnumpy() - expect_h
558    assert np.all(diff < error)
559    assert np.all(-diff < error)
560
561    expect_c = np.array([[[0.8713, 0.2694],
562                          [0.2075, -0.2201]],
563
564                         [[0.5084, -0.0964],
565                          [-0.5155, -0.2452]],
566
567                         [[1.1724, 1.0334],
568                          [1.2003, 1.1058]],
569
570                         [[0.5179, 0.0750],
571                          [0.5309, 0.2012]]])
572
573    error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-3
574    diff = c.asnumpy() - expect_c
575    assert np.all(diff < error)
576    assert np.all(-diff < error)
577
578
579class Grad(nn.Cell):
580    def __init__(self, network):
581        super(Grad, self).__init__()
582        self.network = network
583        self.weights = ParameterTuple(network.trainable_params())
584        self.grad = C.GradOperation(get_by_list=True,
585                                    sens_param=True)
586
587    @ms_function
588    def construct(self, output_grad):
589        weights = self.weights
590        grads = self.grad(self.network, weights)(output_grad)
591        return grads
592
593
594class Net(nn.Cell):
595    def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
596        super(Net, self).__init__()
597
598        num_directions = 1
599        if bidirectional:
600            num_directions = 2
601
602        self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
603
604        input_np = np.array([[[-0.5907, 1.0557, 1.7283, 0.6706, -1.2550, -0.5298, -0.2290, -0.6735, 0.8555, 1.4836],
605                              [-1.7070, -0.5347, -0.9105, -0.2598, 0.0588, 1.5496, 1.0757, 0.3760, -1.2020, -0.2868]],
606
607                             [[0.0151, 0.2126, 0.8090, -0.5292, -2.5590, 0.4279, -0.3081, -1.4706, -0.0498, 1.2301],
608                              [0.4165, -0.5391, -0.0996, 0.1928, -0.4909, -0.1255, 0.4444, -1.3687, 1.3096, 0.6553]],
609
610                             [[-0.7802, -0.2083, -0.6388, 1.3757, 0.4293, 0.5363, 0.3202, -0.6687, -1.3864, -0.2953],
611                              [1.0799, -0.7204, 0.1130, -0.5857, -0.4855, -1.1068, 1.0126, 0.8716, 1.5460, -0.7392]],
612
613                             [[2.2645, -0.6586, -0.2227, 1.4290, -0.5006, -1.6576, -0.1793, 0.5319, 0.1360, 0.2707],
614                              [-0.4071, 0.1575, 1.4199, -0.9156, 0.1855, 0.4947, 1.0460, -0.6365, 0.1191, -0.6374]],
615
616                             [[0.2468, 1.0815, -0.4893, 0.0664, 0.6405, -2.2967, 0.7612, 0.8759, 0.5685, -1.0999],
617                              [-0.7272, -1.7750, -0.1164, -0.7159, 0.0061, -0.7839, -1.8329, 0.3434, -0.5634,
618                               0.5384]]]).astype(np.float32)
619
620        self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
621
622        self.h = Parameter(initializer(
623            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
624            [num_layers * num_directions, batch_size, hidden_size]), name='h')
625
626        self.c = Parameter(initializer(
627            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
628            [num_layers * num_directions, batch_size, hidden_size]), name='c')
629
630        wih_l0 = np.array([[0.2300, 0.6668, 0.4703, 0.0425, 0.0464, 0.6825, 0.2249, -0.4315, -0.2449, 0.2964],
631                           [-0.2811, -0.3444, 0.2557, -0.5137, -0.5518, 0.1652, -0.6720, 0.1066, 0.3586, 0.6299],
632                           [0.5728, -0.1784, 0.5661, 0.4012, 0.3856, -0.1899, 0.3102, 0.3717, -0.5651, 0.1952],
633                           [0.1026, -0.0527, 0.1198, -0.3080, 0.2292, 0.5757, -0.3567, -0.2731, -0.0586, -0.2849],
634                           [0.2194, -0.1622, 0.3219, -0.3008, -0.3713, -0.3034, -0.2385, 0.0412, -0.5205, 0.0280],
635                           [-0.5499, -0.0733, -0.5236, -0.6753, -0.7045, -0.1839, -0.1037, -0.5026, -0.4055, -0.3416],
636                           [0.1573, -0.1301, -0.2882, -0.3464, 0.6643, 0.1980, -0.6804, 0.5359, 0.5996, 0.0124],
637                           [-0.6436, 0.0587, -0.6520, -0.0471, 0.1667, 0.6042, 0.5752, -0.6296, -0.2976,
638                            -0.3757]]).astype(np.float32).reshape([1, -1])
639
640        whh_l0 = np.array([[0.3358, 0.2790],
641                           [-0.5355, 0.0989],
642                           [-0.1402, 0.5120],
643                           [0.1335, 0.1653],
644                           [0.3533, -0.3531],
645                           [0.4166, -0.4420],
646                           [-0.5454, -0.1720],
647                           [0.0041, -0.0799]]).astype(np.float32).reshape([1, -1])
648
649        bih_l0 = np.array([0.5518, 0.1083, 0.4829, 0.0607, -0.1770, -0.6944, 0.3059, 0.5354]).astype(
650            np.float32).reshape([1, -1])
651        bhh_l0 = np.array([0.5025, -0.1261, -0.5405, 0.3220, -0.3441, 0.6488, -0.0284, -0.2334]).astype(
652            np.float32).reshape([1, -1])
653
654        wih_reverse_l0 = np.array(
655            [[-0.7048, -0.1768, 0.2288, -0.0760, -0.1319, 0.0820, -0.4132, 0.3644, 0.3919, 0.2449],
656             [0.0551, -0.0530, -0.5883, 0.0799, -0.5025, 0.1500, -0.4067, -0.3764, -0.3018, 0.2467],
657             [-0.2279, 0.3144, 0.5705, 0.4617, 0.1729, 0.6539, -0.2086, 0.5355, 0.4439, 0.0122],
658             [0.6967, -0.5245, 0.3527, 0.3386, 0.0429, -0.3803, -0.4328, -0.4767, 0.4481, -0.2405],
659             [0.6744, -0.2776, 0.0798, 0.1543, 0.6421, 0.6102, 0.3591, -0.4431, -0.6327, -0.0075],
660             [-0.4520, 0.4201, -0.2374, -0.1556, -0.4175, -0.6834, 0.3096, -0.1581, 0.0127, 0.6872],
661             [0.1788, -0.5442, -0.3675, -0.2887, -0.3004, 0.5813, 0.1618, 0.6875, -0.4678, 0.0071],
662             [-0.6453, -0.2528, 0.5675, -0.5154, -0.4129, -0.0214, 0.5539, 0.0343, 0.1712, 0.5644]]).astype(
663                 np.float32).reshape([1, -1])
664
665        whh_reverse_l0 = np.array([[-0.6657, 0.6330],
666                                   [-0.2290, 0.6556],
667                                   [0.4808, -0.2712],
668                                   [0.0407, -0.2587],
669                                   [0.3837, 0.0382],
670                                   [0.2268, 0.1217],
671                                   [-0.6404, -0.3336],
672                                   [0.5461, -0.0764]]).astype(np.float32).reshape([1, -1])
673
674        bih_reverse_l0 = np.array([0.0314, 0.1009, 0.3664, -0.6732, -0.6944, 0.5098, -0.1251, 0.2644]).astype(
675            np.float32).reshape([1, -1])
676        bhh_reverse_l0 = np.array([-0.1961, -0.3836, 0.1191, -0.7022, -0.0961, 0.5493, -0.6979, 0.0017]).astype(
677            np.float32).reshape([1, -1])
678
679        wih_l1 = np.array([[1.2746e-01, -3.3346e-01, 1.5589e-01, -4.7986e-01],
680                           [6.5835e-01, 3.8135e-01, -3.8409e-01, -3.6499e-01],
681                           [-6.0374e-04, -1.2227e-01, -1.5955e-01, 4.2772e-01],
682                           [-1.8281e-01, -5.0484e-01, 7.0204e-01, 6.5872e-01],
683                           [3.7765e-01, -4.3494e-01, 3.1503e-01, -4.2504e-02],
684                           [6.3506e-01, -4.3049e-02, -5.7413e-01, -2.5134e-01],
685                           [8.7181e-02, -5.5216e-01, 5.5436e-01, -3.9599e-01],
686                           [4.4611e-01, -4.2690e-01, 6.6142e-01, 6.3882e-01]]).astype(np.float32).reshape([1, -1])
687
688        whh_l1 = np.array([[-0.0049, -0.3267],
689                           [0.0863, -0.6277],
690                           [0.4815, -0.2236],
691                           [0.5996, -0.3441],
692                           [0.3959, -0.0249],
693                           [0.3986, -0.0922],
694                           [-0.5321, 0.0877],
695                           [0.2811, -0.0483]]).astype(np.float32).reshape([1, -1])
696
697        bih_l1 = np.array([0.0032, -0.0893, 0.5706, 0.3712, 0.0590, 0.0044, 0.2417, 0.1291]).astype(np.float32).reshape(
698            [1, -1])
699        bhh_l1 = np.array([-0.0704, 0.3908, -0.1121, 0.6970, -0.6216, 0.6340, -0.2945, 0.5224]).astype(
700            np.float32).reshape([1, -1])
701
702        wih_reverse_l1 = np.array([[-0.2693, 0.3487, 0.0692, 0.0047],
703                                   [0.6187, 0.5649, 0.0680, 0.5110],
704                                   [-0.5262, -0.3307, -0.3892, 0.5382],
705                                   [-0.2925, 0.5185, -0.1385, 0.3431],
706                                   [-0.3252, 0.3809, -0.4680, 0.3379],
707                                   [0.4763, -0.5465, 0.0033, -0.5144],
708                                   [0.3826, -0.3879, -0.2439, 0.2571],
709                                   [-0.0422, -0.0359, -0.4197, -0.2209]]).astype(np.float32).reshape([1, -1])
710
711        whh_reverse_l1 = np.array([[-0.4691, 0.5944],
712                                   [-0.6885, 0.1708],
713                                   [0.6391, -0.3690],
714                                   [-0.5919, 0.1805],
715                                   [-0.6853, -0.6215],
716                                   [-0.4635, -0.6714],
717                                   [-0.2050, 0.0513],
718                                   [0.3411, -0.2833]]).astype(np.float32).reshape([1, -1])
719
720        bih_reverse_l1 = np.array([0.5764, -0.7010, -0.0831, -0.3779, -0.2743, 0.0480, -0.2707, -0.5583]).astype(
721            np.float32).reshape([1, -1])
722        bhh_reverse_l1 = np.array([0.3379, -0.2671, -0.2789, -0.6611, -0.5542, -0.0188, 0.1831, 0.3612]).astype(
723            np.float32).reshape([1, -1])
724
725        '''
726        weight
727            layer0
728                forward
729                    wih
730                    whh
731                reverse
732                    wih
733                    whh
734            layer1
735                forward
736                    wih
737                    whh
738                reverse
739                    wih
740                    whh
741            ... ...
742        bias:
743            layer0
744                forward
745                    bih
746                    bhh
747                reverse
748                    bih
749                    bhh
750            layer1
751                forward
752                    bih
753                    bhh
754                reverse
755                    bih
756                    bhh
757            ... ...
758        '''
759        w_np = np.concatenate(
760            (wih_l0, whh_l0, wih_reverse_l0, whh_reverse_l0, wih_l1, whh_l1, wih_reverse_l1, whh_reverse_l1,
761             bih_l0, bhh_l0, bih_reverse_l0, bhh_reverse_l0, bih_l1, bhh_l1, bih_reverse_l1, bhh_reverse_l1),
762            axis=1).reshape([-1, 1, 1])
763
764        self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
765
766    @ms_function
767    def construct(self):
768        return self.lstm(self.x, self.h, self.c, self.w)[0]
769
770
771@pytest.mark.level0
772@pytest.mark.platform_x86_gpu_training
773@pytest.mark.env_onecard
774def test_grad():
775    seq_len = 5
776    batch_size = 2
777
778    input_size = 10
779    hidden_size = 2
780    num_layers = 2
781    has_bias = True
782    bidirectional = True
783    dropout = 0.0
784
785    net = Grad(Net(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout))
786
787    dy = np.array([[[-3.5471e-01, 7.0540e-01, -7.5945e-01, -1.2322e+00],
788                    [2.7161e-01, 1.0865e+00, -2.1827e-03, 8.8031e-01]],
789
790                   [[-4.2431e-01, 1.4955e+00, 4.6576e-01, -2.7230e+00],
791                    [-4.0418e-01, -2.3282e-01, 9.1253e-01, -2.7379e-01]],
792
793                   [[-1.3654e+00, 1.9251e+00, -1.6808e+00, -3.2642e-02],
794                    [-4.6481e-01, 1.3138e+00, 1.2956e-02, 1.0198e+00]],
795
796                   [[1.2914e+00, -2.3753e-01, 9.4763e-01, 1.7930e-02],
797                    [5.3589e-01, -1.0981e-01, 1.5377e+00, 6.2709e-01]],
798
799                   [[-1.6032e+00, -1.8818e-01, 7.0441e-01, -2.8765e+00],
800                    [1.0065e-01, 9.2045e-01, 2.7426e-01, 2.6196e-01]]]).astype(np.float32)
801
802    dx, dh, dc, _ = net(Tensor(dy))
803    expect_dx = np.array([[[0.01697153, -0.0096909, 0.01306139, 0.00863109, -0.00122794, -0.00746152, -0.00879683,
804                            0.00643571, 0.0015958, 0.01480642],
805                           [0.05794962, -0.02326604, 0.01862703, 0.02053947, 0.02607713, -0.01278067, 0.04250786,
806                            -0.02686035, -0.07441005, 0.00806021]],
807
808                          [[-0.026675, -0.01024149, -0.02492021, -0.00457492, -0.0085863, 0.02341479, 0.02188834,
809                            -0.04139283, -0.01367766, -0.00305065],
810                           [-0.00762213, -0.01914341, -0.03233681, -0.03580827, -0.02201782, -0.00153102, -0.00097455,
811                            -0.02708411, -0.03711082, -0.02804472]],
812
813                          [[-0.0040581, -0.00116989, 0.01652471, 0.02182668, -0.02547193, -0.04171437, 0.04185125,
814                            0.01589275, -0.00517019, 0.06554792],
815                           [-0.02294365, -0.00589715, -0.01425684, -0.01499153, -0.05327821, -0.03133425, 0.00755623,
816                            -0.04192506, -0.02122675, -0.01214214]],
817
818                          [[-0.00041491, 0.00240709, -0.00942589, 0.00719656, 0.01438523, 0.00931082, 0.00534746,
819                            -0.0004002, 0.01299422, 0.00181135],
820                           [-0.01704482, -0.00887032, -0.01746774, -0.03289891, -0.04259495, -0.01928082, -0.01570587,
821                            -0.01242383, -0.01799918, -0.00610236]],
822
823                          [[0.00207505, -0.0008109, 0.00114241, 0.00251349, -0.00065676, 0.00151333, -0.00077485,
824                            -0.00034354, -0.00028289, -0.0006986],
825                           [-0.00240827, -0.0001309, 0.01401818, -0.01272261, -0.02665948, -0.01095799, -0.007761,
826                            -0.0087831, 0.01038029, 0.02021475]]]).astype(np.float32)
827
828    error = np.ones(dx.asnumpy().shape) * 1.0e-4
829    diff = dx.asnumpy() - expect_dx
830    assert np.all(diff < error)
831    assert np.all(-diff < error)
832
833    expect_dh = np.array([[[-0.00696833, 0.00212885],
834                           [0.01416209, 0.0002706]],
835
836                          [[0.00297393, -0.0021012],
837                           [0.00458834, 0.00400078]],
838
839                          [[0.08658642, -0.10590762],
840                           [0.1516603, -0.10525411]],
841
842                          [[0.11888178, -0.04759264],
843                           [0.05898442, -0.08082277]]]).astype(np.float32)
844
845    error = np.ones(dh.asnumpy().shape) * 1.0e-4
846    diff = dh.asnumpy() - expect_dh
847    assert np.all(diff < error)
848    assert np.all(-diff < error)
849
850    expect_dc = np.array([[[0.00887521, -0.01391486],
851                           [0.03858164, -0.04941981]],
852
853                          [[0.00665188, 0.00184223],
854                           [-0.00541833, 0.01410913]],
855
856                          [[-0.2068854, 0.5585638],
857                           [0.01735374, 0.3537254]],
858
859                          [[0.20350647, -0.2792883],
860                           [0.18456826, 0.02278761]]]).astype(np.float32)
861
862    error = np.ones(dc.asnumpy().shape) * 1.0e-4
863    diff = dc.asnumpy() - expect_dc
864    assert np.all(diff < error)
865    assert np.all(-diff < error)
866
867
868class LstmNetWithDropout(nn.Cell):
869    def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
870        super(LstmNetWithDropout, self).__init__()
871
872        num_directions = 1
873        if bidirectional:
874            num_directions = 2
875
876        self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
877
878        input_np = np.array([[[-2.48789445e-01, -2.18991071e-01, -8.41492534e-01, -5.73351622e-01, 8.20644796e-02,
879                               4.14313585e-01, -1.30143976e+00, -4.43366140e-01, -1.21003680e-01, -2.11284861e-01],
880                              [9.94045794e-01, 3.18840504e-01, 4.81898338e-01, -4.83986028e-02, -9.26419497e-02,
881                               -2.57977694e-01, 1.82191110e+00, 5.95121741e-01, 6.30752742e-01, -6.01903737e-01]],
882
883                             [[7.67166913e-01, 5.41202351e-02, -1.24094069e+00, 1.38814664e+00, 2.05845284e+00,
884                               7.29744852e-01, -1.12405574e+00, 3.78702253e-01, 2.28524983e-01, 2.02445173e+00],
885                              [-1.85264975e-01, -4.55119252e-01, 1.23624969e+00, 1.24347043e+00, -1.68316591e+00,
886                               -3.55918944e-01, 3.07149738e-01, -3.44966322e-01, -1.08978853e-01, 1.80912763e-01]],
887
888                             [[-6.47622466e-01, 1.31204927e+00, 6.47477210e-01, -7.93370783e-01, 3.08402872e-04,
889                               -5.12097359e-01, -1.69133916e-01, 8.57838035e-01, -3.63963723e-01, 6.35978997e-01],
890                              [-3.92911851e-01, 8.27334300e-02, -1.11347124e-01, 8.79961967e-01, 6.02812059e-02,
891                               -3.76448452e-01, -1.48800862e+00, -9.48699772e-01, -1.24202335e+00, 1.65264118e+00]],
892
893                             [[4.05404866e-01, 5.67396320e-02, -2.05705926e-01, -8.70196745e-02, -7.34854519e-01,
894                               -1.07580565e-01, 1.33716142e+00, -1.18140256e+00, 2.66074872e+00, -3.26788813e-01],
895                              [6.97183967e-01, -2.32625628e+00, 1.20393467e+00, -2.32532692e+00, 2.03347206e+00,
896                               -7.58083522e-01, 1.35564697e+00, -2.32149422e-01, 9.85125721e-01, 1.00944638e+00]],
897
898                             [[9.89606023e-01, -5.30669808e-01, -2.66087383e-01, 8.14819038e-01, 1.07067376e-01,
899                               -1.76214290e+00, -5.04977465e-01, 1.94490123e+00, 5.10450959e-01, -2.29238123e-01],
900                              [-1.32928836e+00, -1.18175328e-01, -5.17818272e-01, -1.45089477e-01, 7.13987231e-01,
901                               -7.41293788e-01, -3.67817104e-01, 1.18039274e+00, -6.03745162e-01,
902                               -5.83392143e-01]]]).astype(np.float32)
903
904        self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
905
906        self.h = Parameter(initializer(
907            Tensor(np.array([[[-0.47240502, 1.6824378],
908                              [-0.00978304, 0.8179632]]]).astype(np.float32)),
909            [num_layers * num_directions, batch_size, hidden_size]), name='h')
910
911        self.c = Parameter(initializer(
912            Tensor(np.array([[[-0.85975164, -0.3198615],
913                              [-0.9821871, 0.26311848]]]).astype(np.float32)),
914            [num_layers * num_directions, batch_size, hidden_size]), name='c')
915
916        wih = np.array([[0.4473, -0.5509, -0.1585, -0.6215, 0.6228, 0.3462, 0.3015, -0.3714, 0.3119, -0.1151],
917                        [-0.6923, 0.1373, 0.2214, 0.2280, 0.6960, -0.6368, 0.5725, -0.1359, 0.0742, -0.6777],
918                        [-0.4432, 0.6162, -0.1066, -0.6138, -0.2529, -0.5638, -0.0603, 0.3039, 0.1068, -0.5300],
919                        [0.4337, -0.1215, -0.5088, -0.0045, 0.2828, 0.1411, 0.0741, 0.6936, -0.4603, 0.6986],
920                        [-0.2079, -0.5518, 0.5375, -0.2168, 0.3662, 0.0948, -0.0564, -0.1808, -0.6672, -0.2410],
921                        [0.5142, 0.0790, -0.1123, -0.2351, 0.3982, -0.6351, 0.5906, 0.3917, -0.0850, -0.5397],
922                        [-0.4795, -0.6576, 0.5693, 0.0047, -0.6626, 0.1013, -0.4015, -0.4040, -0.2817, 0.4430],
923                        [0.0251, -0.3035, -0.6026, 0.2693, -0.2749, 0.1501, -0.5778, 0.5570, -0.7065, -0.6196]]).astype(
924                            np.float32).reshape([1, -1])
925
926        whh = np.array([[-0.4344, -0.2529],
927                        [0.0377, 0.7046],
928                        [-0.0579, -0.5240],
929                        [-0.4801, -0.1149],
930                        [-0.4010, -0.5614],
931                        [0.4721, 0.4366],
932                        [-0.4282, 0.0816],
933                        [0.1574, -0.3359]]).astype(np.float32).reshape([1, -1])
934
935        bih = np.array([0.2431, 0.5967, -0.2417, -0.4169, -0.5326, 0.5685, -0.2971, -0.4326]).astype(
936            np.float32).reshape([1, -1])
937        bhh = np.array([-0.1751, -0.2270, -0.3980, -0.4983, -0.3527, -0.2774, 0.6371, -0.3330]).astype(
938            np.float32).reshape([1, -1])
939
940        w_np = np.concatenate((wih, whh, bih, bhh), axis=1).reshape([-1, 1, 1])
941
942        self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
943
944    def construct(self):
945        return self.lstm(self.x, self.h, self.c, self.w)
946
947
948@pytest.mark.level0
949@pytest.mark.platform_x86_gpu_training
950@pytest.mark.env_onecard
951def test_lstm_dropout():
952    seq_len = 5
953    batch_size = 2
954
955    input_size = 10
956    hidden_size = 2
957    num_layers = 1
958    has_bias = True
959    bidirectional = False
960    dropout = 1.0
961
962    net = LstmNetWithDropout(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
963    y, _, _, _, _ = net()
964    expect_y = np.array([[[-0.45210335, -0.0844336],
965                          [-0.14677924, 0.07140275]],
966
967                         [[-0.18895914, -0.11084185],
968                          [-0.26356253, -0.06367199]],
969
970                         [[-0.33480304, 0.00812318],
971                          [-0.0887147, -0.1564593]],
972
973                         [[-0.33231455, 0.00743252],
974                          [0.428218, 0.00723737]],
975
976                         [[-0.20026046, 0.43491203],
977                          [0.17739448, 0.5313992]]])
978
979    error = np.ones([num_layers, batch_size, hidden_size]) * 1.0e-4
980    diff = y.asnumpy() - expect_y
981    assert np.all(diff < error)
982    assert np.all(-diff < error)
983