1# Copyright 2019 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15 16import numpy as np 17import pytest 18 19import mindspore.context as context 20import mindspore.nn as nn 21from mindspore.common.api import ms_function 22from mindspore.common.initializer import initializer 23from mindspore.common.parameter import ParameterTuple, Parameter 24from mindspore.common.tensor import Tensor 25from mindspore.ops import composite as C 26from mindspore.ops import operations as P 27 28context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU') 29 30 31class LstmNet(nn.Cell): 32 def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout): 33 super(LstmNet, self).__init__() 34 35 num_directions = 1 36 if bidirectional: 37 num_directions = 2 38 39 self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout) 40 41 input_np = np.array([[[0.6755, -1.6607, 0.1367, -0.9209, -1.7088, 0.3953, 2.7120, 0.1103, 0.1504, -0.3611], 42 [0.4276, -0.7850, -0.3758, 0.8604, -0.1361, -1.3618, -0.6251, -0.8391, 0.8142, 0.4068]], 43 44 [[-0.6424, -0.6095, 0.6639, -0.7253, 2.1190, -0.2840, 0.3858, 0.1691, 0.6764, 1.2903], 45 [0.7918, 0.4147, -0.5089, -0.3582, -1.4279, -0.7975, -0.0390, -0.4718, 0.4322, -0.7995]], 46 47 [[-1.5612, 0.0120, -0.7289, -1.2479, -0.6197, -0.6099, 0.9543, 0.4362, -1.3141, 0.4273], 48 [-0.6656, -0.6626, -0.5883, -0.6922, 0.5512, 1.7031, -1.2812, -0.2004, -0.9224, 0.4106]], 49 50 [[-0.9667, -0.6296, -0.7310, 1.2503, -0.1650, 1.2050, -0.1704, -0.5215, 0.1595, 0.3904], 51 [0.1026, -0.6821, -0.4387, -1.1637, -0.5000, 0.0590, 0.5219, -0.6835, 2.4406, 0.7135]], 52 53 [[-0.4710, 0.6558, -0.3144, -1.2213, 0.1556, -0.3836, -0.1081, -0.1440, -1.1231, 0.6279], 54 [-0.8449, -0.2184, -0.1806, -0.0615, -0.5660, -0.3556, 1.6891, -1.0286, 1.3361, 55 -0.4313]]]).astype(np.float32) 56 57 self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x') 58 59 self.h = Parameter(initializer( 60 Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)), 61 [num_layers * num_directions, batch_size, hidden_size]), name='h') 62 63 self.c = Parameter(initializer( 64 Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)), 65 [num_layers * num_directions, batch_size, hidden_size]), name='c') 66 67 wih = np.array([[3.4021e-01, -4.6622e-01, 4.5117e-01, 2.3627e-01, 3.7844e-01, 68 2.8770e-01, 4.1631e-01, -6.2628e-01, -4.8008e-01, -4.9148e-01], 69 [-6.4257e-02, -2.4807e-01, 1.3550e-02, 6.8946e-01, -1.2608e-02, 70 -7.1719e-02, -1.3566e-01, -4.9215e-01, 2.8509e-01, -6.3540e-01], 71 [-6.9863e-01, 5.9773e-01, -3.9062e-01, -7.6151e-02, 5.6803e-04, 72 -7.0420e-01, -6.1822e-01, 4.1854e-01, 4.0596e-01, 6.4867e-01], 73 [-3.0253e-01, -1.9464e-01, 7.0591e-01, 4.9368e-01, -5.9758e-01, 74 1.3251e-02, 3.5685e-01, -3.7640e-01, -4.4612e-01, 5.1794e-01], 75 [-3.2140e-01, 5.5578e-01, 6.3589e-01, -6.4249e-01, 5.7258e-01, 76 2.4256e-01, -2.7954e-01, 2.5202e-01, 2.9235e-01, -3.9979e-01], 77 [1.6547e-01, -7.9030e-02, -2.0045e-01, 6.2484e-01, -1.0727e-01, 78 -5.0010e-01, -2.9165e-01, -1.7620e-01, 1.5939e-01, -2.2744e-01], 79 [-4.0835e-01, 3.6751e-01, 4.7989e-01, 5.8886e-01, 5.3598e-01, 80 -2.9055e-01, -2.8129e-01, 6.0219e-01, 4.9193e-01, 3.3115e-01], 81 [-5.6894e-01, -5.0359e-01, 4.7491e-01, 5.8110e-01, -5.4921e-01, 82 -6.1343e-01, -5.8236e-02, -3.7682e-01, 4.8338e-01, -2.1551e-01]]).astype(np.float32).reshape( 83 [1, -1]) 84 85 whh = np.array([[-0.4820, -0.2350], 86 [-0.1195, 0.0519], 87 [0.4511, -0.3961], 88 [-0.5962, 0.0906], 89 [0.2162, -0.1178], 90 [0.6237, 0.0711], 91 [0.1867, -0.1225], 92 [0.1831, 0.0850]]).astype(np.float32).reshape([1, -1]) 93 94 bih = np.array([-0.2862, 0.0034, 0.2059, -0.6544, 0.3244, -0.2472, 0.0852, -0.3050]).astype(np.float32).reshape( 95 [1, -1]) 96 bhh = np.array([-0.6575, 0.1562, -0.6434, 0.0212, -0.2493, -0.5626, 0.1530, -0.5235]).astype( 97 np.float32).reshape([1, -1]) 98 99 w_np = np.concatenate((wih, whh, bih, bhh), axis=1).reshape([-1, 1, 1]) 100 101 self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w') 102 103 @ms_function 104 def construct(self): 105 return self.lstm(self.x, self.h, self.c, self.w) 106 107 108@pytest.mark.level0 109@pytest.mark.platform_x86_gpu_training 110@pytest.mark.env_onecard 111def test_lstm(): 112 seq_len = 5 113 batch_size = 2 114 115 input_size = 10 116 hidden_size = 2 117 num_layers = 1 118 has_bias = True 119 bidirectional = False 120 dropout = 0.0 121 122 num_directions = 1 123 if bidirectional: 124 num_directions = 2 125 126 net = LstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout) 127 y, h, c, _, _ = net() 128 expect_y = np.array([[[-2.1429e-02, 1.1760e-01], 129 [3.1144e-01, 6.3090e-01]], 130 131 [[-5.0190e-04, -4.5812e-02], 132 [2.0324e-02, 2.0392e-01]], 133 134 [[-1.0370e-02, -6.0141e-02], 135 [6.0931e-02, -1.8913e-02]], 136 137 [[-1.6031e-01, -2.3428e-01], 138 [4.1886e-02, -2.2162e-01]], 139 140 [[-3.9243e-02, -3.2950e-02], 141 [-4.1257e-02, -4.5276e-01]]]) 142 143 error = np.ones([num_layers, batch_size, hidden_size]) * 1.0e-4 144 diff = y.asnumpy() - expect_y 145 assert np.all(diff < error) 146 assert np.all(-diff < error) 147 148 expect_h = np.array([[[-0.0392, -0.0329], 149 [-0.0413, -0.4528]]]) 150 error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4 151 diff = h.asnumpy() - expect_h 152 assert np.all(diff < error) 153 assert np.all(-diff < error) 154 155 expect_c = np.array([[[-0.0984, -0.3665], 156 [-0.1010, -0.6792]]]) 157 error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4 158 diff = c.asnumpy() - expect_c 159 assert np.all(diff < error) 160 assert np.all(-diff < error) 161 162 163class BiLstmNet(nn.Cell): 164 def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout): 165 super(BiLstmNet, self).__init__() 166 167 num_directions = 1 168 if bidirectional: 169 num_directions = 2 170 171 self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout) 172 173 input_np = np.array([[[-1.7322, 1.6642, -1.1861, 0.2955, -0.7907, 0.2982, -1.3413, 1.0665, -0.0436, -0.1883], 174 [0.2195, 0.5917, -0.6739, 0.2388, -0.5364, -1.3309, -0.6018, -0.3081, -0.9648, -1.1627]], 175 176 [[-0.5094, -2.6025, -0.9302, -1.1937, 0.6501, -0.1903, -0.0661, 0.1080, 0.9829, -0.2280], 177 [1.3961, 0.2239, -0.1947, -0.3206, 0.5791, 0.3396, 0.1728, -1.2007, -1.0994, -1.3278]], 178 179 [[0.1870, -1.1090, -0.9705, 0.2207, 0.3743, 0.1158, -0.5443, -0.5559, 0.1538, -0.3975], 180 [-0.2347, -0.1245, -0.2335, 0.3164, 1.0997, -0.3928, -1.8517, 1.1136, -1.5051, -0.0071]], 181 182 [[1.2739, 2.5438, -0.4289, -0.7981, -1.3682, -2.2509, 0.2028, 1.3410, 2.9502, -1.1650], 183 [0.1254, 0.2726, 0.0251, 0.9323, 0.7315, 0.8231, -0.2123, -0.6885, 0.9893, -0.2047]], 184 185 [[0.1870, -0.9066, 0.7155, 0.5438, -0.9757, -0.5828, -0.3417, 1.5681, 1.0326, -0.0179], 186 [-0.7746, -1.0695, -0.5278, 2.5307, -0.1002, -1.5773, 0.7717, 1.0266, -0.0798, 187 1.2333]]]).astype(np.float32) 188 189 self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x') 190 191 self.h = Parameter(initializer( 192 Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)), 193 [num_layers * num_directions, batch_size, hidden_size]), name='h') 194 195 self.c = Parameter(initializer( 196 Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)), 197 [num_layers * num_directions, batch_size, hidden_size]), name='c') 198 199 wih = np.array([[-0.2959, -0.1142, 0.3662, 0.5406, 0.1738, 0.2697, -0.6960, -0.0464, 0.3486, 0.1888], 200 [0.3043, 0.1505, -0.1207, -0.2456, 0.2735, 0.6673, -0.3352, -0.6153, -0.5731, -0.2726], 201 [-0.2657, -0.5570, 0.6785, -0.1861, -0.0652, 0.5757, 0.6442, -0.4068, -0.3260, 0.7054], 202 [0.6607, 0.6927, -0.1354, 0.2484, 0.2053, 0.5743, -0.0212, 0.3340, -0.5685, -0.5668], 203 [0.6701, -0.3013, -0.1202, -0.4200, -0.4280, -0.6329, -0.6074, -0.4997, -0.6215, -0.6259], 204 [0.0299, -0.6071, -0.4683, -0.3363, -0.0044, -0.0007, 0.2700, 0.0202, -0.2880, -0.6869], 205 [0.3025, -0.2461, -0.5128, 0.6327, -0.1438, -0.5100, 0.1924, 0.2023, 0.3129, 0.2271], 206 [0.3777, 0.0546, 0.4790, -0.1895, 0.3588, 0.4490, 0.6850, 0.6240, -0.2739, -0.4474]]).astype( 207 np.float32).reshape([1, -1]) 208 209 whh = np.array([[0.6346, -0.6366], 210 [-0.0248, -0.6156], 211 [-0.3821, 0.6327], 212 [-0.6132, -0.5071], 213 [0.4029, 0.0906], 214 [-0.5671, 0.2556], 215 [0.0268, -0.4347], 216 [0.1152, -0.3124]]).astype(np.float32).reshape([1, -1]) 217 218 bih = np.array([-0.3839, -0.5365, -0.6691, 0.1697, -0.1564, -0.0451, -0.5921, -0.5367]).astype( 219 np.float32).reshape([1, -1]) 220 bhh = np.array([0.5952, -0.4905, 0.0423, -0.0293, -0.6638, 0.4348, -0.4291, -0.5541]).astype( 221 np.float32).reshape([1, -1]) 222 223 wih_reverse = np.array([[-0.2938, 0.0048, 0.2704, -0.3387, -0.4529, -0.2586, 0.1352, -0.1208, -0.1423, -0.0220], 224 [-0.3701, 0.0201, -0.0255, 0.1340, -0.1938, -0.7056, -0.2303, 0.4814, 0.3636, -0.5018], 225 [-0.0284, -0.0108, -0.5788, 0.2389, 0.2604, 0.6774, -0.5525, 0.6265, -0.6126, 0.3197], 226 [-0.6906, 0.6991, -0.6138, 0.0044, 0.5714, 0.4176, 0.5451, -0.5114, -0.2286, 0.1105], 227 [0.3547, 0.6233, -0.4543, -0.6799, 0.1109, 0.5601, 0.0212, 0.6926, 0.0597, -0.4383], 228 [-0.1370, -0.5852, 0.0596, 0.5494, 0.5789, -0.0534, 0.1092, 0.3544, -0.1571, 0.4444], 229 [-0.5886, -0.4765, -0.3837, -0.6634, 0.0963, -0.1385, -0.0837, -0.1354, 0.0547, 230 -0.2870], 231 [0.2049, -0.7057, -0.1736, 0.4724, 0.1957, -0.3037, 0.4626, -0.6465, 0.4575, 232 0.4230]]).astype(np.float32).reshape([1, -1]) 233 234 whh_reverse = np.array([[0.2339, -0.0307], 235 [-0.5850, 0.6328], 236 [0.5856, -0.5601], 237 [0.4875, -0.6929], 238 [0.0314, 0.2531], 239 [-0.2523, 0.3244], 240 [0.5199, 0.5146], 241 [0.3968, 0.4511]]).astype(np.float32).reshape([1, -1]) 242 243 bih_reverse = np.array([-0.1760, 0.2828, 0.2450, -0.4016, -0.4664, 0.4031, -0.1945, -0.1509]).astype( 244 np.float32).reshape([1, -1]) 245 bhh_reverse = np.array([0.6427, 0.4806, 0.6278, 0.1596, 0.0038, -0.3418, 0.0549, -0.3900]).astype( 246 np.float32).reshape([1, -1]) 247 248 w_np = np.concatenate((wih, whh, wih_reverse, whh_reverse, bih, bhh, bih_reverse, bhh_reverse), axis=1).reshape( 249 [-1, 1, 1]) 250 251 self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w') 252 253 @ms_function 254 def construct(self): 255 return self.lstm(self.x, self.h, self.c, self.w) 256 257 258@pytest.mark.level0 259@pytest.mark.platform_x86_gpu_training 260@pytest.mark.env_onecard 261def test_bilstm(): 262 seq_len = 5 263 batch_size = 2 264 265 input_size = 10 266 hidden_size = 2 267 num_layers = 1 268 has_bias = True 269 bidirectional = True 270 dropout = 0.0 271 272 num_directions = 1 273 if bidirectional: 274 num_directions = 2 275 276 net = BiLstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout) 277 y, h, c, _, _ = net() 278 expect_y = np.array([[[-0.0826, 0.0209, 0.1715, -0.0072], 279 [0.1035, 0.0594, -0.0867, -0.1077]], 280 281 [[-0.1647, 0.0293, -0.2189, 0.3809], 282 [0.0466, 0.4461, 0.0784, 0.0905]], 283 284 [[-0.0182, 0.0512, 0.1758, -0.1147], 285 [0.0460, 0.1588, -0.0314, 0.0886]], 286 287 [[-0.0330, 0.0551, 0.2084, -0.1154], 288 [-0.1641, 0.1118, -0.0122, 0.4916]], 289 290 [[-0.2997, 0.0223, 0.1328, 0.3377], 291 [-0.6669, 0.0089, 0.1138, 0.7786]]]) 292 293 error = np.ones([num_layers, batch_size, hidden_size * num_directions]) * 1.0e-4 294 diff = y.asnumpy() - expect_y 295 assert np.all(diff < error) 296 assert np.all(-diff < error) 297 298 expect_h = np.array([[[-0.2997, 0.0223], 299 [-0.6669, 0.0089]], 300 301 [[0.1715, -0.0072], 302 [-0.0867, -0.1077]]]) 303 error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4 304 diff = h.asnumpy() - expect_h 305 assert np.all(diff < error) 306 assert np.all(-diff < error) 307 308 expect_c = np.array([[[-0.6049, 0.0825], 309 [-0.9433, 0.1006]], 310 311 [[0.3037, -0.2036], 312 [-0.1633, -0.5663]]]) 313 314 error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-3 315 diff = c.asnumpy() - expect_c 316 assert np.all(diff < error) 317 assert np.all(-diff < error) 318 319 320class MultiLayerBiLstmNet(nn.Cell): 321 def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout): 322 super(MultiLayerBiLstmNet, self).__init__() 323 324 num_directions = 1 325 if bidirectional: 326 num_directions = 2 327 328 self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout) 329 330 input_np = np.array([[[-0.1887, -0.4144, -0.0235, 0.7489, 0.7522, 0.5969, 0.3342, 1.2198, 0.6786, -0.9404], 331 [-0.8643, -1.6835, -2.4965, 2.8093, 0.1741, 0.2707, 0.7387, -0.0939, -1.7990, 0.4765]], 332 333 [[-0.5963, -1.2598, -0.7226, 1.1365, -1.7320, -0.7302, 0.1221, -0.2111, -1.6173, -0.0706], 334 [0.8964, 0.1737, -1.0077, -0.1389, 0.4889, 0.4391, 0.7911, 0.3614, -1.9533, -0.9936]], 335 336 [[0.3260, -1.3312, 0.0601, 1.0726, -1.6010, -1.8733, -1.5775, 1.1579, -0.8801, -0.5742], 337 [-2.2998, -0.6344, -0.5409, -0.9221, -0.6500, 0.1206, 1.5215, 0.7517, 1.3691, 2.0021]], 338 339 [[-0.1245, -0.3690, 2.1193, 1.3852, -0.1841, -0.8899, -0.3646, -0.8575, -0.3131, 0.2026], 340 [1.0218, -1.4331, 0.1744, 0.5442, -0.7808, 0.2527, 0.1566, 1.1484, -0.7766, -0.6747]], 341 342 [[-0.6752, 0.9906, -0.4973, 0.3471, -0.1202, -0.4213, 2.0213, 0.0441, 0.9016, 1.0365], 343 [1.2223, -1.3248, 0.1207, -0.8256, 0.1816, 0.7057, -0.3105, 0.5713, 0.2804, 344 -1.0685]]]).astype(np.float32) 345 346 self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x') 347 348 self.h = Parameter(initializer( 349 Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)), 350 [num_layers * num_directions, batch_size, hidden_size]), name='h') 351 352 self.c = Parameter(initializer( 353 Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)), 354 [num_layers * num_directions, batch_size, hidden_size]), name='c') 355 356 wih_l0 = np.array([[0.3715, -0.0723, 0.6017, 0.5115, -0.5357, 0.3794, -0.3752, -0.6205, -0.0370, -0.2904], 357 [0.7055, -0.4156, -0.3650, -0.0964, 0.4141, -0.2584, -0.4765, -0.0045, 0.2943, -0.2648], 358 [0.1355, 0.1697, 0.1883, 0.3754, 0.3744, -0.6128, 0.2328, -0.1275, 0.6604, 0.6498], 359 [-0.0266, 0.5805, -0.5358, -0.0929, 0.0797, 0.3744, 0.3299, -0.3825, 0.5804, -0.0855], 360 [0.1141, 0.2587, -0.4370, 0.6430, -0.0017, 0.4865, 0.2814, 0.6213, -0.6415, 0.4574], 361 [-0.3958, -0.5827, -0.1056, 0.6987, -0.6591, -0.1326, 0.5237, 0.4667, -0.7001, -0.2326], 362 [0.3074, -0.3118, -0.4591, 0.2481, -0.2978, -0.1850, 0.4770, -0.0126, 0.3655, -0.4306], 363 [0.3033, -0.6264, -0.6551, 0.0069, -0.5238, -0.3950, 0.5681, -0.4931, -0.6258, 364 0.4079]]).astype(np.float32).reshape([1, -1]) 365 366 whh_l0 = np.array([[-0.3870, 0.0238], 367 [-0.3758, 0.2490], 368 [0.5437, -0.4117], 369 [0.1181, -0.2043], 370 [-0.5335, 0.1188], 371 [-0.0822, 0.2154], 372 [0.5844, -0.3239], 373 [-0.6537, 0.0278]]).astype(np.float32).reshape([1, -1]) 374 375 bih_l0 = np.array([0.5440, 0.5995, 0.0155, -0.6254, 0.5114, 0.3364, -0.1824, -0.6262]).astype( 376 np.float32).reshape([1, -1]) 377 bhh_l0 = np.array([0.4139, -0.2513, -0.4023, 0.4222, 0.6387, -0.6147, 0.0677, 0.5355]).astype( 378 np.float32).reshape([1, -1]) 379 380 wih_reverse_l0 = np.array([[6.5219e-01, 5.6162e-01, -1.8653e-01, 6.8789e-01, 1.3240e-01, 1.7699e-01, 1.2940e-01, 381 -1.8520e-01, -5.5439e-01, -3.4946e-01], 382 [3.7645e-01, 6.5475e-01, 3.5964e-01, 2.2433e-01, -1.7869e-01, -2.9047e-01, 383 1.7615e-01, -5.3353e-01, -7.4204e-02, -2.5270e-01], 384 [5.8095e-01, -4.6426e-04, 1.9262e-01, -5.1306e-01, -3.6811e-01, 4.4858e-01, 385 6.2580e-01, 9.5494e-02, -6.9505e-01, 4.9500e-01], 386 [-3.7810e-01, 1.5485e-01, -1.4735e-01, -1.5327e-01, -4.5702e-01, 3.0816e-01, 387 -3.4280e-01, 2.1604e-01, 1.4087e-01, -5.7707e-01], 388 [-3.8700e-01, -6.4653e-01, 6.0653e-01, -4.7297e-01, 6.8413e-02, -1.2681e-01, 389 6.8464e-02, 6.7011e-01, 3.9950e-01, -2.0577e-01], 390 [-1.8648e-01, -6.7198e-01, 3.8017e-01, -3.3147e-01, 5.3193e-01, -5.4952e-01, 391 2.1774e-01, -4.6271e-01, 3.2611e-01, 6.3554e-02], 392 [-4.5403e-01, -1.5910e-01, -7.5886e-02, 2.6313e-01, 6.8093e-01, -3.9960e-01, 393 5.5428e-01, 1.0429e-01, 5.1322e-01, 1.9406e-01], 394 [3.9698e-01, -5.2101e-01, 5.1372e-01, -3.9866e-01, 1.0115e-01, -4.1290e-02, 395 -3.0980e-01, 2.1607e-01, 4.8420e-01, -1.9267e-01]]).astype(np.float32).reshape( 396 [1, -1]) 397 398 whh_reverse_l0 = np.array([[-0.3231, -0.3960], 399 [-0.1625, -0.3032], 400 [0.3892, -0.0666], 401 [0.0159, -0.4870], 402 [-0.4953, 0.2278], 403 [-0.5380, -0.5250], 404 [0.0371, -0.4534], 405 [-0.5452, 0.5012]]).astype(np.float32).reshape([1, -1]) 406 407 bih_reverse_l0 = np.array([0.0469, -0.0107, 0.3783, -0.2657, -0.0089, 0.5032, -0.0757, -0.2022]).astype( 408 np.float32).reshape([1, -1]) 409 bhh_reverse_l0 = np.array([-0.6584, 0.3977, 0.5597, -0.4784, 0.5360, -0.2532, 0.5362, -0.1063]).astype( 410 np.float32).reshape([1, -1]) 411 412 wih_l1 = np.array([[0.0602, 0.6977, -0.3882, 0.3734], 413 [-0.6896, -0.6014, -0.2311, 0.6433], 414 [-0.6778, -0.5100, -0.1496, 0.5774], 415 [-0.5824, 0.4656, -0.2835, -0.5688], 416 [0.5623, 0.3599, 0.1731, 0.3124], 417 [0.1492, -0.6663, -0.1099, -0.5282], 418 [0.4696, -0.1795, -0.6712, -0.3903], 419 [0.4995, 0.0709, -0.1738, 0.2822]]).astype(np.float32).reshape([1, -1]) 420 421 whh_l1 = np.array([[0.3770, 0.4139], 422 [0.5351, 0.6394], 423 [0.3901, -0.1072], 424 [0.1106, 0.1331], 425 [0.3970, 0.4693], 426 [0.2958, -0.3813], 427 [-0.3064, 0.5519], 428 [-0.2827, 0.5844]]).astype(np.float32).reshape([1, -1]) 429 430 bih_l1 = np.array([0.5242, 0.5896, 0.3709, 0.6202, 0.5008, 0.2674, 0.4356, -0.3261]).astype(np.float32).reshape( 431 [1, -1]) 432 bhh_l1 = np.array([-0.6648, 0.6680, 0.2510, -0.1245, -0.0524, 0.5439, -0.1650, 0.5303]).astype( 433 np.float32).reshape([1, -1]) 434 435 wih_reverse_l1 = np.array([[0.6477, 0.4416, 0.3803, -0.4708], 436 [0.4497, 0.2833, -0.4739, -0.6361], 437 [-0.5573, -0.3867, -0.0349, -0.4128], 438 [-0.1545, 0.3720, 0.2354, -0.6090], 439 [0.5965, 0.6301, -0.4591, -0.0120], 440 [-0.1253, -0.1881, -0.4388, 0.4335], 441 [0.1944, -0.1230, -0.6170, 0.1043], 442 [-0.6700, 0.4343, 0.6474, 0.0113]]).astype(np.float32).reshape([1, -1]) 443 444 whh_reverse_l1 = np.array([[0.6576, 0.5573], 445 [0.2318, 0.0187], 446 [-0.6365, 0.5744], 447 [-0.6494, -0.1820], 448 [0.6461, -0.3344], 449 [0.0906, -0.5405], 450 [-0.5999, 0.5571], 451 [-0.0488, 0.5345]]).astype(np.float32).reshape([1, -1]) 452 453 bih_reverse_l1 = np.array([-0.6058, -0.2812, -0.4449, -0.0802, 0.4931, 0.4066, 0.5960, 0.1968]).astype( 454 np.float32).reshape([1, -1]) 455 bhh_reverse_l1 = np.array([-0.2490, -0.3402, -0.5089, -0.3875, 0.4852, -0.0402, -0.0072, -0.1017]).astype( 456 np.float32).reshape([1, -1]) 457 458 ''' 459 weight 460 layer0 461 forward 462 wih 463 whh 464 reverse 465 wih 466 whh 467 layer1 468 forward 469 wih 470 whh 471 reverse 472 wih 473 whh 474 ... ... 475 bias: 476 layer0 477 forward 478 bih 479 bhh 480 reverse 481 bih 482 bhh 483 layer1 484 forward 485 bih 486 bhh 487 reverse 488 bih 489 bhh 490 ... ... 491 ''' 492 w_np = np.concatenate( 493 (wih_l0, whh_l0, wih_reverse_l0, whh_reverse_l0, wih_l1, whh_l1, wih_reverse_l1, whh_reverse_l1, 494 bih_l0, bhh_l0, bih_reverse_l0, bhh_reverse_l0, bih_l1, bhh_l1, bih_reverse_l1, bhh_reverse_l1), 495 axis=1).reshape([-1, 1, 1]) 496 497 self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w') 498 499 @ms_function 500 def construct(self): 501 return self.lstm(self.x, self.h, self.c, self.w) 502 503 504@pytest.mark.level0 505@pytest.mark.platform_x86_gpu_training 506@pytest.mark.env_onecard 507def test_multi_layer_bilstm(): 508 seq_len = 5 509 batch_size = 2 510 511 input_size = 10 512 hidden_size = 2 513 num_layers = 2 514 has_bias = True 515 bidirectional = True 516 dropout = 0.0 517 518 num_directions = 1 519 if bidirectional: 520 num_directions = 2 521 522 net = MultiLayerBiLstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, 523 dropout) 524 y, h, c, _, _ = net() 525 expect_y = np.array([[[0.5186, 0.5419, 0.2710, 0.0384], 526 [0.6196, 0.5539, 0.3266, 0.0866]], 527 528 [[0.5244, 0.5276, 0.3042, 0.0510], 529 [0.5143, 0.4937, 0.2828, 0.0387]], 530 531 [[0.5124, 0.5079, 0.2951, 0.0548], 532 [0.4051, 0.4493, 0.2369, 0.0077]], 533 534 [[0.4532, 0.4749, 0.2557, 0.0611], 535 [0.4879, 0.4812, 0.3160, 0.0368]], 536 537 [[0.4535, 0.4806, 0.3880, 0.0462], 538 [0.4674, 0.4849, 0.3890, 0.1008]]]) 539 540 error = np.ones([seq_len, batch_size, hidden_size * num_directions]) * 1.0e-4 541 diff = y.asnumpy() - expect_y 542 assert np.all(diff < error) 543 assert np.all(-diff < error) 544 545 expect_h = np.array([[[0.4730, 0.1638], 546 [0.1406, -0.0697]], 547 548 [[0.3887, -0.0518], 549 [-0.3988, -0.0071]], 550 551 [[0.4535, 0.4806], 552 [0.4674, 0.4849]], 553 554 [[0.2710, 0.0384], 555 [0.3266, 0.0866]]]) 556 error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4 557 diff = h.asnumpy() - expect_h 558 assert np.all(diff < error) 559 assert np.all(-diff < error) 560 561 expect_c = np.array([[[0.8713, 0.2694], 562 [0.2075, -0.2201]], 563 564 [[0.5084, -0.0964], 565 [-0.5155, -0.2452]], 566 567 [[1.1724, 1.0334], 568 [1.2003, 1.1058]], 569 570 [[0.5179, 0.0750], 571 [0.5309, 0.2012]]]) 572 573 error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-3 574 diff = c.asnumpy() - expect_c 575 assert np.all(diff < error) 576 assert np.all(-diff < error) 577 578 579class Grad(nn.Cell): 580 def __init__(self, network): 581 super(Grad, self).__init__() 582 self.network = network 583 self.weights = ParameterTuple(network.trainable_params()) 584 self.grad = C.GradOperation(get_by_list=True, 585 sens_param=True) 586 587 @ms_function 588 def construct(self, output_grad): 589 weights = self.weights 590 grads = self.grad(self.network, weights)(output_grad) 591 return grads 592 593 594class Net(nn.Cell): 595 def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout): 596 super(Net, self).__init__() 597 598 num_directions = 1 599 if bidirectional: 600 num_directions = 2 601 602 self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout) 603 604 input_np = np.array([[[-0.5907, 1.0557, 1.7283, 0.6706, -1.2550, -0.5298, -0.2290, -0.6735, 0.8555, 1.4836], 605 [-1.7070, -0.5347, -0.9105, -0.2598, 0.0588, 1.5496, 1.0757, 0.3760, -1.2020, -0.2868]], 606 607 [[0.0151, 0.2126, 0.8090, -0.5292, -2.5590, 0.4279, -0.3081, -1.4706, -0.0498, 1.2301], 608 [0.4165, -0.5391, -0.0996, 0.1928, -0.4909, -0.1255, 0.4444, -1.3687, 1.3096, 0.6553]], 609 610 [[-0.7802, -0.2083, -0.6388, 1.3757, 0.4293, 0.5363, 0.3202, -0.6687, -1.3864, -0.2953], 611 [1.0799, -0.7204, 0.1130, -0.5857, -0.4855, -1.1068, 1.0126, 0.8716, 1.5460, -0.7392]], 612 613 [[2.2645, -0.6586, -0.2227, 1.4290, -0.5006, -1.6576, -0.1793, 0.5319, 0.1360, 0.2707], 614 [-0.4071, 0.1575, 1.4199, -0.9156, 0.1855, 0.4947, 1.0460, -0.6365, 0.1191, -0.6374]], 615 616 [[0.2468, 1.0815, -0.4893, 0.0664, 0.6405, -2.2967, 0.7612, 0.8759, 0.5685, -1.0999], 617 [-0.7272, -1.7750, -0.1164, -0.7159, 0.0061, -0.7839, -1.8329, 0.3434, -0.5634, 618 0.5384]]]).astype(np.float32) 619 620 self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x') 621 622 self.h = Parameter(initializer( 623 Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)), 624 [num_layers * num_directions, batch_size, hidden_size]), name='h') 625 626 self.c = Parameter(initializer( 627 Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)), 628 [num_layers * num_directions, batch_size, hidden_size]), name='c') 629 630 wih_l0 = np.array([[0.2300, 0.6668, 0.4703, 0.0425, 0.0464, 0.6825, 0.2249, -0.4315, -0.2449, 0.2964], 631 [-0.2811, -0.3444, 0.2557, -0.5137, -0.5518, 0.1652, -0.6720, 0.1066, 0.3586, 0.6299], 632 [0.5728, -0.1784, 0.5661, 0.4012, 0.3856, -0.1899, 0.3102, 0.3717, -0.5651, 0.1952], 633 [0.1026, -0.0527, 0.1198, -0.3080, 0.2292, 0.5757, -0.3567, -0.2731, -0.0586, -0.2849], 634 [0.2194, -0.1622, 0.3219, -0.3008, -0.3713, -0.3034, -0.2385, 0.0412, -0.5205, 0.0280], 635 [-0.5499, -0.0733, -0.5236, -0.6753, -0.7045, -0.1839, -0.1037, -0.5026, -0.4055, -0.3416], 636 [0.1573, -0.1301, -0.2882, -0.3464, 0.6643, 0.1980, -0.6804, 0.5359, 0.5996, 0.0124], 637 [-0.6436, 0.0587, -0.6520, -0.0471, 0.1667, 0.6042, 0.5752, -0.6296, -0.2976, 638 -0.3757]]).astype(np.float32).reshape([1, -1]) 639 640 whh_l0 = np.array([[0.3358, 0.2790], 641 [-0.5355, 0.0989], 642 [-0.1402, 0.5120], 643 [0.1335, 0.1653], 644 [0.3533, -0.3531], 645 [0.4166, -0.4420], 646 [-0.5454, -0.1720], 647 [0.0041, -0.0799]]).astype(np.float32).reshape([1, -1]) 648 649 bih_l0 = np.array([0.5518, 0.1083, 0.4829, 0.0607, -0.1770, -0.6944, 0.3059, 0.5354]).astype( 650 np.float32).reshape([1, -1]) 651 bhh_l0 = np.array([0.5025, -0.1261, -0.5405, 0.3220, -0.3441, 0.6488, -0.0284, -0.2334]).astype( 652 np.float32).reshape([1, -1]) 653 654 wih_reverse_l0 = np.array( 655 [[-0.7048, -0.1768, 0.2288, -0.0760, -0.1319, 0.0820, -0.4132, 0.3644, 0.3919, 0.2449], 656 [0.0551, -0.0530, -0.5883, 0.0799, -0.5025, 0.1500, -0.4067, -0.3764, -0.3018, 0.2467], 657 [-0.2279, 0.3144, 0.5705, 0.4617, 0.1729, 0.6539, -0.2086, 0.5355, 0.4439, 0.0122], 658 [0.6967, -0.5245, 0.3527, 0.3386, 0.0429, -0.3803, -0.4328, -0.4767, 0.4481, -0.2405], 659 [0.6744, -0.2776, 0.0798, 0.1543, 0.6421, 0.6102, 0.3591, -0.4431, -0.6327, -0.0075], 660 [-0.4520, 0.4201, -0.2374, -0.1556, -0.4175, -0.6834, 0.3096, -0.1581, 0.0127, 0.6872], 661 [0.1788, -0.5442, -0.3675, -0.2887, -0.3004, 0.5813, 0.1618, 0.6875, -0.4678, 0.0071], 662 [-0.6453, -0.2528, 0.5675, -0.5154, -0.4129, -0.0214, 0.5539, 0.0343, 0.1712, 0.5644]]).astype( 663 np.float32).reshape([1, -1]) 664 665 whh_reverse_l0 = np.array([[-0.6657, 0.6330], 666 [-0.2290, 0.6556], 667 [0.4808, -0.2712], 668 [0.0407, -0.2587], 669 [0.3837, 0.0382], 670 [0.2268, 0.1217], 671 [-0.6404, -0.3336], 672 [0.5461, -0.0764]]).astype(np.float32).reshape([1, -1]) 673 674 bih_reverse_l0 = np.array([0.0314, 0.1009, 0.3664, -0.6732, -0.6944, 0.5098, -0.1251, 0.2644]).astype( 675 np.float32).reshape([1, -1]) 676 bhh_reverse_l0 = np.array([-0.1961, -0.3836, 0.1191, -0.7022, -0.0961, 0.5493, -0.6979, 0.0017]).astype( 677 np.float32).reshape([1, -1]) 678 679 wih_l1 = np.array([[1.2746e-01, -3.3346e-01, 1.5589e-01, -4.7986e-01], 680 [6.5835e-01, 3.8135e-01, -3.8409e-01, -3.6499e-01], 681 [-6.0374e-04, -1.2227e-01, -1.5955e-01, 4.2772e-01], 682 [-1.8281e-01, -5.0484e-01, 7.0204e-01, 6.5872e-01], 683 [3.7765e-01, -4.3494e-01, 3.1503e-01, -4.2504e-02], 684 [6.3506e-01, -4.3049e-02, -5.7413e-01, -2.5134e-01], 685 [8.7181e-02, -5.5216e-01, 5.5436e-01, -3.9599e-01], 686 [4.4611e-01, -4.2690e-01, 6.6142e-01, 6.3882e-01]]).astype(np.float32).reshape([1, -1]) 687 688 whh_l1 = np.array([[-0.0049, -0.3267], 689 [0.0863, -0.6277], 690 [0.4815, -0.2236], 691 [0.5996, -0.3441], 692 [0.3959, -0.0249], 693 [0.3986, -0.0922], 694 [-0.5321, 0.0877], 695 [0.2811, -0.0483]]).astype(np.float32).reshape([1, -1]) 696 697 bih_l1 = np.array([0.0032, -0.0893, 0.5706, 0.3712, 0.0590, 0.0044, 0.2417, 0.1291]).astype(np.float32).reshape( 698 [1, -1]) 699 bhh_l1 = np.array([-0.0704, 0.3908, -0.1121, 0.6970, -0.6216, 0.6340, -0.2945, 0.5224]).astype( 700 np.float32).reshape([1, -1]) 701 702 wih_reverse_l1 = np.array([[-0.2693, 0.3487, 0.0692, 0.0047], 703 [0.6187, 0.5649, 0.0680, 0.5110], 704 [-0.5262, -0.3307, -0.3892, 0.5382], 705 [-0.2925, 0.5185, -0.1385, 0.3431], 706 [-0.3252, 0.3809, -0.4680, 0.3379], 707 [0.4763, -0.5465, 0.0033, -0.5144], 708 [0.3826, -0.3879, -0.2439, 0.2571], 709 [-0.0422, -0.0359, -0.4197, -0.2209]]).astype(np.float32).reshape([1, -1]) 710 711 whh_reverse_l1 = np.array([[-0.4691, 0.5944], 712 [-0.6885, 0.1708], 713 [0.6391, -0.3690], 714 [-0.5919, 0.1805], 715 [-0.6853, -0.6215], 716 [-0.4635, -0.6714], 717 [-0.2050, 0.0513], 718 [0.3411, -0.2833]]).astype(np.float32).reshape([1, -1]) 719 720 bih_reverse_l1 = np.array([0.5764, -0.7010, -0.0831, -0.3779, -0.2743, 0.0480, -0.2707, -0.5583]).astype( 721 np.float32).reshape([1, -1]) 722 bhh_reverse_l1 = np.array([0.3379, -0.2671, -0.2789, -0.6611, -0.5542, -0.0188, 0.1831, 0.3612]).astype( 723 np.float32).reshape([1, -1]) 724 725 ''' 726 weight 727 layer0 728 forward 729 wih 730 whh 731 reverse 732 wih 733 whh 734 layer1 735 forward 736 wih 737 whh 738 reverse 739 wih 740 whh 741 ... ... 742 bias: 743 layer0 744 forward 745 bih 746 bhh 747 reverse 748 bih 749 bhh 750 layer1 751 forward 752 bih 753 bhh 754 reverse 755 bih 756 bhh 757 ... ... 758 ''' 759 w_np = np.concatenate( 760 (wih_l0, whh_l0, wih_reverse_l0, whh_reverse_l0, wih_l1, whh_l1, wih_reverse_l1, whh_reverse_l1, 761 bih_l0, bhh_l0, bih_reverse_l0, bhh_reverse_l0, bih_l1, bhh_l1, bih_reverse_l1, bhh_reverse_l1), 762 axis=1).reshape([-1, 1, 1]) 763 764 self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w') 765 766 @ms_function 767 def construct(self): 768 return self.lstm(self.x, self.h, self.c, self.w)[0] 769 770 771@pytest.mark.level0 772@pytest.mark.platform_x86_gpu_training 773@pytest.mark.env_onecard 774def test_grad(): 775 seq_len = 5 776 batch_size = 2 777 778 input_size = 10 779 hidden_size = 2 780 num_layers = 2 781 has_bias = True 782 bidirectional = True 783 dropout = 0.0 784 785 net = Grad(Net(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)) 786 787 dy = np.array([[[-3.5471e-01, 7.0540e-01, -7.5945e-01, -1.2322e+00], 788 [2.7161e-01, 1.0865e+00, -2.1827e-03, 8.8031e-01]], 789 790 [[-4.2431e-01, 1.4955e+00, 4.6576e-01, -2.7230e+00], 791 [-4.0418e-01, -2.3282e-01, 9.1253e-01, -2.7379e-01]], 792 793 [[-1.3654e+00, 1.9251e+00, -1.6808e+00, -3.2642e-02], 794 [-4.6481e-01, 1.3138e+00, 1.2956e-02, 1.0198e+00]], 795 796 [[1.2914e+00, -2.3753e-01, 9.4763e-01, 1.7930e-02], 797 [5.3589e-01, -1.0981e-01, 1.5377e+00, 6.2709e-01]], 798 799 [[-1.6032e+00, -1.8818e-01, 7.0441e-01, -2.8765e+00], 800 [1.0065e-01, 9.2045e-01, 2.7426e-01, 2.6196e-01]]]).astype(np.float32) 801 802 dx, dh, dc, _ = net(Tensor(dy)) 803 expect_dx = np.array([[[0.01697153, -0.0096909, 0.01306139, 0.00863109, -0.00122794, -0.00746152, -0.00879683, 804 0.00643571, 0.0015958, 0.01480642], 805 [0.05794962, -0.02326604, 0.01862703, 0.02053947, 0.02607713, -0.01278067, 0.04250786, 806 -0.02686035, -0.07441005, 0.00806021]], 807 808 [[-0.026675, -0.01024149, -0.02492021, -0.00457492, -0.0085863, 0.02341479, 0.02188834, 809 -0.04139283, -0.01367766, -0.00305065], 810 [-0.00762213, -0.01914341, -0.03233681, -0.03580827, -0.02201782, -0.00153102, -0.00097455, 811 -0.02708411, -0.03711082, -0.02804472]], 812 813 [[-0.0040581, -0.00116989, 0.01652471, 0.02182668, -0.02547193, -0.04171437, 0.04185125, 814 0.01589275, -0.00517019, 0.06554792], 815 [-0.02294365, -0.00589715, -0.01425684, -0.01499153, -0.05327821, -0.03133425, 0.00755623, 816 -0.04192506, -0.02122675, -0.01214214]], 817 818 [[-0.00041491, 0.00240709, -0.00942589, 0.00719656, 0.01438523, 0.00931082, 0.00534746, 819 -0.0004002, 0.01299422, 0.00181135], 820 [-0.01704482, -0.00887032, -0.01746774, -0.03289891, -0.04259495, -0.01928082, -0.01570587, 821 -0.01242383, -0.01799918, -0.00610236]], 822 823 [[0.00207505, -0.0008109, 0.00114241, 0.00251349, -0.00065676, 0.00151333, -0.00077485, 824 -0.00034354, -0.00028289, -0.0006986], 825 [-0.00240827, -0.0001309, 0.01401818, -0.01272261, -0.02665948, -0.01095799, -0.007761, 826 -0.0087831, 0.01038029, 0.02021475]]]).astype(np.float32) 827 828 error = np.ones(dx.asnumpy().shape) * 1.0e-4 829 diff = dx.asnumpy() - expect_dx 830 assert np.all(diff < error) 831 assert np.all(-diff < error) 832 833 expect_dh = np.array([[[-0.00696833, 0.00212885], 834 [0.01416209, 0.0002706]], 835 836 [[0.00297393, -0.0021012], 837 [0.00458834, 0.00400078]], 838 839 [[0.08658642, -0.10590762], 840 [0.1516603, -0.10525411]], 841 842 [[0.11888178, -0.04759264], 843 [0.05898442, -0.08082277]]]).astype(np.float32) 844 845 error = np.ones(dh.asnumpy().shape) * 1.0e-4 846 diff = dh.asnumpy() - expect_dh 847 assert np.all(diff < error) 848 assert np.all(-diff < error) 849 850 expect_dc = np.array([[[0.00887521, -0.01391486], 851 [0.03858164, -0.04941981]], 852 853 [[0.00665188, 0.00184223], 854 [-0.00541833, 0.01410913]], 855 856 [[-0.2068854, 0.5585638], 857 [0.01735374, 0.3537254]], 858 859 [[0.20350647, -0.2792883], 860 [0.18456826, 0.02278761]]]).astype(np.float32) 861 862 error = np.ones(dc.asnumpy().shape) * 1.0e-4 863 diff = dc.asnumpy() - expect_dc 864 assert np.all(diff < error) 865 assert np.all(-diff < error) 866 867 868class LstmNetWithDropout(nn.Cell): 869 def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout): 870 super(LstmNetWithDropout, self).__init__() 871 872 num_directions = 1 873 if bidirectional: 874 num_directions = 2 875 876 self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout) 877 878 input_np = np.array([[[-2.48789445e-01, -2.18991071e-01, -8.41492534e-01, -5.73351622e-01, 8.20644796e-02, 879 4.14313585e-01, -1.30143976e+00, -4.43366140e-01, -1.21003680e-01, -2.11284861e-01], 880 [9.94045794e-01, 3.18840504e-01, 4.81898338e-01, -4.83986028e-02, -9.26419497e-02, 881 -2.57977694e-01, 1.82191110e+00, 5.95121741e-01, 6.30752742e-01, -6.01903737e-01]], 882 883 [[7.67166913e-01, 5.41202351e-02, -1.24094069e+00, 1.38814664e+00, 2.05845284e+00, 884 7.29744852e-01, -1.12405574e+00, 3.78702253e-01, 2.28524983e-01, 2.02445173e+00], 885 [-1.85264975e-01, -4.55119252e-01, 1.23624969e+00, 1.24347043e+00, -1.68316591e+00, 886 -3.55918944e-01, 3.07149738e-01, -3.44966322e-01, -1.08978853e-01, 1.80912763e-01]], 887 888 [[-6.47622466e-01, 1.31204927e+00, 6.47477210e-01, -7.93370783e-01, 3.08402872e-04, 889 -5.12097359e-01, -1.69133916e-01, 8.57838035e-01, -3.63963723e-01, 6.35978997e-01], 890 [-3.92911851e-01, 8.27334300e-02, -1.11347124e-01, 8.79961967e-01, 6.02812059e-02, 891 -3.76448452e-01, -1.48800862e+00, -9.48699772e-01, -1.24202335e+00, 1.65264118e+00]], 892 893 [[4.05404866e-01, 5.67396320e-02, -2.05705926e-01, -8.70196745e-02, -7.34854519e-01, 894 -1.07580565e-01, 1.33716142e+00, -1.18140256e+00, 2.66074872e+00, -3.26788813e-01], 895 [6.97183967e-01, -2.32625628e+00, 1.20393467e+00, -2.32532692e+00, 2.03347206e+00, 896 -7.58083522e-01, 1.35564697e+00, -2.32149422e-01, 9.85125721e-01, 1.00944638e+00]], 897 898 [[9.89606023e-01, -5.30669808e-01, -2.66087383e-01, 8.14819038e-01, 1.07067376e-01, 899 -1.76214290e+00, -5.04977465e-01, 1.94490123e+00, 5.10450959e-01, -2.29238123e-01], 900 [-1.32928836e+00, -1.18175328e-01, -5.17818272e-01, -1.45089477e-01, 7.13987231e-01, 901 -7.41293788e-01, -3.67817104e-01, 1.18039274e+00, -6.03745162e-01, 902 -5.83392143e-01]]]).astype(np.float32) 903 904 self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x') 905 906 self.h = Parameter(initializer( 907 Tensor(np.array([[[-0.47240502, 1.6824378], 908 [-0.00978304, 0.8179632]]]).astype(np.float32)), 909 [num_layers * num_directions, batch_size, hidden_size]), name='h') 910 911 self.c = Parameter(initializer( 912 Tensor(np.array([[[-0.85975164, -0.3198615], 913 [-0.9821871, 0.26311848]]]).astype(np.float32)), 914 [num_layers * num_directions, batch_size, hidden_size]), name='c') 915 916 wih = np.array([[0.4473, -0.5509, -0.1585, -0.6215, 0.6228, 0.3462, 0.3015, -0.3714, 0.3119, -0.1151], 917 [-0.6923, 0.1373, 0.2214, 0.2280, 0.6960, -0.6368, 0.5725, -0.1359, 0.0742, -0.6777], 918 [-0.4432, 0.6162, -0.1066, -0.6138, -0.2529, -0.5638, -0.0603, 0.3039, 0.1068, -0.5300], 919 [0.4337, -0.1215, -0.5088, -0.0045, 0.2828, 0.1411, 0.0741, 0.6936, -0.4603, 0.6986], 920 [-0.2079, -0.5518, 0.5375, -0.2168, 0.3662, 0.0948, -0.0564, -0.1808, -0.6672, -0.2410], 921 [0.5142, 0.0790, -0.1123, -0.2351, 0.3982, -0.6351, 0.5906, 0.3917, -0.0850, -0.5397], 922 [-0.4795, -0.6576, 0.5693, 0.0047, -0.6626, 0.1013, -0.4015, -0.4040, -0.2817, 0.4430], 923 [0.0251, -0.3035, -0.6026, 0.2693, -0.2749, 0.1501, -0.5778, 0.5570, -0.7065, -0.6196]]).astype( 924 np.float32).reshape([1, -1]) 925 926 whh = np.array([[-0.4344, -0.2529], 927 [0.0377, 0.7046], 928 [-0.0579, -0.5240], 929 [-0.4801, -0.1149], 930 [-0.4010, -0.5614], 931 [0.4721, 0.4366], 932 [-0.4282, 0.0816], 933 [0.1574, -0.3359]]).astype(np.float32).reshape([1, -1]) 934 935 bih = np.array([0.2431, 0.5967, -0.2417, -0.4169, -0.5326, 0.5685, -0.2971, -0.4326]).astype( 936 np.float32).reshape([1, -1]) 937 bhh = np.array([-0.1751, -0.2270, -0.3980, -0.4983, -0.3527, -0.2774, 0.6371, -0.3330]).astype( 938 np.float32).reshape([1, -1]) 939 940 w_np = np.concatenate((wih, whh, bih, bhh), axis=1).reshape([-1, 1, 1]) 941 942 self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w') 943 944 def construct(self): 945 return self.lstm(self.x, self.h, self.c, self.w) 946 947 948@pytest.mark.level0 949@pytest.mark.platform_x86_gpu_training 950@pytest.mark.env_onecard 951def test_lstm_dropout(): 952 seq_len = 5 953 batch_size = 2 954 955 input_size = 10 956 hidden_size = 2 957 num_layers = 1 958 has_bias = True 959 bidirectional = False 960 dropout = 1.0 961 962 net = LstmNetWithDropout(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout) 963 y, _, _, _, _ = net() 964 expect_y = np.array([[[-0.45210335, -0.0844336], 965 [-0.14677924, 0.07140275]], 966 967 [[-0.18895914, -0.11084185], 968 [-0.26356253, -0.06367199]], 969 970 [[-0.33480304, 0.00812318], 971 [-0.0887147, -0.1564593]], 972 973 [[-0.33231455, 0.00743252], 974 [0.428218, 0.00723737]], 975 976 [[-0.20026046, 0.43491203], 977 [0.17739448, 0.5313992]]]) 978 979 error = np.ones([num_layers, batch_size, hidden_size]) * 1.0e-4 980 diff = y.asnumpy() - expect_y 981 assert np.all(diff < error) 982 assert np.all(-diff < error) 983