1# Copyright 2020 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15import math 16 17import pytest 18import numpy as np 19import mindspore.nn as nn 20import mindspore.context as context 21from mindspore.common.api import ms_function 22from mindspore.common.initializer import initializer 23from mindspore.ops import composite as C 24from mindspore.ops import operations as P 25from mindspore.common.tensor import Tensor 26from mindspore.common.parameter import ParameterTuple, Parameter 27 28context.set_context(mode=context.GRAPH_MODE, device_target='CPU') 29 30 31class StackLSTM(nn.Cell): 32 """ 33 Stack multi-layers LSTM together. 34 """ 35 36 def __init__(self, 37 input_size, 38 hidden_size, 39 num_layers=1, 40 has_bias=True, 41 batch_first=False, 42 dropout=0.0, 43 bidirectional=False): 44 super(StackLSTM, self).__init__() 45 self.num_layers = num_layers 46 self.batch_first = batch_first 47 self.transpose = P.Transpose() 48 49 # direction number 50 num_directions = 2 if bidirectional else 1 51 52 # input_size list 53 input_size_list = [input_size] 54 for i in range(num_layers - 1): 55 input_size_list.append(hidden_size * num_directions) 56 57 # layers 58 layers = [] 59 for i in range(num_layers): 60 layers.append(nn.LSTMCell(input_size=input_size_list[i], 61 hidden_size=hidden_size, 62 has_bias=has_bias, 63 batch_first=batch_first, 64 bidirectional=bidirectional, 65 dropout=dropout)) 66 67 # weights 68 weights = [] 69 for i in range(num_layers): 70 # weight size 71 weight_size = (input_size_list[i] + hidden_size) * num_directions * hidden_size * 4 72 if has_bias: 73 bias_size = num_directions * hidden_size * 4 74 weight_size = weight_size + bias_size 75 76 # numpy weight 77 stdv = 1 / math.sqrt(hidden_size) 78 w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) 79 80 # lstm weight 81 weights.append(Parameter(initializer(Tensor(w_np), w_np.shape), name="weight" + str(i))) 82 83 # 84 self.lstms = layers 85 self.weight = ParameterTuple(tuple(weights)) 86 87 def construct(self, x, hx): 88 """construct""" 89 if self.batch_first: 90 x = self.transpose(x, (1, 0, 2)) 91 # stack lstm 92 h, c = hx 93 hn = cn = None 94 for i in range(self.num_layers): 95 x, hn, cn, _, _ = self.lstms[i](x, h[i], c[i], self.weight[i]) 96 if self.batch_first: 97 x = self.transpose(x, (1, 0, 2)) 98 return x, (hn, cn) 99 100 101class LstmNet(nn.Cell): 102 def __init__(self, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout): 103 super(LstmNet, self).__init__() 104 105 num_directions = 1 106 if bidirectional: 107 num_directions = 2 108 109 self.lstm = StackLSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout) 110 input_np = np.array([[[0.6755, -1.6607, 0.1367], [0.4276, -0.7850, -0.3758]], 111 [[-0.6424, -0.6095, 0.6639], [0.7918, 0.4147, -0.5089]], 112 [[-1.5612, 0.0120, -0.7289], [-0.6656, -0.6626, -0.5883]], 113 [[-0.9667, -0.6296, -0.7310], [0.1026, -0.6821, -0.4387]], 114 [[-0.4710, 0.6558, -0.3144], [-0.8449, -0.2184, -0.1806]] 115 ]).astype(np.float32) 116 self.x = Tensor(input_np) 117 118 self.h = Tensor(np.array([0., 0., 0., 0.]).reshape((num_directions, batch_size, hidden_size)).astype( 119 np.float32)) 120 121 self.c = Tensor(np.array([0., 0., 0., 0.]).reshape((num_directions, batch_size, hidden_size)).astype( 122 np.float32)) 123 self.h = tuple((self.h,)) 124 self.c = tuple((self.c,)) 125 wih = np.array([[3.4021e-01, -4.6622e-01, 4.5117e-01], 126 [-6.4257e-02, -2.4807e-01, 1.3550e-02], # i 127 [-3.2140e-01, 5.5578e-01, 6.3589e-01], 128 [1.6547e-01, -7.9030e-02, -2.0045e-01], 129 [-6.9863e-01, 5.9773e-01, -3.9062e-01], 130 [-3.0253e-01, -1.9464e-01, 7.0591e-01], 131 [-4.0835e-01, 3.6751e-01, 4.7989e-01], 132 [-5.6894e-01, -5.0359e-01, 4.7491e-01]]).astype(np.float32).reshape([1, -1]) 133 whh = np.array([[-0.4820, -0.2350], 134 [-0.1195, 0.0519], 135 [0.2162, -0.1178], 136 [0.6237, 0.0711], 137 [0.4511, -0.3961], 138 [-0.5962, 0.0906], 139 [0.1867, -0.1225], 140 [0.1831, 0.0850]]).astype(np.float32).reshape([1, -1]) 141 bih = np.zeros((1, 8)).astype(np.float32) 142 w_np = np.concatenate((wih, whh, bih), axis=1).reshape([-1, 1, 1]) 143 self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w') 144 self.lstm.weight = ParameterTuple((self.w,)) 145 146 @ms_function 147 def construct(self): 148 return self.lstm(self.x, (self.h, self.c)) 149 150 151@pytest.mark.level0 152@pytest.mark.platform_x86_cpu 153@pytest.mark.env_onecard 154def test_lstm(): 155 seq_len = 5 156 batch_size = 2 157 input_size = 3 158 hidden_size = 2 159 num_layers = 1 160 has_bias = True 161 bidirectional = False 162 dropout = 0.0 163 num_directions = 1 164 if bidirectional: 165 num_directions = 2 166 net = LstmNet(batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout) 167 y, (h, c) = net() 168 print(y) 169 print(c) 170 print(h) 171 expect_y = [[[-0.17992045, 0.07819052], 172 [-0.10745212, -0.06291768]], 173 174 [[-0.28830513, 0.30579978], 175 [-0.07570618, -0.08868407]], 176 177 [[-0.00814095, 0.16889746], 178 [0.02814853, -0.11208838]], 179 180 [[0.08157863, 0.06088024], 181 [-0.04227093, -0.11514835]], 182 183 [[0.18908429, -0.02963362], 184 [0.09106826, -0.00602506]]] 185 expect_h = [[[0.18908429, -0.02963362], 186 [0.09106826, -0.00602506]]] 187 expect_c = [[[0.3434288, -0.06561527], 188 [0.16838229, -0.00972614]]] 189 190 diff_y = y.asnumpy() - expect_y 191 error_y = np.ones([seq_len, batch_size, hidden_size]) * 1.0e-4 192 assert np.all(diff_y < error_y) 193 assert np.all(-diff_y < error_y) 194 diff_h = h.asnumpy() - expect_h 195 error_h = np.ones([num_layers * num_directions, batch_size, hidden_size]) * 1.0e-4 196 assert np.all(diff_h < error_h) 197 assert np.all(-diff_h < error_h) 198 diff_c = c.asnumpy() - expect_c 199 error_c = np.ones([num_layers * num_directions, batch_size, hidden_size]) * 1.0e-4 200 assert np.all(diff_c < error_c) 201 assert np.all(-diff_c < error_c) 202 203 204class MultiLayerBiLstmNet(nn.Cell): 205 def __init__(self, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout): 206 super(MultiLayerBiLstmNet, self).__init__() 207 208 num_directions = 1 209 if bidirectional: 210 num_directions = 2 211 212 self.lstm = StackLSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias, 213 bidirectional=bidirectional, dropout=dropout) 214 215 input_np = np.array([[[-0.1887, -0.4144, -0.0235, 0.7489, 0.7522, 0.5969, 0.3342, 1.2198, 0.6786, -0.9404], 216 [-0.8643, -1.6835, -2.4965, 2.8093, 0.1741, 0.2707, 0.7387, -0.0939, -1.7990, 0.4765]], 217 218 [[-0.5963, -1.2598, -0.7226, 1.1365, -1.7320, -0.7302, 0.1221, -0.2111, -1.6173, -0.0706], 219 [0.8964, 0.1737, -1.0077, -0.1389, 0.4889, 0.4391, 0.7911, 0.3614, -1.9533, -0.9936]], 220 221 [[0.3260, -1.3312, 0.0601, 1.0726, -1.6010, -1.8733, -1.5775, 1.1579, -0.8801, -0.5742], 222 [-2.2998, -0.6344, -0.5409, -0.9221, -0.6500, 0.1206, 1.5215, 0.7517, 1.3691, 2.0021]], 223 224 [[-0.1245, -0.3690, 2.1193, 1.3852, -0.1841, -0.8899, -0.3646, -0.8575, -0.3131, 0.2026], 225 [1.0218, -1.4331, 0.1744, 0.5442, -0.7808, 0.2527, 0.1566, 1.1484, -0.7766, -0.6747]], 226 227 [[-0.6752, 0.9906, -0.4973, 0.3471, -0.1202, -0.4213, 2.0213, 0.0441, 0.9016, 1.0365], 228 [1.2223, -1.3248, 0.1207, -0.8256, 0.1816, 0.7057, -0.3105, 0.5713, 0.2804, 229 -1.0685]]]).astype(np.float32) 230 231 self.x = Tensor(input_np) 232 233 self.h0 = Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)) 234 self.c0 = Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)) 235 self.h1 = Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)) 236 self.c1 = Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)) 237 238 self.h = tuple((self.h0, self.h1)) 239 self.c = tuple((self.c0, self.c1)) 240 input_size_list = [input_size, hidden_size * num_directions] 241 weights = [] 242 bias_size = 0 if not has_bias else num_directions * hidden_size * 4 243 for i in range(num_layers): 244 weight_size = (input_size_list[i] + hidden_size) * num_directions * hidden_size * 4 245 w_np = np.ones([weight_size, 1, 1]).astype(np.float32) * 0.02 246 if has_bias: 247 bias_np = np.zeros([bias_size, 1, 1]).astype(np.float32) 248 w_np = np.concatenate([w_np, bias_np], axis=0) 249 weights.append(Parameter(initializer(Tensor(w_np), w_np.shape), name='weight' + str(i))) 250 self.lstm.weight = weights 251 252 @ms_function 253 def construct(self): 254 return self.lstm(self.x, (self.h, self.c)) 255 256 257@pytest.mark.level1 258@pytest.mark.platform_x86_cpu 259@pytest.mark.env_onecard 260def test_multi_layer_bilstm(): 261 batch_size = 2 262 input_size = 10 263 hidden_size = 2 264 num_layers = 2 265 has_bias = True 266 bidirectional = True 267 dropout = 0.0 268 269 net = MultiLayerBiLstmNet(batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, 270 dropout) 271 y, (h, c) = net() 272 print(y) 273 print(h) 274 print(c) 275 276 277class Grad(nn.Cell): 278 def __init__(self, network): 279 super(Grad, self).__init__() 280 self.network = network 281 self.weights = ParameterTuple(network.trainable_params()) 282 self.grad = C.GradOperation(get_by_list=True, 283 sens_param=True) 284 285 @ms_function 286 def construct(self, output_grad): 287 weights = self.weights 288 grads = self.grad(self.network, weights)(output_grad) 289 return grads 290 291 292class Net(nn.Cell): 293 def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout): 294 super(Net, self).__init__() 295 296 num_directions = 1 297 if bidirectional: 298 num_directions = 2 299 input_np = np.array([[[0.6755, -1.6607, 0.1367], [0.4276, -0.7850, -0.3758]], 300 [[-0.6424, -0.6095, 0.6639], [0.7918, 0.4147, -0.5089]], 301 [[-1.5612, 0.0120, -0.7289], [-0.6656, -0.6626, -0.5883]], 302 [[-0.9667, -0.6296, -0.7310], [0.1026, -0.6821, -0.4387]], 303 [[-0.4710, 0.6558, -0.3144], [-0.8449, -0.2184, -0.1806]] 304 ]).astype(np.float32) 305 self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x') 306 self.hlist = [] 307 self.clist = [] 308 self.hlist.append(Parameter(initializer( 309 Tensor( 310 np.array([0.1, 0.1, 0.1, 0.1]).reshape((num_directions, batch_size, hidden_size)).astype( 311 np.float32)), 312 [num_directions, batch_size, hidden_size]), name='h')) 313 self.clist.append(Parameter(initializer( 314 Tensor( 315 np.array([0.2, 0.2, 0.2, 0.2]).reshape((num_directions, batch_size, hidden_size)).astype( 316 np.float32)), 317 [num_directions, batch_size, hidden_size]), name='c')) 318 self.h = ParameterTuple(tuple(self.hlist)) 319 self.c = ParameterTuple(tuple(self.clist)) 320 wih = np.array([[3.4021e-01, -4.6622e-01, 4.5117e-01], 321 [-6.4257e-02, -2.4807e-01, 1.3550e-02], # i 322 [-3.2140e-01, 5.5578e-01, 6.3589e-01], 323 [1.6547e-01, -7.9030e-02, -2.0045e-01], 324 [-6.9863e-01, 5.9773e-01, -3.9062e-01], 325 [-3.0253e-01, -1.9464e-01, 7.0591e-01], 326 [-4.0835e-01, 3.6751e-01, 4.7989e-01], 327 [-5.6894e-01, -5.0359e-01, 4.7491e-01]]).astype(np.float32).reshape([1, -1]) 328 whh = np.array([[-0.4820, -0.2350], 329 [-0.1195, 0.0519], 330 [0.2162, -0.1178], 331 [0.6237, 0.0711], 332 [0.4511, -0.3961], 333 [-0.5962, 0.0906], 334 [0.1867, -0.1225], 335 [0.1831, 0.0850]]).astype(np.float32).reshape([1, -1]) 336 bih = np.zeros((1, 8)).astype(np.float32) 337 w_np = np.concatenate((wih, whh, bih), axis=1).reshape([-1, 1, 1]) 338 self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='weight0') 339 self.lstm = StackLSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, 340 has_bias=has_bias, bidirectional=bidirectional, dropout=dropout) 341 self.lstm.weight = ParameterTuple(tuple([self.w])) 342 343 @ms_function 344 def construct(self): 345 return self.lstm(self.x, (self.h, self.c))[0] 346 347 348@pytest.mark.level1 349@pytest.mark.platform_x86_cpu 350@pytest.mark.env_onecard 351def test_grad(): 352 seq_len = 5 353 batch_size = 2 354 input_size = 3 355 hidden_size = 2 356 num_layers = 1 357 has_bias = True 358 bidirectional = False 359 dropout = 0.0 360 net = Grad(Net(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)) 361 dy = np.array([[[-3.5471e-01, 7.0540e-01], 362 [2.7161e-01, 1.0865e+00]], 363 364 [[-4.2431e-01, 1.4955e+00], 365 [-4.0418e-01, -2.3282e-01]], 366 367 [[-1.3654e+00, 1.9251e+00], 368 [-4.6481e-01, 1.3138e+00]], 369 370 [[1.2914e+00, -2.3753e-01], 371 [5.3589e-01, -1.0981e-01]], 372 373 [[-1.6032e+00, -1.8818e-01], 374 [1.0065e-01, 9.2045e-01]]]).astype(np.float32) 375 dx, dhx, dcx, dw = net(Tensor(dy)) 376 print(dx) 377 print(dhx) 378 print(dcx) 379 print(dw) 380 381test_multi_layer_bilstm() 382test_lstm() 383test_grad() 384