1# Copyright 2019 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15 16import numpy as np 17import pytest 18 19import mindspore.context as context 20import mindspore.nn as nn 21from mindspore import Tensor 22from mindspore.common.initializer import initializer 23from mindspore.common.parameter import Parameter 24from mindspore.nn import TrainOneStepCell, WithLossCell 25from mindspore.nn.optim import Momentum 26from mindspore.ops import operations as P 27 28context.set_context(mode=context.GRAPH_MODE, device_target="GPU") 29 30 31def InitialLstmWeight(input_size, hidden_size, num_layers, bidirectional, has_bias=False): 32 num_directions = 1 33 if bidirectional: 34 num_directions = 2 35 36 weight_size = 0 37 gate_size = 4 * hidden_size 38 for layer in range(num_layers): 39 for d in range(num_directions): 40 input_layer_size = input_size if layer == 0 else hidden_size * num_directions 41 weight_size += gate_size * input_layer_size 42 weight_size += gate_size * hidden_size 43 if has_bias: 44 weight_size += 2 * gate_size 45 46 w_np = np.ones([weight_size, 1, 1]).astype(np.float32) * 0.01 47 48 w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w') 49 50 h = Parameter(initializer( 51 Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)), 52 [num_layers * num_directions, batch_size, hidden_size]), name='h') 53 54 c = Parameter(initializer( 55 Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)), 56 [num_layers * num_directions, batch_size, hidden_size]), name='c') 57 58 return h, c, w 59 60 61class SentimentNet(nn.Cell): 62 def __init__(self, vocab_size, embed_size, num_hiddens, num_layers, 63 bidirectional, weight, labels, batch_size): 64 super(SentimentNet, self).__init__() 65 self.num_hiddens = num_hiddens 66 self.num_layers = num_layers 67 self.bidirectional = bidirectional 68 self.batch_size = batch_size 69 70 self.embedding = nn.Embedding(vocab_size, embed_size, use_one_hot=False, embedding_table=Tensor(weight)) 71 self.embedding.embedding_table.requires_grad = False 72 self.trans = P.Transpose() 73 self.perm = (1, 0, 2) 74 self.h, self.c, self.w = InitialLstmWeight(embed_size, num_hiddens, num_layers, bidirectional) 75 self.encoder = P.LSTM(input_size=embed_size, hidden_size=self.num_hiddens, 76 num_layers=num_layers, has_bias=False, 77 bidirectional=self.bidirectional, dropout=0.0) 78 self.concat = P.Concat(2) 79 if self.bidirectional: 80 self.decoder = nn.Dense(num_hiddens * 4, labels) 81 else: 82 self.decoder = nn.Dense(num_hiddens * 2, labels) 83 84 self.slice1 = P.Slice() 85 self.slice2 = P.Slice() 86 self.reshape = P.Reshape() 87 88 self.num_direction = 1 89 if bidirectional: 90 self.num_direction = 2 91 92 def construct(self, inputs): 93 embeddings = self.embedding(inputs) 94 embeddings = self.trans(embeddings, self.perm) 95 output, hidden = self.encoder(embeddings, self.h, self.c, self.w) 96 97 output0 = self.slice1(output, (0, 0, 0), (1, 64, 200)) 98 output1 = self.slice2(output, (499, 0, 0), (1, 64, 200)) 99 encoding = self.concat((output0, output1)) 100 encoding = self.reshape(encoding, (self.batch_size, self.num_hiddens * self.num_direction * 2)) 101 outputs = self.decoder(encoding) 102 return outputs 103 104 105batch_size = 64 106 107 108@pytest.mark.level0 109@pytest.mark.platform_x86_gpu_training 110@pytest.mark.env_onecard 111def test_LSTM(): 112 num_epochs = 5 113 embed_size = 100 114 num_hiddens = 100 115 num_layers = 2 116 bidirectional = True 117 labels = 2 118 vocab_size = 252193 119 max_len = 500 120 121 weight = np.ones((vocab_size + 1, embed_size)).astype(np.float32) 122 123 net = SentimentNet(vocab_size=(vocab_size + 1), embed_size=embed_size, 124 num_hiddens=num_hiddens, num_layers=num_layers, 125 bidirectional=bidirectional, weight=weight, 126 labels=labels, batch_size=batch_size) 127 128 learning_rate = 0.1 129 momentum = 0.9 130 131 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) 132 criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') 133 net_with_criterion = WithLossCell(net, criterion) 134 train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer 135 train_network.set_train() 136 137 train_features = Tensor(np.ones([64, max_len]).astype(np.int32)) 138 train_labels = Tensor(np.ones([64,]).astype(np.int32)[0:64]) 139 losses = [] 140 for epoch in range(num_epochs): 141 loss = train_network(train_features, train_labels) 142 losses.append(loss) 143 print("loss:", loss.asnumpy()) 144 assert (losses[-1].asnumpy() < 0.01) 145