• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15
16import numpy as np
17import pytest
18
19import mindspore.context as context
20import mindspore.nn as nn
21from mindspore import Tensor
22from mindspore.common.initializer import initializer
23from mindspore.common.parameter import Parameter
24from mindspore.nn import TrainOneStepCell, WithLossCell
25from mindspore.nn.optim import Momentum
26from mindspore.ops import operations as P
27
28context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
29
30
31def InitialLstmWeight(input_size, hidden_size, num_layers, bidirectional, has_bias=False):
32    num_directions = 1
33    if bidirectional:
34        num_directions = 2
35
36    weight_size = 0
37    gate_size = 4 * hidden_size
38    for layer in range(num_layers):
39        for d in range(num_directions):
40            input_layer_size = input_size if layer == 0 else hidden_size * num_directions
41            weight_size += gate_size * input_layer_size
42            weight_size += gate_size * hidden_size
43            if has_bias:
44                weight_size += 2 * gate_size
45
46    w_np = np.ones([weight_size, 1, 1]).astype(np.float32) * 0.01
47
48    w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
49
50    h = Parameter(initializer(
51        Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
52        [num_layers * num_directions, batch_size, hidden_size]), name='h')
53
54    c = Parameter(initializer(
55        Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
56        [num_layers * num_directions, batch_size, hidden_size]), name='c')
57
58    return h, c, w
59
60
61class SentimentNet(nn.Cell):
62    def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,
63                 bidirectional, weight, labels, batch_size):
64        super(SentimentNet, self).__init__()
65        self.num_hiddens = num_hiddens
66        self.num_layers = num_layers
67        self.bidirectional = bidirectional
68        self.batch_size = batch_size
69
70        self.embedding = nn.Embedding(vocab_size, embed_size, use_one_hot=False, embedding_table=Tensor(weight))
71        self.embedding.embedding_table.requires_grad = False
72        self.trans = P.Transpose()
73        self.perm = (1, 0, 2)
74        self.h, self.c, self.w = InitialLstmWeight(embed_size, num_hiddens, num_layers, bidirectional)
75        self.encoder = P.LSTM(input_size=embed_size, hidden_size=self.num_hiddens,
76                              num_layers=num_layers, has_bias=False,
77                              bidirectional=self.bidirectional, dropout=0.0)
78        self.concat = P.Concat(2)
79        if self.bidirectional:
80            self.decoder = nn.Dense(num_hiddens * 4, labels)
81        else:
82            self.decoder = nn.Dense(num_hiddens * 2, labels)
83
84        self.slice1 = P.Slice()
85        self.slice2 = P.Slice()
86        self.reshape = P.Reshape()
87
88        self.num_direction = 1
89        if bidirectional:
90            self.num_direction = 2
91
92    def construct(self, inputs):
93        embeddings = self.embedding(inputs)
94        embeddings = self.trans(embeddings, self.perm)
95        output, hidden = self.encoder(embeddings, self.h, self.c, self.w)
96
97        output0 = self.slice1(output, (0, 0, 0), (1, 64, 200))
98        output1 = self.slice2(output, (499, 0, 0), (1, 64, 200))
99        encoding = self.concat((output0, output1))
100        encoding = self.reshape(encoding, (self.batch_size, self.num_hiddens * self.num_direction * 2))
101        outputs = self.decoder(encoding)
102        return outputs
103
104
105batch_size = 64
106
107
108@pytest.mark.level0
109@pytest.mark.platform_x86_gpu_training
110@pytest.mark.env_onecard
111def test_LSTM():
112    num_epochs = 5
113    embed_size = 100
114    num_hiddens = 100
115    num_layers = 2
116    bidirectional = True
117    labels = 2
118    vocab_size = 252193
119    max_len = 500
120
121    weight = np.ones((vocab_size + 1, embed_size)).astype(np.float32)
122
123    net = SentimentNet(vocab_size=(vocab_size + 1), embed_size=embed_size,
124                       num_hiddens=num_hiddens, num_layers=num_layers,
125                       bidirectional=bidirectional, weight=weight,
126                       labels=labels, batch_size=batch_size)
127
128    learning_rate = 0.1
129    momentum = 0.9
130
131    optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
132    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
133    net_with_criterion = WithLossCell(net, criterion)
134    train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
135    train_network.set_train()
136
137    train_features = Tensor(np.ones([64, max_len]).astype(np.int32))
138    train_labels = Tensor(np.ones([64,]).astype(np.int32)[0:64])
139    losses = []
140    for epoch in range(num_epochs):
141        loss = train_network(train_features, train_labels)
142        losses.append(loss)
143        print("loss:", loss.asnumpy())
144    assert (losses[-1].asnumpy() < 0.01)
145