• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2021 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15""" test transformer"""
16import numpy as np
17import pytest
18from mindspore import Tensor
19from mindspore.common import dtype
20from mindspore.parallel.nn import MultiHeadAttention, FeedForward, TransformerEncoderLayer, TransformerEncoder, \
21    TransformerDecoder, TransformerDecoderLayer, Transformer, CrossEntropyLoss, AttentionMask, FixedSparseAttention
22from mindspore.common.api import _cell_graph_executor
23
24
25def test_transformer_encoder_only():
26    model = Transformer(batch_size=2,
27                        src_seq_length=20,
28                        tgt_seq_length=10,
29                        encoder_layers=2,
30                        decoder_layers=0,
31                        hidden_size=64,
32                        ffn_hidden_size=64)
33
34    encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
35    encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
36
37    _cell_graph_executor.compile(model, encoder_input_value, encoder_input_mask)
38
39
40def test_transformer_encoder_log_softmax():
41    with pytest.raises(ValueError):
42        model = Transformer(batch_size=2,
43                            src_seq_length=20,
44                            tgt_seq_length=10,
45                            encoder_layers=2,
46                            decoder_layers=0,
47                            hidden_act='logsoftmax',
48                            hidden_size=64,
49                            ffn_hidden_size=64)
50
51        encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
52        encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
53
54        _cell_graph_executor.compile(model, encoder_input_value, encoder_input_mask)
55
56
57def test_transformer_encoder_leakyrelu():
58    model = Transformer(batch_size=2,
59                        src_seq_length=20,
60                        tgt_seq_length=10,
61                        encoder_layers=2,
62                        decoder_layers=0,
63                        hidden_act='leakyrelu',
64                        hidden_size=64,
65                        ffn_hidden_size=64)
66
67    encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
68    encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
69
70    _cell_graph_executor.compile(model, encoder_input_value, encoder_input_mask)
71
72
73def test_transformer_encoder_logsigmoid():
74    model = Transformer(batch_size=2,
75                        src_seq_length=20,
76                        tgt_seq_length=10,
77                        encoder_layers=2,
78                        decoder_layers=0,
79                        hidden_act='logsigmoid',
80                        hidden_size=64,
81                        ffn_hidden_size=64)
82
83    encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
84    encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
85
86    _cell_graph_executor.compile(model, encoder_input_value, encoder_input_mask)
87
88
89def test_encoder_and_decoder():
90    model = Transformer(batch_size=2,
91                        src_seq_length=20,
92                        tgt_seq_length=10,
93                        encoder_layers=1,
94                        decoder_layers=2,
95                        hidden_size=64,
96                        ffn_hidden_size=64)
97
98    encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
99    encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
100
101    decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32)
102    decoder_input_mask = Tensor(np.ones((2, 10, 10)), dtype.float16)
103    memory_mask = Tensor(np.ones((2, 10, 20)), dtype.float16)
104
105    _cell_graph_executor.compile(model, encoder_input_value, encoder_input_mask,
106                                 decoder_input_value,
107                                 decoder_input_mask,
108                                 memory_mask)
109
110
111def test_transformer_encoder():
112    model = TransformerEncoder(batch_size=2,
113                               seq_length=16,
114                               num_layers=2,
115                               hidden_size=8,
116                               ffn_hidden_size=64,
117                               num_heads=2)
118
119    encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32)
120    encoder_input_mask = Tensor(np.ones((2, 16, 16)), dtype.float16)
121
122    _cell_graph_executor.compile(model,
123                                 encoder_input_value,
124                                 encoder_input_mask)
125
126
127def test_transformer_encoder_layer():
128    model = TransformerEncoderLayer(batch_size=2, hidden_size=8, ffn_hidden_size=64, seq_length=16,
129                                    num_heads=2)
130
131    encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32)
132    encoder_input_mask = Tensor(np.ones((2, 16, 16)), dtype.float16)
133
134    _cell_graph_executor.compile(model,
135                                 encoder_input_value,
136                                 encoder_input_mask)
137
138
139def test_transformer_encoder_layer_post_ture():
140    model = TransformerEncoderLayer(batch_size=2,
141                                    seq_length=16,
142                                    hidden_size=8, ffn_hidden_size=64,
143                                    num_heads=2, post_layernorm_residual=True)
144
145    encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32)
146    encoder_input_mask = Tensor(np.ones((2, 16, 16)), dtype.float16)
147
148    _cell_graph_executor.compile(model,
149                                 encoder_input_value,
150                                 encoder_input_mask)
151
152
153def test_transformer_decoder():
154    model = TransformerDecoder(num_layers=1,
155                               batch_size=2,
156                               src_seq_length=20,
157                               tgt_seq_length=10,
158                               hidden_size=64,
159                               ffn_hidden_size=64,
160                               num_heads=2)
161
162    encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
163
164    decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32)
165    decoder_input_mask = Tensor(np.ones((2, 10, 10)), dtype.float16)
166    memory_mask = Tensor(np.ones((2, 10, 20)), dtype.float16)
167
168    _cell_graph_executor.compile(model, decoder_input_value, decoder_input_mask,
169                                 encoder_input_value,
170                                 memory_mask)
171
172
173def test_transformer_decoder_layer():
174    model = TransformerDecoderLayer(
175        batch_size=2,
176        src_seq_length=20,
177        tgt_seq_length=10,
178        hidden_size=64,
179        ffn_hidden_size=64,
180        num_heads=2)
181
182    encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
183
184    decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32)
185    decoder_input_mask = Tensor(np.ones((2, 10, 10)), dtype.float16)
186    memory_mask = Tensor(np.ones((2, 10, 20)), dtype.float16)
187
188    _cell_graph_executor.compile(model, decoder_input_value, decoder_input_mask,
189                                 encoder_input_value,
190                                 memory_mask)
191
192
193def test_multihead_attention():
194    model = MultiHeadAttention(hidden_size=15,
195                               src_seq_length=20,
196                               tgt_seq_length=20,
197                               batch_size=2,
198                               num_heads=3)
199    from_tensor = Tensor(np.ones((2, 20, 15)), dtype.float32)
200    to_tensor = Tensor(np.ones((2, 20, 15)), dtype.float16)
201    attention_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
202
203    _cell_graph_executor.compile(model, from_tensor, to_tensor, to_tensor, attention_mask)
204
205
206def test_multihead_attention_wrong_batch():
207    model = MultiHeadAttention(hidden_size=15,
208                               src_seq_length=20,
209                               tgt_seq_length=20,
210                               batch_size=2,
211                               num_heads=3)
212    from_tensor = Tensor(np.ones((3, 20, 15)), dtype.float32)
213    to_tensor = Tensor(np.ones((3, 20, 15)), dtype.float16)
214    attention_mask = Tensor(np.ones((3, 20, 20)), dtype.float16)
215
216    with pytest.raises(ValueError):
217        _cell_graph_executor.compile(model, from_tensor, to_tensor, to_tensor, attention_mask)
218
219
220def test_feedforward_layer():
221    model = FeedForward(hidden_size=15,
222                        ffn_hidden_size=30,
223                        dropout_rate=0.1,
224                        hidden_act='relu')
225    tensor = Tensor(np.ones((2, 20, 15)), dtype.float32)
226
227    _cell_graph_executor.compile(model, tensor)
228
229
230def test_cross_entroy():
231    model = CrossEntropyLoss()
232    logits = Tensor(np.array([[3, 5, 6, 9, 12, 33, 42, 12, 32, 72]]), dtype.float32)
233    labels_np = np.array([1]).astype(np.int32)
234    input_mask = Tensor(np.ones(1).astype(np.float32))
235    labels = Tensor(labels_np)
236    _cell_graph_executor.compile(model, logits, labels, input_mask)
237
238
239def test_attention_mask():
240    model = AttentionMask(seq_length=19)
241    inputs = Tensor(np.ones((2, 19)), dtype.float32)
242    _cell_graph_executor.compile(model, inputs)
243
244
245def test_sparse_attention():
246    model = FixedSparseAttention(batch_size=2,
247                                 seq_length=1024,
248                                 size_per_head=64,
249                                 num_heads=8,
250                                 block_size=64)
251    q = Tensor(np.ones((2, 1024, 512)), dtype.float16)
252    k = Tensor(np.ones((2, 1024, 512)), dtype.float16)
253    v = Tensor(np.ones((2, 1024, 512)), dtype.float16)
254    mask = Tensor(np.ones((2, 1024, 1024)), dtype.float32)
255    _cell_graph_executor.compile(model, q, k, v, mask)
256