1# Copyright 2021 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15""" test transformer""" 16import numpy as np 17import pytest 18from mindspore import Tensor 19from mindspore.common import dtype 20from mindspore.parallel.nn import MultiHeadAttention, FeedForward, TransformerEncoderLayer, TransformerEncoder, \ 21 TransformerDecoder, TransformerDecoderLayer, Transformer, CrossEntropyLoss, AttentionMask, FixedSparseAttention 22from mindspore.common.api import _cell_graph_executor 23 24 25def test_transformer_encoder_only(): 26 model = Transformer(batch_size=2, 27 src_seq_length=20, 28 tgt_seq_length=10, 29 encoder_layers=2, 30 decoder_layers=0, 31 hidden_size=64, 32 ffn_hidden_size=64) 33 34 encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32) 35 encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16) 36 37 _cell_graph_executor.compile(model, encoder_input_value, encoder_input_mask) 38 39 40def test_transformer_encoder_log_softmax(): 41 with pytest.raises(ValueError): 42 model = Transformer(batch_size=2, 43 src_seq_length=20, 44 tgt_seq_length=10, 45 encoder_layers=2, 46 decoder_layers=0, 47 hidden_act='logsoftmax', 48 hidden_size=64, 49 ffn_hidden_size=64) 50 51 encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32) 52 encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16) 53 54 _cell_graph_executor.compile(model, encoder_input_value, encoder_input_mask) 55 56 57def test_transformer_encoder_leakyrelu(): 58 model = Transformer(batch_size=2, 59 src_seq_length=20, 60 tgt_seq_length=10, 61 encoder_layers=2, 62 decoder_layers=0, 63 hidden_act='leakyrelu', 64 hidden_size=64, 65 ffn_hidden_size=64) 66 67 encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32) 68 encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16) 69 70 _cell_graph_executor.compile(model, encoder_input_value, encoder_input_mask) 71 72 73def test_transformer_encoder_logsigmoid(): 74 model = Transformer(batch_size=2, 75 src_seq_length=20, 76 tgt_seq_length=10, 77 encoder_layers=2, 78 decoder_layers=0, 79 hidden_act='logsigmoid', 80 hidden_size=64, 81 ffn_hidden_size=64) 82 83 encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32) 84 encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16) 85 86 _cell_graph_executor.compile(model, encoder_input_value, encoder_input_mask) 87 88 89def test_encoder_and_decoder(): 90 model = Transformer(batch_size=2, 91 src_seq_length=20, 92 tgt_seq_length=10, 93 encoder_layers=1, 94 decoder_layers=2, 95 hidden_size=64, 96 ffn_hidden_size=64) 97 98 encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32) 99 encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16) 100 101 decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32) 102 decoder_input_mask = Tensor(np.ones((2, 10, 10)), dtype.float16) 103 memory_mask = Tensor(np.ones((2, 10, 20)), dtype.float16) 104 105 _cell_graph_executor.compile(model, encoder_input_value, encoder_input_mask, 106 decoder_input_value, 107 decoder_input_mask, 108 memory_mask) 109 110 111def test_transformer_encoder(): 112 model = TransformerEncoder(batch_size=2, 113 seq_length=16, 114 num_layers=2, 115 hidden_size=8, 116 ffn_hidden_size=64, 117 num_heads=2) 118 119 encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32) 120 encoder_input_mask = Tensor(np.ones((2, 16, 16)), dtype.float16) 121 122 _cell_graph_executor.compile(model, 123 encoder_input_value, 124 encoder_input_mask) 125 126 127def test_transformer_encoder_layer(): 128 model = TransformerEncoderLayer(batch_size=2, hidden_size=8, ffn_hidden_size=64, seq_length=16, 129 num_heads=2) 130 131 encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32) 132 encoder_input_mask = Tensor(np.ones((2, 16, 16)), dtype.float16) 133 134 _cell_graph_executor.compile(model, 135 encoder_input_value, 136 encoder_input_mask) 137 138 139def test_transformer_encoder_layer_post_ture(): 140 model = TransformerEncoderLayer(batch_size=2, 141 seq_length=16, 142 hidden_size=8, ffn_hidden_size=64, 143 num_heads=2, post_layernorm_residual=True) 144 145 encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32) 146 encoder_input_mask = Tensor(np.ones((2, 16, 16)), dtype.float16) 147 148 _cell_graph_executor.compile(model, 149 encoder_input_value, 150 encoder_input_mask) 151 152 153def test_transformer_decoder(): 154 model = TransformerDecoder(num_layers=1, 155 batch_size=2, 156 src_seq_length=20, 157 tgt_seq_length=10, 158 hidden_size=64, 159 ffn_hidden_size=64, 160 num_heads=2) 161 162 encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32) 163 164 decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32) 165 decoder_input_mask = Tensor(np.ones((2, 10, 10)), dtype.float16) 166 memory_mask = Tensor(np.ones((2, 10, 20)), dtype.float16) 167 168 _cell_graph_executor.compile(model, decoder_input_value, decoder_input_mask, 169 encoder_input_value, 170 memory_mask) 171 172 173def test_transformer_decoder_layer(): 174 model = TransformerDecoderLayer( 175 batch_size=2, 176 src_seq_length=20, 177 tgt_seq_length=10, 178 hidden_size=64, 179 ffn_hidden_size=64, 180 num_heads=2) 181 182 encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32) 183 184 decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32) 185 decoder_input_mask = Tensor(np.ones((2, 10, 10)), dtype.float16) 186 memory_mask = Tensor(np.ones((2, 10, 20)), dtype.float16) 187 188 _cell_graph_executor.compile(model, decoder_input_value, decoder_input_mask, 189 encoder_input_value, 190 memory_mask) 191 192 193def test_multihead_attention(): 194 model = MultiHeadAttention(hidden_size=15, 195 src_seq_length=20, 196 tgt_seq_length=20, 197 batch_size=2, 198 num_heads=3) 199 from_tensor = Tensor(np.ones((2, 20, 15)), dtype.float32) 200 to_tensor = Tensor(np.ones((2, 20, 15)), dtype.float16) 201 attention_mask = Tensor(np.ones((2, 20, 20)), dtype.float16) 202 203 _cell_graph_executor.compile(model, from_tensor, to_tensor, to_tensor, attention_mask) 204 205 206def test_multihead_attention_wrong_batch(): 207 model = MultiHeadAttention(hidden_size=15, 208 src_seq_length=20, 209 tgt_seq_length=20, 210 batch_size=2, 211 num_heads=3) 212 from_tensor = Tensor(np.ones((3, 20, 15)), dtype.float32) 213 to_tensor = Tensor(np.ones((3, 20, 15)), dtype.float16) 214 attention_mask = Tensor(np.ones((3, 20, 20)), dtype.float16) 215 216 with pytest.raises(ValueError): 217 _cell_graph_executor.compile(model, from_tensor, to_tensor, to_tensor, attention_mask) 218 219 220def test_feedforward_layer(): 221 model = FeedForward(hidden_size=15, 222 ffn_hidden_size=30, 223 dropout_rate=0.1, 224 hidden_act='relu') 225 tensor = Tensor(np.ones((2, 20, 15)), dtype.float32) 226 227 _cell_graph_executor.compile(model, tensor) 228 229 230def test_cross_entroy(): 231 model = CrossEntropyLoss() 232 logits = Tensor(np.array([[3, 5, 6, 9, 12, 33, 42, 12, 32, 72]]), dtype.float32) 233 labels_np = np.array([1]).astype(np.int32) 234 input_mask = Tensor(np.ones(1).astype(np.float32)) 235 labels = Tensor(labels_np) 236 _cell_graph_executor.compile(model, logits, labels, input_mask) 237 238 239def test_attention_mask(): 240 model = AttentionMask(seq_length=19) 241 inputs = Tensor(np.ones((2, 19)), dtype.float32) 242 _cell_graph_executor.compile(model, inputs) 243 244 245def test_sparse_attention(): 246 model = FixedSparseAttention(batch_size=2, 247 seq_length=1024, 248 size_per_head=64, 249 num_heads=8, 250 block_size=64) 251 q = Tensor(np.ones((2, 1024, 512)), dtype.float16) 252 k = Tensor(np.ones((2, 1024, 512)), dtype.float16) 253 v = Tensor(np.ones((2, 1024, 512)), dtype.float16) 254 mask = Tensor(np.ones((2, 1024, 1024)), dtype.float32) 255 _cell_graph_executor.compile(model, q, k, v, mask) 256