1# Copyright 2023 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15 16import numpy as np 17import pytest 18 19import mindspore as ms 20from mindspore import context 21from mindspore.nn import MultiheadAttention 22 23@pytest.mark.level1 24@pytest.mark.platform_x86_gpu_training 25@pytest.mark.platform_x86_cpu 26@pytest.mark.platform_arm_cpu 27@pytest.mark.env_onecard 28@pytest.mark.parametrize('dtype', [ms.float16, ms.float32, ms.float64]) 29@pytest.mark.parametrize('mode', [context.GRAPH_MODE, context.PYNATIVE_MODE]) 30def test_multihead_attention_cpu_gpu(dtype, mode): 31 """ 32 Feature: MultiheadAttention 33 Description: Verify the result of MultiheadAttention 34 Expectation: success 35 """ 36 context.set_context(mode=mode) 37 embed_dim, num_heads = 128, 8 38 seq_length, batch_size = 10, 8 39 query = ms.Tensor(np.random.randn(seq_length, batch_size, embed_dim), ms.float32) 40 key = ms.Tensor(np.random.randn(seq_length, batch_size, embed_dim), ms.float32) 41 value = ms.Tensor(np.random.randn(seq_length, batch_size, embed_dim), ms.float32) 42 multihead_attn = MultiheadAttention(embed_dim, num_heads) 43 attn_output, attn_output_weights = multihead_attn(query, key, value) 44 assert attn_output.shape == (10, 8, 128) 45 assert attn_output_weights.shape == (8, 10, 10) 46 47 48@pytest.mark.level0 49@pytest.mark.platform_arm_ascend_training 50@pytest.mark.platform_x86_ascend_training 51@pytest.mark.env_onecard 52@pytest.mark.parametrize('dtype', [ms.float16, ms.float32]) 53@pytest.mark.parametrize('mode', [context.GRAPH_MODE, context.PYNATIVE_MODE]) 54def test_multihead_attention_ascend(dtype, mode): 55 """ 56 Feature: MultiheadAttention 57 Description: Verify the result of MultiheadAttention 58 Expectation: success 59 """ 60 context.set_context(mode=mode) 61 embed_dim, num_heads = 128, 8 62 seq_length, batch_size = 10, 8 63 query = ms.Tensor(np.random.randn(seq_length, batch_size, embed_dim), ms.float32) 64 key = ms.Tensor(np.random.randn(seq_length, batch_size, embed_dim), ms.float32) 65 value = ms.Tensor(np.random.randn(seq_length, batch_size, embed_dim), ms.float32) 66 multihead_attn = MultiheadAttention(embed_dim, num_heads) 67 attn_output, attn_output_weights = multihead_attn(query, key, value) 68 assert attn_output.shape == (10, 8, 128) 69 assert attn_output_weights.shape == (8, 10, 10) 70