• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CORE_OPS_PROMPT_FLASH_ATTENTION_H_
18 #define MINDSPORE_CORE_OPS_PROMPT_FLASH_ATTENTION_H_
19 #include <map>
20 #include <memory>
21 #include <vector>
22 #include "ops/base_operator.h"
23 #include "mindapi/base/types.h"
24 #include "ops/primitive_c.h"
25 #include "abstract/abstract_value.h"
26 namespace mindspore {
27 namespace ops {
28 constexpr auto kNamePromptFlashAttention = "PromptFlashAttention";
29 enum PromptFlashAttentionInputIndex : size_t {
30   kPromptFlashAttentionInputQueryIndex = 0,
31   kPromptFlashAttentionInputKeyIndex,
32   kPromptFlashAttentionInputValueIndex,
33   kPromptFlashAttentionInputAttnMaskIndex,
34   kPromptFlashAttentionInputActualSeqLengthsIndex,
35   kPromptFlashAttentionInputActualSeqLengthsKvIndex,
36   kPromptFlashAttentionInputPaddingMaskIndex,
37   kPromptFlashAttentionInputDeqScale1Index,
38   kPromptFlashAttentionInputQuantScale1Index,
39   kPromptFlashAttentionInputDeqScale2Index,
40   kPromptFlashAttentionInputQuantScale2Index,
41   kPromptFlashAttentionInputQuantOffset2Index,
42   kPromptFlashAttentionInputsNum,
43 };
44 enum PromptFlashAttentionOutputIndex : size_t {
45   kPromptFlashAttentionOutputAttentionOutIndex = 0,
46   kPromptFlashAttentionOutputsNum,
47 };
48 
49 /// \brief PromptFlashAttention.
50 /// Refer to Python API @ref mindspore.ops.PromptFlashAttention for more details.
51 class MIND_API PromptFlashAttention : public BaseOperator {
52  public:
53   MIND_API_BASE_MEMBER(PromptFlashAttention);
54   /// \brief Constructor.
PromptFlashAttention()55   PromptFlashAttention() : BaseOperator(kNamePromptFlashAttention) {
56     InitIOName({"query", "key", "value", "attn_mask", "actual_seq_lengths", "actual_seq_lengths_kv", "padding_mask",
57                 "deq_scale1", "quant_scale1", "deq_scale2", "quant_scale2", "quant_offset2"},
58                {"attention_out"});
59   }
60 };
61 AbstractBasePtr PromptFlashAttentionInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
62                                           const std::vector<AbstractBasePtr> &input_args);
63 using PromptFlashAttentionPtr = std::shared_ptr<PromptFlashAttention>;
64 }  // namespace ops
65 }  // namespace mindspore
66 #endif  // MINDSPORE_CORE_OPS_PROMPT_FLASH_ATTENTION_H_
67