1 /** 2 * Copyright 2023 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CORE_OPS_PROMPT_FLASH_ATTENTION_H_ 18 #define MINDSPORE_CORE_OPS_PROMPT_FLASH_ATTENTION_H_ 19 #include <map> 20 #include <memory> 21 #include <vector> 22 #include "ops/base_operator.h" 23 #include "mindapi/base/types.h" 24 #include "ops/primitive_c.h" 25 #include "abstract/abstract_value.h" 26 namespace mindspore { 27 namespace ops { 28 constexpr auto kNamePromptFlashAttention = "PromptFlashAttention"; 29 enum PromptFlashAttentionInputIndex : size_t { 30 kPromptFlashAttentionInputQueryIndex = 0, 31 kPromptFlashAttentionInputKeyIndex, 32 kPromptFlashAttentionInputValueIndex, 33 kPromptFlashAttentionInputAttnMaskIndex, 34 kPromptFlashAttentionInputActualSeqLengthsIndex, 35 kPromptFlashAttentionInputActualSeqLengthsKvIndex, 36 kPromptFlashAttentionInputPaddingMaskIndex, 37 kPromptFlashAttentionInputDeqScale1Index, 38 kPromptFlashAttentionInputQuantScale1Index, 39 kPromptFlashAttentionInputDeqScale2Index, 40 kPromptFlashAttentionInputQuantScale2Index, 41 kPromptFlashAttentionInputQuantOffset2Index, 42 kPromptFlashAttentionInputsNum, 43 }; 44 enum PromptFlashAttentionOutputIndex : size_t { 45 kPromptFlashAttentionOutputAttentionOutIndex = 0, 46 kPromptFlashAttentionOutputsNum, 47 }; 48 49 /// \brief PromptFlashAttention. 50 /// Refer to Python API @ref mindspore.ops.PromptFlashAttention for more details. 51 class MIND_API PromptFlashAttention : public BaseOperator { 52 public: 53 MIND_API_BASE_MEMBER(PromptFlashAttention); 54 /// \brief Constructor. PromptFlashAttention()55 PromptFlashAttention() : BaseOperator(kNamePromptFlashAttention) { 56 InitIOName({"query", "key", "value", "attn_mask", "actual_seq_lengths", "actual_seq_lengths_kv", "padding_mask", 57 "deq_scale1", "quant_scale1", "deq_scale2", "quant_scale2", "quant_offset2"}, 58 {"attention_out"}); 59 } 60 }; 61 AbstractBasePtr PromptFlashAttentionInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, 62 const std::vector<AbstractBasePtr> &input_args); 63 using PromptFlashAttentionPtr = std::shared_ptr<PromptFlashAttention>; 64 } // namespace ops 65 } // namespace mindspore 66 #endif // MINDSPORE_CORE_OPS_PROMPT_FLASH_ATTENTION_H_ 67