• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CORE_OPS_INCRE_FLASH_ATTENTION_H_
17 #define MINDSPORE_CORE_OPS_INCRE_FLASH_ATTENTION_H_
18 #include <map>
19 #include <memory>
20 #include <vector>
21 #include "ops/base_operator.h"
22 #include "mindapi/base/types.h"
23 #include "ops/primitive_c.h"
24 #include "abstract/abstract_value.h"
25 #include "mindspore/core/ops/op_name.h"
26 
27 namespace mindspore {
28 namespace ops {
29 constexpr auto kNameIncreFlashAttention = "IncreFlashAttention";
30 enum IncreFlashAttentionInputIndex : size_t {
31   kIncreFlashAttentionInputQueryIndex = 0,
32   kIncreFlashAttentionInputKeyIndex,
33   kIncreFlashAttentionInputValueIndex,
34   kIncreFlashAttentionInputAttnMaskIndex,
35   kIncreFlashAttentionInputActualSeqLengths,
36   kIncreFlashAttentionInputPseShiftIndex,
37   kIncreFlashAttentionInputDequantScale1,
38   kIncreFlashAttentionInputQuantScale1,
39   kIncreFlashAttentionInputDequantScale2,
40   kIncreFlashAttentionInputQuantScale2,
41   kIncreFlashAttentionInputQuantOffset2,
42   kIncreFlashAttentionInputAntiquantScale,
43   kIncreFlashAttentionInputAntiquantOffset,
44   kIncreFlashAttentionInputBlockTable,
45   kIncreFlashAttentionInputsNum,
46 };
47 enum IncreFlashAttentionOutputIndex : size_t {
48   kIncreFlashAttentionOutputAttentionOutIndex = 0,
49   kIncreFlashAttentionOutputsNum,
50 };
51 
52 /// \brief IncreFlashAttention.
53 /// Refer to Python API @ref mindspore.ops.IncreFlashAttention for more details.
54 class MIND_API IncreFlashAttention : public BaseOperator {
55  public:
56   MIND_API_BASE_MEMBER(IncreFlashAttention);
57   /// \brief Constructor.
IncreFlashAttention()58   IncreFlashAttention() : BaseOperator(kNameIncreFlashAttention) {
59     InitIOName(
60       {"query", "key", "value", "attn_mask", "actual_seq_lengths", "padding_mask", "dequant_scale1", "quant_scale1",
61        "dequant_scale2", "quant_scale2", "quant_offset2", "antiquant_scale", "antiquant_offset", "block_table"},
62       {"attention_out"});
63   }
64 };
65 AbstractBasePtr IncreFlashAttentionInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
66                                          const std::vector<AbstractBasePtr> &input_args);
67 using IncreFlashAttentionPtr = std::shared_ptr<IncreFlashAttention>;
68 }  // namespace ops
69 }  // namespace mindspore
70 
71 #endif  // MINDSPORE_CORE_OPS_INCRE_FLASH_ATTENTION_H_
72