• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2024 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_FLASH_ATTENTION_SCORE_H_
17 #define MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_FLASH_ATTENTION_SCORE_H_
18 
19 #include <vector>
20 #include "ops/ops_func_impl/op_func_impl.h"
21 
22 namespace mindspore {
23 namespace ops {
24 enum FlashAttentionScoreInputIndex : size_t {
25   kFlashAttentionScoreInputQueryIndex = 0,
26   kFlashAttentionScoreInputKeyIndex,
27   kFlashAttentionScoreInputValueIndex,
28   kFlashAttentionScoreInputRealShiftIndex,
29   kFlashAttentionScoreInputDropMaskIndex,
30   kFlashAttentionScoreInputPaddingMaskIndex,
31   kFlashAttentionScoreInputAttnMaskIndex,
32   kFlashAttentionScoreInputPrefixIndex,
33   kFlashAttentionScoreInputActualSeqQlenIndex,
34   kFlashAttentionScoreInputActualSeqKVlenIndex,
35   kFlashAttentionScoreInputHeadNumIndex,
36   kFlashAttentionScoreInputKeepProbIndex,
37   kFlashAttentionScoreInputScaleValueIndex,
38   kFlashAttentionScoreInputPreTokensIndex,
39   kFlashAttentionScoreInputNextTokensIndex,
40   kFlashAttentionScoreInputInnerPreciseIndex,
41   kFlashAttentionScoreInputLayoutIndex,
42   kFlashAttentionScoreInputSparseModeIndex,
43   kFlashAttentionScoreInputsNum,
44 };
45 enum FlashAttentionScoreOutputIndex : size_t {
46   kFlashAttentionScoreOutputSoftmaxMaxIndex = 0,
47   kFlashAttentionScoreOutputSoftmaxSumIndex,
48   kFlashAttentionScoreOutputSoftmaxOutIndex,
49   kFlashAttentionScoreOutputAttentionOutIndex,
50   kFlashAttentionScoreOutputsNum,
51 };
52 enum FlashAttentionScoreSparseMode : int64_t {
53   kSparseDefaultMask = 0,
54   kSparseAllMask,
55   kSparseLeftUpCausal,
56   kSparseRightDownCausal,
57   kSparseBand,
58   kSparsePrefix,
59   kSparseGlobal,
60   kSparseDilated,
61   kSparseBlockLocal,
62 };
63 class MIND_API FlashAttentionScoreFuncImpl : public OpFuncImpl {
64  public:
65   BaseShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) const override;
66   TypePtr InferType(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) const override;
67 };
68 }  // namespace ops
69 }  // namespace mindspore
70 #endif  // MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_FLASH_ATTENTION_SCORE_H_
71