1 /** 2 * Copyright 2024 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_FLASH_ATTENTION_SCORE_H_ 17 #define MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_FLASH_ATTENTION_SCORE_H_ 18 19 #include <vector> 20 #include "ops/ops_func_impl/op_func_impl.h" 21 22 namespace mindspore { 23 namespace ops { 24 enum FlashAttentionScoreInputIndex : size_t { 25 kFlashAttentionScoreInputQueryIndex = 0, 26 kFlashAttentionScoreInputKeyIndex, 27 kFlashAttentionScoreInputValueIndex, 28 kFlashAttentionScoreInputRealShiftIndex, 29 kFlashAttentionScoreInputDropMaskIndex, 30 kFlashAttentionScoreInputPaddingMaskIndex, 31 kFlashAttentionScoreInputAttnMaskIndex, 32 kFlashAttentionScoreInputPrefixIndex, 33 kFlashAttentionScoreInputActualSeqQlenIndex, 34 kFlashAttentionScoreInputActualSeqKVlenIndex, 35 kFlashAttentionScoreInputHeadNumIndex, 36 kFlashAttentionScoreInputKeepProbIndex, 37 kFlashAttentionScoreInputScaleValueIndex, 38 kFlashAttentionScoreInputPreTokensIndex, 39 kFlashAttentionScoreInputNextTokensIndex, 40 kFlashAttentionScoreInputInnerPreciseIndex, 41 kFlashAttentionScoreInputLayoutIndex, 42 kFlashAttentionScoreInputSparseModeIndex, 43 kFlashAttentionScoreInputsNum, 44 }; 45 enum FlashAttentionScoreOutputIndex : size_t { 46 kFlashAttentionScoreOutputSoftmaxMaxIndex = 0, 47 kFlashAttentionScoreOutputSoftmaxSumIndex, 48 kFlashAttentionScoreOutputSoftmaxOutIndex, 49 kFlashAttentionScoreOutputAttentionOutIndex, 50 kFlashAttentionScoreOutputsNum, 51 }; 52 enum FlashAttentionScoreSparseMode : int64_t { 53 kSparseDefaultMask = 0, 54 kSparseAllMask, 55 kSparseLeftUpCausal, 56 kSparseRightDownCausal, 57 kSparseBand, 58 kSparsePrefix, 59 kSparseGlobal, 60 kSparseDilated, 61 kSparseBlockLocal, 62 }; 63 class MIND_API FlashAttentionScoreFuncImpl : public OpFuncImpl { 64 public: 65 BaseShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) const override; 66 TypePtr InferType(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) const override; 67 }; 68 } // namespace ops 69 } // namespace mindspore 70 #endif // MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_FLASH_ATTENTION_SCORE_H_ 71