OpenHarmony-v5.1.0-Release/s

/**
 * Copyright 2023 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef MINDSPORE_CORE_OPS_INCRE_FLASH_ATTENTION_H_
#define MINDSPORE_CORE_OPS_INCRE_FLASH_ATTENTION_H_
#include <map>
#include <memory>
#include <vector>
#include "ops/base_operator.h"
#include "mindapi/base/types.h"
#include "ops/primitive_c.h"
#include "abstract/abstract_value.h"
#include "mindspore/core/ops/op_name.h"

namespace mindspore {
namespace ops {
constexpr auto kNameIncreFlashAttention = "IncreFlashAttention";
enum IncreFlashAttentionInputIndex : size_t {
  kIncreFlashAttentionInputQueryIndex = 0,
  kIncreFlashAttentionInputKeyIndex,
  kIncreFlashAttentionInputValueIndex,
  kIncreFlashAttentionInputAttnMaskIndex,
  kIncreFlashAttentionInputActualSeqLengths,
  kIncreFlashAttentionInputPseShiftIndex,
  kIncreFlashAttentionInputDequantScale1,
  kIncreFlashAttentionInputQuantScale1,
  kIncreFlashAttentionInputDequantScale2,
  kIncreFlashAttentionInputQuantScale2,
  kIncreFlashAttentionInputQuantOffset2,
  kIncreFlashAttentionInputAntiquantScale,
  kIncreFlashAttentionInputAntiquantOffset,
  kIncreFlashAttentionInputBlockTable,
  kIncreFlashAttentionInputsNum,
};
enum IncreFlashAttentionOutputIndex : size_t {
  kIncreFlashAttentionOutputAttentionOutIndex = 0,
  kIncreFlashAttentionOutputsNum,
};

/// \brief IncreFlashAttention.
/// Refer to Python API @ref mindspore.ops.IncreFlashAttention for more details.
class MIND_API IncreFlashAttention : public BaseOperator {
 public:
  MIND_API_BASE_MEMBER(IncreFlashAttention);
  /// \brief Constructor.
  IncreFlashAttention() : BaseOperator(kNameIncreFlashAttention) {
    InitIOName(
      {"query", "key", "value", "attn_mask", "actual_seq_lengths", "padding_mask", "dequant_scale1", "quant_scale1",
       "dequant_scale2", "quant_scale2", "quant_offset2", "antiquant_scale", "antiquant_offset", "block_table"},
      {"attention_out"});
  }
};
AbstractBasePtr IncreFlashAttentionInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                         const std::vector<AbstractBasePtr> &input_args);
using IncreFlashAttentionPtr = std::shared_ptr<IncreFlashAttention>;
}  // namespace ops
}  // namespace mindspore

#endif  // MINDSPORE_CORE_OPS_INCRE_FLASH_ATTENTION_H_