1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TBE_TBE_ADAPTER_H 17 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TBE_TBE_ADAPTER_H 18 19 #include <string> 20 #include <map> 21 #include <memory> 22 #include <vector> 23 #include <algorithm> 24 #include <unordered_set> 25 #include "nlohmann/json.hpp" 26 #include "base/base.h" 27 #include "backend/kernel_compiler/oplib/opinfo.h" 28 #include "backend/kernel_compiler/kernel_fusion.h" 29 #include "backend/session/anf_runtime_algorithm.h" 30 // Note: This file is mainly used to adapt the ME front-end operator description and 31 // the TBE back-end operator implementation difference 32 namespace mindspore { 33 namespace kernel { 34 enum kCreaterType : int { SINGLE_BUILD = 0, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE }; 35 namespace tbe { 36 const std::map<std::string, std::string> opTypeAdapter = { 37 {"ReLUV2", "ReluV2"}, {"ReLU6", "Relu6"}, 38 {"ReLU6Grad", "Relu6Grad"}, {"ReLUGrad", "ReluGrad"}, 39 {"ReLU", "Relu"}, {"Pad", "PadD"}, 40 {"Gather", "GatherV2"}, {"SparseApplyFtrl", "SparseApplyFtrlD"}, 41 {"Concat", "ConcatD"}, {"DepthwiseConv2dNative", "DepthwiseConv2D"}, 42 {"FastGeLU", "FastGelu"}, {"FastGeLUGrad", "FastGeluGrad"}, 43 {"GeLU", "Gelu"}, {"GeLUGrad", "GeluGrad"}, 44 {"PReLU", "PRelu"}, {"PReLUGrad", "PReluGrad"}, 45 {"SeLU", "Selu"}, {"TransposeNOD", "Transpose"}}; 46 47 enum FusionDataType { kFusionNormal = 0, kFusionAddN, kFusionReLUGradV2, kFusionAdd }; 48 using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, 49 nlohmann::json *attrs_json); 50 using FPreAttrsPass = void (*)(const AnfNodePtr &anf_node, std::vector<OpAttrPtr> *op_info_attrs, 51 nlohmann::json *attrs_json); 52 class TbeAdapter { 53 public: 54 TbeAdapter() = default; 55 ~TbeAdapter() = default; 56 template <typename T> InputOrderPass(const std::shared_ptr<AnfNode> & anf_node,std::vector<T> const & inputs_list,std::vector<T> * inputs_json)57 static void InputOrderPass(const std::shared_ptr<AnfNode> &anf_node, std::vector<T> const &inputs_list, 58 std::vector<T> *inputs_json) { 59 MS_EXCEPTION_IF_NULL(inputs_json); 60 if (DynamicInputAdjusted(anf_node, inputs_list, inputs_json)) { 61 return; 62 } 63 auto op_name = AnfAlgo::GetCNodeName(anf_node); 64 if (input_order_adjusted_ops_.find(op_name) == input_order_adjusted_ops_.end()) { 65 (void)std::copy(inputs_list.begin(), inputs_list.end(), std::back_inserter((*inputs_json))); 66 } else { 67 if (op_name == kMinimumGradOpName || op_name == kMaximumGradOpName) { 68 inputs_json->push_back(inputs_list[kIndex2]); 69 inputs_json->push_back(inputs_list[kIndex0]); 70 inputs_json->push_back(inputs_list[kIndex1]); 71 for (size_t i = 3; i < inputs_list.size(); ++i) { 72 inputs_json->push_back(inputs_list[i]); 73 } 74 } else if (op_name == kApplyCenteredRMSPropOpName) { 75 // Parameter order of ApplyCenteredRMSProp's TBE implementation is different from python API, so map 76 // TBE parameter to correspond python API parameter by latter's index using hardcode 77 inputs_json->push_back(inputs_list[kIndex0]); 78 inputs_json->push_back(inputs_list[kIndex1]); 79 inputs_json->push_back(inputs_list[kIndex2]); 80 inputs_json->push_back(inputs_list[kIndex3]); 81 inputs_json->push_back(inputs_list[kIndex5]); 82 inputs_json->push_back(inputs_list[kIndex6]); 83 inputs_json->push_back(inputs_list[kIndex7]); 84 inputs_json->push_back(inputs_list[kIndex8]); 85 inputs_json->push_back(inputs_list[kIndex4]); 86 } else { 87 inputs_json->push_back(inputs_list[kIndex1]); 88 inputs_json->push_back(inputs_list[kIndex0]); 89 for (size_t i = 2; i < inputs_list.size(); ++i) { 90 inputs_json->push_back(inputs_list[i]); 91 } 92 } 93 } 94 } 95 96 template <typename T> DynamicInputAdjusted(const std::shared_ptr<AnfNode> & anf_node,std::vector<T> const & inputs_list,std::vector<T> * inputs_json)97 static bool DynamicInputAdjusted(const std::shared_ptr<AnfNode> &anf_node, std::vector<T> const &inputs_list, 98 std::vector<T> *inputs_json) { 99 if (!AnfAlgo::IsNodeDynamicShape(anf_node) && !AnfAlgo::IsDynamicShape(anf_node)) { 100 return false; 101 } 102 auto op_name = AnfAlgo::GetCNodeName(anf_node); 103 if (op_name == kConv2DBackpropInputOpName) { 104 // process dynamic Conv2DBackpropInput, tbe kernel input is x, input_size and dout 105 inputs_json->push_back(inputs_list[kIndex2]); 106 inputs_json->push_back(inputs_list[kIndex1]); 107 inputs_json->push_back(inputs_list[kIndex0]); 108 return true; 109 } 110 if (op_name == kConv2DBackpropFilterOpName) { 111 // process dynamic Conv2DBackpropFilter, tbe kernel input is filter_size, x and dout 112 inputs_json->push_back(inputs_list[kIndex1]); 113 inputs_json->push_back(inputs_list[kIndex2]); 114 inputs_json->push_back(inputs_list[kIndex0]); 115 return true; 116 } 117 return false; 118 } 119 120 // TODO(xxx): delete 121 // FusionInputOrderPass/InputOrderPass/FusionDataOrderPass/GenTopKV2IndicesTensorInfo/GetNodeFusionType 122 static void FusionInputOrderPass(const std::shared_ptr<AnfNode> &anf_node, 123 const std::vector<nlohmann::json> &inputs_list, 124 std::vector<nlohmann::json> *inputs_json); 125 static void InputOrderPass(const std::shared_ptr<AnfNode> &anf_node, 126 std::vector<std::vector<nlohmann::json>> const &inputs_list, nlohmann::json *inputs_json); 127 static bool DynamicInputAdjusted(const std::shared_ptr<AnfNode> &anf_node, 128 std::vector<std::vector<nlohmann::json>> const &inputs_list, 129 nlohmann::json *inputs_json); 130 static void FusionDataOrderPass(const std::string &op_name, const std::vector<AnfNodePtr> &data_layer, 131 std::vector<AnfNodePtr> *reorder_data_layer); 132 static void GenTopKV2IndicesTensorInfo(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, 133 std::vector<nlohmann::json> *input_list, kCreaterType creater_type); 134 static std::string GetNodeFusionType(const mindspore::CNodePtr &cnode); 135 136 static bool RunAttrPass(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, 137 nlohmann::json *attrs_json); 138 static void FusionDescJsonPass(const AnfNodePtr &node, nlohmann::json *output_desc, 139 const std::map<const AnfNodePtr, tbe::FusionDataType> &spec_data_input); 140 static std::string GetRealOpType(const std::string &origin_type); 141 static std::string FormatPass(const std::string &format, const size_t &origin_shape_size); 142 static bool GetSpecDataInput(const FusionScopeInfo &fusion_scope_info, 143 std::map<const AnfNodePtr, tbe::FusionDataType> *spec_data_input); 144 static bool IsPlaceHolderInput(const AnfNodePtr &node, const OpIOInfoPtr &input_ptr); 145 static void CastAttrJsonPrePass(const AnfNodePtr &anf_node, std::vector<OpAttrPtr> *op_info_attrs, 146 nlohmann::json *attrs_json); 147 static void CastAttrJsonPost(const AnfNodePtr &anf_node, nlohmann::json *attrs_json); 148 static void LayerNormAttrJsonPost(const AnfNodePtr &anf_node, nlohmann::json *attrs_json); 149 150 private: 151 // TODO(xxx): delete MaxiOrMinimumGradAttrJsonPass 152 static void MaxiOrMinimumGradAttrJsonPass(const AnfNodePtr &anf_node, 153 const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, 154 nlohmann::json *attrs_json); 155 static void CastAttrJsonPass(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, 156 nlohmann::json *attrs_json); 157 158 static bool IsSpecialFusionComputeNode(const std::vector<mindspore::AnfNodePtr> &compute_nodes); 159 static bool GetSpecInputLayers(const std::string &op_name, const std::vector<mindspore::AnfNodePtr> &reorder_layer, 160 std::map<const AnfNodePtr, FusionDataType> *spec_data_input); 161 162 static std::map<std::string, FAttrsPass> build_json_attr_pass_map_; 163 static std::unordered_set<std::string> input_order_adjusted_ops_; 164 }; 165 } // namespace tbe 166 } // namespace kernel 167 } // namespace mindspore 168 #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TBE_TBE_ADAPTER_H 169