• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TBE_TBE_ADAPTER_H
17 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TBE_TBE_ADAPTER_H
18 
19 #include <string>
20 #include <map>
21 #include <memory>
22 #include <vector>
23 #include <algorithm>
24 #include <unordered_set>
25 #include "nlohmann/json.hpp"
26 #include "base/base.h"
27 #include "backend/kernel_compiler/oplib/opinfo.h"
28 #include "backend/kernel_compiler/kernel_fusion.h"
29 #include "backend/session/anf_runtime_algorithm.h"
30 // Note: This file is mainly used to adapt the ME front-end operator description and
31 //       the TBE back-end operator implementation difference
32 namespace mindspore {
33 namespace kernel {
34 enum kCreaterType : int { SINGLE_BUILD = 0, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
35 namespace tbe {
36 const std::map<std::string, std::string> opTypeAdapter = {
37   {"ReLUV2", "ReluV2"},       {"ReLU6", "Relu6"},
38   {"ReLU6Grad", "Relu6Grad"}, {"ReLUGrad", "ReluGrad"},
39   {"ReLU", "Relu"},           {"Pad", "PadD"},
40   {"Gather", "GatherV2"},     {"SparseApplyFtrl", "SparseApplyFtrlD"},
41   {"Concat", "ConcatD"},      {"DepthwiseConv2dNative", "DepthwiseConv2D"},
42   {"FastGeLU", "FastGelu"},   {"FastGeLUGrad", "FastGeluGrad"},
43   {"GeLU", "Gelu"},           {"GeLUGrad", "GeluGrad"},
44   {"PReLU", "PRelu"},         {"PReLUGrad", "PReluGrad"},
45   {"SeLU", "Selu"},           {"TransposeNOD", "Transpose"}};
46 
47 enum FusionDataType { kFusionNormal = 0, kFusionAddN, kFusionReLUGradV2, kFusionAdd };
48 using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
49                             nlohmann::json *attrs_json);
50 using FPreAttrsPass = void (*)(const AnfNodePtr &anf_node, std::vector<OpAttrPtr> *op_info_attrs,
51                                nlohmann::json *attrs_json);
52 class TbeAdapter {
53  public:
54   TbeAdapter() = default;
55   ~TbeAdapter() = default;
56   template <typename T>
InputOrderPass(const std::shared_ptr<AnfNode> & anf_node,std::vector<T> const & inputs_list,std::vector<T> * inputs_json)57   static void InputOrderPass(const std::shared_ptr<AnfNode> &anf_node, std::vector<T> const &inputs_list,
58                              std::vector<T> *inputs_json) {
59     MS_EXCEPTION_IF_NULL(inputs_json);
60     if (DynamicInputAdjusted(anf_node, inputs_list, inputs_json)) {
61       return;
62     }
63     auto op_name = AnfAlgo::GetCNodeName(anf_node);
64     if (input_order_adjusted_ops_.find(op_name) == input_order_adjusted_ops_.end()) {
65       (void)std::copy(inputs_list.begin(), inputs_list.end(), std::back_inserter((*inputs_json)));
66     } else {
67       if (op_name == kMinimumGradOpName || op_name == kMaximumGradOpName) {
68         inputs_json->push_back(inputs_list[kIndex2]);
69         inputs_json->push_back(inputs_list[kIndex0]);
70         inputs_json->push_back(inputs_list[kIndex1]);
71         for (size_t i = 3; i < inputs_list.size(); ++i) {
72           inputs_json->push_back(inputs_list[i]);
73         }
74       } else if (op_name == kApplyCenteredRMSPropOpName) {
75         // Parameter order of ApplyCenteredRMSProp's TBE implementation is different from python API, so map
76         // TBE parameter to correspond python API parameter by latter's index using hardcode
77         inputs_json->push_back(inputs_list[kIndex0]);
78         inputs_json->push_back(inputs_list[kIndex1]);
79         inputs_json->push_back(inputs_list[kIndex2]);
80         inputs_json->push_back(inputs_list[kIndex3]);
81         inputs_json->push_back(inputs_list[kIndex5]);
82         inputs_json->push_back(inputs_list[kIndex6]);
83         inputs_json->push_back(inputs_list[kIndex7]);
84         inputs_json->push_back(inputs_list[kIndex8]);
85         inputs_json->push_back(inputs_list[kIndex4]);
86       } else {
87         inputs_json->push_back(inputs_list[kIndex1]);
88         inputs_json->push_back(inputs_list[kIndex0]);
89         for (size_t i = 2; i < inputs_list.size(); ++i) {
90           inputs_json->push_back(inputs_list[i]);
91         }
92       }
93     }
94   }
95 
96   template <typename T>
DynamicInputAdjusted(const std::shared_ptr<AnfNode> & anf_node,std::vector<T> const & inputs_list,std::vector<T> * inputs_json)97   static bool DynamicInputAdjusted(const std::shared_ptr<AnfNode> &anf_node, std::vector<T> const &inputs_list,
98                                    std::vector<T> *inputs_json) {
99     if (!AnfAlgo::IsNodeDynamicShape(anf_node) && !AnfAlgo::IsDynamicShape(anf_node)) {
100       return false;
101     }
102     auto op_name = AnfAlgo::GetCNodeName(anf_node);
103     if (op_name == kConv2DBackpropInputOpName) {
104       // process dynamic Conv2DBackpropInput, tbe kernel input is x, input_size and dout
105       inputs_json->push_back(inputs_list[kIndex2]);
106       inputs_json->push_back(inputs_list[kIndex1]);
107       inputs_json->push_back(inputs_list[kIndex0]);
108       return true;
109     }
110     if (op_name == kConv2DBackpropFilterOpName) {
111       // process dynamic Conv2DBackpropFilter, tbe kernel input is filter_size, x and dout
112       inputs_json->push_back(inputs_list[kIndex1]);
113       inputs_json->push_back(inputs_list[kIndex2]);
114       inputs_json->push_back(inputs_list[kIndex0]);
115       return true;
116     }
117     return false;
118   }
119 
120   // TODO(xxx): delete
121   //  FusionInputOrderPass/InputOrderPass/FusionDataOrderPass/GenTopKV2IndicesTensorInfo/GetNodeFusionType
122   static void FusionInputOrderPass(const std::shared_ptr<AnfNode> &anf_node,
123                                    const std::vector<nlohmann::json> &inputs_list,
124                                    std::vector<nlohmann::json> *inputs_json);
125   static void InputOrderPass(const std::shared_ptr<AnfNode> &anf_node,
126                              std::vector<std::vector<nlohmann::json>> const &inputs_list, nlohmann::json *inputs_json);
127   static bool DynamicInputAdjusted(const std::shared_ptr<AnfNode> &anf_node,
128                                    std::vector<std::vector<nlohmann::json>> const &inputs_list,
129                                    nlohmann::json *inputs_json);
130   static void FusionDataOrderPass(const std::string &op_name, const std::vector<AnfNodePtr> &data_layer,
131                                   std::vector<AnfNodePtr> *reorder_data_layer);
132   static void GenTopKV2IndicesTensorInfo(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index,
133                                          std::vector<nlohmann::json> *input_list, kCreaterType creater_type);
134   static std::string GetNodeFusionType(const mindspore::CNodePtr &cnode);
135 
136   static bool RunAttrPass(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
137                           nlohmann::json *attrs_json);
138   static void FusionDescJsonPass(const AnfNodePtr &node, nlohmann::json *output_desc,
139                                  const std::map<const AnfNodePtr, tbe::FusionDataType> &spec_data_input);
140   static std::string GetRealOpType(const std::string &origin_type);
141   static std::string FormatPass(const std::string &format, const size_t &origin_shape_size);
142   static bool GetSpecDataInput(const FusionScopeInfo &fusion_scope_info,
143                                std::map<const AnfNodePtr, tbe::FusionDataType> *spec_data_input);
144   static bool IsPlaceHolderInput(const AnfNodePtr &node, const OpIOInfoPtr &input_ptr);
145   static void CastAttrJsonPrePass(const AnfNodePtr &anf_node, std::vector<OpAttrPtr> *op_info_attrs,
146                                   nlohmann::json *attrs_json);
147   static void CastAttrJsonPost(const AnfNodePtr &anf_node, nlohmann::json *attrs_json);
148   static void LayerNormAttrJsonPost(const AnfNodePtr &anf_node, nlohmann::json *attrs_json);
149 
150  private:
151   // TODO(xxx): delete MaxiOrMinimumGradAttrJsonPass
152   static void MaxiOrMinimumGradAttrJsonPass(const AnfNodePtr &anf_node,
153                                             const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
154                                             nlohmann::json *attrs_json);
155   static void CastAttrJsonPass(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
156                                nlohmann::json *attrs_json);
157 
158   static bool IsSpecialFusionComputeNode(const std::vector<mindspore::AnfNodePtr> &compute_nodes);
159   static bool GetSpecInputLayers(const std::string &op_name, const std::vector<mindspore::AnfNodePtr> &reorder_layer,
160                                  std::map<const AnfNodePtr, FusionDataType> *spec_data_input);
161 
162   static std::map<std::string, FAttrsPass> build_json_attr_pass_map_;
163   static std::unordered_set<std::string> input_order_adjusted_ops_;
164 };
165 }  // namespace tbe
166 }  // namespace kernel
167 }  // namespace mindspore
168 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TBE_TBE_ADAPTER_H
169