1 /** 2 * Copyright 2019-2023 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_BACKEND_SESSION_ANF_RUNTIME_ALGORITHM_H 18 #define MINDSPORE_CCSRC_BACKEND_SESSION_ANF_RUNTIME_ALGORITHM_H 19 #include <iostream> 20 #include <string> 21 #include <vector> 22 #include <set> 23 #include <tuple> 24 #include <utility> 25 #include <memory> 26 #include <map> 27 #include <optional> 28 #include "ir/anf.h" 29 #include "ir/dtype.h" 30 #include "base/base.h" 31 #include "ir/primitive.h" 32 #include "ir/kernel_info_dev.h" 33 #include "kernel/kernel.h" 34 #include "kernel/kernel_build_info.h" 35 #include "utils/anf_utils.h" 36 #include "include/common/utils/contract.h" 37 #include "include/backend/device_address.h" 38 #include "include/backend/kernel_graph.h" 39 #include "include/backend/kernel_info.h" 40 #include "include/backend/visible.h" 41 42 namespace mindspore { 43 namespace session { 44 using DeviceAddress = device::DeviceAddress; 45 using DeviceAddressPtr = device::DeviceAddressPtr; 46 using Address = kernel::Address; 47 using AddressPtr = kernel::AddressPtr; 48 using kernel::KernelObjectType; 49 using kernel::KernelTensor; 50 using kernel::KernelTensorPtr; 51 52 class BACKEND_EXPORT AnfRuntimeAlgorithm { 53 public: 54 static AnfNodePtr MakeMonadValueNode(const KernelGraphPtr &kg); 55 static void KeepOrder(const KernelGraphPtr &kg, const AnfNodePtr &former, const AnfNodePtr &latter); 56 // Get the memory size of output tensor of node. 57 static size_t GetOutputTensorMemSize(const AnfNodePtr &node, size_t output_index); 58 static size_t GetOutputTensorMemSize(const AnfNodePtr &node, size_t output_index, const ShapeVector &shape); 59 // get all outputs format select of anf node 60 static std::vector<std::string> GetAllOutputFormats(const AnfNodePtr &node); 61 // get all inputs format select of anf node 62 static std::vector<std::string> GetAllInputFormats(const AnfNodePtr &node); 63 // get all inputs type select of anf node 64 static std::vector<TypeId> GetAllInputDeviceTypes(const AnfNodePtr &node); 65 // get all outputs type select of anf node 66 static std::vector<TypeId> GetAllOutputDeviceTypes(const AnfNodePtr &node); 67 // get origin data format select of anf node 68 static std::string GetOriginDataFormat(const AnfNodePtr &node); 69 // get output format select of anf node 70 static std::string GetOutputFormat(const AnfNodePtr &node, size_t output_idx); 71 // get input format select of anf node 72 static std::string GetInputFormat(const AnfNodePtr &node, size_t input_idx); 73 // Judge whether the format is equivalent by converting between default format and real format. 74 static bool IsEquivalentFormat(const std::string &src_format, const std::string &dst_format); 75 // get output format from prev node,input_index is the input index of current node related to prev node 76 static std::string GetPrevNodeOutputFormat(const AnfNodePtr &anf_node, size_t input_idx); 77 // get reshape_type of from the output of input node. 78 static std::string GetPrevNodeOutputReshapeType(const AnfNodePtr &node, size_t input_idx); 79 // get output shapes which will built and run in device 80 static std::vector<int64_t> GetOutputDeviceShape(const AnfNodePtr &node, size_t output_idx); 81 // get output shapes which will built and run in device when dynamic shape 82 static std::vector<int64_t> GetOutputDeviceShape(const AnfNodePtr &node, size_t output_idx, ShapeVector real_shape); 83 // get input shapes which will built and run in device 84 static std::vector<int64_t> GetInputDeviceShape(const AnfNodePtr &node, size_t input_idx); 85 // get output shapes for tbe build 86 static std::vector<int64_t> GetOutputDeviceShapeForTbeBuild(const AnfNodePtr &node, size_t output_idx, 87 const std::string &format); 88 // get input shapes for tbe build 89 static std::vector<int64_t> GetInputDeviceShapeForTbeBuild(const AnfNodePtr &node, size_t input_idx, 90 const std::string &format); 91 // get input kernel object type 92 static std::vector<KernelObjectType> GetInputKernelObjectTypes(const AnfNodePtr &node); 93 static KernelObjectType GetInputKernelObjectType(const AnfNodePtr &node, size_t input_idx); 94 // get output kernel object type 95 static std::vector<KernelObjectType> GetOutputKernelObjectTypes(const AnfNodePtr &node); 96 static KernelObjectType GetOutputKernelObjectType(const AnfNodePtr &node, size_t output_idx); 97 // get output kernel object type 98 static std::vector<KernelObjectType> GetOutputElementsKernelObjectTypes(const AnfNodePtr &node); 99 // Get Input Padding Axis 100 static std::string GetInputReshapeType(const AnfNodePtr &node, size_t input_idx); 101 // Get Output Padding Axis 102 static std::string GetOutputReshapeType(const AnfNodePtr &node, size_t output_idx); 103 // Get all input reshape shape of anf node 104 static std::vector<std::string> GetAllInputReshapeType(const AnfNodePtr &node); 105 // Get all output reshape shape of anf node 106 static std::vector<std::string> GetAllOutputReshapeType(const AnfNodePtr &node); 107 // get output select data type of anf node 108 static TypeId GetOutputDeviceDataType(const AnfNodePtr &node, size_t output_idx); 109 // get input select data type of anf node 110 static TypeId GetInputDeviceDataType(const AnfNodePtr &node, size_t input_idx); 111 // get output select data type from prev node,input_index is the input index of current node related to prev node 112 static TypeId GetPrevNodeOutputDeviceDataType(const AnfNodePtr &anf_node, size_t input_idx); 113 // get output device addr of anf_node 114 static const DeviceAddress *GetOutputAddr(const AnfNodePtr &node, size_t output_idx, bool skip_nop_node = true); 115 // get mutable output device addr of anf_node 116 static DeviceAddressPtr GetMutableOutputAddr(const AnfNodePtr &node, size_t output_idx, bool skip_nop_node = true); GetMutableOutputAddr(const KernelWithIndex & node_output_index,bool skip_nop_node)117 static DeviceAddressPtr GetMutableOutputAddr(const KernelWithIndex &node_output_index, bool skip_nop_node) { 118 return GetMutableOutputAddr(node_output_index.first, node_output_index.second, skip_nop_node); 119 } 120 // check whether output addr is exist or not 121 static bool OutputAddrExist(const AnfNodePtr &node, size_t output_idx, bool skip_nop_node = false); 122 // check whether workspace addr is exist or not 123 static bool WorkspaceAddrExist(const AnfNodePtr &node, size_t output_idx); 124 // get address from prev node,input_index is the input index of current node related to prev node 125 static const DeviceAddress *GetPrevNodeOutputAddr(const AnfNodePtr &anf_node, size_t input_idx, 126 bool skip_nop_node = true); 127 static DeviceAddressPtr GetPrevNodeMutableOutputAddr(const AnfNodePtr &anf_node, size_t input_idx, 128 bool skip_nop_node = true); 129 130 // Get shape, devie type and value information. 131 static std::tuple<abstract::BaseShapePtr, TypePtr, ValuePtr> GetAbstractInfo(const AnfNodePtr &node, 132 size_t output_idx); 133 134 static bool ExistOutputKernelTensor(const AnfNodePtr &node, size_t output_idx); 135 136 // Get output kernel tensor if exists, otherwise throw a exception. 137 static const KernelTensorPtr &GetOutputKernelTensor(const AnfNodePtr &node, size_t output_idx); 138 // Get output kernel tensor if exists, otherwise create a new one and set into node. 139 static const KernelTensorPtr &GetOrCreateOutputKernelTensor(const AnfNodePtr &node, size_t output_idx); 140 141 // Get input kernel tensor if exists, otherwise throw a exception. 142 static const KernelTensorPtr &GetPrevNodeOutputKernelTensor(const AnfNodePtr &node, size_t input_idx); 143 // Get input kernel tensor if exists, otherwise create a new one and set into node. 144 static const KernelTensorPtr &GetOrCreatePrevNodeOutputKernelTensor(const AnfNodePtr &node, size_t input_idx); 145 146 // Get all input kernel tensor if exists, otherwise create new KernelTensor and set into input node. 147 static std::vector<KernelTensor *> GetOrCreateAllInputKernelTensors(const AnfNodePtr &node); 148 // Get all output kernel tensor if exists, otherwise create new KernelTensor and set into node. 149 static std::vector<KernelTensor *> GetOrCreateAllOutputKernelTensors(const AnfNodePtr &node); 150 151 // Create output kernel tensor for node using node's shape, type and value, 152 // and set device information to kernel tensor. 153 static KernelTensorPtr CreateOutputKernelTensorWithDeviceInfo(const AnfWithOutIndex &node_with_index, 154 void *const device_ptr, size_t size, 155 const string &format, TypeId dtype_id, 156 const ShapeVector &host_shape, 157 const std::string &device_name, uint32_t device_id, 158 const UserDataPtr &user_data = nullptr); 159 160 // Get all input memory size list for node. 161 static std::vector<size_t> GetNodeInputSizeList(const AnfNodePtr &node); 162 163 static size_t GetOutputAddressNum(const AnfNodePtr &node); 164 // set output device addr of anf_node 165 static void SetOutputAddr(const DeviceAddressPtr &addr, size_t output_idx, AnfNode *node); 166 // set workspace device addr of anf_node 167 static void SetWorkspaceAddr(const DeviceAddressPtr &addr, size_t output_idx, AnfNode *node); 168 // get workspace device addr of anf_node 169 static DeviceAddress *GetWorkspaceAddr(const AnfNodePtr &node, size_t output_idx); 170 // get workspace device mutable addr of anf_node 171 static DeviceAddressPtr GetMutableWorkspaceAddr(const AnfNodePtr &node, size_t index); 172 // get op pattern of the node 173 static kernel::OpPattern GetOpPattern(const AnfNodePtr &node); 174 // get KernelBuildType of node ,such as ATT,RT,FWK and so on 175 static KernelType GetKernelType(const AnfNodePtr &node); 176 // get processor type:AICORE,AICPU... 177 static kernel::Processor GetProcessor(const AnfNodePtr &node); 178 // get fusion type:AICORE,AICPU... 179 static std::string GetFusionType(const AnfNodePtr &node); 180 static void SetFusionType(const AnfNodePtr &node, const std::string &type); 181 // get KernelBuildInfoValid 182 static bool GetValid(const AnfNodePtr &node); 183 184 static void SetOutputDataDesc(const AnfNodePtr &node, const std::vector<nlohmann::json> &desc); 185 static std::vector<nlohmann::json> GetOutputDataDesc(const AnfNodePtr &node); 186 // core type 187 static void SetCoreType(const AnfNodePtr &node, const std::string &core_type); 188 static std::string GetCoreType(const AnfNodePtr &node); 189 // op type 190 static kernel::OpType GetOpType(const AnfNodePtr &node); 191 // set select kernel_build_info 192 static void SetSelectKernelBuildInfo(const kernel::KernelBuildInfoPtr &select_kernel_build_info, AnfNode *node); 193 // get select kernel_build_info 194 static kernel::KernelBuildInfoPtr GetSelectKernelBuildInfo(const AnfNodePtr &node); 195 // get kernelMode 196 static kernel::KernelMod *GetKernelMod(const AnfNodePtr &node); 197 // set kernel mod 198 static void SetKernelMod(const kernel::KernelModPtr &kernel_mod, AnfNode *node); 199 // set stream id of kernel,which will be set in stream assign and be used in stream generate 200 static void SetStreamId(uint32_t stream_id, AnfNode *node); 201 // get stream id 202 static uint32_t GetStreamId(const AnfNodePtr &node); 203 // set stream distinction label to distinguish different ops in different streams 204 static void SetStreamDistinctionLabel(uint32_t stream_label, AnfNode *node); 205 // get stream distinction label 206 static uint32_t GetStreamDistinctionLabel(const AnfNode *node); 207 // set graph id 208 static void SetGraphId(uint32_t graph_id, AnfNode *node); 209 // get graph id 210 static uint32_t GetGraphId(const AnfNode *node); 211 // charge if the node's output is a feature map output 212 static bool IsFeatureMapOutput(const AnfNodePtr &node); 213 // charge if the node's input is from a feature map output 214 static bool IsFeatureMapInput(const AnfNodePtr &node, size_t input_index); 215 // get input index in graph for some tbe ops which input order is different between graph and tbe kernel 216 static size_t GetInputGraphIdxByKernelIdx(const AnfNodePtr &anf_node, size_t input_index_in_kernel); 217 // get input index in kernel for some tbe ops which input order is different between graph and tbe kernel 218 static size_t GetInputKernelIdxByGraphIdx(const AnfNodePtr &anf_node, size_t input_index_in_graph); 219 static std::vector<KernelGraphPtr> GetCallSwitchKernelGraph(const CNodePtr &cnode); 220 static KernelGraphPtr GetValueNodeKernelGraph(const AnfNodePtr &node); 221 static bool IsIndependentNode(const CNodePtr &node); 222 static void InferShape(const CNodePtr &node, std::map<uint32_t, tensor::TensorPtr> *depend_tensors = nullptr); 223 static KernelGraphPtr FetchKernelGraph(const AnfNode *node); 224 static AnfNodePtr FetchFrontNodeByBackendNode(const AnfNodePtr &backend_node, const KernelGraph &graph); 225 static void InsertMakeTupleForOutput(const NotNull<KernelGraphPtr> &root_graph); 226 227 static void UpdateGraphValidRefPair(const KernelGraphPtr &graph); 228 static bool IsDynamicShapeSkipExecute(bool skip_mode, const ShapeVector &axes_shape); 229 static bool IsDynamicShapeSkipExecute(const CNodePtr &cnode); 230 // return true if need to update output's shape and type after launch 231 static bool IsNeedUpdateShapeAndTypeAfterLaunch(const AnfNodePtr &cnode); 232 // The size of output address may be changed in dynamic shape scenario, for example, the output shape of operator 233 // 'Unique' will change after Launch, the output address size should update. 234 static void UpdateOutputAddrSize(device::KernelInfo const *kernel_info, const CNodePtr &kernel); 235 static bool IsShapesDynamic(const std::vector<ShapeVector> &shapes); 236 237 // Check whether the kernel has input node which is a computed depend kernel. 238 static bool HasComputedDependInputNode(const CNodePtr &kernel); 239 240 static void AddOutInRefToGraph(const KernelGraphPtr &graph); 241 static bool HasOriginFormat(const AnfNodePtr &anf_node); 242 static std::string GetOriginFormat(const AnfNodePtr &anf_node); 243 244 static bool NodeValueIsFuncGraph(const AnfNodePtr &node); 245 246 // Whether the kernel is not supported by other device and need be backed off on the CPU device. 247 static bool IsNodeSupportKernelSelectBackoff(const AnfNodePtr &node, const KernelGraphPtr &graph); 248 static bool IsKernelSelectBackoffOp(const AnfNodePtr &node); 249 static void SetKernelSelectBackoffInfo(const CNodePtr &node, 250 const std::pair<std::string, ExceptionType> &failure_info); 251 static std::pair<std::string, ExceptionType> GetKernelSelectBackoffInfo(const AnfNodePtr &node); 252 253 // The related interface of device target. 254 static std::string FetchDeviceTarget(const AnfNodePtr &node, const KernelGraph *graph); 255 // Set device target for parameter affinity by the user nodes in the graph. 256 static void SetParameterDeviceTarget(const KernelGraphPtr graph); 257 258 // Get the real output num(which can be build and run in device). 259 static size_t GetOutputTensorNum(const AnfNodePtr &node); 260 // Get the real output num before kernel select. 261 static size_t GetOutputNumWithoutKernelInfo(const AnfNodePtr &node); 262 // Get the expanded output element num(which the tuple is expanded to calculate num). 263 static size_t GetOutputElementNum(const AnfNodePtr &node); 264 265 // Get output abstract type of anf node. 266 static TypeId GetAbstractObjectType(const AbstractBasePtr &abstract); 267 static TypeId GetOutputObjectType(const AnfNodePtr &node, size_t output_idx); 268 static TypeId GetInputObjectType(const CNodePtr &node, size_t input_idx); 269 static std::vector<TypeId> GetAllInputObjectType(const AnfNodePtr &node); 270 static std::vector<TypeId> GetAllOutputObjectType(const AnfNodePtr &node); 271 // Get all output infer data type. 272 static std::vector<TypeId> GetAllOutputInferDataTypes(const AnfNodePtr &node); 273 // Get unfold input num 274 static size_t GetInputElementNum(const AnfNodePtr &node); 275 static bool IsRealSquenceOutput(const AnfNodePtr &node); 276 static void SetDynamicAttrToPrim(const PrimitivePtr &prim); 277 278 // Get output detail shape. These interfaces should take TUPLE output into consideration. 279 static abstract::BaseShapePtr GetOutputDetailShape(const AnfNodePtr &node, size_t output_idx); 280 static abstract::BaseShapePtr GetPrevNodeOutputDetailShape(const AnfNodePtr &node, size_t input_idx); 281 282 // Check whether the input scalar need converted to tensor. 283 static bool IsScalarConvertToTensor(const AnfNodePtr &input_node, const CNodePtr &node); 284 // Check all elements of a ndoe's output(tuple/list type) are scalar. 285 static bool IsSequenceOutputOfScalar(const AnfNodePtr &node); 286 287 // The tensor related interfaces. 288 static tensor::TensorPtr CreateMapTensor(const DeviceAddressPtr &output_device_address); 289 static tensor::TensorPtr CreateMapTensor(const AnfNodePtr &output_node, size_t output_index); 290 static tensor::TensorPtr SequenceToTensor(const ValuePtr &value); 291 static void FlattenDynamicInputArg(const BaseRef &arg, const AnfNodePtr &node, 292 std::vector<tensor::TensorPtr> *flatten_tensors); 293 static void FlattenInputArg(const BaseRef &arg, const AnfNodePtr &node, 294 std::vector<tensor::TensorPtr> *flatten_tensors); 295 296 // Used to check whether an AnfNode is a Summary Node. 297 static bool IsSummaryNode(const AnfNodePtr &node); 298 static void UpdateValueNodeShape(const AnfNodePtr &node); 299 static bool HasSelectKernelBuildInfo(const AnfNodePtr &node); 300 static bool NeedEraseCache(const PrimitivePtr &prim); 301 302 static abstract::AbstractBasePtr GetNodeAbstractByIndex(const AnfNodePtr &node, size_t index); 303 ConvertValueToNode(const KernelGraphPtr & kernel_graph,const ValuePtr & value)304 static inline ValueNodePtr ConvertValueToNode(const KernelGraphPtr &kernel_graph, const ValuePtr &value) { 305 MS_EXCEPTION_IF_NULL(kernel_graph); 306 MS_EXCEPTION_IF_NULL(value); 307 auto value_node = kernel_graph->NewValueNode(value->ToAbstract(), value); 308 kernel_graph->AddValueNodeToGraph(value_node); 309 return value_node; 310 } 311 // create type id value node and add it to graph 312 static ValueNodePtr CreateTypeIdValueNodeToKernelGraph(const FuncGraphPtr &func_graph, TypeId data_type); 313 static ValueNodePtr CreateTypeIdValueNodeToFuncGraph(const FuncGraphPtr &func_graph, TypeId data_type); 314 }; 315 } // namespace session 316 317 using AnfAlgo = session::AnfRuntimeAlgorithm; 318 } // namespace mindspore 319 #endif // MINDSPORE_CCSRC_BACKEND_SESSION_ANF_RUNTIME_ALGORITHM_H 320