1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_KERNEL_ADJUST_H_ 18 #define MINDSPORE_CCSRC_RUNTIME_DEVICE_KERNEL_ADJUST_H_ 19 20 #include <memory> 21 #include <map> 22 #include <string> 23 #include <vector> 24 #include <unordered_set> 25 #include "ir/anf.h" 26 #include "backend/session/kernel_graph.h" 27 #include "backend/kernel_compiler/kernel_build_info.h" 28 #include "backend/session/session_context.h" 29 #include "ir/tensor.h" 30 #include "runtime/device/kernel_info.h" 31 #include "runtime/device/kernel_runtime_manager.h" 32 33 #ifndef ENABLE_SECURITY 34 #include "runtime/device/ascend/profiling/profiling_utils.h" 35 using mindspore::device::ascend::ProfilingTraceInfo; 36 using mindspore::device::ascend::ProfilingUtils; 37 #endif 38 namespace mindspore { 39 constexpr auto kCurLoopCountParamName = "cur_loop_count"; 40 constexpr auto kNextLoopCountParamName = "next_loop_count"; 41 constexpr auto kIterLoopParamName = "iter_loop"; 42 constexpr auto kOneParamName = "one"; 43 constexpr auto kEpochParamName = "loop_epoch"; 44 constexpr auto kStreamNeedActivedFirst = "stream_need_active_first"; 45 constexpr uint32_t kSecondStreamSwitchLabel = 2; 46 enum StreamSwitchKind { 47 kFpBpStreamSwitch = 0, 48 kGetNextStreamSwitch = 1, 49 kEosStreamSwitch = 2, 50 kIndependentStreamSwitch = 3 51 }; 52 53 namespace device { 54 class KernelAdjust { 55 public: GetInstance()56 static KernelAdjust &GetInstance() { 57 static KernelAdjust instance; 58 return instance; 59 } 60 61 void InsertOverflowCheckOperations(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr); 62 void InsertSwitchLoop(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr); 63 bool StepLoadCtrlInputs(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr); 64 #ifndef ENABLE_SECURITY 65 void Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr); 66 #endif 67 static bool NeedInsertSwitch(); 68 CNodePtr CreateStreamActiveOp(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr); 69 70 private: 71 KernelAdjust() = default; 72 ~KernelAdjust() = default; 73 74 CNodePtr CreateNPUGetFloatStatus(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 75 const CNodePtr &npu_cnode); 76 CNodePtr CreateNPUClearStatus(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 77 const CNodePtr &npu_cnode); 78 CNodePtr CreateNPUAllocStatus(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr); 79 CNodePtr CreateAssignAdd(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, const CNodePtr &npu_get_cnode, 80 const AnfNodePtr &specify_para); 81 CNodePtr CreateAssign(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, const AnfNodePtr &specify_para); 82 void ReorderGetNext(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr); 83 CNodePtr CreateRecvApplyKernel(const std::shared_ptr<session::KernelGraph> &graph_ptr, uint32_t event_id); 84 CNodePtr CreateSendApplyKernel(const std::shared_ptr<session::KernelGraph> &graph_ptr, uint32_t event_id); 85 void CreateSwitchOpParameters(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 86 std::map<std::string, mindspore::ParameterPtr> *switch_loop_input); 87 CNodePtr CreateStreamSwitchOp(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 88 const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input, 89 StreamSwitchKind kind); 90 91 CNodePtr CreatTupleGetItemNode(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, const CNodePtr &node, 92 size_t output_idx); 93 CNodePtr CreateEndOfSequenceOP(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 94 const CNodePtr &getnext_cnode); 95 CNodePtr CreateStreamAssignAddnOP(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 96 const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input, 97 bool cur_loop); 98 kernel::KernelBuildInfo::KernelBuildInfoBuilder CreateMngKernelBuilder(const std::vector<std::string> &formats, 99 const std::vector<TypeId> &type_ids); 100 void LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs); 101 void InitCtrlInputs(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr); 102 #ifndef ENABLE_SECURITY 103 void InsertProfilingKernel(const ProfilingTraceInfo &profiling_trace_info, 104 NotNull<session::KernelGraph *> kernel_graph_ptr); 105 #endif 106 bool ExistIndependent(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr); 107 bool ExistGetNext(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr); 108 109 void InsertSwitchLoopInput(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 110 const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input); 111 void InsertGetNextLoopStreamSwitch(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 112 std::vector<CNodePtr> *exec_order, uint32_t *getnext_switch_stream_id, 113 uint32_t *getnext_stream_id, 114 const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input); 115 void SetBeforeGetNextStreamID(std::vector<CNodePtr> *exec_order, const std::vector<CNodePtr> &orders, 116 size_t *order_index, CNodePtr getnext_cnode, uint32_t getnext_stream_id); 117 void InsertGetNextLoopFpBpStartSend(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 118 std::vector<CNodePtr> *exec_order, uint32_t *fpbp_start_event_id, 119 uint32_t getnext_stream_id); 120 void InsertGetNextLoopEosStartSend(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 121 std::vector<CNodePtr> *exec_order, uint32_t *eos_start_event_id, 122 uint32_t getnext_stream_id); 123 void InsertEosStreamSwitch(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 124 const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input, 125 std::vector<CNodePtr> *exec_order, uint32_t *eos_switch_stream_id, 126 uint32_t *eos_stream_id); 127 void InsertGetNextLoopEosStartRecv(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 128 std::vector<CNodePtr> *exec_order, uint32_t eos_start_event_id, 129 uint32_t eos_stream_id); 130 void InsertEosOp(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, std::vector<CNodePtr> *exec_order, 131 const CNodePtr &getnext_cnode, uint32_t eos_stream_id); 132 void InsertEosDoneSend(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 133 std::vector<CNodePtr> *exec_order, uint32_t *eos_done_event_id, uint32_t eos_stream_id); 134 void InsertIndepentParallel(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 135 const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input, 136 std::vector<CNodePtr> *exec_order); 137 void InsertFpBpLoopStreamSwitch(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 138 const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input, 139 std::vector<CNodePtr> *exec_order, uint32_t *fpbp_stream_id, 140 uint32_t *fpbp_switch_stream_id); 141 void InsertFpBpStartRecv(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 142 std::vector<CNodePtr> *exec_order, uint32_t fpbp_start_event_id, uint32_t fpbp_stream_id); 143 void InsertNextLoopAssignAdd(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 144 std::vector<CNodePtr> *exec_order, 145 const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input, 146 uint32_t fpbp_stream_id); 147 void CopyMemcpyList(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 148 const std::vector<CNodePtr> &orders, size_t order_index, std::vector<CNodePtr> *memcpy_list, 149 std::vector<CNodePtr> *other_list); 150 void InsertEosDoneRecv(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 151 std::vector<CNodePtr> *exec_order, uint32_t eos_done_event_id, uint32_t fpbp_stream_id); 152 void InsertGetNextLoopStreamActive(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 153 std::vector<CNodePtr> *exec_order, 154 const std::vector<uint32_t> &getnext_active_streams); 155 void InsertCurrentLoopAssignAdd(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 156 std::vector<CNodePtr> *exec_order, 157 const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input); 158 void InsertFpBpAndEosLoopStreamActive(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, 159 std::vector<CNodePtr> *exec_order, 160 const std::vector<uint32_t> &fpbp_active_streams); 161 }; 162 } // namespace device 163 } // namespace mindspore 164 #endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_KERNEL_ADJUST_H_ 165