• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_KERNEL_ADJUST_H_
18 #define MINDSPORE_CCSRC_RUNTIME_DEVICE_KERNEL_ADJUST_H_
19 
20 #include <memory>
21 #include <map>
22 #include <string>
23 #include <vector>
24 #include <unordered_set>
25 #include "ir/anf.h"
26 #include "backend/session/kernel_graph.h"
27 #include "backend/kernel_compiler/kernel_build_info.h"
28 #include "backend/session/session_context.h"
29 #include "ir/tensor.h"
30 #include "runtime/device/kernel_info.h"
31 #include "runtime/device/kernel_runtime_manager.h"
32 
33 #ifndef ENABLE_SECURITY
34 #include "runtime/device/ascend/profiling/profiling_utils.h"
35 using mindspore::device::ascend::ProfilingTraceInfo;
36 using mindspore::device::ascend::ProfilingUtils;
37 #endif
38 namespace mindspore {
39 constexpr auto kCurLoopCountParamName = "cur_loop_count";
40 constexpr auto kNextLoopCountParamName = "next_loop_count";
41 constexpr auto kIterLoopParamName = "iter_loop";
42 constexpr auto kOneParamName = "one";
43 constexpr auto kEpochParamName = "loop_epoch";
44 constexpr auto kStreamNeedActivedFirst = "stream_need_active_first";
45 constexpr uint32_t kSecondStreamSwitchLabel = 2;
46 enum StreamSwitchKind {
47   kFpBpStreamSwitch = 0,
48   kGetNextStreamSwitch = 1,
49   kEosStreamSwitch = 2,
50   kIndependentStreamSwitch = 3
51 };
52 
53 namespace device {
54 class KernelAdjust {
55  public:
GetInstance()56   static KernelAdjust &GetInstance() {
57     static KernelAdjust instance;
58     return instance;
59   }
60 
61   void InsertOverflowCheckOperations(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
62   void InsertSwitchLoop(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
63   bool StepLoadCtrlInputs(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
64 #ifndef ENABLE_SECURITY
65   void Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr);
66 #endif
67   static bool NeedInsertSwitch();
68   CNodePtr CreateStreamActiveOp(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
69 
70  private:
71   KernelAdjust() = default;
72   ~KernelAdjust() = default;
73 
74   CNodePtr CreateNPUGetFloatStatus(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
75                                    const CNodePtr &npu_cnode);
76   CNodePtr CreateNPUClearStatus(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
77                                 const CNodePtr &npu_cnode);
78   CNodePtr CreateNPUAllocStatus(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
79   CNodePtr CreateAssignAdd(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, const CNodePtr &npu_get_cnode,
80                            const AnfNodePtr &specify_para);
81   CNodePtr CreateAssign(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, const AnfNodePtr &specify_para);
82   void ReorderGetNext(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
83   CNodePtr CreateRecvApplyKernel(const std::shared_ptr<session::KernelGraph> &graph_ptr, uint32_t event_id);
84   CNodePtr CreateSendApplyKernel(const std::shared_ptr<session::KernelGraph> &graph_ptr, uint32_t event_id);
85   void CreateSwitchOpParameters(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
86                                 std::map<std::string, mindspore::ParameterPtr> *switch_loop_input);
87   CNodePtr CreateStreamSwitchOp(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
88                                 const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input,
89                                 StreamSwitchKind kind);
90 
91   CNodePtr CreatTupleGetItemNode(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, const CNodePtr &node,
92                                  size_t output_idx);
93   CNodePtr CreateEndOfSequenceOP(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
94                                  const CNodePtr &getnext_cnode);
95   CNodePtr CreateStreamAssignAddnOP(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
96                                     const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input,
97                                     bool cur_loop);
98   kernel::KernelBuildInfo::KernelBuildInfoBuilder CreateMngKernelBuilder(const std::vector<std::string> &formats,
99                                                                          const std::vector<TypeId> &type_ids);
100   void LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs);
101   void InitCtrlInputs(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
102 #ifndef ENABLE_SECURITY
103   void InsertProfilingKernel(const ProfilingTraceInfo &profiling_trace_info,
104                              NotNull<session::KernelGraph *> kernel_graph_ptr);
105 #endif
106   bool ExistIndependent(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
107   bool ExistGetNext(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
108 
109   void InsertSwitchLoopInput(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
110                              const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input);
111   void InsertGetNextLoopStreamSwitch(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
112                                      std::vector<CNodePtr> *exec_order, uint32_t *getnext_switch_stream_id,
113                                      uint32_t *getnext_stream_id,
114                                      const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input);
115   void SetBeforeGetNextStreamID(std::vector<CNodePtr> *exec_order, const std::vector<CNodePtr> &orders,
116                                 size_t *order_index, CNodePtr getnext_cnode, uint32_t getnext_stream_id);
117   void InsertGetNextLoopFpBpStartSend(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
118                                       std::vector<CNodePtr> *exec_order, uint32_t *fpbp_start_event_id,
119                                       uint32_t getnext_stream_id);
120   void InsertGetNextLoopEosStartSend(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
121                                      std::vector<CNodePtr> *exec_order, uint32_t *eos_start_event_id,
122                                      uint32_t getnext_stream_id);
123   void InsertEosStreamSwitch(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
124                              const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input,
125                              std::vector<CNodePtr> *exec_order, uint32_t *eos_switch_stream_id,
126                              uint32_t *eos_stream_id);
127   void InsertGetNextLoopEosStartRecv(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
128                                      std::vector<CNodePtr> *exec_order, uint32_t eos_start_event_id,
129                                      uint32_t eos_stream_id);
130   void InsertEosOp(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, std::vector<CNodePtr> *exec_order,
131                    const CNodePtr &getnext_cnode, uint32_t eos_stream_id);
132   void InsertEosDoneSend(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
133                          std::vector<CNodePtr> *exec_order, uint32_t *eos_done_event_id, uint32_t eos_stream_id);
134   void InsertIndepentParallel(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
135                               const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input,
136                               std::vector<CNodePtr> *exec_order);
137   void InsertFpBpLoopStreamSwitch(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
138                                   const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input,
139                                   std::vector<CNodePtr> *exec_order, uint32_t *fpbp_stream_id,
140                                   uint32_t *fpbp_switch_stream_id);
141   void InsertFpBpStartRecv(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
142                            std::vector<CNodePtr> *exec_order, uint32_t fpbp_start_event_id, uint32_t fpbp_stream_id);
143   void InsertNextLoopAssignAdd(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
144                                std::vector<CNodePtr> *exec_order,
145                                const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input,
146                                uint32_t fpbp_stream_id);
147   void CopyMemcpyList(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
148                       const std::vector<CNodePtr> &orders, size_t order_index, std::vector<CNodePtr> *memcpy_list,
149                       std::vector<CNodePtr> *other_list);
150   void InsertEosDoneRecv(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
151                          std::vector<CNodePtr> *exec_order, uint32_t eos_done_event_id, uint32_t fpbp_stream_id);
152   void InsertGetNextLoopStreamActive(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
153                                      std::vector<CNodePtr> *exec_order,
154                                      const std::vector<uint32_t> &getnext_active_streams);
155   void InsertCurrentLoopAssignAdd(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
156                                   std::vector<CNodePtr> *exec_order,
157                                   const std::map<std::string, mindspore::ParameterPtr> &switch_loop_input);
158   void InsertFpBpAndEosLoopStreamActive(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
159                                         std::vector<CNodePtr> *exec_order,
160                                         const std::vector<uint32_t> &fpbp_active_streams);
161 };
162 }  // namespace device
163 }  // namespace mindspore
164 #endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_KERNEL_ADJUST_H_
165