• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_KERNEL_ACTOR_H_
18 #define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_KERNEL_ACTOR_H_
19 
20 #include <vector>
21 #include <string>
22 #include <memory>
23 #include <utility>
24 #include <unordered_map>
25 #include "runtime/framework/actor/actor_common.h"
26 #include "runtime/framework/actor/debug_aware_actor.h"
27 #include "runtime/hardware/device_context.h"
28 #include "runtime/framework/device_tensor_store.h"
29 #include "backend/kernel_compiler/kernel.h"
30 #include "ir/anf.h"
31 #include "ir/tensor.h"
32 
33 namespace mindspore {
34 namespace runtime {
35 using mindspore::device::DeviceContext;
36 using mindspore::device::KernelInfo;
37 using mindspore::kernel::Address;
38 using mindspore::kernel::KernelLaunchInfo;
39 using mindspore::tensor::TensorPtr;
40 
41 // The kernel actor is used to receive the device tensors and control info to luanch kernel.
42 // The processing flow is RunOpData/RunOpControl -> CheckRunningCondition -> SendMemoryAllocReq
43 // -> OnMemoryAllocFinish -> LaunchKernel -> SendMemoryFreeReq -> SendOutput.
44 class KernelActor : public DebugAwareActor {
45  public:
KernelActor(const std::string & name,const CNodePtr & kernel,const DeviceContext * device_context,const AID & memory_manager_aid,const AID * debug_aid,const AID * recorder_aid,GraphExecutionStrategy strategy)46   KernelActor(const std::string &name, const CNodePtr &kernel, const DeviceContext *device_context,
47               const AID &memory_manager_aid, const AID *debug_aid, const AID *recorder_aid,
48               GraphExecutionStrategy strategy)
49       : DebugAwareActor(name, KernelTransformType::kKernelActor, recorder_aid, memory_manager_aid, debug_aid),
50         kernel_(kernel),
51         kernel_info_(nullptr),
52         is_dynamic_shape_(false),
53         real_input_num_(0),
54         strategy_(strategy) {
55     (void)device_contexts_.emplace_back(device_context);
56   }
57   ~KernelActor() override = default;
58 
59   void Init() override;
60 
61   // The kernel actor run when receive the input data.
62   void RunOpData(OpData<DeviceTensor> *const input_data, OpContext<DeviceTensor> *const context) override;
63   // The kernel actor run when receive the input control.
64   void RunOpControl(AID *const input_control, OpContext<DeviceTensor> *const context) override;
65   // The kernel actor run when receive the input control and input tensors, used in step mode.
66   void RunOpControlWithInputTensor(AID *const input_control, OpContext<DeviceTensor> *const context,
67                                    const std::vector<TensorPtr> *input_tensors);
68 
69   // The memory related operation interface.
70   void SendMemoryAllocReq(OpContext<DeviceTensor> *const context) override;
71   void SendMemoryFreeReq(OpContext<DeviceTensor> *const context) override;
72   // The callback after memory alloc finished.
73   void OnMemoryAllocFinish(OpContext<DeviceTensor> *const context) override;
74 
75   // The debug related operation interface.
76   void SendDebugReq(OpContext<DeviceTensor> *const context) override;
77   // The callback after debug finished.
78   void OnDebugFinish(OpContext<DeviceTensor> *const context) override;
79 
80  private:
81   friend class GraphScheduler;
82 
83   // Fetch the device tensor for launch.
84   void FetchInputDeviceTensor(OpContext<DeviceTensor> *const context);
85   void FetchOutputDeviceTensor();
86   void CopyInputDeviceTensor(const OpData<DeviceTensor> *input_data, OpContext<DeviceTensor> *const context);
87   // In step mode, push the input tensors which contain valid device address into input_device_tensors_ directly.
88   void PushInputDeviceTensor(const std::vector<TensorPtr> *input_tensors);
89 
90   // The processing before kernel launch: update the info of kernel launch.
91   void PreLaunchKernel(OpContext<DeviceTensor> *const context);
92   // The processing after kernel launch: 1.erase input, 2.free memory, 3.send output.
93   void PostLaunchKernel(OpContext<DeviceTensor> *const context);
94 
95   // Send output data and output controls when finish kernel launch.
96   void SendOutput(OpContext<DeviceTensor> *const context) const;
97 
98   // The info of kernel.
99   CNodePtr kernel_;
100   KernelInfo *kernel_info_;
101   bool is_dynamic_shape_;
102 
103   // The real input number of kernel launch.
104   size_t real_input_num_;
105 
106   // The execution strategy of kernel actor.
107   // In pipeline mode, kernel actor executes asynchronously.
108   // In step mode, kernel actor executes synchronously.
109   GraphExecutionStrategy strategy_{GraphExecutionStrategy::kPipeline};
110 
111   // The device tensors for launch.
112   std::vector<DeviceTensor *> input_device_tensors_;
113   std::vector<DeviceTensor *> output_device_tensors_;
114   std::vector<DeviceTensor *> workspace_device_tensors_;
115   // The received input device type may be different from the device context type in the control flow and host device
116   // scenarios, so it needs to be copied from the input device type to the device context type.
117   std::vector<DeviceTensorPtr> copy_input_device_tensors_;
118 
119   // The device tensors for memory alloc and free.
120   // output + workspace
121   std::vector<DeviceTensor *> memory_alloc_list_;
122   // input + output + workspace
123   std::vector<DeviceTensor *> memory_free_list_;
124   // The device tensor of external reference is not the real data of this kernel, but need add to the memory_free_list_.
125   std::vector<DeviceTensor *> external_reference_tensors_;
126 
127   // The kernel launch info is fetched by the device tensors.
128   KernelLaunchInfo launch_info_;
129 
130   // Cache unique output data by output index to modify the output data effectively.
131   std::vector<std::vector<OpDataUniquePtr<DeviceTensor>>> output_data_by_output_index_;
132   //  The output_data_ corresponds to the output_data_arrows_ one by one.
133   std::vector<OpData<DeviceTensor> *> output_data_;
134 };
135 
136 using KernelActorPtr = std::shared_ptr<KernelActor>;
137 }  // namespace runtime
138 }  // namespace mindspore
139 
140 #endif  // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_KERNEL_ACTOR_H_
141