1 /**
2 * Copyright 2019-2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "runtime/device/ascend/ge_runtime/runtime_model.h"
18 #include <set>
19 #include "runtime/kernel.h"
20 #include "runtime/rt_model.h"
21 #include "graphengine/inc/external/runtime/rt_error_codes.h"
22 #include "runtime/device/ascend/ge_runtime/model_context.h"
23 #include "runtime/device/ascend/ge_runtime/task/task.h"
24 #include "runtime/device/ascend/ge_runtime/task/task_factory.h"
25 #include "mindspore/core/utils/log_adapter.h"
26
27 namespace mindspore::ge::model_runner {
~RuntimeModel()28 RuntimeModel::~RuntimeModel() {
29 MS_LOG(INFO) << "RuntimeModel destructor start.";
30
31 // Unbind rtModel from all task related streams
32 RtModelUnbindStream();
33
34 // Release task first, hccl task hold stream
35 task_list_.clear();
36
37 // Release all task related streams
38 RtStreamDestory();
39
40 // Release rtlabel resource
41 RtLabelDestory();
42
43 // Release rtEvent resourece
44 RtEventDestory();
45
46 MS_LOG(INFO) << "Do RtModelDestroy";
47 // Release all rt_model
48 RtModelDestory();
49 }
50
InitStream(const std::shared_ptr<DavinciModel> & davinci_model)51 void RuntimeModel::InitStream(const std::shared_ptr<DavinciModel> &davinci_model) {
52 MS_EXCEPTION_IF_NULL(davinci_model);
53
54 std::set<int64_t> wait_active_streams;
55 std::set<int64_t> force_copy_streams;
56
57 for (const auto &stream_id : davinci_model->GetWaitActiveStreams()) {
58 MS_LOG(INFO) << "Stream id " << stream_id << " is wait active stream.";
59 (void)wait_active_streams.insert(stream_id);
60 }
61
62 for (const auto &stream_id : davinci_model->GetForceCopyStreams()) {
63 MS_LOG(INFO) << "Stream id " << stream_id << " is force copy stream.";
64 (void)force_copy_streams.insert(stream_id);
65 }
66
67 MS_LOG(INFO) << "Total stream num " << davinci_model->GetStreamNum();
68 for (uint32_t i = 0; i < davinci_model->GetStreamNum(); ++i) {
69 rtStream_t stream = nullptr;
70 uint32_t flag = (force_copy_streams.find(i) != force_copy_streams.end())
71 ? (RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY)
72 : (RT_STREAM_PERSISTENT);
73
74 rtError_t rt_ret = rtStreamCreateWithFlags(&stream, davinci_model->GetPriority(), flag);
75 if (rt_ret != RT_ERROR_NONE) {
76 MS_LOG(EXCEPTION) << "Call rt api rtStreamCreate failed, ret: " << rt_ret;
77 }
78
79 MS_LOG(INFO) << "rtStreamCreateWithFlags end.";
80 stream_list_.emplace_back(stream);
81
82 // Bind rt_model_handle_ to all task related streams
83 flag = (wait_active_streams.find(i) != wait_active_streams.end()) ? (static_cast<uint32_t>(RT_INVALID_FLAG))
84 : (static_cast<uint32_t>(RT_HEAD_STREAM));
85 rt_ret = rtModelBindStream(rt_model_handle_, stream, flag);
86 if (rt_ret != RT_ERROR_NONE) {
87 MS_LOG(EXCEPTION) << "Call rt api rtModelBindStream failed, ret: " << rt_ret;
88 }
89 MS_LOG(INFO) << "stream index: " << i << ", stream: " << stream;
90 }
91 }
92
InitEvent(uint32_t event_num)93 void RuntimeModel::InitEvent(uint32_t event_num) {
94 MS_LOG(INFO) << "Event number: " << event_num;
95 for (uint32_t i = 0; i < event_num; ++i) {
96 rtEvent_t rt_event;
97 rtError_t rt_ret = rtEventCreate(&rt_event);
98 if (rt_ret != RT_ERROR_NONE) {
99 MS_LOG(EXCEPTION) << "Call rt api rtEventCreate failed, ret: " << rt_ret;
100 }
101 event_list_.push_back(rt_event);
102 }
103 }
104
InitLabel(const std::shared_ptr<DavinciModel> & davinci_model)105 void RuntimeModel::InitLabel(const std::shared_ptr<DavinciModel> &davinci_model) {
106 MS_LOG(INFO) << "Label number: " << davinci_model->GetBatchNum();
107 label_list_.resize(davinci_model->GetBatchNum());
108 for (auto &task_info : davinci_model->GetTaskInfoList()) {
109 MS_EXCEPTION_IF_NULL(task_info);
110
111 if (task_info->type() != TaskInfoType::LABEL_SET) {
112 continue;
113 }
114 auto label_set_task_info = std::static_pointer_cast<LabelSetTaskInfo>(task_info);
115
116 if (label_set_task_info->stream_id() >= stream_list_.size()) {
117 MS_LOG(EXCEPTION) << "Invalid stream id " << label_set_task_info->stream_id() << " total stream num "
118 << stream_list_.size();
119 }
120
121 rtLabel_t rt_label = nullptr;
122 rtError_t rt_ret = rtLabelCreateEx(&rt_label, stream_list_[label_set_task_info->stream_id()]);
123 if (rt_ret != RT_ERROR_NONE) {
124 MS_LOG(EXCEPTION) << "Call rt api rtLabelCreate failed, ret: " << rt_ret;
125 }
126 label_list_[label_set_task_info->label_id()] = rt_label;
127 }
128 }
129
InitResource(const std::shared_ptr<DavinciModel> & davinci_model)130 void RuntimeModel::InitResource(const std::shared_ptr<DavinciModel> &davinci_model) {
131 MS_LOG(INFO) << "InitResource start";
132 MS_EXCEPTION_IF_NULL(davinci_model);
133
134 rtError_t rt_ret = rtModelCreate(&rt_model_handle_, 0);
135 if (rt_ret != RT_ERROR_NONE) {
136 MS_LOG(EXCEPTION) << "Call rt api rtModelCreate failed, ret: " << rt_ret;
137 }
138
139 // Create rtStream for rt_model_handle_
140 rt_ret = rtStreamCreate(&rt_model_stream_, davinci_model->GetPriority());
141 if (rt_ret != RT_ERROR_NONE) {
142 MS_LOG(EXCEPTION) << "Call rt api rtStreamCreate failed, ret: " << rt_ret;
143 }
144 MS_LOG(INFO) << "rtStreamCreate end";
145
146 InitStream(davinci_model);
147 InitEvent(davinci_model->GetEventNum());
148 InitLabel(davinci_model);
149
150 MS_LOG(INFO) << "InitResource success";
151 }
152
GenerateTask(uint32_t device_id,uint64_t session_id,const std::shared_ptr<DavinciModel> & davinci_model)153 void RuntimeModel::GenerateTask(uint32_t device_id, uint64_t session_id,
154 const std::shared_ptr<DavinciModel> &davinci_model) {
155 MS_LOG(INFO) << "GenerateTask start.";
156 MS_EXCEPTION_IF_NULL(davinci_model);
157 auto task_infos = davinci_model->GetTaskInfoList();
158 ModelContext model_context(device_id, session_id, davinci_model->GetPriority(), rt_model_handle_, rt_model_stream_,
159 stream_list_, label_list_, event_list_);
160 for (auto &task_info : task_infos) {
161 auto task = TaskFactory::GetInstance().Create(model_context, task_info);
162 task_list_.push_back(task);
163 }
164 MS_LOG(INFO) << "GenerateTask success.";
165 }
166
LoadComplete()167 void RuntimeModel::LoadComplete() {
168 uint32_t task_id = 0;
169 uint32_t stream_id = 0;
170 auto rt_ret = rtModelGetTaskId(rt_model_handle_, &task_id, &stream_id);
171 if (rt_ret != RT_ERROR_NONE) {
172 MS_LOG(EXCEPTION) << "Call rt api rtModelGetTaskId failed, ret: " << rt_ret;
173 }
174 task_id_list_.push_back(task_id);
175 stream_id_list_.push_back(stream_id);
176
177 rt_ret = rtModelLoadComplete(rt_model_handle_);
178 if (rt_ret != RT_ERROR_NONE) {
179 MS_LOG(EXCEPTION) << "Call rt api rtModelLoadComplete failed, ret: " << rt_ret;
180 }
181 }
182
Load(uint32_t device_id,uint64_t session_id,const std::shared_ptr<DavinciModel> & davinci_model)183 void RuntimeModel::Load(uint32_t device_id, uint64_t session_id, const std::shared_ptr<DavinciModel> &davinci_model) {
184 InitResource(davinci_model);
185 GenerateTask(device_id, session_id, davinci_model);
186 }
187
DistributeTask()188 void RuntimeModel::DistributeTask() {
189 MS_LOG(INFO) << "DistributeTask start.";
190 for (auto &task : task_list_) {
191 MS_EXCEPTION_IF_NULL(task);
192 task->set_model_handle(rt_model_handle_);
193 task->Distribute();
194
195 uint32_t task_id = 0;
196 uint32_t stream_id = 0;
197 rtError_t rt_ret = rtModelGetTaskId(rt_model_handle_, &task_id, &stream_id);
198 if (rt_ret != RT_ERROR_NONE) {
199 MS_LOG(EXCEPTION) << "Call rt api rtModelGetTaskId failed, ret: " << rt_ret;
200 }
201 task_id_list_.push_back(task_id);
202 stream_id_list_.push_back(stream_id);
203 if (task->Args() != nullptr) {
204 std::shared_ptr<RuntimeInfo> runtime_tuple = std::make_shared<RuntimeInfo>(task_id, stream_id, task->Args());
205 auto emplace_ret = runtime_info_map_.emplace(task->task_name(), runtime_tuple);
206 if (!emplace_ret.second) {
207 // The task_name is (fullname_with_scope + UniqueId). There should be no duplication.
208 MS_LOG(EXCEPTION) << "Task name exist: " << task->task_name();
209 }
210 }
211 }
212 if (task_list_.empty()) {
213 MS_LOG(EXCEPTION) << "Task list is empty";
214 }
215
216 MS_LOG(INFO) << "DistributeTask success.";
217 }
218
Run()219 void RuntimeModel::Run() {
220 MS_LOG(INFO) << "Davinci task run start.";
221 rtError_t ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0);
222 if (ret != RT_ERROR_NONE) {
223 MS_LOG(EXCEPTION) << "Call rt api rtModelLoadComplete failed, ret: " << ret;
224 }
225
226 MS_LOG(INFO) << "Run rtModelExecute success, start to rtStreamSynchronize.";
227 ret = rtStreamSynchronize(rt_model_stream_);
228 if (ret != RT_ERROR_NONE) {
229 if (ret == ACL_ERROR_RT_END_OF_SEQUENCE) {
230 MS_LOG(INFO) << "Model stream ACL_ERROR_RT_END_OF_SEQUENCE signal received.";
231 return;
232 }
233 MS_LOG(EXCEPTION) << "Call rt api rtStreamSynchronize failed, ret: " << ret;
234 }
235
236 MS_LOG(INFO) << "Davinci task run success.";
237 }
238
RtModelUnbindStream()239 void RuntimeModel::RtModelUnbindStream() noexcept {
240 for (size_t i = 0; i < stream_list_.size(); i++) {
241 if (rtModelUnbindStream(rt_model_handle_, stream_list_[i]) != RT_ERROR_NONE) {
242 MS_LOG(ERROR) << "Unbind stream from model failed! Index: " << i;
243 return;
244 }
245 }
246 }
247
RtStreamDestory()248 void RuntimeModel::RtStreamDestory() noexcept {
249 if (rtStreamDestroy(rt_model_stream_) != RT_ERROR_NONE) {
250 MS_LOG(ERROR) << "Destroy stream for rt_model failed!";
251 return;
252 }
253
254 for (size_t i = 0; i < stream_list_.size(); i++) {
255 if (rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE) {
256 MS_LOG(ERROR) << "Destroy stream failed! Index: " << i;
257 return;
258 }
259 }
260 }
261
RtLabelDestory()262 void RuntimeModel::RtLabelDestory() noexcept {
263 for (size_t i = 0; i < label_list_.size(); i++) {
264 if (label_list_[i] == nullptr) {
265 continue;
266 }
267 if (rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE) {
268 MS_LOG(ERROR) << "Destroy label failed! Index: " << i;
269 return;
270 }
271 }
272 }
273
RtModelDestory()274 void RuntimeModel::RtModelDestory() noexcept {
275 rtError_t ret = rtModelDestroy(rt_model_handle_);
276 if (ret != RT_ERROR_NONE) {
277 MS_LOG(ERROR) << "Call rt api rtModelDestroy failed, ret: " << ret;
278 return;
279 }
280 }
281
RtEventDestory()282 void RuntimeModel::RtEventDestory() noexcept {
283 for (size_t i = 0; i < event_list_.size(); i++) {
284 if (rtEventDestroy(event_list_[i]) != RT_ERROR_NONE) {
285 MS_LOG(ERROR) << "Destroy event failed! Index: " << i;
286 return;
287 }
288 }
289 }
290
GetTaskIdList() const291 const std::vector<uint32_t> &RuntimeModel::GetTaskIdList() const { return task_id_list_; }
292
GetStreamIdList() const293 const std::vector<uint32_t> &RuntimeModel::GetStreamIdList() const { return stream_id_list_; }
294 } // namespace mindspore::ge::model_runner
295