• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "runtime/device/ascend/ge_runtime/runtime_model.h"
18 #include <set>
19 #include "runtime/kernel.h"
20 #include "runtime/rt_model.h"
21 #include "graphengine/inc/external/runtime/rt_error_codes.h"
22 #include "runtime/device/ascend/ge_runtime/model_context.h"
23 #include "runtime/device/ascend/ge_runtime/task/task.h"
24 #include "runtime/device/ascend/ge_runtime/task/task_factory.h"
25 #include "mindspore/core/utils/log_adapter.h"
26 
27 namespace mindspore::ge::model_runner {
~RuntimeModel()28 RuntimeModel::~RuntimeModel() {
29   MS_LOG(INFO) << "RuntimeModel destructor start.";
30 
31   // Unbind rtModel from all task related streams
32   RtModelUnbindStream();
33 
34   // Release task first, hccl task hold stream
35   task_list_.clear();
36 
37   // Release all task related streams
38   RtStreamDestory();
39 
40   // Release rtlabel resource
41   RtLabelDestory();
42 
43   // Release rtEvent resourece
44   RtEventDestory();
45 
46   MS_LOG(INFO) << "Do RtModelDestroy";
47   // Release all rt_model
48   RtModelDestory();
49 }
50 
InitStream(const std::shared_ptr<DavinciModel> & davinci_model)51 void RuntimeModel::InitStream(const std::shared_ptr<DavinciModel> &davinci_model) {
52   MS_EXCEPTION_IF_NULL(davinci_model);
53 
54   std::set<int64_t> wait_active_streams;
55   std::set<int64_t> force_copy_streams;
56 
57   for (const auto &stream_id : davinci_model->GetWaitActiveStreams()) {
58     MS_LOG(INFO) << "Stream id " << stream_id << " is wait active stream.";
59     (void)wait_active_streams.insert(stream_id);
60   }
61 
62   for (const auto &stream_id : davinci_model->GetForceCopyStreams()) {
63     MS_LOG(INFO) << "Stream id " << stream_id << " is force copy stream.";
64     (void)force_copy_streams.insert(stream_id);
65   }
66 
67   MS_LOG(INFO) << "Total stream num " << davinci_model->GetStreamNum();
68   for (uint32_t i = 0; i < davinci_model->GetStreamNum(); ++i) {
69     rtStream_t stream = nullptr;
70     uint32_t flag = (force_copy_streams.find(i) != force_copy_streams.end())
71                       ? (RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY)
72                       : (RT_STREAM_PERSISTENT);
73 
74     rtError_t rt_ret = rtStreamCreateWithFlags(&stream, davinci_model->GetPriority(), flag);
75     if (rt_ret != RT_ERROR_NONE) {
76       MS_LOG(EXCEPTION) << "Call rt api rtStreamCreate failed, ret: " << rt_ret;
77     }
78 
79     MS_LOG(INFO) << "rtStreamCreateWithFlags end.";
80     stream_list_.emplace_back(stream);
81 
82     // Bind rt_model_handle_ to all task related streams
83     flag = (wait_active_streams.find(i) != wait_active_streams.end()) ? (static_cast<uint32_t>(RT_INVALID_FLAG))
84                                                                       : (static_cast<uint32_t>(RT_HEAD_STREAM));
85     rt_ret = rtModelBindStream(rt_model_handle_, stream, flag);
86     if (rt_ret != RT_ERROR_NONE) {
87       MS_LOG(EXCEPTION) << "Call rt api rtModelBindStream failed, ret: " << rt_ret;
88     }
89     MS_LOG(INFO) << "stream index: " << i << ", stream: " << stream;
90   }
91 }
92 
InitEvent(uint32_t event_num)93 void RuntimeModel::InitEvent(uint32_t event_num) {
94   MS_LOG(INFO) << "Event number: " << event_num;
95   for (uint32_t i = 0; i < event_num; ++i) {
96     rtEvent_t rt_event;
97     rtError_t rt_ret = rtEventCreate(&rt_event);
98     if (rt_ret != RT_ERROR_NONE) {
99       MS_LOG(EXCEPTION) << "Call rt api rtEventCreate failed, ret: " << rt_ret;
100     }
101     event_list_.push_back(rt_event);
102   }
103 }
104 
InitLabel(const std::shared_ptr<DavinciModel> & davinci_model)105 void RuntimeModel::InitLabel(const std::shared_ptr<DavinciModel> &davinci_model) {
106   MS_LOG(INFO) << "Label number: " << davinci_model->GetBatchNum();
107   label_list_.resize(davinci_model->GetBatchNum());
108   for (auto &task_info : davinci_model->GetTaskInfoList()) {
109     MS_EXCEPTION_IF_NULL(task_info);
110 
111     if (task_info->type() != TaskInfoType::LABEL_SET) {
112       continue;
113     }
114     auto label_set_task_info = std::static_pointer_cast<LabelSetTaskInfo>(task_info);
115 
116     if (label_set_task_info->stream_id() >= stream_list_.size()) {
117       MS_LOG(EXCEPTION) << "Invalid stream id " << label_set_task_info->stream_id() << " total stream num "
118                         << stream_list_.size();
119     }
120 
121     rtLabel_t rt_label = nullptr;
122     rtError_t rt_ret = rtLabelCreateEx(&rt_label, stream_list_[label_set_task_info->stream_id()]);
123     if (rt_ret != RT_ERROR_NONE) {
124       MS_LOG(EXCEPTION) << "Call rt api rtLabelCreate failed, ret: " << rt_ret;
125     }
126     label_list_[label_set_task_info->label_id()] = rt_label;
127   }
128 }
129 
InitResource(const std::shared_ptr<DavinciModel> & davinci_model)130 void RuntimeModel::InitResource(const std::shared_ptr<DavinciModel> &davinci_model) {
131   MS_LOG(INFO) << "InitResource start";
132   MS_EXCEPTION_IF_NULL(davinci_model);
133 
134   rtError_t rt_ret = rtModelCreate(&rt_model_handle_, 0);
135   if (rt_ret != RT_ERROR_NONE) {
136     MS_LOG(EXCEPTION) << "Call rt api rtModelCreate failed, ret: " << rt_ret;
137   }
138 
139   // Create rtStream for rt_model_handle_
140   rt_ret = rtStreamCreate(&rt_model_stream_, davinci_model->GetPriority());
141   if (rt_ret != RT_ERROR_NONE) {
142     MS_LOG(EXCEPTION) << "Call rt api rtStreamCreate failed, ret: " << rt_ret;
143   }
144   MS_LOG(INFO) << "rtStreamCreate end";
145 
146   InitStream(davinci_model);
147   InitEvent(davinci_model->GetEventNum());
148   InitLabel(davinci_model);
149 
150   MS_LOG(INFO) << "InitResource success";
151 }
152 
GenerateTask(uint32_t device_id,uint64_t session_id,const std::shared_ptr<DavinciModel> & davinci_model)153 void RuntimeModel::GenerateTask(uint32_t device_id, uint64_t session_id,
154                                 const std::shared_ptr<DavinciModel> &davinci_model) {
155   MS_LOG(INFO) << "GenerateTask start.";
156   MS_EXCEPTION_IF_NULL(davinci_model);
157   auto task_infos = davinci_model->GetTaskInfoList();
158   ModelContext model_context(device_id, session_id, davinci_model->GetPriority(), rt_model_handle_, rt_model_stream_,
159                              stream_list_, label_list_, event_list_);
160   for (auto &task_info : task_infos) {
161     auto task = TaskFactory::GetInstance().Create(model_context, task_info);
162     task_list_.push_back(task);
163   }
164   MS_LOG(INFO) << "GenerateTask success.";
165 }
166 
LoadComplete()167 void RuntimeModel::LoadComplete() {
168   uint32_t task_id = 0;
169   uint32_t stream_id = 0;
170   auto rt_ret = rtModelGetTaskId(rt_model_handle_, &task_id, &stream_id);
171   if (rt_ret != RT_ERROR_NONE) {
172     MS_LOG(EXCEPTION) << "Call rt api rtModelGetTaskId failed, ret: " << rt_ret;
173   }
174   task_id_list_.push_back(task_id);
175   stream_id_list_.push_back(stream_id);
176 
177   rt_ret = rtModelLoadComplete(rt_model_handle_);
178   if (rt_ret != RT_ERROR_NONE) {
179     MS_LOG(EXCEPTION) << "Call rt api rtModelLoadComplete failed, ret: " << rt_ret;
180   }
181 }
182 
Load(uint32_t device_id,uint64_t session_id,const std::shared_ptr<DavinciModel> & davinci_model)183 void RuntimeModel::Load(uint32_t device_id, uint64_t session_id, const std::shared_ptr<DavinciModel> &davinci_model) {
184   InitResource(davinci_model);
185   GenerateTask(device_id, session_id, davinci_model);
186 }
187 
DistributeTask()188 void RuntimeModel::DistributeTask() {
189   MS_LOG(INFO) << "DistributeTask start.";
190   for (auto &task : task_list_) {
191     MS_EXCEPTION_IF_NULL(task);
192     task->set_model_handle(rt_model_handle_);
193     task->Distribute();
194 
195     uint32_t task_id = 0;
196     uint32_t stream_id = 0;
197     rtError_t rt_ret = rtModelGetTaskId(rt_model_handle_, &task_id, &stream_id);
198     if (rt_ret != RT_ERROR_NONE) {
199       MS_LOG(EXCEPTION) << "Call rt api rtModelGetTaskId failed, ret: " << rt_ret;
200     }
201     task_id_list_.push_back(task_id);
202     stream_id_list_.push_back(stream_id);
203     if (task->Args() != nullptr) {
204       std::shared_ptr<RuntimeInfo> runtime_tuple = std::make_shared<RuntimeInfo>(task_id, stream_id, task->Args());
205       auto emplace_ret = runtime_info_map_.emplace(task->task_name(), runtime_tuple);
206       if (!emplace_ret.second) {
207         // The task_name is (fullname_with_scope + UniqueId). There should be no duplication.
208         MS_LOG(EXCEPTION) << "Task name exist: " << task->task_name();
209       }
210     }
211   }
212   if (task_list_.empty()) {
213     MS_LOG(EXCEPTION) << "Task list is empty";
214   }
215 
216   MS_LOG(INFO) << "DistributeTask success.";
217 }
218 
Run()219 void RuntimeModel::Run() {
220   MS_LOG(INFO) << "Davinci task run start.";
221   rtError_t ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0);
222   if (ret != RT_ERROR_NONE) {
223     MS_LOG(EXCEPTION) << "Call rt api rtModelLoadComplete failed, ret: " << ret;
224   }
225 
226   MS_LOG(INFO) << "Run rtModelExecute success, start to rtStreamSynchronize.";
227   ret = rtStreamSynchronize(rt_model_stream_);
228   if (ret != RT_ERROR_NONE) {
229     if (ret == ACL_ERROR_RT_END_OF_SEQUENCE) {
230       MS_LOG(INFO) << "Model stream ACL_ERROR_RT_END_OF_SEQUENCE signal received.";
231       return;
232     }
233     MS_LOG(EXCEPTION) << "Call rt api rtStreamSynchronize failed, ret: " << ret;
234   }
235 
236   MS_LOG(INFO) << "Davinci task run success.";
237 }
238 
RtModelUnbindStream()239 void RuntimeModel::RtModelUnbindStream() noexcept {
240   for (size_t i = 0; i < stream_list_.size(); i++) {
241     if (rtModelUnbindStream(rt_model_handle_, stream_list_[i]) != RT_ERROR_NONE) {
242       MS_LOG(ERROR) << "Unbind stream from model failed! Index: " << i;
243       return;
244     }
245   }
246 }
247 
RtStreamDestory()248 void RuntimeModel::RtStreamDestory() noexcept {
249   if (rtStreamDestroy(rt_model_stream_) != RT_ERROR_NONE) {
250     MS_LOG(ERROR) << "Destroy stream for rt_model failed!";
251     return;
252   }
253 
254   for (size_t i = 0; i < stream_list_.size(); i++) {
255     if (rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE) {
256       MS_LOG(ERROR) << "Destroy stream failed! Index: " << i;
257       return;
258     }
259   }
260 }
261 
RtLabelDestory()262 void RuntimeModel::RtLabelDestory() noexcept {
263   for (size_t i = 0; i < label_list_.size(); i++) {
264     if (label_list_[i] == nullptr) {
265       continue;
266     }
267     if (rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE) {
268       MS_LOG(ERROR) << "Destroy label failed! Index: " << i;
269       return;
270     }
271   }
272 }
273 
RtModelDestory()274 void RuntimeModel::RtModelDestory() noexcept {
275   rtError_t ret = rtModelDestroy(rt_model_handle_);
276   if (ret != RT_ERROR_NONE) {
277     MS_LOG(ERROR) << "Call rt api rtModelDestroy failed, ret: " << ret;
278     return;
279   }
280 }
281 
RtEventDestory()282 void RuntimeModel::RtEventDestory() noexcept {
283   for (size_t i = 0; i < event_list_.size(); i++) {
284     if (rtEventDestroy(event_list_[i]) != RT_ERROR_NONE) {
285       MS_LOG(ERROR) << "Destroy event failed! Index: " << i;
286       return;
287     }
288   }
289 }
290 
GetTaskIdList() const291 const std::vector<uint32_t> &RuntimeModel::GetTaskIdList() const { return task_id_list_; }
292 
GetStreamIdList() const293 const std::vector<uint32_t> &RuntimeModel::GetStreamIdList() const { return stream_id_list_; }
294 }  // namespace mindspore::ge::model_runner
295