1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/litert/kernel/opencl/opencl_subgraph.h"
18 #include <set>
19 #include <map>
20 #include <memory>
21 #include <string>
22 #include <utility>
23 #include "src/litert/kernel/gpu/opencl/opencl_executor.h"
24 #include "src/litert/kernel/opencl/utils.h"
25 #include "src/litert/kernel/opencl/kernel/to_format.h"
26 #include "src/litert/kernel/opencl/kernel/gl_to_cl.h"
27 #include "include/errorcode.h"
28 #include "src/common/utils.h"
29
30 namespace mindspore::kernel {
31 using mindspore::lite::RET_ERROR;
32 using mindspore::lite::RET_OK;
33 using mindspore::lite::opencl::MemType;
34 using PrimType::PrimType_Inner_ToFormat;
35
~OpenCLSubGraph()36 OpenCLSubGraph::~OpenCLSubGraph() { UnInit(); }
37
ReplaceOutTensorAndKernelToConvert(const lite::Tensor * in_tensor,const std::vector<kernel::KernelExec * > & in_kernels,lite::Tensor * new_tensor,kernel::KernelExec * in_convert_op,MemType mem_type)38 void OpenCLSubGraph::ReplaceOutTensorAndKernelToConvert(const lite::Tensor *in_tensor,
39 const std::vector<kernel::KernelExec *> &in_kernels,
40 lite::Tensor *new_tensor, kernel::KernelExec *in_convert_op,
41 MemType mem_type) {
42 MS_ASSERT(in_convert_op);
43 auto in_opencl_op = in_convert_op;
44 for (auto &iv : in_kernels) {
45 MS_ASSERT(iv);
46 auto kernels = (mem_type == MemType::IMG) ? iv->in_kernels() : iv->out_kernels();
47 auto fk = std::find_if(kernels.begin(), kernels.end(), [&](kernel::KernelExec *kv) { return kv == iv; });
48 if (fk != kernels.end()) {
49 *fk = in_convert_op;
50 } else {
51 kernels.emplace_back(in_convert_op);
52 }
53 auto tensors = (mem_type == MemType::IMG) ? iv->in_tensors() : iv->out_tensors();
54 auto ft = std::find_if(tensors.begin(), tensors.end(), [&](lite::Tensor *kv) { return kv == in_tensor; });
55 if (ft != tensors.end()) {
56 *ft = new_tensor;
57 } else {
58 tensors.emplace_back(new_tensor);
59 }
60 if (mem_type == MemType::IMG) {
61 iv->set_in_kernels(kernels);
62 iv->set_in_tensors(tensors);
63 in_opencl_op->AddOutKernel(iv);
64 } else {
65 iv->set_out_kernels(kernels);
66 iv->set_out_tensors(tensors);
67 in_convert_op->AddInKernel(iv);
68 }
69 }
70 }
71
GenToFormatOp(const std::vector<lite::Tensor * > & in_tensors,const std::vector<std::vector<kernel::KernelExec * >> & in_kernels,std::vector<lite::Tensor * > * out_tensors,std::vector<OpenCLToFormatParameter * > * out_parameters,std::vector<KernelExec * > * out_convert_ops,MemType mem_type)72 int OpenCLSubGraph::GenToFormatOp(const std::vector<lite::Tensor *> &in_tensors,
73 const std::vector<std::vector<kernel::KernelExec *>> &in_kernels,
74 std::vector<lite::Tensor *> *out_tensors,
75 std::vector<OpenCLToFormatParameter *> *out_parameters,
76 std::vector<KernelExec *> *out_convert_ops, MemType mem_type) {
77 MS_ASSERT(out_tensors);
78 MS_ASSERT(out_parameters);
79 MS_ASSERT(out_convert_ops);
80 out_tensors->clear();
81 out_parameters->clear();
82 out_convert_ops->clear();
83 std::vector<std::vector<kernel::KernelExec *>> loop_kernels;
84 if (mem_type == MemType::BUF) {
85 GetKernelFromToTensor(in_tensors, nodes_, &loop_kernels, true);
86 }
87
88 for (size_t i = 0; i < in_tensors.size(); ++i) {
89 auto *in_tensor = in_tensors.at(i);
90 auto *new_tensor = new (std::nothrow)
91 lite::Tensor(in_tensor->data_type(), in_tensor->shape(), in_tensor->format(), lite::Category::VAR);
92 MS_ASSERT(new_tensor);
93 if (new_tensor == nullptr) {
94 MS_LOG(ERROR) << "OpenCLSubGraph new tensor failed!";
95 return RET_ERROR;
96 }
97 for (const auto ¶m : in_tensor->quant_params()) {
98 new_tensor->AddQuantParam(param);
99 }
100
101 out_tensors->emplace_back(new_tensor);
102 KernelKey desc{kGPU, kNumberTypeFloat32, NHWC, PrimType_Inner_ToFormat};
103 auto *parameter = static_cast<OpenCLToFormatParameter *>(malloc(sizeof(OpenCLToFormatParameter)));
104 MS_ASSERT(parameter);
105 if (parameter == nullptr) {
106 MS_LOG(ERROR) << "OpenCLSubGraph new parameter failed!";
107 delete new_tensor;
108 new_tensor = nullptr;
109 return RET_ERROR;
110 }
111
112 parameter->op_parameter.is_zero_shape_ = false;
113 parameter->op_parameter.type_ = PrimType_Inner_ToFormat;
114 parameter->out_mem_type = mem_type;
115 out_parameters->emplace_back(parameter);
116 LiteKernel *in_convert_op_inner = nullptr;
117 if (mem_type == MemType::IMG) {
118 in_convert_op_inner = OpenCLKernelCreator<ToFormatOpenCLKernel>(
119 {in_tensor}, {new_tensor}, reinterpret_cast<OpParameter *>(parameter), this->Context(), desc);
120 } else {
121 in_convert_op_inner = OpenCLKernelCreator<ToFormatOpenCLKernel>(
122 {new_tensor}, {in_tensor}, reinterpret_cast<OpParameter *>(parameter), this->Context(), desc);
123 }
124 MS_ASSERT(in_convert_op_inner);
125 if (in_convert_op_inner == nullptr ||
126 reinterpret_cast<ToFormatOpenCLKernel *>(in_convert_op_inner)->CheckSpecs() != RET_OK) {
127 MS_LOG(ERROR) << "OpenCLSubGraph create op failed!";
128 delete new_tensor;
129 new_tensor = nullptr;
130 free(parameter);
131 parameter = nullptr;
132 return RET_ERROR;
133 }
134 std::shared_ptr<kernel::Kernel> inner_convert_op(in_convert_op_inner);
135 auto *in_convert_op = new (std::nothrow) kernel::KernelExec(inner_convert_op);
136 if (in_convert_op == nullptr) {
137 MS_LOG(ERROR) << "OpenCLSubGraph create op failed!";
138 delete new_tensor;
139 new_tensor = nullptr;
140 free(parameter);
141 parameter = nullptr;
142 return RET_ERROR;
143 }
144 static int index = 0;
145 in_convert_op->set_name("ToFormat_" + std::to_string(index++));
146
147 ReplaceOutTensorAndKernelToConvert(in_tensor, in_kernels.at(i), new_tensor, in_convert_op, mem_type);
148
149 // replace in_tensor of inner kernel which use out tensor
150 if (mem_type == MemType::BUF) {
151 for (auto &iv : loop_kernels[i]) {
152 MS_ASSERT(iv);
153 auto tensors = iv->in_tensors();
154 auto jv = std::find(tensors.begin(), tensors.end(), in_tensors.at(i));
155 if (jv != tensors.end()) {
156 *jv = new_tensor;
157 iv->set_in_tensors(tensors);
158 }
159 }
160 }
161
162 out_convert_ops->emplace_back(in_convert_op);
163 }
164 return RET_OK;
165 }
166
GenGLToCLOp(const std::vector<lite::Tensor * > & in_tensors,const std::vector<std::vector<kernel::KernelExec * >> & in_kernels,std::vector<lite::Tensor * > * out_tensors,std::vector<OpenGLTexture2DToOpenCLParameter * > * out_parameters,std::vector<KernelExec * > * out_convert_ops,MemType mem_type)167 int OpenCLSubGraph::GenGLToCLOp(const std::vector<lite::Tensor *> &in_tensors,
168 const std::vector<std::vector<kernel::KernelExec *>> &in_kernels,
169 std::vector<lite::Tensor *> *out_tensors,
170 std::vector<OpenGLTexture2DToOpenCLParameter *> *out_parameters,
171 std::vector<KernelExec *> *out_convert_ops, MemType mem_type) {
172 MS_ASSERT(out_tensors);
173 MS_ASSERT(out_parameters);
174 MS_ASSERT(out_convert_ops);
175 out_tensors->clear();
176 out_parameters->clear();
177 out_convert_ops->clear();
178 std::vector<std::vector<kernel::KernelExec *>> loop_kernels;
179 if (mem_type == MemType::GLTexture) {
180 GetKernelFromToTensor(in_tensors, nodes_, &loop_kernels, true);
181 }
182
183 for (size_t i = 0; i < in_tensors.size(); ++i) {
184 auto *in_tensor = in_tensors.at(i);
185 auto *new_tensor = new (std::nothrow)
186 lite::Tensor(in_tensor->data_type(), in_tensor->shape(), in_tensor->format(), lite::Category::VAR);
187 MS_ASSERT(new_tensor);
188 if (new_tensor == nullptr) {
189 MS_LOG(ERROR) << "OpenCLSubGraph new tensor failed!";
190 return RET_ERROR;
191 }
192 for (const auto ¶m : in_tensor->quant_params()) {
193 new_tensor->AddQuantParam(param);
194 }
195
196 out_tensors->emplace_back(new_tensor);
197 KernelKey desc{kGPU, kNumberTypeGLUInt, NHWC, PrimType::PrimType_Inner_GltextureToOpencl};
198 auto *parameter = static_cast<OpenGLTexture2DToOpenCLParameter *>(malloc(sizeof(OpenGLTexture2DToOpenCLParameter)));
199 MS_ASSERT(parameter);
200 if (parameter == nullptr) {
201 MS_LOG(ERROR) << "OpenCLSubGraph new parameter failed!";
202 delete new_tensor;
203 new_tensor = nullptr;
204 return RET_ERROR;
205 }
206
207 parameter->op_parameter.is_zero_shape_ = false;
208 parameter->op_parameter.type_ = PrimType::PrimType_Inner_GltextureToOpencl;
209 parameter->out_mem_type = mem_type;
210 out_parameters->emplace_back(parameter);
211 LiteKernel *in_convert_op_inner = nullptr;
212 if (mem_type == MemType::IMG) {
213 in_convert_op_inner = OpenCLKernelCreator<GLToCLOpenCLKernel>(
214 {in_tensor}, {new_tensor}, reinterpret_cast<OpParameter *>(parameter), this->Context(), desc);
215 } else {
216 in_convert_op_inner = OpenCLKernelCreator<GLToCLOpenCLKernel>(
217 {new_tensor}, {in_tensor}, reinterpret_cast<OpParameter *>(parameter), this->Context(), desc);
218 }
219 MS_ASSERT(in_convert_op_inner);
220 if (in_convert_op_inner == nullptr ||
221 reinterpret_cast<GLToCLOpenCLKernel *>(in_convert_op_inner)->CheckSpecs() != RET_OK) {
222 MS_LOG(ERROR) << "OpenCLSubGraph create op failed!";
223 delete new_tensor;
224 new_tensor = nullptr;
225 free(parameter);
226 parameter = nullptr;
227 return RET_ERROR;
228 }
229 std::shared_ptr<kernel::Kernel> inner_convert_op(in_convert_op_inner);
230 auto *in_convert_op = new (std::nothrow) kernel::KernelExec(inner_convert_op);
231 if (in_convert_op == nullptr) {
232 MS_LOG(ERROR) << "OpenCLSubGraph create op failed!";
233 delete new_tensor;
234 new_tensor = nullptr;
235 free(parameter);
236 parameter = nullptr;
237 return RET_ERROR;
238 }
239 static int index = 0;
240 in_convert_op->set_name("GLToCL_" + std::to_string(index++));
241 ReplaceOutTensorAndKernelToConvert(in_tensor, in_kernels.at(i), new_tensor, in_convert_op, mem_type);
242 // replace in_tensor of inner kernel which use out tensor
243 if (mem_type == MemType::GLTexture) {
244 for (auto &iv : loop_kernels[i]) {
245 MS_ASSERT(iv);
246 auto tensors = iv->in_tensors();
247 auto jv = std::find(tensors.begin(), tensors.end(), in_tensors.at(i));
248 if (jv != tensors.end()) {
249 *jv = new_tensor;
250 iv->set_in_tensors(tensors);
251 }
252 }
253 }
254
255 out_convert_ops->emplace_back(in_convert_op);
256 }
257 return RET_OK;
258 }
259
InsertOpsPass()260 int OpenCLSubGraph::InsertOpsPass() {
261 GetInOutNodes();
262
263 std::vector<std::vector<kernel::KernelExec *>> from_kernels_;
264 GetKernelFromToTensor(in_tensors(), in_nodes_, &from_kernels_, true);
265 int ret = 0;
266
267 if (this->GetOpenGLTextureEnable() == true) {
268 ret = GenGLToCLOp(in_tensors(), from_kernels_, &in_convert_tensors_, &gl_in_parameters_, &in_convert_ops_,
269 MemType::IMG);
270 } else {
271 ret =
272 GenToFormatOp(in_tensors(), from_kernels_, &in_convert_tensors_, &in_parameters_, &in_convert_ops_, MemType::IMG);
273 }
274
275 if (ret != RET_OK) {
276 return ret;
277 }
278 nodes_.insert(nodes_.begin(), in_convert_ops_.begin(), in_convert_ops_.end());
279
280 std::vector<std::vector<kernel::KernelExec *>> to_kernels_;
281 GetKernelFromToTensor(out_tensors(), out_nodes_, &to_kernels_, false);
282
283 if (this->GetOpenGLTextureEnable()) {
284 ret = GenGLToCLOp(out_tensors(), to_kernels_, &out_convert_tensors_, &gl_out_parameters_, &out_convert_ops_,
285 MemType::GLTexture);
286 } else {
287 ret = GenToFormatOp(out_tensors(), to_kernels_, &out_convert_tensors_, &out_parameters_, &out_convert_ops_,
288 MemType::BUF);
289 }
290
291 if (ret != RET_OK) {
292 return ret;
293 }
294 nodes_.insert(nodes_.end(), out_convert_ops_.begin(), out_convert_ops_.end());
295 GetInOutNodes();
296 return RET_OK;
297 }
298
RunPass()299 int OpenCLSubGraph::RunPass() {
300 // The fp16 operator in heterogeneous scenes needs to be set to fp32
301 // to prevent the frame from being converted to fp16 in advance.
302 auto in_first_tensor = in_tensors().front();
303 if (in_first_tensor->IsGraphInput() &&
304 (in_first_tensor->data_type() == kNumberTypeFloat32 || in_first_tensor->data_type() == kNumberTypeFloat16)) {
305 desc_.data_type = in_tensors()[0]->data_type();
306 }
307 allocator_ = ocl_runtime_->GetAllocator();
308 MS_LOG(DEBUG) << "input num=" << in_tensors().size() << ", output num=" << out_tensors().size();
309 for (const auto tensor : in_tensors()) {
310 MS_ASSERT(tensor);
311 tensor->set_allocator(allocator_);
312 }
313 for (const auto tensor : out_tensors()) {
314 MS_ASSERT(tensor);
315 tensor->set_allocator(allocator_);
316 }
317 std::vector<std::pair<std::string, std::function<int(void)>>> pass_manager{
318 {"FusionPass", std::bind(&OpenCLSubGraph::FusionPass, this)},
319 {"InsertOpsPass", std::bind(&OpenCLSubGraph::InsertOpsPass, this)},
320 {"UpdateTensorDataTypePass", std::bind(&OpenCLSubGraph::UpdateTensorDataTypePass, this)},
321 };
322 for (auto iv : pass_manager) {
323 auto ret = iv.second();
324 if (ret != RET_OK) {
325 MS_LOG(ERROR) << "Run Pass: " << iv.first << " failed.";
326 return RET_ERROR;
327 }
328 }
329 return RET_OK;
330 }
331
UpdateTensorDataTypePass()332 int OpenCLSubGraph::UpdateTensorDataTypePass() {
333 bool is_fp16 = ocl_runtime_->GetFp16Enable();
334 if (is_fp16 && subgraph_type() == kGpuFp16SubGraph) {
335 auto in_tensors = this->in_tensors();
336 auto out_tensors = this->out_tensors();
337 for (auto iv : nodes_) {
338 MS_ASSERT(iv);
339 auto cur_outs = iv->out_tensors();
340 // if softmax is last kernel, output fp32 tensor
341 if (iv->type() == schema::PrimitiveType_Softmax) {
342 bool last_kernel = true;
343 for (auto k : iv->out_kernels()) {
344 int type = k->op_parameter() == nullptr ? k->type() : k->op_parameter()->type_;
345 if (type == PrimType::PrimType_Inner_ToFormat) {
346 last_kernel = false;
347 break;
348 }
349 }
350 if (last_kernel) continue;
351 }
352 for (auto jv : cur_outs) {
353 MS_ASSERT(jv);
354 // if Fp16Enable, only change fp32 to fp16, other dtype is reserved
355 if (jv->data_type() == kNumberTypeFloat32 && !jv->IsGraphOutput()) {
356 jv->set_data_type(kNumberTypeFloat16);
357 }
358 }
359 }
360 }
361 return RET_OK;
362 }
363
GetKernelFromToTensor(const std::vector<lite::Tensor * > & in_tensors,const std::vector<kernel::KernelExec * > & in_kernels,std::vector<std::vector<kernel::KernelExec * >> * out_kernels,bool is_from)364 void OpenCLSubGraph::GetKernelFromToTensor(const std::vector<lite::Tensor *> &in_tensors,
365 const std::vector<kernel::KernelExec *> &in_kernels,
366 std::vector<std::vector<kernel::KernelExec *>> *out_kernels, bool is_from) {
367 std::vector<std::set<lite::Tensor *>> ksets;
368 for (auto jv : in_kernels) {
369 MS_ASSERT(jv);
370 auto tens = is_from ? jv->in_tensors() : jv->out_tensors();
371 std::set<lite::Tensor *> kset;
372 kset.insert(tens.begin(), tens.end());
373 ksets.emplace_back(kset);
374 }
375 MS_ASSERT(out_kernels);
376 for (auto in_tensor : in_tensors) {
377 std::vector<kernel::KernelExec *> kvec;
378 for (size_t j = 0; j < in_kernels.size(); ++j) {
379 if (ksets[j].count(in_tensor)) {
380 kvec.emplace_back(in_kernels[j]);
381 }
382 }
383 out_kernels->emplace_back(kvec);
384 }
385 }
386
GetInOutNodes()387 void OpenCLSubGraph::GetInOutNodes() {
388 this->in_nodes_.clear();
389 this->out_nodes_.clear();
390 auto in_tensors = this->in_tensors();
391 auto out_tensors = this->out_tensors();
392 for (auto *node : nodes_) {
393 for (auto *tensor : node->in_tensors()) {
394 if (std::find(in_tensors.begin(), in_tensors.end(), tensor) != in_tensors.end()) {
395 in_nodes_.emplace_back(node);
396 break;
397 }
398 }
399 for (auto *tensor : node->out_tensors()) {
400 if (std::find(out_tensors.begin(), out_tensors.end(), tensor) != out_tensors.end()) {
401 out_nodes_.emplace_back(node);
402 break;
403 }
404 }
405 }
406 }
407
Prepare()408 int OpenCLSubGraph::Prepare() {
409 ocl_runtime_->SetFp16Enable(subgraph_type() == kGpuFp16SubGraph);
410
411 for (const auto tensor : in_tensors()) {
412 MS_ASSERT(tensor);
413 tensor->set_allocator(allocator_);
414 }
415 for (const auto tensor : out_tensors()) {
416 MS_ASSERT(tensor);
417 tensor->set_allocator(allocator_);
418 }
419 executor_ = new (std::nothrow) lite::opencl::OpenCLExecutor();
420 if (executor_ == nullptr) {
421 MS_LOG(ERROR) << "Create OpenCLExecutor fail";
422 return RET_ERROR;
423 }
424 for (auto node : this->nodes_) {
425 if (node == nullptr) {
426 MS_LOG(ERROR) << "node in Subgraph is nullptr";
427 return mindspore::lite::RET_NULL_PTR;
428 }
429 for (const auto tensor : node->out_tensors()) {
430 CHECK_NULL_RETURN(tensor);
431 MS_CHECK_TRUE_RET(tensor->data() == nullptr, RET_ERROR);
432 tensor->set_allocator(allocator_);
433 }
434 if (desc_.provider == kBuiltin) {
435 auto opencl_kernel = reinterpret_cast<kernel::OpenCLKernel *>(node->kernel());
436 std::set<int> pre_init_weight_list = {schema::PrimitiveType_MatMulFusion, schema::PrimitiveType_BiasAdd};
437 if (pre_init_weight_list.find(opencl_kernel->type()) != pre_init_weight_list.end()) {
438 auto ret = opencl_kernel->InitWeights();
439 if (ret != RET_OK) {
440 MS_LOG(ERROR) << "init weights " << node->name() << " failed";
441 return ret;
442 }
443 }
444 }
445 if (node->InferShapeDone()) {
446 auto ret = node->Prepare();
447 if (ret != RET_OK) {
448 MS_LOG(ERROR) << "prepare node " << node->name() << " failed";
449 return ret;
450 }
451 }
452 }
453 if (all_kernels_infer_done_) {
454 auto opencl_exec = reinterpret_cast<lite::opencl::OpenCLExecutor *>(executor_);
455 // If tuning_mode is DEFAULT, just malloc memory for reuse.
456 auto ret = opencl_exec->RunOrTune(in_tensors(), out_tensors(), nodes_, nullptr, nullptr, true);
457 if (ret != RET_OK) {
458 MS_LOG(ERROR) << "Run opencl Tuning failed: " << ret;
459 return ret;
460 }
461 }
462 return RET_OK;
463 }
464
UnInit()465 void OpenCLSubGraph::UnInit() {
466 for (const auto &tensor : in_convert_tensors_) {
467 delete tensor;
468 }
469 in_convert_tensors_.clear();
470 for (const auto &tensor : out_convert_tensors_) {
471 delete tensor;
472 }
473 out_convert_tensors_.clear();
474 for (const auto &op : nodes_) {
475 delete op;
476 }
477 nodes_.clear();
478 in_convert_ops_.clear();
479 out_convert_ops_.clear();
480 delete this->executor_;
481 }
482
ReSize()483 int OpenCLSubGraph::ReSize() {
484 for (auto kernel : nodes_) {
485 if (kernel == nullptr) {
486 MS_LOG(ERROR) << "input kernel is nullptr!";
487 return RET_ERROR;
488 }
489 if (kernel->subgraph_type() != kernel::kNotSubGraph) {
490 MS_LOG(ERROR) << "all nodes in should be kernel";
491 return RET_ERROR;
492 }
493 std::vector<lite::Tensor *> outputs = kernel->out_tensors();
494 for (auto &output : outputs) {
495 output->FreeData();
496 output->set_shape({-1});
497 }
498 }
499 for (auto kernel : nodes_) {
500 auto ret = kernel->ReSize();
501 if (ret != RET_OK) {
502 MS_LOG(WARNING) << "ReSize " << kernel->name() << "failed!, ret:" << ret;
503 return ret;
504 }
505 }
506 return RET_OK;
507 }
508
Execute()509 int OpenCLSubGraph::Execute() {
510 if (executor_ == nullptr) {
511 MS_LOG(ERROR) << "executor is nullptr";
512 return RET_ERROR;
513 }
514 int ret;
515 for (auto &tensor : in_tensors()) {
516 MS_ASSERT(tensor);
517 if (tensor->data() == nullptr) {
518 MS_LOG(ERROR) << "OpenCL subgraph input tensor data is null";
519 return RET_ERROR;
520 }
521 ret = allocator_->UnmapBuffer(tensor->data());
522 if (ret != RET_OK) {
523 return ret;
524 }
525 }
526
527 ret = executor_->Run(in_tensors(), out_tensors(), nodes_);
528 if (ret != RET_OK) {
529 MS_LOG(ERROR) << "Run opencl executor failed: " << ret;
530 return ret;
531 }
532 if (!ocl_runtime_->SyncCommandQueue()) {
533 MS_LOG(ERROR) << "SyncCommandQueue failed.";
534 return RET_ERROR;
535 }
536 return RET_OK;
537 }
538
Execute(const KernelCallBack & before,const KernelCallBack & after)539 int OpenCLSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) {
540 if (executor_ == nullptr) {
541 MS_LOG(ERROR) << "executor is nullptr";
542 return RET_ERROR;
543 }
544 int ret;
545 for (auto &tensor : in_tensors()) {
546 MS_ASSERT(tensor);
547 if (tensor->data() == nullptr) {
548 MS_LOG(ERROR) << "OpenCL subgraph input tensor data is null";
549 return RET_ERROR;
550 }
551 ret = allocator_->UnmapBuffer(tensor->data());
552 if (ret != RET_OK) {
553 return ret;
554 }
555 }
556
557 ret = executor_->Run(in_tensors(), out_tensors(), nodes_, before, after);
558 if (ret != RET_OK) {
559 MS_LOG(ERROR) << "Run opencl executor failed: " << ret;
560 return ret;
561 }
562 if (!ocl_runtime_->SyncCommandQueue()) {
563 MS_LOG(ERROR) << "SyncCommandQueue failed.";
564 return RET_ERROR;
565 }
566 return RET_OK;
567 }
568 } // namespace mindspore::kernel
569