• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/litert/kernel/opencl/opencl_subgraph.h"
18 #include <set>
19 #include <map>
20 #include <memory>
21 #include <string>
22 #include <utility>
23 #include "src/litert/kernel/gpu/opencl/opencl_executor.h"
24 #include "src/litert/kernel/opencl/utils.h"
25 #include "src/litert/kernel/opencl/kernel/to_format.h"
26 #include "src/litert/kernel/opencl/kernel/gl_to_cl.h"
27 #include "include/errorcode.h"
28 #include "src/common/utils.h"
29 
30 namespace mindspore::kernel {
31 using mindspore::lite::RET_ERROR;
32 using mindspore::lite::RET_OK;
33 using mindspore::lite::opencl::MemType;
34 using PrimType::PrimType_Inner_ToFormat;
35 
~OpenCLSubGraph()36 OpenCLSubGraph::~OpenCLSubGraph() { UnInit(); }
37 
ReplaceOutTensorAndKernelToConvert(const lite::Tensor * in_tensor,const std::vector<kernel::KernelExec * > & in_kernels,lite::Tensor * new_tensor,kernel::KernelExec * in_convert_op,MemType mem_type)38 void OpenCLSubGraph::ReplaceOutTensorAndKernelToConvert(const lite::Tensor *in_tensor,
39                                                         const std::vector<kernel::KernelExec *> &in_kernels,
40                                                         lite::Tensor *new_tensor, kernel::KernelExec *in_convert_op,
41                                                         MemType mem_type) {
42   MS_ASSERT(in_convert_op);
43   auto in_opencl_op = in_convert_op;
44   for (auto &iv : in_kernels) {
45     MS_ASSERT(iv);
46     auto kernels = (mem_type == MemType::IMG) ? iv->in_kernels() : iv->out_kernels();
47     auto fk = std::find_if(kernels.begin(), kernels.end(), [&](kernel::KernelExec *kv) { return kv == iv; });
48     if (fk != kernels.end()) {
49       *fk = in_convert_op;
50     } else {
51       kernels.emplace_back(in_convert_op);
52     }
53     auto tensors = (mem_type == MemType::IMG) ? iv->in_tensors() : iv->out_tensors();
54     auto ft = std::find_if(tensors.begin(), tensors.end(), [&](lite::Tensor *kv) { return kv == in_tensor; });
55     if (ft != tensors.end()) {
56       *ft = new_tensor;
57     } else {
58       tensors.emplace_back(new_tensor);
59     }
60     if (mem_type == MemType::IMG) {
61       iv->set_in_kernels(kernels);
62       iv->set_in_tensors(tensors);
63       in_opencl_op->AddOutKernel(iv);
64     } else {
65       iv->set_out_kernels(kernels);
66       iv->set_out_tensors(tensors);
67       in_convert_op->AddInKernel(iv);
68     }
69   }
70 }
71 
GenToFormatOp(const std::vector<lite::Tensor * > & in_tensors,const std::vector<std::vector<kernel::KernelExec * >> & in_kernels,std::vector<lite::Tensor * > * out_tensors,std::vector<OpenCLToFormatParameter * > * out_parameters,std::vector<KernelExec * > * out_convert_ops,MemType mem_type)72 int OpenCLSubGraph::GenToFormatOp(const std::vector<lite::Tensor *> &in_tensors,
73                                   const std::vector<std::vector<kernel::KernelExec *>> &in_kernels,
74                                   std::vector<lite::Tensor *> *out_tensors,
75                                   std::vector<OpenCLToFormatParameter *> *out_parameters,
76                                   std::vector<KernelExec *> *out_convert_ops, MemType mem_type) {
77   MS_ASSERT(out_tensors);
78   MS_ASSERT(out_parameters);
79   MS_ASSERT(out_convert_ops);
80   out_tensors->clear();
81   out_parameters->clear();
82   out_convert_ops->clear();
83   std::vector<std::vector<kernel::KernelExec *>> loop_kernels;
84   if (mem_type == MemType::BUF) {
85     GetKernelFromToTensor(in_tensors, nodes_, &loop_kernels, true);
86   }
87 
88   for (size_t i = 0; i < in_tensors.size(); ++i) {
89     auto *in_tensor = in_tensors.at(i);
90     auto *new_tensor = new (std::nothrow)
91       lite::Tensor(in_tensor->data_type(), in_tensor->shape(), in_tensor->format(), lite::Category::VAR);
92     MS_ASSERT(new_tensor);
93     if (new_tensor == nullptr) {
94       MS_LOG(ERROR) << "OpenCLSubGraph new tensor failed!";
95       return RET_ERROR;
96     }
97     for (const auto &param : in_tensor->quant_params()) {
98       new_tensor->AddQuantParam(param);
99     }
100 
101     out_tensors->emplace_back(new_tensor);
102     KernelKey desc{kGPU, kNumberTypeFloat32, NHWC, PrimType_Inner_ToFormat};
103     auto *parameter = static_cast<OpenCLToFormatParameter *>(malloc(sizeof(OpenCLToFormatParameter)));
104     MS_ASSERT(parameter);
105     if (parameter == nullptr) {
106       MS_LOG(ERROR) << "OpenCLSubGraph new parameter failed!";
107       delete new_tensor;
108       new_tensor = nullptr;
109       return RET_ERROR;
110     }
111 
112     parameter->op_parameter.is_zero_shape_ = false;
113     parameter->op_parameter.type_ = PrimType_Inner_ToFormat;
114     parameter->out_mem_type = mem_type;
115     out_parameters->emplace_back(parameter);
116     LiteKernel *in_convert_op_inner = nullptr;
117     if (mem_type == MemType::IMG) {
118       in_convert_op_inner = OpenCLKernelCreator<ToFormatOpenCLKernel>(
119         {in_tensor}, {new_tensor}, reinterpret_cast<OpParameter *>(parameter), this->Context(), desc);
120     } else {
121       in_convert_op_inner = OpenCLKernelCreator<ToFormatOpenCLKernel>(
122         {new_tensor}, {in_tensor}, reinterpret_cast<OpParameter *>(parameter), this->Context(), desc);
123     }
124     MS_ASSERT(in_convert_op_inner);
125     if (in_convert_op_inner == nullptr ||
126         reinterpret_cast<ToFormatOpenCLKernel *>(in_convert_op_inner)->CheckSpecs() != RET_OK) {
127       MS_LOG(ERROR) << "OpenCLSubGraph create op failed!";
128       delete new_tensor;
129       new_tensor = nullptr;
130       free(parameter);
131       parameter = nullptr;
132       return RET_ERROR;
133     }
134     std::shared_ptr<kernel::Kernel> inner_convert_op(in_convert_op_inner);
135     auto *in_convert_op = new (std::nothrow) kernel::KernelExec(inner_convert_op);
136     if (in_convert_op == nullptr) {
137       MS_LOG(ERROR) << "OpenCLSubGraph create op failed!";
138       delete new_tensor;
139       new_tensor = nullptr;
140       free(parameter);
141       parameter = nullptr;
142       return RET_ERROR;
143     }
144     static int index = 0;
145     in_convert_op->set_name("ToFormat_" + std::to_string(index++));
146 
147     ReplaceOutTensorAndKernelToConvert(in_tensor, in_kernels.at(i), new_tensor, in_convert_op, mem_type);
148 
149     // replace in_tensor of inner kernel which use out tensor
150     if (mem_type == MemType::BUF) {
151       for (auto &iv : loop_kernels[i]) {
152         MS_ASSERT(iv);
153         auto tensors = iv->in_tensors();
154         auto jv = std::find(tensors.begin(), tensors.end(), in_tensors.at(i));
155         if (jv != tensors.end()) {
156           *jv = new_tensor;
157           iv->set_in_tensors(tensors);
158         }
159       }
160     }
161 
162     out_convert_ops->emplace_back(in_convert_op);
163   }
164   return RET_OK;
165 }
166 
GenGLToCLOp(const std::vector<lite::Tensor * > & in_tensors,const std::vector<std::vector<kernel::KernelExec * >> & in_kernels,std::vector<lite::Tensor * > * out_tensors,std::vector<OpenGLTexture2DToOpenCLParameter * > * out_parameters,std::vector<KernelExec * > * out_convert_ops,MemType mem_type)167 int OpenCLSubGraph::GenGLToCLOp(const std::vector<lite::Tensor *> &in_tensors,
168                                 const std::vector<std::vector<kernel::KernelExec *>> &in_kernels,
169                                 std::vector<lite::Tensor *> *out_tensors,
170                                 std::vector<OpenGLTexture2DToOpenCLParameter *> *out_parameters,
171                                 std::vector<KernelExec *> *out_convert_ops, MemType mem_type) {
172   MS_ASSERT(out_tensors);
173   MS_ASSERT(out_parameters);
174   MS_ASSERT(out_convert_ops);
175   out_tensors->clear();
176   out_parameters->clear();
177   out_convert_ops->clear();
178   std::vector<std::vector<kernel::KernelExec *>> loop_kernels;
179   if (mem_type == MemType::GLTexture) {
180     GetKernelFromToTensor(in_tensors, nodes_, &loop_kernels, true);
181   }
182 
183   for (size_t i = 0; i < in_tensors.size(); ++i) {
184     auto *in_tensor = in_tensors.at(i);
185     auto *new_tensor = new (std::nothrow)
186       lite::Tensor(in_tensor->data_type(), in_tensor->shape(), in_tensor->format(), lite::Category::VAR);
187     MS_ASSERT(new_tensor);
188     if (new_tensor == nullptr) {
189       MS_LOG(ERROR) << "OpenCLSubGraph new tensor failed!";
190       return RET_ERROR;
191     }
192     for (const auto &param : in_tensor->quant_params()) {
193       new_tensor->AddQuantParam(param);
194     }
195 
196     out_tensors->emplace_back(new_tensor);
197     KernelKey desc{kGPU, kNumberTypeGLUInt, NHWC, PrimType::PrimType_Inner_GltextureToOpencl};
198     auto *parameter = static_cast<OpenGLTexture2DToOpenCLParameter *>(malloc(sizeof(OpenGLTexture2DToOpenCLParameter)));
199     MS_ASSERT(parameter);
200     if (parameter == nullptr) {
201       MS_LOG(ERROR) << "OpenCLSubGraph new parameter failed!";
202       delete new_tensor;
203       new_tensor = nullptr;
204       return RET_ERROR;
205     }
206 
207     parameter->op_parameter.is_zero_shape_ = false;
208     parameter->op_parameter.type_ = PrimType::PrimType_Inner_GltextureToOpencl;
209     parameter->out_mem_type = mem_type;
210     out_parameters->emplace_back(parameter);
211     LiteKernel *in_convert_op_inner = nullptr;
212     if (mem_type == MemType::IMG) {
213       in_convert_op_inner = OpenCLKernelCreator<GLToCLOpenCLKernel>(
214         {in_tensor}, {new_tensor}, reinterpret_cast<OpParameter *>(parameter), this->Context(), desc);
215     } else {
216       in_convert_op_inner = OpenCLKernelCreator<GLToCLOpenCLKernel>(
217         {new_tensor}, {in_tensor}, reinterpret_cast<OpParameter *>(parameter), this->Context(), desc);
218     }
219     MS_ASSERT(in_convert_op_inner);
220     if (in_convert_op_inner == nullptr ||
221         reinterpret_cast<GLToCLOpenCLKernel *>(in_convert_op_inner)->CheckSpecs() != RET_OK) {
222       MS_LOG(ERROR) << "OpenCLSubGraph create op failed!";
223       delete new_tensor;
224       new_tensor = nullptr;
225       free(parameter);
226       parameter = nullptr;
227       return RET_ERROR;
228     }
229     std::shared_ptr<kernel::Kernel> inner_convert_op(in_convert_op_inner);
230     auto *in_convert_op = new (std::nothrow) kernel::KernelExec(inner_convert_op);
231     if (in_convert_op == nullptr) {
232       MS_LOG(ERROR) << "OpenCLSubGraph create op failed!";
233       delete new_tensor;
234       new_tensor = nullptr;
235       free(parameter);
236       parameter = nullptr;
237       return RET_ERROR;
238     }
239     static int index = 0;
240     in_convert_op->set_name("GLToCL_" + std::to_string(index++));
241     ReplaceOutTensorAndKernelToConvert(in_tensor, in_kernels.at(i), new_tensor, in_convert_op, mem_type);
242     // replace in_tensor of inner kernel which use out tensor
243     if (mem_type == MemType::GLTexture) {
244       for (auto &iv : loop_kernels[i]) {
245         MS_ASSERT(iv);
246         auto tensors = iv->in_tensors();
247         auto jv = std::find(tensors.begin(), tensors.end(), in_tensors.at(i));
248         if (jv != tensors.end()) {
249           *jv = new_tensor;
250           iv->set_in_tensors(tensors);
251         }
252       }
253     }
254 
255     out_convert_ops->emplace_back(in_convert_op);
256   }
257   return RET_OK;
258 }
259 
InsertOpsPass()260 int OpenCLSubGraph::InsertOpsPass() {
261   GetInOutNodes();
262 
263   std::vector<std::vector<kernel::KernelExec *>> from_kernels_;
264   GetKernelFromToTensor(in_tensors(), in_nodes_, &from_kernels_, true);
265   int ret = 0;
266 
267   if (this->GetOpenGLTextureEnable() == true) {
268     ret = GenGLToCLOp(in_tensors(), from_kernels_, &in_convert_tensors_, &gl_in_parameters_, &in_convert_ops_,
269                       MemType::IMG);
270   } else {
271     ret =
272       GenToFormatOp(in_tensors(), from_kernels_, &in_convert_tensors_, &in_parameters_, &in_convert_ops_, MemType::IMG);
273   }
274 
275   if (ret != RET_OK) {
276     return ret;
277   }
278   nodes_.insert(nodes_.begin(), in_convert_ops_.begin(), in_convert_ops_.end());
279 
280   std::vector<std::vector<kernel::KernelExec *>> to_kernels_;
281   GetKernelFromToTensor(out_tensors(), out_nodes_, &to_kernels_, false);
282 
283   if (this->GetOpenGLTextureEnable()) {
284     ret = GenGLToCLOp(out_tensors(), to_kernels_, &out_convert_tensors_, &gl_out_parameters_, &out_convert_ops_,
285                       MemType::GLTexture);
286   } else {
287     ret = GenToFormatOp(out_tensors(), to_kernels_, &out_convert_tensors_, &out_parameters_, &out_convert_ops_,
288                         MemType::BUF);
289   }
290 
291   if (ret != RET_OK) {
292     return ret;
293   }
294   nodes_.insert(nodes_.end(), out_convert_ops_.begin(), out_convert_ops_.end());
295   GetInOutNodes();
296   return RET_OK;
297 }
298 
RunPass()299 int OpenCLSubGraph::RunPass() {
300   // The fp16 operator in heterogeneous scenes needs to be set to fp32
301   // to prevent the frame from being converted to fp16 in advance.
302   auto in_first_tensor = in_tensors().front();
303   if (in_first_tensor->IsGraphInput() &&
304       (in_first_tensor->data_type() == kNumberTypeFloat32 || in_first_tensor->data_type() == kNumberTypeFloat16)) {
305     desc_.data_type = in_tensors()[0]->data_type();
306   }
307   allocator_ = ocl_runtime_->GetAllocator();
308   MS_LOG(DEBUG) << "input num=" << in_tensors().size() << ", output num=" << out_tensors().size();
309   for (const auto tensor : in_tensors()) {
310     MS_ASSERT(tensor);
311     tensor->set_allocator(allocator_);
312   }
313   for (const auto tensor : out_tensors()) {
314     MS_ASSERT(tensor);
315     tensor->set_allocator(allocator_);
316   }
317   std::vector<std::pair<std::string, std::function<int(void)>>> pass_manager{
318     {"FusionPass", std::bind(&OpenCLSubGraph::FusionPass, this)},
319     {"InsertOpsPass", std::bind(&OpenCLSubGraph::InsertOpsPass, this)},
320     {"UpdateTensorDataTypePass", std::bind(&OpenCLSubGraph::UpdateTensorDataTypePass, this)},
321   };
322   for (auto iv : pass_manager) {
323     auto ret = iv.second();
324     if (ret != RET_OK) {
325       MS_LOG(ERROR) << "Run Pass: " << iv.first << " failed.";
326       return RET_ERROR;
327     }
328   }
329   return RET_OK;
330 }
331 
UpdateTensorDataTypePass()332 int OpenCLSubGraph::UpdateTensorDataTypePass() {
333   bool is_fp16 = ocl_runtime_->GetFp16Enable();
334   if (is_fp16 && subgraph_type() == kGpuFp16SubGraph) {
335     auto in_tensors = this->in_tensors();
336     auto out_tensors = this->out_tensors();
337     for (auto iv : nodes_) {
338       MS_ASSERT(iv);
339       auto cur_outs = iv->out_tensors();
340       // if softmax is last kernel, output fp32 tensor
341       if (iv->type() == schema::PrimitiveType_Softmax) {
342         bool last_kernel = true;
343         for (auto k : iv->out_kernels()) {
344           int type = k->op_parameter() == nullptr ? k->type() : k->op_parameter()->type_;
345           if (type == PrimType::PrimType_Inner_ToFormat) {
346             last_kernel = false;
347             break;
348           }
349         }
350         if (last_kernel) continue;
351       }
352       for (auto jv : cur_outs) {
353         MS_ASSERT(jv);
354         // if Fp16Enable, only change fp32 to fp16, other dtype is reserved
355         if (jv->data_type() == kNumberTypeFloat32 && !jv->IsGraphOutput()) {
356           jv->set_data_type(kNumberTypeFloat16);
357         }
358       }
359     }
360   }
361   return RET_OK;
362 }
363 
GetKernelFromToTensor(const std::vector<lite::Tensor * > & in_tensors,const std::vector<kernel::KernelExec * > & in_kernels,std::vector<std::vector<kernel::KernelExec * >> * out_kernels,bool is_from)364 void OpenCLSubGraph::GetKernelFromToTensor(const std::vector<lite::Tensor *> &in_tensors,
365                                            const std::vector<kernel::KernelExec *> &in_kernels,
366                                            std::vector<std::vector<kernel::KernelExec *>> *out_kernels, bool is_from) {
367   std::vector<std::set<lite::Tensor *>> ksets;
368   for (auto jv : in_kernels) {
369     MS_ASSERT(jv);
370     auto tens = is_from ? jv->in_tensors() : jv->out_tensors();
371     std::set<lite::Tensor *> kset;
372     kset.insert(tens.begin(), tens.end());
373     ksets.emplace_back(kset);
374   }
375   MS_ASSERT(out_kernels);
376   for (auto in_tensor : in_tensors) {
377     std::vector<kernel::KernelExec *> kvec;
378     for (size_t j = 0; j < in_kernels.size(); ++j) {
379       if (ksets[j].count(in_tensor)) {
380         kvec.emplace_back(in_kernels[j]);
381       }
382     }
383     out_kernels->emplace_back(kvec);
384   }
385 }
386 
GetInOutNodes()387 void OpenCLSubGraph::GetInOutNodes() {
388   this->in_nodes_.clear();
389   this->out_nodes_.clear();
390   auto in_tensors = this->in_tensors();
391   auto out_tensors = this->out_tensors();
392   for (auto *node : nodes_) {
393     for (auto *tensor : node->in_tensors()) {
394       if (std::find(in_tensors.begin(), in_tensors.end(), tensor) != in_tensors.end()) {
395         in_nodes_.emplace_back(node);
396         break;
397       }
398     }
399     for (auto *tensor : node->out_tensors()) {
400       if (std::find(out_tensors.begin(), out_tensors.end(), tensor) != out_tensors.end()) {
401         out_nodes_.emplace_back(node);
402         break;
403       }
404     }
405   }
406 }
407 
Prepare()408 int OpenCLSubGraph::Prepare() {
409   ocl_runtime_->SetFp16Enable(subgraph_type() == kGpuFp16SubGraph);
410 
411   for (const auto tensor : in_tensors()) {
412     MS_ASSERT(tensor);
413     tensor->set_allocator(allocator_);
414   }
415   for (const auto tensor : out_tensors()) {
416     MS_ASSERT(tensor);
417     tensor->set_allocator(allocator_);
418   }
419   executor_ = new (std::nothrow) lite::opencl::OpenCLExecutor();
420   if (executor_ == nullptr) {
421     MS_LOG(ERROR) << "Create OpenCLExecutor fail";
422     return RET_ERROR;
423   }
424   for (auto node : this->nodes_) {
425     if (node == nullptr) {
426       MS_LOG(ERROR) << "node in Subgraph is nullptr";
427       return mindspore::lite::RET_NULL_PTR;
428     }
429     for (const auto tensor : node->out_tensors()) {
430       CHECK_NULL_RETURN(tensor);
431       MS_CHECK_TRUE_RET(tensor->data() == nullptr, RET_ERROR);
432       tensor->set_allocator(allocator_);
433     }
434     if (desc_.provider == kBuiltin) {
435       auto opencl_kernel = reinterpret_cast<kernel::OpenCLKernel *>(node->kernel());
436       std::set<int> pre_init_weight_list = {schema::PrimitiveType_MatMulFusion, schema::PrimitiveType_BiasAdd};
437       if (pre_init_weight_list.find(opencl_kernel->type()) != pre_init_weight_list.end()) {
438         auto ret = opencl_kernel->InitWeights();
439         if (ret != RET_OK) {
440           MS_LOG(ERROR) << "init weights " << node->name() << " failed";
441           return ret;
442         }
443       }
444     }
445     if (node->InferShapeDone()) {
446       auto ret = node->Prepare();
447       if (ret != RET_OK) {
448         MS_LOG(ERROR) << "prepare node " << node->name() << " failed";
449         return ret;
450       }
451     }
452   }
453   if (all_kernels_infer_done_) {
454     auto opencl_exec = reinterpret_cast<lite::opencl::OpenCLExecutor *>(executor_);
455     // If tuning_mode is DEFAULT, just malloc memory for reuse.
456     auto ret = opencl_exec->RunOrTune(in_tensors(), out_tensors(), nodes_, nullptr, nullptr, true);
457     if (ret != RET_OK) {
458       MS_LOG(ERROR) << "Run opencl Tuning failed: " << ret;
459       return ret;
460     }
461   }
462   return RET_OK;
463 }
464 
UnInit()465 void OpenCLSubGraph::UnInit() {
466   for (const auto &tensor : in_convert_tensors_) {
467     delete tensor;
468   }
469   in_convert_tensors_.clear();
470   for (const auto &tensor : out_convert_tensors_) {
471     delete tensor;
472   }
473   out_convert_tensors_.clear();
474   for (const auto &op : nodes_) {
475     delete op;
476   }
477   nodes_.clear();
478   in_convert_ops_.clear();
479   out_convert_ops_.clear();
480   delete this->executor_;
481 }
482 
ReSize()483 int OpenCLSubGraph::ReSize() {
484   for (auto kernel : nodes_) {
485     if (kernel == nullptr) {
486       MS_LOG(ERROR) << "input kernel is nullptr!";
487       return RET_ERROR;
488     }
489     if (kernel->subgraph_type() != kernel::kNotSubGraph) {
490       MS_LOG(ERROR) << "all nodes in should be kernel";
491       return RET_ERROR;
492     }
493     std::vector<lite::Tensor *> outputs = kernel->out_tensors();
494     for (auto &output : outputs) {
495       output->FreeData();
496       output->set_shape({-1});
497     }
498   }
499   for (auto kernel : nodes_) {
500     auto ret = kernel->ReSize();
501     if (ret != RET_OK) {
502       MS_LOG(WARNING) << "ReSize " << kernel->name() << "failed!, ret:" << ret;
503       return ret;
504     }
505   }
506   return RET_OK;
507 }
508 
Execute()509 int OpenCLSubGraph::Execute() {
510   if (executor_ == nullptr) {
511     MS_LOG(ERROR) << "executor is nullptr";
512     return RET_ERROR;
513   }
514   int ret;
515   for (auto &tensor : in_tensors()) {
516     MS_ASSERT(tensor);
517     if (tensor->data() == nullptr) {
518       MS_LOG(ERROR) << "OpenCL subgraph input tensor data is null";
519       return RET_ERROR;
520     }
521     ret = allocator_->UnmapBuffer(tensor->data());
522     if (ret != RET_OK) {
523       return ret;
524     }
525   }
526 
527   ret = executor_->Run(in_tensors(), out_tensors(), nodes_);
528   if (ret != RET_OK) {
529     MS_LOG(ERROR) << "Run opencl executor failed: " << ret;
530     return ret;
531   }
532   if (!ocl_runtime_->SyncCommandQueue()) {
533     MS_LOG(ERROR) << "SyncCommandQueue failed.";
534     return RET_ERROR;
535   }
536   return RET_OK;
537 }
538 
Execute(const KernelCallBack & before,const KernelCallBack & after)539 int OpenCLSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) {
540   if (executor_ == nullptr) {
541     MS_LOG(ERROR) << "executor is nullptr";
542     return RET_ERROR;
543   }
544   int ret;
545   for (auto &tensor : in_tensors()) {
546     MS_ASSERT(tensor);
547     if (tensor->data() == nullptr) {
548       MS_LOG(ERROR) << "OpenCL subgraph input tensor data is null";
549       return RET_ERROR;
550     }
551     ret = allocator_->UnmapBuffer(tensor->data());
552     if (ret != RET_OK) {
553       return ret;
554     }
555   }
556 
557   ret = executor_->Run(in_tensors(), out_tensors(), nodes_, before, after);
558   if (ret != RET_OK) {
559     MS_LOG(ERROR) << "Run opencl executor failed: " << ret;
560     return ret;
561   }
562   if (!ocl_runtime_->SyncCommandQueue()) {
563     MS_LOG(ERROR) << "SyncCommandQueue failed.";
564     return RET_ERROR;
565   }
566   return RET_OK;
567 }
568 }  // namespace mindspore::kernel
569