• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_OPENCL_SUBGRAPH_H_
18 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_OPENCL_SUBGRAPH_H_
19 
20 #include <memory>
21 #include <set>
22 #include <vector>
23 #include "src/litert/kernel/opencl/opencl_kernel.h"
24 #include "src/litert/kernel/gpu/opencl/opencl_allocator.h"
25 #include "src/litert/kernel/gpu/opencl/opencl_executor.h"
26 #include "src/executor/sub_graph_kernel.h"
27 
28 namespace mindspore::kernel {
29 class OpenCLSubGraph : public SubGraphKernel {
30  public:
OpenCLSubGraph(const std::vector<kernel::KernelExec * > & inKernels,const std::vector<kernel::KernelExec * > & outKernels,const std::vector<kernel::KernelExec * > & nodes,MSKernel * kernel)31   OpenCLSubGraph(const std::vector<kernel::KernelExec *> &inKernels,
32                  const std::vector<kernel::KernelExec *> &outKernels, const std::vector<kernel::KernelExec *> &nodes,
33                  MSKernel *kernel)
34       : SubGraphKernel(inKernels, outKernels, nodes, kernel) {
35     ocl_runtime_ = ocl_runtime_wrap_.GetInstance();
36     if (nodes.front()->desc().data_type == kNumberTypeFloat16) {
37       subgraph_type_ = kGpuFp16SubGraph;
38       desc_.data_type = kNumberTypeFloat16;
39     } else {
40       subgraph_type_ = kGpuFp32SubGraph;
41       desc_.data_type = kNumberTypeFloat32;
42     }
43     desc_.arch = kernel::KERNEL_ARCH::kGPU;
44     static std::atomic_int index = 0;
45     this->set_name("GpuSubGraph" + std::to_string(index++));
46     nodes_set_.insert(nodes.begin(), nodes.end());
47     all_kernels_infer_done_ = std::all_of(nodes_.begin(), nodes_.end(), [](const kernel::KernelExec *kernel) {
48       return kernel && kernel->InferShapeDone();
49     });
50   }
51   ~OpenCLSubGraph() override;
52 
53   int RunPass();
54   int Prepare() override;
55   int ReSize() override;
56   int Execute() override;
57   int Execute(const KernelCallBack &before, const KernelCallBack &after) override;
58 
59  private:
60   void UnInit();
61   int UpdateTensorDataTypePass();
62   void ReplaceOutTensorAndKernelToConvert(const lite::Tensor *in_tensor,
63                                           const std::vector<kernel::KernelExec *> &in_kernels, lite::Tensor *new_tensor,
64                                           kernel::KernelExec *in_convert_op, lite::opencl::MemType mem_type);
65   void GetInOutNodes();
66   int GenToFormatOp(const std::vector<lite::Tensor *> &in_tensors,
67                     const std::vector<std::vector<kernel::KernelExec *>> &in_kernels,
68                     std::vector<lite::Tensor *> *out_tensors, std::vector<OpenCLToFormatParameter *> *out_parameters,
69                     std::vector<KernelExec *> *out_convert_ops, lite::opencl::MemType mem_type);
70   int GenGLToCLOp(const std::vector<lite::Tensor *> &in_tensors,
71                   const std::vector<std::vector<kernel::KernelExec *>> &in_kernels,
72                   std::vector<lite::Tensor *> *out_tensors,
73                   std::vector<OpenGLTexture2DToOpenCLParameter *> *out_parameters,
74                   std::vector<KernelExec *> *out_convert_ops, lite::opencl::MemType mem_type);
75   void GetKernelFromToTensor(const std::vector<lite::Tensor *> &in_tensors,
76                              const std::vector<kernel::KernelExec *> &in_kernels,
77                              std::vector<std::vector<kernel::KernelExec *>> *out_kernels, bool is_from);
78   int FusionPass();
79 
80   int InsertOpsPass();
81 
82  public:
83   using PassFunc = int (OpenCLSubGraph::*)(void);
84 
85  private:
86   std::shared_ptr<lite::opencl::OpenCLAllocator> allocator_{nullptr};
87   std::vector<lite::Tensor *> in_convert_tensors_;
88   std::vector<lite::Tensor *> out_convert_tensors_;
89   std::vector<OpenCLToFormatParameter *> in_parameters_;
90   std::vector<OpenCLToFormatParameter *> out_parameters_;
91   std::vector<OpenGLTexture2DToOpenCLParameter *> gl_in_parameters_;
92   std::vector<OpenGLTexture2DToOpenCLParameter *> gl_out_parameters_;
93   std::vector<KernelExec *> in_convert_ops_;
94   std::vector<KernelExec *> out_convert_ops_;
95   std::set<KernelExec *> nodes_set_;
96   lite::opencl::OpenCLRuntimeInnerWrapper ocl_runtime_wrap_;
97   lite::opencl::OpenCLRuntime *ocl_runtime_{nullptr};
98   bool all_kernels_infer_done_ = false;
99 };
100 }  // namespace mindspore::kernel
101 
102 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_OPENCL_SUBGRAPH_H_
103