1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/delegate/tensorrt/tensorrt_delegate.h"
18 #include <cuda_runtime.h>
19 #include <vector>
20 #include <fstream>
21 #include <string>
22 #include "src/delegate/delegate_utils.h"
23 #include "src/delegate/tensorrt/op/activation_tensorrt.h"
24 #include "src/delegate/tensorrt/op/shape_tensorrt.h"
25 #include "src/delegate/tensorrt/op/gather_tensorrt.h"
26 #include "src/delegate/tensorrt/op/shuffle_tensorrt.h"
27 #include "src/delegate/tensorrt/op/concate_tensorrt.h"
28 #include "src/delegate/tensorrt/op/convolution_tensorrt.h"
29 #include "src/delegate/tensorrt/op/deconvolution_tensorrt.h"
30 #include "src/delegate/tensorrt/op/elementwise_tensorrt.h"
31 #include "src/delegate/tensorrt/op/reduce_tensorrt.h"
32 #include "src/delegate/tensorrt/op/softmax_tensorrt.h"
33 #include "src/delegate/tensorrt/op/unary_tensorrt.h"
34 #include "src/delegate/tensorrt/op/matmul_tensorrt.h"
35 #include "src/delegate/tensorrt/op/scale_tensorrt.h"
36 #include "src/delegate/tensorrt/op/slice_tensorrt.h"
37 #include "src/delegate/tensorrt/op/pool_tensorrt.h"
38 #include "src/delegate/tensorrt/op/pad_tensorrt.h"
39 #include "src/delegate/tensorrt/op/resize_tensorrt.h"
40
41 namespace mindspore::lite {
~TensorRTDelegate()42 TensorRTDelegate::~TensorRTDelegate() {
43 if (runtime_ != nullptr) {
44 delete runtime_;
45 }
46 }
IsHardwareSupport()47 bool IsHardwareSupport() {
48 int driver_version = 0;
49 int ret = cudaDriverGetVersion(&driver_version);
50 if (ret != cudaSuccess || driver_version == 0) {
51 MS_LOG(WARNING) << "No nvidia GPU driver.";
52 return false;
53 }
54 return true;
55 }
56
Init()57 Status TensorRTDelegate::Init() {
58 if (!IsHardwareSupport()) {
59 return mindspore::kLiteNotSupport;
60 }
61 std::vector<std::shared_ptr<DeviceInfoContext>> device_list = context_->MutableDeviceInfo();
62 auto iter = std::find_if(device_list.begin(), device_list.end(), [](std::shared_ptr<DeviceInfoContext> device) {
63 return device->GetDeviceType() == DeviceType::kGPU;
64 });
65 if (iter == device_list.end()) {
66 MS_LOG(ERROR) << "no gpu device info found for TensorRT.";
67 return mindspore::kLiteError;
68 }
69 auto gpu_info = (*iter)->Cast<GPUDeviceInfo>();
70 if (gpu_info == nullptr) {
71 MS_LOG(ERROR) << "no gpu device info found for TensorRT.";
72 return mindspore::kLiteError;
73 }
74 device_info_ = gpu_info;
75 op_func_lists_.clear();
76 op_func_lists_ = {
77 {schema::PrimitiveType_Activation, GetTensorRTOp<ActivationTensorRT>},
78 {schema::PrimitiveType_Concat, GetTensorRTOp<ConcateTensorRT>},
79 {schema::PrimitiveType_Conv2DFusion, GetTensorRTOp<ConvolutionTensorRT>},
80 {schema::PrimitiveType_Conv2dTransposeFusion, GetTensorRTOp<DeconvolutionTensorRT>},
81 {schema::PrimitiveType_SubFusion, GetTensorRTOp<ElementWiseTensorRT>},
82 {schema::PrimitiveType_DivFusion, GetTensorRTOp<ElementWiseTensorRT>},
83 {schema::PrimitiveType_PowFusion, GetTensorRTOp<ElementWiseTensorRT>},
84 {schema::PrimitiveType_AddFusion, GetTensorRTOp<ElementWiseTensorRT>},
85 {schema::PrimitiveType_MulFusion, GetTensorRTOp<ElementWiseTensorRT>},
86 {schema::PrimitiveType_Eltwise, GetTensorRTOp<ElementWiseTensorRT>},
87 {schema::PrimitiveType_Gather, GetTensorRTOp<GatherTensorRT>},
88 {schema::PrimitiveType_MatMulFusion, GetTensorRTOp<MatMulTensorRT>},
89 {schema::PrimitiveType_FullConnection, GetTensorRTOp<MatMulTensorRT>},
90 {schema::PrimitiveType_AvgPoolFusion, GetTensorRTOp<PoolTensorRT>},
91 {schema::PrimitiveType_MaxPoolFusion, GetTensorRTOp<PoolTensorRT>},
92 {schema::PrimitiveType_PadFusion, GetTensorRTOp<PadTensorRT>},
93 {schema::PrimitiveType_ReduceFusion, GetTensorRTOp<ReduceTensorRT>},
94 {schema::PrimitiveType_Resize, GetTensorRTOp<ResizeTensorRT>},
95 {schema::PrimitiveType_ScaleFusion, GetTensorRTOp<ScaleTensorRT>},
96 {schema::PrimitiveType_StridedSlice, GetTensorRTOp<SliceTensorRT>},
97 {schema::PrimitiveType_Shape, GetTensorRTOp<ShapeTensorRT>},
98 {schema::PrimitiveType_Unsqueeze, GetTensorRTOp<ShuffleTensorRT>},
99 {schema::PrimitiveType_Squeeze, GetTensorRTOp<ShuffleTensorRT>},
100 {schema::PrimitiveType_Reshape, GetTensorRTOp<ShuffleTensorRT>},
101 {schema::PrimitiveType_Transpose, GetTensorRTOp<ShuffleTensorRT>},
102 {schema::PrimitiveType_Flatten, GetTensorRTOp<ShuffleTensorRT>},
103 {schema::PrimitiveType_Softmax, GetTensorRTOp<SoftMaxTensorRT>},
104 {schema::PrimitiveType_Sqrt, GetTensorRTOp<UnaryTensorRT>},
105 };
106 unsupport_hw_op_lists_ = {schema::PrimitiveType_Reshape};
107 lite::SetCudaDevice(device_info_);
108 if (runtime_ == nullptr) {
109 runtime_ = new (std::nothrow) TensorRTRuntime();
110 }
111 if (runtime_->Init() != RET_OK) {
112 MS_LOG(ERROR) << "TensorRTRuntime init failed.";
113 return mindspore::kLiteError;
114 }
115 return mindspore::kSuccess;
116 }
117
Build(DelegateModel<schema::Primitive> * model)118 Status TensorRTDelegate::Build(DelegateModel<schema::Primitive> *model) {
119 lite::SetCudaDevice(device_info_);
120 KernelIter from, end;
121 std::vector<TensorRTOp *> tensorrt_ops;
122 for (KernelIter iter = model->BeginKernelIterator(); iter != model->EndKernelIterator(); iter++) {
123 kernel::Kernel *kernel = *iter;
124 if (support_hw_resize_) {
125 for (auto no_type : unsupport_hw_op_lists_) {
126 if (model->GetPrimitive(kernel)->value_type() == no_type) {
127 support_hw_resize_ = false;
128 MS_LOG(INFO) << "network has op don't support hw resize.";
129 continue;
130 }
131 }
132 }
133 auto tensorrt_op = FindTensorRTOp(kernel, model->GetPrimitive(kernel));
134 if (tensorrt_op != nullptr) {
135 // If tensorrt_ops does not equal nullptr, this kernel can be supported by delegate
136 if (tensorrt_ops.size() == 0) {
137 from = iter;
138 }
139 tensorrt_ops.push_back(tensorrt_op);
140 end = iter;
141 } else {
142 if (tensorrt_ops.size() > 0) {
143 auto tensorrt_subgraph = CreateTensorRTGraph(tensorrt_ops, model, from, end);
144 if (tensorrt_subgraph == nullptr) {
145 MS_LOG(ERROR) << "Create TensorRT Graph failed.";
146 return mindspore::kLiteNullptr;
147 }
148 iter = model->Replace(from, end + 1, tensorrt_subgraph);
149 tensorrt_ops.clear();
150 }
151 }
152 }
153 if (tensorrt_ops.size() > 0) {
154 auto tensorrt_subgraph = CreateTensorRTGraph(tensorrt_ops, model, from, end);
155 if (tensorrt_subgraph == nullptr) {
156 MS_LOG(DEBUG) << "Create TensorRT Graph failed.";
157 return mindspore::kLiteNullptr;
158 }
159 model->Replace(from, end + 1, tensorrt_subgraph);
160 tensorrt_ops.clear();
161 }
162 return mindspore::kSuccess;
163 }
164
FindTensorRTOp(kernel::Kernel * kernel,const schema::Primitive * primitive)165 TensorRTOp *TensorRTDelegate::FindTensorRTOp(kernel::Kernel *kernel, const schema::Primitive *primitive) {
166 auto in_tensors = kernel->inputs();
167 auto out_tensors = kernel->outputs();
168 auto name = kernel->name();
169 auto node_type = primitive->value_type();
170 if (op_func_lists_.find(node_type) != op_func_lists_.end()) {
171 return op_func_lists_[node_type](primitive, in_tensors, out_tensors, name);
172 } else {
173 MS_LOG(WARNING) << "Unsupported op type for TensorRT. kernel->name:" << kernel->name()
174 << " type:" << schema::EnumNamePrimitiveType(primitive->value_type());
175 return nullptr;
176 }
177 }
178
CreateTensorRTGraph(const std::vector<TensorRTOp * > & ops,DelegateModel<schema::Primitive> * model,KernelIter from,KernelIter end)179 TensorRTSubGraph *TensorRTDelegate::CreateTensorRTGraph(const std::vector<TensorRTOp *> &ops,
180 DelegateModel<schema::Primitive> *model, KernelIter from,
181 KernelIter end) {
182 auto in_tensors = GraphInTensors<TensorRTOp>(ops, model, from, end);
183 auto out_tensors = GraphOutTensors<TensorRTOp>(ops, model, from, end);
184 auto *tensorrt_graph = new (std::nothrow)
185 TensorRTSubGraph(ops, in_tensors, out_tensors, context_, device_info_, runtime_, support_hw_resize_);
186 if (tensorrt_graph == nullptr) {
187 MS_LOG(ERROR) << "new tensorrt_graph failed.";
188 return nullptr;
189 }
190 // 1. For every op, find pre and next ops
191 FindPreNextOps<TensorRTOp>(ops);
192
193 // 2. Init TensorRT SubGraph.
194 auto ret = tensorrt_graph->Init();
195 if (ret != RET_OK) {
196 MS_LOG(ERROR) << "TensorRTGraph init failed.";
197 return nullptr;
198 }
199
200 // 3. Build TensorRT Model.
201 ret = tensorrt_graph->BuildTensorRTGraph();
202 if (ret != RET_OK) {
203 MS_LOG(ERROR) << "TensorRTGraph build failed.";
204 return nullptr;
205 }
206
207 return tensorrt_graph;
208 }
209 } // namespace mindspore::lite
210