• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/delegate/tensorrt/tensorrt_delegate.h"
18 #include <cuda_runtime.h>
19 #include <vector>
20 #include <fstream>
21 #include <string>
22 #include "src/delegate/delegate_utils.h"
23 #include "src/delegate/tensorrt/op/activation_tensorrt.h"
24 #include "src/delegate/tensorrt/op/shape_tensorrt.h"
25 #include "src/delegate/tensorrt/op/gather_tensorrt.h"
26 #include "src/delegate/tensorrt/op/shuffle_tensorrt.h"
27 #include "src/delegate/tensorrt/op/concate_tensorrt.h"
28 #include "src/delegate/tensorrt/op/convolution_tensorrt.h"
29 #include "src/delegate/tensorrt/op/deconvolution_tensorrt.h"
30 #include "src/delegate/tensorrt/op/elementwise_tensorrt.h"
31 #include "src/delegate/tensorrt/op/reduce_tensorrt.h"
32 #include "src/delegate/tensorrt/op/softmax_tensorrt.h"
33 #include "src/delegate/tensorrt/op/unary_tensorrt.h"
34 #include "src/delegate/tensorrt/op/matmul_tensorrt.h"
35 #include "src/delegate/tensorrt/op/scale_tensorrt.h"
36 #include "src/delegate/tensorrt/op/slice_tensorrt.h"
37 #include "src/delegate/tensorrt/op/pool_tensorrt.h"
38 #include "src/delegate/tensorrt/op/pad_tensorrt.h"
39 #include "src/delegate/tensorrt/op/resize_tensorrt.h"
40 
41 namespace mindspore::lite {
~TensorRTDelegate()42 TensorRTDelegate::~TensorRTDelegate() {
43   if (runtime_ != nullptr) {
44     delete runtime_;
45   }
46 }
IsHardwareSupport()47 bool IsHardwareSupport() {
48   int driver_version = 0;
49   int ret = cudaDriverGetVersion(&driver_version);
50   if (ret != cudaSuccess || driver_version == 0) {
51     MS_LOG(WARNING) << "No nvidia GPU driver.";
52     return false;
53   }
54   return true;
55 }
56 
Init()57 Status TensorRTDelegate::Init() {
58   if (!IsHardwareSupport()) {
59     return mindspore::kLiteNotSupport;
60   }
61   std::vector<std::shared_ptr<DeviceInfoContext>> device_list = context_->MutableDeviceInfo();
62   auto iter = std::find_if(device_list.begin(), device_list.end(), [](std::shared_ptr<DeviceInfoContext> device) {
63     return device->GetDeviceType() == DeviceType::kGPU;
64   });
65   if (iter == device_list.end()) {
66     MS_LOG(ERROR) << "no gpu device info found for TensorRT.";
67     return mindspore::kLiteError;
68   }
69   auto gpu_info = (*iter)->Cast<GPUDeviceInfo>();
70   if (gpu_info == nullptr) {
71     MS_LOG(ERROR) << "no gpu device info found for TensorRT.";
72     return mindspore::kLiteError;
73   }
74   device_info_ = gpu_info;
75   op_func_lists_.clear();
76   op_func_lists_ = {
77     {schema::PrimitiveType_Activation, GetTensorRTOp<ActivationTensorRT>},
78     {schema::PrimitiveType_Concat, GetTensorRTOp<ConcateTensorRT>},
79     {schema::PrimitiveType_Conv2DFusion, GetTensorRTOp<ConvolutionTensorRT>},
80     {schema::PrimitiveType_Conv2dTransposeFusion, GetTensorRTOp<DeconvolutionTensorRT>},
81     {schema::PrimitiveType_SubFusion, GetTensorRTOp<ElementWiseTensorRT>},
82     {schema::PrimitiveType_DivFusion, GetTensorRTOp<ElementWiseTensorRT>},
83     {schema::PrimitiveType_PowFusion, GetTensorRTOp<ElementWiseTensorRT>},
84     {schema::PrimitiveType_AddFusion, GetTensorRTOp<ElementWiseTensorRT>},
85     {schema::PrimitiveType_MulFusion, GetTensorRTOp<ElementWiseTensorRT>},
86     {schema::PrimitiveType_Eltwise, GetTensorRTOp<ElementWiseTensorRT>},
87     {schema::PrimitiveType_Gather, GetTensorRTOp<GatherTensorRT>},
88     {schema::PrimitiveType_MatMulFusion, GetTensorRTOp<MatMulTensorRT>},
89     {schema::PrimitiveType_FullConnection, GetTensorRTOp<MatMulTensorRT>},
90     {schema::PrimitiveType_AvgPoolFusion, GetTensorRTOp<PoolTensorRT>},
91     {schema::PrimitiveType_MaxPoolFusion, GetTensorRTOp<PoolTensorRT>},
92     {schema::PrimitiveType_PadFusion, GetTensorRTOp<PadTensorRT>},
93     {schema::PrimitiveType_ReduceFusion, GetTensorRTOp<ReduceTensorRT>},
94     {schema::PrimitiveType_Resize, GetTensorRTOp<ResizeTensorRT>},
95     {schema::PrimitiveType_ScaleFusion, GetTensorRTOp<ScaleTensorRT>},
96     {schema::PrimitiveType_StridedSlice, GetTensorRTOp<SliceTensorRT>},
97     {schema::PrimitiveType_Shape, GetTensorRTOp<ShapeTensorRT>},
98     {schema::PrimitiveType_Unsqueeze, GetTensorRTOp<ShuffleTensorRT>},
99     {schema::PrimitiveType_Squeeze, GetTensorRTOp<ShuffleTensorRT>},
100     {schema::PrimitiveType_Reshape, GetTensorRTOp<ShuffleTensorRT>},
101     {schema::PrimitiveType_Transpose, GetTensorRTOp<ShuffleTensorRT>},
102     {schema::PrimitiveType_Flatten, GetTensorRTOp<ShuffleTensorRT>},
103     {schema::PrimitiveType_Softmax, GetTensorRTOp<SoftMaxTensorRT>},
104     {schema::PrimitiveType_Sqrt, GetTensorRTOp<UnaryTensorRT>},
105   };
106   unsupport_hw_op_lists_ = {schema::PrimitiveType_Reshape};
107   lite::SetCudaDevice(device_info_);
108   if (runtime_ == nullptr) {
109     runtime_ = new (std::nothrow) TensorRTRuntime();
110   }
111   if (runtime_->Init() != RET_OK) {
112     MS_LOG(ERROR) << "TensorRTRuntime init failed.";
113     return mindspore::kLiteError;
114   }
115   return mindspore::kSuccess;
116 }
117 
Build(DelegateModel<schema::Primitive> * model)118 Status TensorRTDelegate::Build(DelegateModel<schema::Primitive> *model) {
119   lite::SetCudaDevice(device_info_);
120   KernelIter from, end;
121   std::vector<TensorRTOp *> tensorrt_ops;
122   for (KernelIter iter = model->BeginKernelIterator(); iter != model->EndKernelIterator(); iter++) {
123     kernel::Kernel *kernel = *iter;
124     if (support_hw_resize_) {
125       for (auto no_type : unsupport_hw_op_lists_) {
126         if (model->GetPrimitive(kernel)->value_type() == no_type) {
127           support_hw_resize_ = false;
128           MS_LOG(INFO) << "network has op don't support hw resize.";
129           continue;
130         }
131       }
132     }
133     auto tensorrt_op = FindTensorRTOp(kernel, model->GetPrimitive(kernel));
134     if (tensorrt_op != nullptr) {
135       // If tensorrt_ops does not equal nullptr, this kernel can be supported by delegate
136       if (tensorrt_ops.size() == 0) {
137         from = iter;
138       }
139       tensorrt_ops.push_back(tensorrt_op);
140       end = iter;
141     } else {
142       if (tensorrt_ops.size() > 0) {
143         auto tensorrt_subgraph = CreateTensorRTGraph(tensorrt_ops, model, from, end);
144         if (tensorrt_subgraph == nullptr) {
145           MS_LOG(ERROR) << "Create TensorRT Graph failed.";
146           return mindspore::kLiteNullptr;
147         }
148         iter = model->Replace(from, end + 1, tensorrt_subgraph);
149         tensorrt_ops.clear();
150       }
151     }
152   }
153   if (tensorrt_ops.size() > 0) {
154     auto tensorrt_subgraph = CreateTensorRTGraph(tensorrt_ops, model, from, end);
155     if (tensorrt_subgraph == nullptr) {
156       MS_LOG(DEBUG) << "Create TensorRT Graph failed.";
157       return mindspore::kLiteNullptr;
158     }
159     model->Replace(from, end + 1, tensorrt_subgraph);
160     tensorrt_ops.clear();
161   }
162   return mindspore::kSuccess;
163 }
164 
FindTensorRTOp(kernel::Kernel * kernel,const schema::Primitive * primitive)165 TensorRTOp *TensorRTDelegate::FindTensorRTOp(kernel::Kernel *kernel, const schema::Primitive *primitive) {
166   auto in_tensors = kernel->inputs();
167   auto out_tensors = kernel->outputs();
168   auto name = kernel->name();
169   auto node_type = primitive->value_type();
170   if (op_func_lists_.find(node_type) != op_func_lists_.end()) {
171     return op_func_lists_[node_type](primitive, in_tensors, out_tensors, name);
172   } else {
173     MS_LOG(WARNING) << "Unsupported op type for TensorRT. kernel->name:" << kernel->name()
174                     << " type:" << schema::EnumNamePrimitiveType(primitive->value_type());
175     return nullptr;
176   }
177 }
178 
CreateTensorRTGraph(const std::vector<TensorRTOp * > & ops,DelegateModel<schema::Primitive> * model,KernelIter from,KernelIter end)179 TensorRTSubGraph *TensorRTDelegate::CreateTensorRTGraph(const std::vector<TensorRTOp *> &ops,
180                                                         DelegateModel<schema::Primitive> *model, KernelIter from,
181                                                         KernelIter end) {
182   auto in_tensors = GraphInTensors<TensorRTOp>(ops, model, from, end);
183   auto out_tensors = GraphOutTensors<TensorRTOp>(ops, model, from, end);
184   auto *tensorrt_graph = new (std::nothrow)
185     TensorRTSubGraph(ops, in_tensors, out_tensors, context_, device_info_, runtime_, support_hw_resize_);
186   if (tensorrt_graph == nullptr) {
187     MS_LOG(ERROR) << "new tensorrt_graph failed.";
188     return nullptr;
189   }
190   // 1. For every op, find pre and next ops
191   FindPreNextOps<TensorRTOp>(ops);
192 
193   // 2. Init TensorRT SubGraph.
194   auto ret = tensorrt_graph->Init();
195   if (ret != RET_OK) {
196     MS_LOG(ERROR) << "TensorRTGraph init failed.";
197     return nullptr;
198   }
199 
200   // 3. Build TensorRT Model.
201   ret = tensorrt_graph->BuildTensorRTGraph();
202   if (ret != RET_OK) {
203     MS_LOG(ERROR) << "TensorRTGraph build failed.";
204     return nullptr;
205   }
206 
207   return tensorrt_graph;
208 }
209 }  // namespace mindspore::lite
210