1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/delegate/tensorrt/tensorrt_utils.h"
18 #include <cuda_runtime_api.h>
19 #include <map>
20
21 namespace mindspore::lite {
ConvertCudaDims(int data,size_t size)22 nvinfer1::Dims ConvertCudaDims(int data, size_t size) {
23 nvinfer1::Dims dims{};
24 if (size > static_cast<size_t>(dims.MAX_DIMS)) {
25 MS_LOG(ERROR) << "invalid shape size: " << size;
26 return dims;
27 }
28 dims.nbDims = size;
29 for (size_t i = 0; i < size; i++) {
30 dims.d[i] = data;
31 }
32 return dims;
33 }
34
ConvertCudaDims(const void * data,int64_t size)35 nvinfer1::Dims ConvertCudaDims(const void *data, int64_t size) {
36 nvinfer1::Dims dims{};
37 if (size > static_cast<int64_t>(dims.MAX_DIMS)) {
38 MS_LOG(ERROR) << "invalid shape size: " << size;
39 return dims;
40 }
41 dims.nbDims = size;
42 const int *dims_data = reinterpret_cast<const int *>(data);
43 for (int i = 0; i < size; i++) {
44 dims.d[i] = *(dims_data + i);
45 }
46 return dims;
47 }
48
SameDims(nvinfer1::Dims dims,const std::vector<int64_t> & shape)49 bool SameDims(nvinfer1::Dims dims, const std::vector<int64_t> &shape) {
50 if (dims.nbDims != static_cast<int>(shape.size())) {
51 return false;
52 }
53 // dynamic dim, only channel dim know
54 for (int i = 0; i < dims.nbDims; i++) {
55 if (dims.d[i] == -1) {
56 continue;
57 }
58 if (dims.d[i] != shape[i]) {
59 return false;
60 }
61 }
62 return true;
63 }
64
ConvertMSShape(const nvinfer1::Dims dims)65 std::vector<int64_t> ConvertMSShape(const nvinfer1::Dims dims) {
66 std::vector<int64_t> shape;
67 for (int i = 0; i < dims.nbDims; i++) {
68 shape.push_back(dims.d[i]);
69 }
70 return shape;
71 }
72
SetTranspose(nvinfer1::INetworkDefinition * network,const nvinfer1::ITensor & input,nvinfer1::Permutation permutation)73 nvinfer1::IShuffleLayer *SetTranspose(nvinfer1::INetworkDefinition *network, const nvinfer1::ITensor &input,
74 nvinfer1::Permutation permutation) {
75 nvinfer1::IShuffleLayer *layer = network->addShuffle(const_cast<nvinfer1::ITensor &>(input));
76 if (layer == nullptr) {
77 MS_LOG(ERROR) << "failed to create ShuffleLayer when create transpose op.";
78 return nullptr;
79 }
80 layer->setFirstTranspose(permutation);
81 return layer;
82 }
83
ConvertDataType(DataType type_id)84 nvinfer1::DataType ConvertDataType(DataType type_id) {
85 std::map<DataType, nvinfer1::DataType> data_type_map = {{DataType::kNumberTypeInt8, nvinfer1::DataType::kINT8},
86 {DataType::kNumberTypeInt32, nvinfer1::DataType::kINT32},
87 {DataType::kNumberTypeFloat32, nvinfer1::DataType::kFLOAT},
88 {DataType::kNumberTypeFloat16, nvinfer1::DataType::kHALF}};
89 auto iter = data_type_map.find(type_id);
90 nvinfer1::DataType data_type;
91 if (iter != data_type_map.end()) {
92 data_type = iter->second;
93 } else {
94 data_type = nvinfer1::DataType::kFLOAT;
95 MS_LOG(WARNING) << "invalid data_type for TensorRT, need check";
96 }
97 return data_type;
98 }
99
NHWC2NCHW(nvinfer1::INetworkDefinition * network,const nvinfer1::ITensor & input)100 nvinfer1::IShuffleLayer *NHWC2NCHW(nvinfer1::INetworkDefinition *network, const nvinfer1::ITensor &input) {
101 // NHWC 0123 NCHW 0312
102 nvinfer1::Permutation perm{{0, 3, 1, 2}};
103 return SetTranspose(network, input, perm);
104 }
105
NCHW2NHWC(nvinfer1::INetworkDefinition * network,const nvinfer1::ITensor & input)106 nvinfer1::IShuffleLayer *NCHW2NHWC(nvinfer1::INetworkDefinition *network, const nvinfer1::ITensor &input) {
107 // NCHW 0123 NHWC 0231
108 nvinfer1::Permutation perm{{0, 2, 3, 1}};
109 return SetTranspose(network, input, perm);
110 }
111
ConvertConstantTensor(nvinfer1::INetworkDefinition * network,const mindspore::MSTensor & ms_tensor)112 nvinfer1::ITensor *ConvertConstantTensor(nvinfer1::INetworkDefinition *network, const mindspore::MSTensor &ms_tensor) {
113 if (network == nullptr) {
114 MS_LOG(ERROR) << "network is null for ConvertConstantTensor";
115 return nullptr;
116 }
117 nvinfer1::Dims dims = ConvertCudaDims(ms_tensor.Shape());
118 nvinfer1::DataType data_type = ConvertDataType(ms_tensor.DataType());
119 if (ms_tensor.Data() == nullptr) {
120 MS_LOG(ERROR) << "ConvertConstantTensor from a MSTensor with nullptr data: " << ms_tensor.Name();
121 return nullptr;
122 }
123 nvinfer1::Weights weights{data_type, ms_tensor.Data().get(), ms_tensor.ElementNum()};
124 nvinfer1::IConstantLayer *constant_tensor = network->addConstant(dims, weights);
125 if (constant_tensor == nullptr) {
126 MS_LOG(ERROR) << "create constant_tensor failed.";
127 return nullptr;
128 }
129 auto name = ms_tensor.Name() + "_constant_layer";
130 constant_tensor->setName(name.c_str());
131 return constant_tensor->getOutput(0);
132 }
133
ConvertScalarToITensor(nvinfer1::INetworkDefinition * network,size_t shape_size,const void * value,const DataType data_type)134 nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, const void *value,
135 const DataType data_type) {
136 nvinfer1::Dims dims = ConvertCudaDims(1, shape_size);
137 nvinfer1::Weights weights{ConvertDataType(data_type), value, 1};
138 nvinfer1::IConstantLayer *constant_tensor = network->addConstant(dims, weights);
139 if (constant_tensor == nullptr) {
140 MS_LOG(ERROR) << "create constant_tensor failed.";
141 return nullptr;
142 }
143 return constant_tensor->getOutput(0);
144 }
145
ConvertActivationType(schema::ActivationType activation_type)146 ActivationParams ConvertActivationType(schema::ActivationType activation_type) {
147 std::map<schema::ActivationType, ActivationParams> action_map = {
148 {schema::ActivationType_RELU, ActivationParams{nvinfer1::ActivationType::kRELU, false, 0, false, 0}},
149 {schema::ActivationType_SIGMOID, ActivationParams{nvinfer1::ActivationType::kSIGMOID, false, 0, false, 0}},
150 {schema::ActivationType_TANH, ActivationParams{nvinfer1::ActivationType::kTANH, false, 0, false, 0}},
151 {schema::ActivationType_LEAKY_RELU, ActivationParams{nvinfer1::ActivationType::kLEAKY_RELU, true, 0, false, 0}},
152 {schema::ActivationType_ELU, ActivationParams{nvinfer1::ActivationType::kELU, true, 0, false, 0}},
153 {schema::ActivationType_SELU, ActivationParams{nvinfer1::ActivationType::kSELU, true, 0, true, 0}},
154 {schema::ActivationType_SOFTSIGN, ActivationParams{nvinfer1::ActivationType::kSOFTSIGN, false, 0, false, 0}},
155 {schema::ActivationType_SOFTPLUS, ActivationParams{nvinfer1::ActivationType::kSOFTPLUS, true, 0, true, 0}},
156 {schema::ActivationType_THRESHOLDRELU,
157 ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, true, 0, false, 0}},
158 {schema::ActivationType_RELU6, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 6}},
159 {schema::ActivationType_RELU1, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 1}}};
160 auto iter = action_map.find(activation_type);
161 ActivationParams action_param = ActivationParams{nvinfer1::ActivationType::kRELU, false, 0, false, 0};
162 if (iter != action_map.end()) {
163 action_param = iter->second;
164 } else {
165 MS_LOG(WARNING) << "Unsupported op action type for TensorRT: " << activation_type;
166 }
167 return action_param;
168 }
169
ConvertTensorWithExpandDims(nvinfer1::INetworkDefinition * network,const mindspore::MSTensor & ms_tensor,size_t expand_shape_size)170 nvinfer1::ITensor *ConvertTensorWithExpandDims(nvinfer1::INetworkDefinition *network,
171 const mindspore::MSTensor &ms_tensor, size_t expand_shape_size) {
172 if (network == nullptr) {
173 MS_LOG(ERROR) << "network is null for ConvertConstantTensor";
174 return nullptr;
175 }
176 std::vector<int64_t> shape(expand_shape_size);
177 size_t shape_size = ms_tensor.Shape().size();
178 size_t expand_size = expand_shape_size - shape_size;
179 for (size_t i = 0; i < expand_shape_size; ++i) {
180 if (i < expand_size) {
181 shape[i] = 1;
182 } else {
183 shape[i] = ms_tensor.Shape()[i - expand_size];
184 }
185 }
186 nvinfer1::Dims dims = ConvertCudaDims(shape);
187 nvinfer1::DataType data_type = ConvertDataType(ms_tensor.DataType());
188 if (ms_tensor.Data() == nullptr) {
189 MS_LOG(ERROR) << "ConvertTensorWithExpandDims from a MSTensor with nullptr data";
190 return nullptr;
191 }
192 nvinfer1::Weights weights{data_type, ms_tensor.Data().get(), ms_tensor.ElementNum()};
193 nvinfer1::IConstantLayer *constant_tensor = network->addConstant(dims, weights);
194 if (constant_tensor == nullptr) {
195 MS_LOG(ERROR) << "create constant_tensor failed.";
196 return nullptr;
197 }
198 auto name = ms_tensor.Name() + "_constant_layer";
199 constant_tensor->setName(name.c_str());
200 return constant_tensor->getOutput(0);
201 }
202
TransposeWeight(const mindspore::MSTensor & ms_tensor,void ** pack_weight)203 nvinfer1::Weights TransposeWeight(const mindspore::MSTensor &ms_tensor, void **pack_weight) {
204 nvinfer1::Weights weights{};
205 MS_LOG(DEBUG) << "ms_tensor.DataType(): " << static_cast<int>(ms_tensor.DataType());
206 if (ms_tensor.DataType() == DataType::kNumberTypeFloat16) {
207 weights.type = nvinfer1::DataType::kHALF;
208 weights.count = ms_tensor.ElementNum();
209 void *pack_weight_tmp = malloc(ms_tensor.DataSize());
210 if (pack_weight_tmp == nullptr) {
211 MS_LOG(ERROR) << "Malloc buffer failed.";
212 return weights;
213 }
214 MS_ASSERT(ms_tensor.Data());
215 auto weight_shape = ms_tensor.Shape();
216 PackNHWCToNCHWFp16(ms_tensor.Data().get(), pack_weight_tmp, weight_shape[0], weight_shape[1] * weight_shape[2],
217 weight_shape[3], 0, 0);
218 *pack_weight = pack_weight_tmp;
219 weights.values = pack_weight_tmp;
220 return weights;
221 } else {
222 return TransposeWeightFP32(ms_tensor, pack_weight);
223 }
224 }
225
TransposeWeightFP32(const mindspore::MSTensor & ms_tensor,void ** pack_weight)226 nvinfer1::Weights TransposeWeightFP32(const mindspore::MSTensor &ms_tensor, void **pack_weight) {
227 // usage notice: malloc addr saved to pack_weight, save pack_weight ptr and free it when deconstruct
228 nvinfer1::Weights weights{};
229 weights.count = ms_tensor.ElementNum();
230 if (lite::ConvertDataType(ms_tensor.DataType()) != nvinfer1::DataType::kFLOAT) {
231 MS_LOG(WARNING) << "weights data type is not float32";
232 }
233 weights.type = nvinfer1::DataType::kFLOAT;
234 auto weight_shape = ms_tensor.Shape();
235 const void *src_ptr = ms_tensor.Data().get();
236 if (src_ptr == nullptr) {
237 MS_LOG(ERROR) << "TransposeWeight from a MSTensor with nullptr data";
238 return weights;
239 }
240
241 float *pack_weight_tmp = reinterpret_cast<float *>(malloc(ms_tensor.ElementNum() * sizeof(float)));
242 if (pack_weight_tmp == nullptr) {
243 MS_LOG(ERROR) << "Malloc buffer failed.";
244 return weights;
245 }
246 PackNHWCToNCHWFp32(src_ptr, pack_weight_tmp, weight_shape[0], weight_shape[1] * weight_shape[2], weight_shape[3], 0,
247 0);
248 weights.values = pack_weight_tmp;
249 *pack_weight = pack_weight_tmp;
250 return weights;
251 }
252
ConvertWeight(const mindspore::MSTensor & ms_tensor)253 nvinfer1::Weights ConvertWeight(const mindspore::MSTensor &ms_tensor) {
254 nvinfer1::Weights weights{};
255 weights.type = ConvertDataType(ms_tensor.DataType());
256 weights.values = ms_tensor.Data().get();
257 weights.count = ms_tensor.ElementNum();
258 if (weights.values == nullptr) {
259 MS_LOG(ERROR) << "ConvertWeight from a MSTensor with nullptr data";
260 }
261 return weights;
262 }
263
SetCudaDevice(std::shared_ptr<GPUDeviceInfo> device_info_)264 void SetCudaDevice(std::shared_ptr<GPUDeviceInfo> device_info_) {
265 int device = 0;
266 auto ret = cudaGetDevice(&device);
267 if (ret != cudaSuccess) {
268 MS_LOG(WARNING) << "cudaGetDevice failed, device is untrustable. error code: " << ret;
269 }
270 int set_device_id = static_cast<int>(device_info_->GetDeviceID());
271 int deviceCnt = 0;
272
273 ret = cudaGetDeviceCount(&deviceCnt);
274 if (ret != cudaSuccess) {
275 MS_LOG(ERROR) << "cudaGetDeviceCount failed.";
276 return;
277 }
278
279 if (set_device_id > deviceCnt - 1) {
280 MS_LOG(WARNING) << "invalid input device id as " << set_device_id << " for current device count " << deviceCnt;
281 } else if (device != set_device_id) {
282 ret = cudaSetDevice(set_device_id);
283 if (ret != cudaSuccess) {
284 MS_LOG(WARNING) << "cudaSetDevice failed, error code: " << ret;
285 }
286 }
287 if (cudaGetDevice(&device) != cudaSuccess) {
288 MS_LOG(WARNING) << "cudaGetDevice failed, device is untrustable.";
289 }
290 MS_LOG(DEBUG) << "cuda is running on device: " << device;
291 }
GetOutputFormat(Format input_format,nvinfer1::Permutation perm)292 Format GetOutputFormat(Format input_format, nvinfer1::Permutation perm) {
293 if (input_format == Format::NHWC) {
294 if (perm.order[0] == 0 && perm.order[1] == 3 && perm.order[2] == 2 && perm.order[3] == 1) {
295 return Format::NCHW;
296 }
297 } else if (input_format == Format::NCHW) {
298 if (perm.order[0] == 0 && perm.order[1] == 2 && perm.order[2] == 3 && perm.order[3] == 1) {
299 return Format::NHWC;
300 }
301 }
302 MS_LOG(WARNING) << "transpose out format needs to check for " << input_format;
303 return input_format;
304 }
ConvertAxisFromNHWC2NCHW(int nhwc_axis)305 int ConvertAxisFromNHWC2NCHW(int nhwc_axis) {
306 // N0H1W2C3->N0C1H2W3
307 if (nhwc_axis > kNHWC_C) {
308 return nhwc_axis;
309 }
310 switch (nhwc_axis) {
311 case kNHWC_N:
312 return kNCHW_N;
313 case kNHWC_H:
314 return kNCHW_H;
315 case kNHWC_W:
316 return kNCHW_W;
317 case kNHWC_C:
318 return kNCHW_C;
319 default:
320 MS_LOG(ERROR) << "invalid input axis for nhwc: " << nhwc_axis;
321 }
322 return nhwc_axis;
323 }
324
PackNHWCToNCHWFp16(const void * src,void * dst,size_t batches,size_t plane,size_t channel,size_t task_id,size_t thread_count)325 void PackNHWCToNCHWFp16(const void *src, void *dst, size_t batches, size_t plane, size_t channel, size_t task_id,
326 size_t thread_count) {
327 size_t hw8 = plane / C8NUM;
328 size_t task_start = 0;
329 size_t task_end = plane;
330 if (thread_count > 0) {
331 size_t offset_hw = UP_DIV(hw8, thread_count) * C8NUM;
332 task_start = offset_hw * task_id;
333 size_t count = plane - task_start;
334 if (count == 0) {
335 return;
336 }
337 task_end = (task_id + 1) == thread_count ? plane : MSMIN(plane, task_start + offset_hw);
338 hw8 = task_start + ((task_end - task_start) >= offset_hw ? offset_hw : 0);
339 } else {
340 hw8 *= C8NUM;
341 }
342 size_t c8 = channel / C8NUM * C8NUM;
343 size_t batch = plane * channel;
344 for (size_t n = 0; n < batches; n++) {
345 const uint16_t *src_batch = static_cast<const uint16_t *>(src) + n * batch;
346 uint16_t *dst_batch = static_cast<uint16_t *>(dst) + n * batch;
347 size_t hw = task_start;
348 for (; hw < hw8; hw += C8NUM) {
349 size_t c = 0;
350 for (; c < c8; c += C8NUM) {
351 const uint16_t *src_ptr = src_batch + hw * channel + c;
352 uint16_t *dst_ptr = dst_batch + c * plane + hw;
353 for (size_t tr = 0; tr < C8NUM; tr++) {
354 for (size_t tc = 0; tc < C8NUM; tc++) {
355 dst_ptr[tc * plane + tr] = src_ptr[tr * channel + tc];
356 }
357 }
358 }
359 for (; c < channel; c++) {
360 const uint16_t *src_ptr = src_batch + hw * channel + c;
361 uint16_t *dst_ptr = dst_batch + c * plane + hw;
362 for (size_t i = 0; i < C8NUM; i++) {
363 dst_ptr[i] = src_ptr[i * channel];
364 }
365 }
366 }
367 for (; hw < task_end; hw++) {
368 const uint16_t *src_ptr = src_batch + hw * channel;
369 uint16_t *dst_ptr = dst_batch + hw;
370 for (size_t i = 0; i < channel; i++) {
371 dst_ptr[i * plane] = src_ptr[i];
372 }
373 }
374 }
375 }
GetTensorFormat(nvinfer1::ITensor * trt_tensor,mindspore::Format format)376 std::string GetTensorFormat(nvinfer1::ITensor *trt_tensor, mindspore::Format format) {
377 nvinfer1::Dims dims = trt_tensor->getDimensions();
378 std::string out_string = "tensor " + std::string(trt_tensor->getName()) + ": format (NHWC:1, NCHW:0) is " +
379 std::to_string(static_cast<int>(format)) + ", dims is ";
380 std::string dim_string = "[";
381 for (int i = 0; i < dims.nbDims; i++) {
382 dim_string += std::to_string(dims.d[i]);
383 if (i != dims.nbDims - 1) {
384 dim_string += ", ";
385 }
386 }
387 dim_string += "]";
388 out_string += dim_string;
389 return out_string;
390 }
391 } // namespace mindspore::lite
392