• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/delegate/tensorrt/tensorrt_utils.h"
18 #include <cuda_runtime_api.h>
19 #include <map>
20 
21 namespace mindspore::lite {
ConvertCudaDims(int data,size_t size)22 nvinfer1::Dims ConvertCudaDims(int data, size_t size) {
23   nvinfer1::Dims dims{};
24   if (size > static_cast<size_t>(dims.MAX_DIMS)) {
25     MS_LOG(ERROR) << "invalid shape size: " << size;
26     return dims;
27   }
28   dims.nbDims = size;
29   for (size_t i = 0; i < size; i++) {
30     dims.d[i] = data;
31   }
32   return dims;
33 }
34 
ConvertCudaDims(const void * data,int64_t size)35 nvinfer1::Dims ConvertCudaDims(const void *data, int64_t size) {
36   nvinfer1::Dims dims{};
37   if (size > static_cast<int64_t>(dims.MAX_DIMS)) {
38     MS_LOG(ERROR) << "invalid shape size: " << size;
39     return dims;
40   }
41   dims.nbDims = size;
42   const int *dims_data = reinterpret_cast<const int *>(data);
43   for (int i = 0; i < size; i++) {
44     dims.d[i] = *(dims_data + i);
45   }
46   return dims;
47 }
48 
SameDims(nvinfer1::Dims dims,const std::vector<int64_t> & shape)49 bool SameDims(nvinfer1::Dims dims, const std::vector<int64_t> &shape) {
50   if (dims.nbDims != static_cast<int>(shape.size())) {
51     return false;
52   }
53   // dynamic dim, only channel dim know
54   for (int i = 0; i < dims.nbDims; i++) {
55     if (dims.d[i] == -1) {
56       continue;
57     }
58     if (dims.d[i] != shape[i]) {
59       return false;
60     }
61   }
62   return true;
63 }
64 
ConvertMSShape(const nvinfer1::Dims dims)65 std::vector<int64_t> ConvertMSShape(const nvinfer1::Dims dims) {
66   std::vector<int64_t> shape;
67   for (int i = 0; i < dims.nbDims; i++) {
68     shape.push_back(dims.d[i]);
69   }
70   return shape;
71 }
72 
SetTranspose(nvinfer1::INetworkDefinition * network,const nvinfer1::ITensor & input,nvinfer1::Permutation permutation)73 nvinfer1::IShuffleLayer *SetTranspose(nvinfer1::INetworkDefinition *network, const nvinfer1::ITensor &input,
74                                       nvinfer1::Permutation permutation) {
75   nvinfer1::IShuffleLayer *layer = network->addShuffle(const_cast<nvinfer1::ITensor &>(input));
76   if (layer == nullptr) {
77     MS_LOG(ERROR) << "failed to create ShuffleLayer when create transpose op.";
78     return nullptr;
79   }
80   layer->setFirstTranspose(permutation);
81   return layer;
82 }
83 
ConvertDataType(DataType type_id)84 nvinfer1::DataType ConvertDataType(DataType type_id) {
85   std::map<DataType, nvinfer1::DataType> data_type_map = {{DataType::kNumberTypeInt8, nvinfer1::DataType::kINT8},
86                                                           {DataType::kNumberTypeInt32, nvinfer1::DataType::kINT32},
87                                                           {DataType::kNumberTypeFloat32, nvinfer1::DataType::kFLOAT},
88                                                           {DataType::kNumberTypeFloat16, nvinfer1::DataType::kHALF}};
89   auto iter = data_type_map.find(type_id);
90   nvinfer1::DataType data_type;
91   if (iter != data_type_map.end()) {
92     data_type = iter->second;
93   } else {
94     data_type = nvinfer1::DataType::kFLOAT;
95     MS_LOG(WARNING) << "invalid data_type for TensorRT, need check";
96   }
97   return data_type;
98 }
99 
NHWC2NCHW(nvinfer1::INetworkDefinition * network,const nvinfer1::ITensor & input)100 nvinfer1::IShuffleLayer *NHWC2NCHW(nvinfer1::INetworkDefinition *network, const nvinfer1::ITensor &input) {
101   // NHWC 0123 NCHW 0312
102   nvinfer1::Permutation perm{{0, 3, 1, 2}};
103   return SetTranspose(network, input, perm);
104 }
105 
NCHW2NHWC(nvinfer1::INetworkDefinition * network,const nvinfer1::ITensor & input)106 nvinfer1::IShuffleLayer *NCHW2NHWC(nvinfer1::INetworkDefinition *network, const nvinfer1::ITensor &input) {
107   // NCHW 0123 NHWC 0231
108   nvinfer1::Permutation perm{{0, 2, 3, 1}};
109   return SetTranspose(network, input, perm);
110 }
111 
ConvertConstantTensor(nvinfer1::INetworkDefinition * network,const mindspore::MSTensor & ms_tensor)112 nvinfer1::ITensor *ConvertConstantTensor(nvinfer1::INetworkDefinition *network, const mindspore::MSTensor &ms_tensor) {
113   if (network == nullptr) {
114     MS_LOG(ERROR) << "network is null for ConvertConstantTensor";
115     return nullptr;
116   }
117   nvinfer1::Dims dims = ConvertCudaDims(ms_tensor.Shape());
118   nvinfer1::DataType data_type = ConvertDataType(ms_tensor.DataType());
119   if (ms_tensor.Data() == nullptr) {
120     MS_LOG(ERROR) << "ConvertConstantTensor from a MSTensor with nullptr data: " << ms_tensor.Name();
121     return nullptr;
122   }
123   nvinfer1::Weights weights{data_type, ms_tensor.Data().get(), ms_tensor.ElementNum()};
124   nvinfer1::IConstantLayer *constant_tensor = network->addConstant(dims, weights);
125   if (constant_tensor == nullptr) {
126     MS_LOG(ERROR) << "create constant_tensor failed.";
127     return nullptr;
128   }
129   auto name = ms_tensor.Name() + "_constant_layer";
130   constant_tensor->setName(name.c_str());
131   return constant_tensor->getOutput(0);
132 }
133 
ConvertScalarToITensor(nvinfer1::INetworkDefinition * network,size_t shape_size,const void * value,const DataType data_type)134 nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, const void *value,
135                                           const DataType data_type) {
136   nvinfer1::Dims dims = ConvertCudaDims(1, shape_size);
137   nvinfer1::Weights weights{ConvertDataType(data_type), value, 1};
138   nvinfer1::IConstantLayer *constant_tensor = network->addConstant(dims, weights);
139   if (constant_tensor == nullptr) {
140     MS_LOG(ERROR) << "create constant_tensor failed.";
141     return nullptr;
142   }
143   return constant_tensor->getOutput(0);
144 }
145 
ConvertActivationType(schema::ActivationType activation_type)146 ActivationParams ConvertActivationType(schema::ActivationType activation_type) {
147   std::map<schema::ActivationType, ActivationParams> action_map = {
148     {schema::ActivationType_RELU, ActivationParams{nvinfer1::ActivationType::kRELU, false, 0, false, 0}},
149     {schema::ActivationType_SIGMOID, ActivationParams{nvinfer1::ActivationType::kSIGMOID, false, 0, false, 0}},
150     {schema::ActivationType_TANH, ActivationParams{nvinfer1::ActivationType::kTANH, false, 0, false, 0}},
151     {schema::ActivationType_LEAKY_RELU, ActivationParams{nvinfer1::ActivationType::kLEAKY_RELU, true, 0, false, 0}},
152     {schema::ActivationType_ELU, ActivationParams{nvinfer1::ActivationType::kELU, true, 0, false, 0}},
153     {schema::ActivationType_SELU, ActivationParams{nvinfer1::ActivationType::kSELU, true, 0, true, 0}},
154     {schema::ActivationType_SOFTSIGN, ActivationParams{nvinfer1::ActivationType::kSOFTSIGN, false, 0, false, 0}},
155     {schema::ActivationType_SOFTPLUS, ActivationParams{nvinfer1::ActivationType::kSOFTPLUS, true, 0, true, 0}},
156     {schema::ActivationType_THRESHOLDRELU,
157      ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, true, 0, false, 0}},
158     {schema::ActivationType_RELU6, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 6}},
159     {schema::ActivationType_RELU1, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 1}}};
160   auto iter = action_map.find(activation_type);
161   ActivationParams action_param = ActivationParams{nvinfer1::ActivationType::kRELU, false, 0, false, 0};
162   if (iter != action_map.end()) {
163     action_param = iter->second;
164   } else {
165     MS_LOG(WARNING) << "Unsupported op action type for TensorRT: " << activation_type;
166   }
167   return action_param;
168 }
169 
ConvertTensorWithExpandDims(nvinfer1::INetworkDefinition * network,const mindspore::MSTensor & ms_tensor,size_t expand_shape_size)170 nvinfer1::ITensor *ConvertTensorWithExpandDims(nvinfer1::INetworkDefinition *network,
171                                                const mindspore::MSTensor &ms_tensor, size_t expand_shape_size) {
172   if (network == nullptr) {
173     MS_LOG(ERROR) << "network is null for ConvertConstantTensor";
174     return nullptr;
175   }
176   std::vector<int64_t> shape(expand_shape_size);
177   size_t shape_size = ms_tensor.Shape().size();
178   size_t expand_size = expand_shape_size - shape_size;
179   for (size_t i = 0; i < expand_shape_size; ++i) {
180     if (i < expand_size) {
181       shape[i] = 1;
182     } else {
183       shape[i] = ms_tensor.Shape()[i - expand_size];
184     }
185   }
186   nvinfer1::Dims dims = ConvertCudaDims(shape);
187   nvinfer1::DataType data_type = ConvertDataType(ms_tensor.DataType());
188   if (ms_tensor.Data() == nullptr) {
189     MS_LOG(ERROR) << "ConvertTensorWithExpandDims from a MSTensor with nullptr data";
190     return nullptr;
191   }
192   nvinfer1::Weights weights{data_type, ms_tensor.Data().get(), ms_tensor.ElementNum()};
193   nvinfer1::IConstantLayer *constant_tensor = network->addConstant(dims, weights);
194   if (constant_tensor == nullptr) {
195     MS_LOG(ERROR) << "create constant_tensor failed.";
196     return nullptr;
197   }
198   auto name = ms_tensor.Name() + "_constant_layer";
199   constant_tensor->setName(name.c_str());
200   return constant_tensor->getOutput(0);
201 }
202 
TransposeWeight(const mindspore::MSTensor & ms_tensor,void ** pack_weight)203 nvinfer1::Weights TransposeWeight(const mindspore::MSTensor &ms_tensor, void **pack_weight) {
204   nvinfer1::Weights weights{};
205   MS_LOG(DEBUG) << "ms_tensor.DataType(): " << static_cast<int>(ms_tensor.DataType());
206   if (ms_tensor.DataType() == DataType::kNumberTypeFloat16) {
207     weights.type = nvinfer1::DataType::kHALF;
208     weights.count = ms_tensor.ElementNum();
209     void *pack_weight_tmp = malloc(ms_tensor.DataSize());
210     if (pack_weight_tmp == nullptr) {
211       MS_LOG(ERROR) << "Malloc buffer failed.";
212       return weights;
213     }
214     MS_ASSERT(ms_tensor.Data());
215     auto weight_shape = ms_tensor.Shape();
216     PackNHWCToNCHWFp16(ms_tensor.Data().get(), pack_weight_tmp, weight_shape[0], weight_shape[1] * weight_shape[2],
217                        weight_shape[3], 0, 0);
218     *pack_weight = pack_weight_tmp;
219     weights.values = pack_weight_tmp;
220     return weights;
221   } else {
222     return TransposeWeightFP32(ms_tensor, pack_weight);
223   }
224 }
225 
TransposeWeightFP32(const mindspore::MSTensor & ms_tensor,void ** pack_weight)226 nvinfer1::Weights TransposeWeightFP32(const mindspore::MSTensor &ms_tensor, void **pack_weight) {
227   // usage notice: malloc addr saved to pack_weight, save pack_weight ptr and free it when deconstruct
228   nvinfer1::Weights weights{};
229   weights.count = ms_tensor.ElementNum();
230   if (lite::ConvertDataType(ms_tensor.DataType()) != nvinfer1::DataType::kFLOAT) {
231     MS_LOG(WARNING) << "weights data type is not float32";
232   }
233   weights.type = nvinfer1::DataType::kFLOAT;
234   auto weight_shape = ms_tensor.Shape();
235   const void *src_ptr = ms_tensor.Data().get();
236   if (src_ptr == nullptr) {
237     MS_LOG(ERROR) << "TransposeWeight from a MSTensor with nullptr data";
238     return weights;
239   }
240 
241   float *pack_weight_tmp = reinterpret_cast<float *>(malloc(ms_tensor.ElementNum() * sizeof(float)));
242   if (pack_weight_tmp == nullptr) {
243     MS_LOG(ERROR) << "Malloc buffer failed.";
244     return weights;
245   }
246   PackNHWCToNCHWFp32(src_ptr, pack_weight_tmp, weight_shape[0], weight_shape[1] * weight_shape[2], weight_shape[3], 0,
247                      0);
248   weights.values = pack_weight_tmp;
249   *pack_weight = pack_weight_tmp;
250   return weights;
251 }
252 
ConvertWeight(const mindspore::MSTensor & ms_tensor)253 nvinfer1::Weights ConvertWeight(const mindspore::MSTensor &ms_tensor) {
254   nvinfer1::Weights weights{};
255   weights.type = ConvertDataType(ms_tensor.DataType());
256   weights.values = ms_tensor.Data().get();
257   weights.count = ms_tensor.ElementNum();
258   if (weights.values == nullptr) {
259     MS_LOG(ERROR) << "ConvertWeight from a MSTensor with nullptr data";
260   }
261   return weights;
262 }
263 
SetCudaDevice(std::shared_ptr<GPUDeviceInfo> device_info_)264 void SetCudaDevice(std::shared_ptr<GPUDeviceInfo> device_info_) {
265   int device = 0;
266   auto ret = cudaGetDevice(&device);
267   if (ret != cudaSuccess) {
268     MS_LOG(WARNING) << "cudaGetDevice failed, device is untrustable. error code: " << ret;
269   }
270   int set_device_id = static_cast<int>(device_info_->GetDeviceID());
271   int deviceCnt = 0;
272 
273   ret = cudaGetDeviceCount(&deviceCnt);
274   if (ret != cudaSuccess) {
275     MS_LOG(ERROR) << "cudaGetDeviceCount failed.";
276     return;
277   }
278 
279   if (set_device_id > deviceCnt - 1) {
280     MS_LOG(WARNING) << "invalid input device id as " << set_device_id << " for current device count " << deviceCnt;
281   } else if (device != set_device_id) {
282     ret = cudaSetDevice(set_device_id);
283     if (ret != cudaSuccess) {
284       MS_LOG(WARNING) << "cudaSetDevice failed, error code: " << ret;
285     }
286   }
287   if (cudaGetDevice(&device) != cudaSuccess) {
288     MS_LOG(WARNING) << "cudaGetDevice failed, device is untrustable.";
289   }
290   MS_LOG(DEBUG) << "cuda is running on device: " << device;
291 }
GetOutputFormat(Format input_format,nvinfer1::Permutation perm)292 Format GetOutputFormat(Format input_format, nvinfer1::Permutation perm) {
293   if (input_format == Format::NHWC) {
294     if (perm.order[0] == 0 && perm.order[1] == 3 && perm.order[2] == 2 && perm.order[3] == 1) {
295       return Format::NCHW;
296     }
297   } else if (input_format == Format::NCHW) {
298     if (perm.order[0] == 0 && perm.order[1] == 2 && perm.order[2] == 3 && perm.order[3] == 1) {
299       return Format::NHWC;
300     }
301   }
302   MS_LOG(WARNING) << "transpose out format needs to check for " << input_format;
303   return input_format;
304 }
ConvertAxisFromNHWC2NCHW(int nhwc_axis)305 int ConvertAxisFromNHWC2NCHW(int nhwc_axis) {
306   // N0H1W2C3->N0C1H2W3
307   if (nhwc_axis > kNHWC_C) {
308     return nhwc_axis;
309   }
310   switch (nhwc_axis) {
311     case kNHWC_N:
312       return kNCHW_N;
313     case kNHWC_H:
314       return kNCHW_H;
315     case kNHWC_W:
316       return kNCHW_W;
317     case kNHWC_C:
318       return kNCHW_C;
319     default:
320       MS_LOG(ERROR) << "invalid input axis for nhwc: " << nhwc_axis;
321   }
322   return nhwc_axis;
323 }
324 
PackNHWCToNCHWFp16(const void * src,void * dst,size_t batches,size_t plane,size_t channel,size_t task_id,size_t thread_count)325 void PackNHWCToNCHWFp16(const void *src, void *dst, size_t batches, size_t plane, size_t channel, size_t task_id,
326                         size_t thread_count) {
327   size_t hw8 = plane / C8NUM;
328   size_t task_start = 0;
329   size_t task_end = plane;
330   if (thread_count > 0) {
331     size_t offset_hw = UP_DIV(hw8, thread_count) * C8NUM;
332     task_start = offset_hw * task_id;
333     size_t count = plane - task_start;
334     if (count == 0) {
335       return;
336     }
337     task_end = (task_id + 1) == thread_count ? plane : MSMIN(plane, task_start + offset_hw);
338     hw8 = task_start + ((task_end - task_start) >= offset_hw ? offset_hw : 0);
339   } else {
340     hw8 *= C8NUM;
341   }
342   size_t c8 = channel / C8NUM * C8NUM;
343   size_t batch = plane * channel;
344   for (size_t n = 0; n < batches; n++) {
345     const uint16_t *src_batch = static_cast<const uint16_t *>(src) + n * batch;
346     uint16_t *dst_batch = static_cast<uint16_t *>(dst) + n * batch;
347     size_t hw = task_start;
348     for (; hw < hw8; hw += C8NUM) {
349       size_t c = 0;
350       for (; c < c8; c += C8NUM) {
351         const uint16_t *src_ptr = src_batch + hw * channel + c;
352         uint16_t *dst_ptr = dst_batch + c * plane + hw;
353         for (size_t tr = 0; tr < C8NUM; tr++) {
354           for (size_t tc = 0; tc < C8NUM; tc++) {
355             dst_ptr[tc * plane + tr] = src_ptr[tr * channel + tc];
356           }
357         }
358       }
359       for (; c < channel; c++) {
360         const uint16_t *src_ptr = src_batch + hw * channel + c;
361         uint16_t *dst_ptr = dst_batch + c * plane + hw;
362         for (size_t i = 0; i < C8NUM; i++) {
363           dst_ptr[i] = src_ptr[i * channel];
364         }
365       }
366     }
367     for (; hw < task_end; hw++) {
368       const uint16_t *src_ptr = src_batch + hw * channel;
369       uint16_t *dst_ptr = dst_batch + hw;
370       for (size_t i = 0; i < channel; i++) {
371         dst_ptr[i * plane] = src_ptr[i];
372       }
373     }
374   }
375 }
GetTensorFormat(nvinfer1::ITensor * trt_tensor,mindspore::Format format)376 std::string GetTensorFormat(nvinfer1::ITensor *trt_tensor, mindspore::Format format) {
377   nvinfer1::Dims dims = trt_tensor->getDimensions();
378   std::string out_string = "tensor " + std::string(trt_tensor->getName()) + ": format (NHWC:1, NCHW:0) is " +
379                            std::to_string(static_cast<int>(format)) + ", dims is ";
380   std::string dim_string = "[";
381   for (int i = 0; i < dims.nbDims; i++) {
382     dim_string += std::to_string(dims.d[i]);
383     if (i != dims.nbDims - 1) {
384       dim_string += ", ";
385     }
386   }
387   dim_string += "]";
388   out_string += dim_string;
389   return out_string;
390 }
391 }  // namespace mindspore::lite
392