• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <vector>
18 #include <string>
19 #include <variant>
20 #include <NvInfer.h>
21 #include "backend/optimizer/trt_pass/trt_converter_context.h"
22 #include "backend/optimizer/trt_pass/trt_op_factory.h"
23 #include "backend/kernel_compiler/gpu/trt/trt_utils.h"
24 
25 namespace mindspore {
26 namespace opt {
27 namespace {
ToShape(LayerInput * input,const std::vector<size_t> & shape,std::shared_ptr<TrtConverterContext> context)28 nvinfer1::ITensor *ToShape(LayerInput *input, const std::vector<size_t> &shape,
29                            std::shared_ptr<TrtConverterContext> context) {
30   MS_EXCEPTION_IF_NULL(input);
31   MS_EXCEPTION_IF_NULL(context);
32 
33   if (!input->IsTensor()) {
34     MS_LOG(WARNING) << "Expect Tensor but got weight";
35     return nullptr;
36   }
37 
38   const nvinfer1::Dims &src_dim = input->tensor()->getDimensions();
39   const nvinfer1::Dims &dst_dim = TrtUtils::MsDimsToTrtDims(shape, false);
40   if (TrtUtils::IsSameShape(src_dim, dst_dim)) {
41     return input->tensor();
42   }
43 
44   auto *layer = context->network()->addShuffle(*input->tensor());
45   MS_EXCEPTION_IF_NULL(layer);
46   layer->setReshapeDimensions(dst_dim);
47 
48   return layer->getOutput(0);
49 }
50 
ToTensor(LayerInput * input,const std::vector<size_t> & shape,std::shared_ptr<TrtConverterContext> context)51 nvinfer1::ITensor *ToTensor(LayerInput *input, const std::vector<size_t> &shape,
52                             std::shared_ptr<TrtConverterContext> context) {
53   MS_EXCEPTION_IF_NULL(input);
54   MS_EXCEPTION_IF_NULL(context);
55   if (input->IsTensor()) {
56     return ToShape(input, shape, context);
57   }
58 
59   const nvinfer1::Dims &dim = TrtUtils::MsDimsToTrtDims(shape, false);
60   auto *const_layer = context->network()->addConstant(dim, *input->weight());
61   MS_EXCEPTION_IF_NULL(const_layer);
62   return const_layer->getOutput(0);
63 }
64 
AddReshapeLayer(AnfNodePtr node,std::shared_ptr<TrtConverterContext> context)65 ConvertResult AddReshapeLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context) {
66   std::vector<LayerInput> inputs;
67   bool ret = context->LoadLayerInput(node, &inputs);
68   if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
69     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
70     return {false, {}};
71   }
72 
73   auto *layer = context->network()->addShuffle(*inputs[0].tensor());
74   MS_EXCEPTION_IF_NULL(layer);
75   const auto &output_shape = AnfAlgo::GetOutputInferShape(node, 0);
76   const nvinfer1::Dims &dims = TrtUtils::MsDimsToTrtDims(output_shape, false);
77   layer->setReshapeDimensions(dims);
78 
79   return {true, {layer->getOutput(0)}};
80 }
81 
AddElementLayer(AnfNodePtr node,std::shared_ptr<TrtConverterContext> context,nvinfer1::ElementWiseOperation op_type)82 ConvertResult AddElementLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context,
83                               nvinfer1::ElementWiseOperation op_type) {
84   std::vector<LayerInput> inputs;
85   bool ret = context->LoadLayerInput(node, &inputs);
86   if (!ret || inputs.size() != 2) {
87     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
88     return {false, {}};
89   }
90 
91   const std::vector<size_t> &x1_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
92   const std::vector<size_t> &x2_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
93   const std::vector<size_t> &y_shape = AnfAlgo::GetOutputInferShape(node, 0);
94 
95   auto Broadcast = [&context, &y_shape](nvinfer1::ITensor *tensor, const std::vector<size_t> &x_shape) {
96     if (x_shape.size() == y_shape.size()) {
97       return tensor;
98     }
99 
100     // Copy x_shape to dim with tail align, and fill left axis with 1.
101     // For example:
102     //    x: [C, H, W]
103     //    y: [N, C, H, W]
104     //  dim: [1, C, H, W]
105     nvinfer1::Dims dim;
106     dim.nbDims = SizeToInt(y_shape.size());
107     std::fill(dim.d, dim.d + dim.nbDims, 1);
108     size_t offset = y_shape.size() - x_shape.size();
109     for (size_t i = 0; i < x_shape.size(); i++) {
110       dim.d[i + offset] = SizeToInt(x_shape[i]);
111     }
112 
113     auto *layer = context->network()->addShuffle(*tensor);
114     MS_EXCEPTION_IF_NULL(layer);
115     layer->setReshapeDimensions(dim);
116 
117     return layer->getOutput(0);
118   };
119 
120   auto *x1 = Broadcast(ToTensor(&inputs[0], x1_shape, context), x1_shape);
121   auto *x2 = Broadcast(ToTensor(&inputs[1], x2_shape, context), x2_shape);
122   auto *layer = context->network()->addElementWise(*x1, *x2, op_type);
123   MS_EXCEPTION_IF_NULL(layer);
124 
125   return {true, {layer->getOutput(0)}};
126 }
127 
AddPoolingLayer(AnfNodePtr node,std::shared_ptr<TrtConverterContext> context,nvinfer1::PoolingType pooling_type)128 ConvertResult AddPoolingLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context,
129                               nvinfer1::PoolingType pooling_type) {
130   std::vector<LayerInput> inputs;
131   bool ret = context->LoadLayerInput(node, &inputs);
132   if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
133     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
134     return {false, {}};
135   }
136 
137   const auto &format = AnfAlgo::GetNodeAttr<std::string>(node, "format");
138   if (format != "NCHW") {
139     MS_LOG(WARNING) << "The format: " << format << " not supported.";
140     return {false, {}};
141   }
142 
143   const auto &kernel_size = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "kernel_size");
144   auto *layer = context->network()->addPoolingNd(
145     *(inputs[0].tensor()), pooling_type, nvinfer1::DimsHW{LongToInt(kernel_size[2]), LongToInt(kernel_size[3])});
146   MS_EXCEPTION_IF_NULL(layer);
147 
148   const auto &strides = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "strides");
149   layer->setStride(nvinfer1::DimsHW{LongToInt(strides[2]), LongToInt(strides[3])});
150 
151   auto pad_mode = AnfAlgo::GetNodeAttr<std::string>(node, "pad_mode");
152   std::transform(pad_mode.begin(), pad_mode.end(), pad_mode.begin(), toupper);
153   if (pad_mode == "SAME") {
154     layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
155   }
156 
157   return {true, {layer->getOutput(0)}};
158 }
159 
AddActivationLayer(AnfNodePtr node,std::shared_ptr<TrtConverterContext> context,nvinfer1::ActivationType act_type)160 ConvertResult AddActivationLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context,
161                                  nvinfer1::ActivationType act_type) {
162   std::vector<LayerInput> inputs;
163   bool ret = context->LoadLayerInput(node, &inputs);
164   if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
165     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
166     return {false, {}};
167   }
168 
169   auto *layer = context->network()->addActivation(*inputs[0].tensor(), act_type);
170   MS_EXCEPTION_IF_NULL(layer);
171 
172   return {true, {layer->getOutput(0)}};
173 }
174 
AddUnaryLayer(AnfNodePtr node,std::shared_ptr<TrtConverterContext> context,nvinfer1::UnaryOperation op_type)175 ConvertResult AddUnaryLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context,
176                             nvinfer1::UnaryOperation op_type) {
177   std::vector<LayerInput> inputs;
178   bool ret = context->LoadLayerInput(node, &inputs);
179   if (!ret || inputs.size() != 1) {
180     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
181     return {false, {}};
182   }
183 
184   auto *layer = context->network()->addUnary(*inputs[0].tensor(), op_type);
185   MS_EXCEPTION_IF_NULL(layer);
186 
187   return {true, {layer->getOutput(0)}};
188 }
189 
AddReduceLayer(AnfNodePtr node,std::shared_ptr<TrtConverterContext> context,nvinfer1::ReduceOperation op_type)190 ConvertResult AddReduceLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context,
191                              nvinfer1::ReduceOperation op_type) {
192   std::vector<LayerInput> inputs;
193   bool ret = context->LoadLayerInput(node, &inputs);
194   if (!ret || inputs.size() != 1) {
195     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
196     return {false, {}};
197   }
198 
199   // Calculate reduce axes bitmask
200   const std::vector<size_t> &input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
201   const ValuePtr &value = AnfAlgo::GetCNodePrimitive(node)->GetAttr("axis");
202   uint32_t reduce_axes = 0;
203   if (value->isa<ValueTuple>() || value->isa<ValueList>()) {
204     const auto &axis = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "axis");
205     for (size_t i = 0; i < axis.size(); i++) {
206       int offset = axis[i] >= 0 ? LongToInt(axis[i]) : LongToInt(axis[i] + input_shape.size());
207       reduce_axes |= 1UL << offset;
208     }
209   } else {
210     const auto &axis = AnfAlgo::GetNodeAttr<int64_t>(node, "axis");
211     int offset = axis >= 0 ? LongToInt(axis) : LongToInt(axis + input_shape.size());
212     reduce_axes = 1UL << offset;
213   }
214 
215   // Tensor-RT do not support reduce with no dimensions.
216   // Skip reduce operator if reduce_axes == 0
217   if (reduce_axes == 0) {
218     MS_LOG(WARNING) << "No dimension be be reduced. " << node->DebugString();
219     return {true, {inputs[0].tensor()}};
220   }
221 
222   bool keep_dims = AnfAlgo::GetNodeAttr<bool>(node, "keep_dims");
223   // Tensor-RT do not support reduce all dimensions with keep_dims == false.
224   // Reduce with keep_dims = true, add apply reshape latter.
225   bool post_reshape = false;
226   if (keep_dims == false && (reduce_axes == (1UL << input_shape.size()) - 1)) {
227     keep_dims = true;
228     post_reshape = true;
229   }
230 
231   nvinfer1::IReduceLayer *layer = context->network()->addReduce(*inputs[0].tensor(), op_type, reduce_axes, keep_dims);
232   MS_EXCEPTION_IF_NULL(layer);
233 
234   if (post_reshape) {
235     nvinfer1::IShuffleLayer *reshape_layer = context->network()->addShuffle(*layer->getOutput(0));
236     MS_EXCEPTION_IF_NULL(reshape_layer);
237 
238     nvinfer1::Dims dim;
239     dim.nbDims = 1;
240     dim.d[0] = 1;
241     reshape_layer->setReshapeDimensions(dim);
242 
243     return {true, {reshape_layer->getOutput(0)}};
244   }
245 
246   return {true, {layer->getOutput(0)}};
247 }
248 }  // namespace
249 
250 // Register operator converter from AnfNode to trt layer: `OPNAME` should keep the same as primitive definition.
251 #define MS_TRT_CONVERTER_FUNC_REG(OPNAME)                                                                 \
252   ConvertResult Gpu##OPNAME##TrtConverter(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context); \
253   static const TrtOpRegister(Gpu##OPNAME##ConverterRegister)(#OPNAME, Gpu##OPNAME##TrtConverter);         \
254   ConvertResult Gpu##OPNAME##TrtConverter(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context)
255 
MS_TRT_CONVERTER_FUNC_REG(Conv2D)256 MS_TRT_CONVERTER_FUNC_REG(Conv2D) {
257   std::vector<LayerInput> inputs;
258   bool ret = context->LoadLayerInput(node, &inputs);
259   if (!ret || inputs.size() != 2 || !inputs[0].IsTensor() || !inputs[1].IsWeight()) {
260     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
261     return {false, {}};
262   }
263 
264   const auto &data_format = AnfAlgo::GetNodeAttr<std::string>(node, "format");
265   if (data_format != "NCHW") {
266     MS_LOG(WARNING) << "The format: " << data_format << " not supported.";
267     return {false, {}};
268   }
269 
270   const auto &kernel_size = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "kernel_size");
271   const auto &out_channel = AnfAlgo::GetNodeAttr<int64_t>(node, "out_channel");
272   nvinfer1::Weights bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
273   auto *layer = context->network()->addConvolutionNd(
274     *(inputs[0].tensor()), LongToInt(out_channel),
275     nvinfer1::DimsHW{LongToInt(kernel_size[0]), LongToInt(kernel_size[1])}, *(inputs[1].weight()), bias);
276   MS_EXCEPTION_IF_NULL(layer);
277 
278   const auto &strides = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "stride");
279   layer->setStride(nvinfer1::DimsHW{LongToInt(strides[2]), LongToInt(strides[3])});
280 
281   auto pad_mode = AnfAlgo::GetNodeAttr<std::string>(node, "pad_mode");
282   std::transform(pad_mode.begin(), pad_mode.end(), pad_mode.begin(), toupper);
283   if (pad_mode == "SAME") {
284     layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
285   }
286 
287   if (pad_mode == "PAD") {
288     const auto &pad_list = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "pad_list");
289     layer->setPrePadding(nvinfer1::DimsHW{LongToInt(pad_list[0]), LongToInt(pad_list[2])});
290     layer->setPostPadding(nvinfer1::DimsHW{LongToInt(pad_list[1]), LongToInt(pad_list[3])});
291   }
292 
293   const auto &group = AnfAlgo::GetNodeAttr<int64_t>(node, "group");
294   layer->setNbGroups(SizeToInt(group));
295 
296   return {true, {layer->getOutput(0)}};
297 }
298 
299 // Binary broadcast operators.
MS_TRT_CONVERTER_FUNC_REG(Add)300 MS_TRT_CONVERTER_FUNC_REG(Add) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kSUM); }
MS_TRT_CONVERTER_FUNC_REG(Sub)301 MS_TRT_CONVERTER_FUNC_REG(Sub) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kSUB); }
MS_TRT_CONVERTER_FUNC_REG(Mul)302 MS_TRT_CONVERTER_FUNC_REG(Mul) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kPROD); }
MS_TRT_CONVERTER_FUNC_REG(Div)303 MS_TRT_CONVERTER_FUNC_REG(Div) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kDIV); }
MS_TRT_CONVERTER_FUNC_REG(RealDiv)304 MS_TRT_CONVERTER_FUNC_REG(RealDiv) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kDIV); }
MS_TRT_CONVERTER_FUNC_REG(Pow)305 MS_TRT_CONVERTER_FUNC_REG(Pow) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kPOW); }
MS_TRT_CONVERTER_FUNC_REG(Maximum)306 MS_TRT_CONVERTER_FUNC_REG(Maximum) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kMAX); }
MS_TRT_CONVERTER_FUNC_REG(Minimum)307 MS_TRT_CONVERTER_FUNC_REG(Minimum) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kMIN); }
MS_TRT_CONVERTER_FUNC_REG(FloorDiv)308 MS_TRT_CONVERTER_FUNC_REG(FloorDiv) {
309   return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kFLOOR_DIV);
310 }
311 
312 // Unary operators
MS_TRT_CONVERTER_FUNC_REG(Exp)313 MS_TRT_CONVERTER_FUNC_REG(Exp) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kEXP); }
MS_TRT_CONVERTER_FUNC_REG(Log)314 MS_TRT_CONVERTER_FUNC_REG(Log) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kLOG); }
MS_TRT_CONVERTER_FUNC_REG(Sqrt)315 MS_TRT_CONVERTER_FUNC_REG(Sqrt) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kSQRT); }
MS_TRT_CONVERTER_FUNC_REG(Reciprocal)316 MS_TRT_CONVERTER_FUNC_REG(Reciprocal) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kRECIP); }
MS_TRT_CONVERTER_FUNC_REG(Abs)317 MS_TRT_CONVERTER_FUNC_REG(Abs) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kABS); }
MS_TRT_CONVERTER_FUNC_REG(Neg)318 MS_TRT_CONVERTER_FUNC_REG(Neg) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kNEG); }
MS_TRT_CONVERTER_FUNC_REG(Sin)319 MS_TRT_CONVERTER_FUNC_REG(Sin) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kSIN); }
MS_TRT_CONVERTER_FUNC_REG(Cos)320 MS_TRT_CONVERTER_FUNC_REG(Cos) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kCOS); }
MS_TRT_CONVERTER_FUNC_REG(Tan)321 MS_TRT_CONVERTER_FUNC_REG(Tan) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kTAN); }
MS_TRT_CONVERTER_FUNC_REG(Sinh)322 MS_TRT_CONVERTER_FUNC_REG(Sinh) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kSINH); }
MS_TRT_CONVERTER_FUNC_REG(Cosh)323 MS_TRT_CONVERTER_FUNC_REG(Cosh) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kCOSH); }
MS_TRT_CONVERTER_FUNC_REG(Asin)324 MS_TRT_CONVERTER_FUNC_REG(Asin) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kASIN); }
MS_TRT_CONVERTER_FUNC_REG(Acos)325 MS_TRT_CONVERTER_FUNC_REG(Acos) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kACOS); }
MS_TRT_CONVERTER_FUNC_REG(Atan)326 MS_TRT_CONVERTER_FUNC_REG(Atan) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kATAN); }
MS_TRT_CONVERTER_FUNC_REG(Asinh)327 MS_TRT_CONVERTER_FUNC_REG(Asinh) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kASINH); }
MS_TRT_CONVERTER_FUNC_REG(Acosh)328 MS_TRT_CONVERTER_FUNC_REG(Acosh) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kACOSH); }
MS_TRT_CONVERTER_FUNC_REG(Ceil)329 MS_TRT_CONVERTER_FUNC_REG(Ceil) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kCEIL); }
MS_TRT_CONVERTER_FUNC_REG(Floor)330 MS_TRT_CONVERTER_FUNC_REG(Floor) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kFLOOR); }
331 
332 // Reduce operators
MS_TRT_CONVERTER_FUNC_REG(ReduceSum)333 MS_TRT_CONVERTER_FUNC_REG(ReduceSum) { return AddReduceLayer(node, context, nvinfer1::ReduceOperation::kSUM); }
MS_TRT_CONVERTER_FUNC_REG(ReduceMean)334 MS_TRT_CONVERTER_FUNC_REG(ReduceMean) { return AddReduceLayer(node, context, nvinfer1::ReduceOperation::kAVG); }
MS_TRT_CONVERTER_FUNC_REG(ReduceMax)335 MS_TRT_CONVERTER_FUNC_REG(ReduceMax) { return AddReduceLayer(node, context, nvinfer1::ReduceOperation::kMAX); }
MS_TRT_CONVERTER_FUNC_REG(ReduceMin)336 MS_TRT_CONVERTER_FUNC_REG(ReduceMin) { return AddReduceLayer(node, context, nvinfer1::ReduceOperation::kMIN); }
MS_TRT_CONVERTER_FUNC_REG(ReduceProd)337 MS_TRT_CONVERTER_FUNC_REG(ReduceProd) { return AddReduceLayer(node, context, nvinfer1::ReduceOperation::kPROD); }
338 
339 // Pooling operators.
MS_TRT_CONVERTER_FUNC_REG(AvgPool)340 MS_TRT_CONVERTER_FUNC_REG(AvgPool) { return AddPoolingLayer(node, context, nvinfer1::PoolingType::kAVERAGE); }
MS_TRT_CONVERTER_FUNC_REG(MaxPool)341 MS_TRT_CONVERTER_FUNC_REG(MaxPool) { return AddPoolingLayer(node, context, nvinfer1::PoolingType::kMAX); }
342 
343 // Activation operators.
MS_TRT_CONVERTER_FUNC_REG(ReLU)344 MS_TRT_CONVERTER_FUNC_REG(ReLU) { return AddActivationLayer(node, context, nvinfer1::ActivationType::kRELU); }
MS_TRT_CONVERTER_FUNC_REG(Sigmoid)345 MS_TRT_CONVERTER_FUNC_REG(Sigmoid) { return AddActivationLayer(node, context, nvinfer1::ActivationType::kSIGMOID); }
MS_TRT_CONVERTER_FUNC_REG(Tanh)346 MS_TRT_CONVERTER_FUNC_REG(Tanh) { return AddActivationLayer(node, context, nvinfer1::ActivationType::kTANH); }
MS_TRT_CONVERTER_FUNC_REG(Elu)347 MS_TRT_CONVERTER_FUNC_REG(Elu) { return AddActivationLayer(node, context, nvinfer1::ActivationType::kELU); }
MS_TRT_CONVERTER_FUNC_REG(Softsign)348 MS_TRT_CONVERTER_FUNC_REG(Softsign) { return AddActivationLayer(node, context, nvinfer1::ActivationType::kSOFTSIGN); }
349 
MS_TRT_CONVERTER_FUNC_REG(ReLU6)350 MS_TRT_CONVERTER_FUNC_REG(ReLU6) {
351   std::vector<LayerInput> inputs;
352   bool ret = context->LoadLayerInput(node, &inputs);
353   if (!ret || inputs.size() != 1) {
354     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
355     return {false, {}};
356   }
357 
358   const std::vector<size_t> &x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
359   nvinfer1::Dims dim;
360   dim.nbDims = SizeToInt(x_shape.size());
361   std::fill(dim.d, dim.d + dim.nbDims, 1);
362 
363   auto AddConst = [&context, &dim](const float &coeff) -> nvinfer1::ITensor * {
364     std::shared_ptr<tensor::Tensor> weight = context->CreateTempWeight(kNumberTypeFloat32, {1});
365     auto value = static_cast<float *>(weight->data_c());
366     value[0] = coeff;
367 
368     auto *layer = context->network()->addConstant(dim, nvinfer1::Weights{nvinfer1::DataType::kFLOAT, value, 1});
369     MS_EXCEPTION_IF_NULL(layer);
370     return layer->getOutput(0);
371   };
372 
373   // y = max(0.0, min(6.0, x)
374   auto *c0 = AddConst(0.0f);
375   auto *c1 = AddConst(6.0f);
376   auto *x = inputs[0].tensor();
377   nvinfer1::ILayer *layer = context->network()->addElementWise(*x, *c1, nvinfer1::ElementWiseOperation::kMIN);
378   MS_EXCEPTION_IF_NULL(layer);
379   layer = context->network()->addElementWise(*layer->getOutput(0), *c0, nvinfer1::ElementWiseOperation::kMAX);
380   MS_EXCEPTION_IF_NULL(layer);
381 
382   return {true, {layer->getOutput(0)}};
383 }
384 
MS_TRT_CONVERTER_FUNC_REG(GeLU)385 MS_TRT_CONVERTER_FUNC_REG(GeLU) {
386   std::vector<LayerInput> inputs;
387   bool ret = context->LoadLayerInput(node, &inputs);
388   if (!ret || inputs.size() != 1) {
389     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
390     return {false, {}};
391   }
392 
393   const std::vector<size_t> &x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
394   nvinfer1::Dims dim;
395   dim.nbDims = SizeToInt(x_shape.size());
396   std::fill(dim.d, dim.d + dim.nbDims, 1);
397 
398   auto AddConst = [&context, &dim](const float &coeff) -> nvinfer1::ITensor * {
399     std::shared_ptr<tensor::Tensor> weight = context->CreateTempWeight(kNumberTypeFloat32, {1});
400     auto value = static_cast<float *>(weight->data_c());
401     value[0] = coeff;
402 
403     auto *layer = context->network()->addConstant(dim, nvinfer1::Weights{nvinfer1::DataType::kFLOAT, value, 1});
404     MS_EXCEPTION_IF_NULL(layer);
405     return layer->getOutput(0);
406   };
407 
408   // y = 0.5 * x * (1 + tanh(0.7978846 * (x + 0.044715 * x^3)))
409   auto *c1 = AddConst(0.5f);
410   auto *c2 = AddConst(1.0f);
411   auto *c3 = AddConst(0.7978846f);
412   auto *c4 = AddConst(0.044715f);
413   auto *c5 = AddConst(3.0f);
414 
415   auto *x = inputs[0].tensor();
416   nvinfer1::ILayer *layer = context->network()->addElementWise(*x, *c5, nvinfer1::ElementWiseOperation::kPOW);
417   MS_EXCEPTION_IF_NULL(layer);
418   layer = context->network()->addElementWise(*c4, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
419   MS_EXCEPTION_IF_NULL(layer);
420   layer = context->network()->addElementWise(*x, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kSUM);
421   MS_EXCEPTION_IF_NULL(layer);
422   layer = context->network()->addElementWise(*c3, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
423   MS_EXCEPTION_IF_NULL(layer);
424   layer = context->network()->addActivation(*layer->getOutput(0), nvinfer1::ActivationType::kTANH);
425   MS_EXCEPTION_IF_NULL(layer);
426   layer = context->network()->addElementWise(*c2, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kSUM);
427   MS_EXCEPTION_IF_NULL(layer);
428   layer = context->network()->addElementWise(*x, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
429   MS_EXCEPTION_IF_NULL(layer);
430   layer = context->network()->addElementWise(*c1, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
431   MS_EXCEPTION_IF_NULL(layer);
432 
433   return {true, {layer->getOutput(0)}};
434 }
435 
MS_TRT_CONVERTER_FUNC_REG(HSigmoid)436 MS_TRT_CONVERTER_FUNC_REG(HSigmoid) {
437   std::vector<LayerInput> inputs;
438   bool ret = context->LoadLayerInput(node, &inputs);
439   if (!ret || inputs.size() != 1) {
440     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
441     return {false, {}};
442   }
443 
444   const std::vector<size_t> &x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
445   nvinfer1::Dims dim;
446   dim.nbDims = SizeToInt(x_shape.size());
447   std::fill(dim.d, dim.d + dim.nbDims, 1);
448 
449   auto AddConst = [&context, &dim](const float &coeff) -> nvinfer1::ITensor * {
450     std::shared_ptr<tensor::Tensor> weight = context->CreateTempWeight(kNumberTypeFloat32, {1});
451     auto value = static_cast<float *>(weight->data_c());
452     value[0] = coeff;
453 
454     auto *layer = context->network()->addConstant(dim, nvinfer1::Weights{nvinfer1::DataType::kFLOAT, value, 1});
455     MS_EXCEPTION_IF_NULL(layer);
456     return layer->getOutput(0);
457   };
458 
459   // y = max(0, min(1.0, (x + 3.0)/6.0))
460   auto *c0 = AddConst(0.0f);
461   auto *c1 = AddConst(1.0f);
462   auto *c2 = AddConst(3.0f);
463   auto *c3 = AddConst(6.0f);
464   auto *x = inputs[0].tensor();
465   nvinfer1::ILayer *layer = context->network()->addElementWise(*x, *c2, nvinfer1::ElementWiseOperation::kSUM);
466   MS_EXCEPTION_IF_NULL(layer);
467   layer = context->network()->addElementWise(*layer->getOutput(0), *c3, nvinfer1::ElementWiseOperation::kDIV);
468   MS_EXCEPTION_IF_NULL(layer);
469   layer = context->network()->addElementWise(*layer->getOutput(0), *c1, nvinfer1::ElementWiseOperation::kMIN);
470   MS_EXCEPTION_IF_NULL(layer);
471   layer = context->network()->addElementWise(*layer->getOutput(0), *c0, nvinfer1::ElementWiseOperation::kMAX);
472   MS_EXCEPTION_IF_NULL(layer);
473 
474   return {true, {layer->getOutput(0)}};
475 }
476 
MS_TRT_CONVERTER_FUNC_REG(HSwish)477 MS_TRT_CONVERTER_FUNC_REG(HSwish) {
478   std::vector<LayerInput> inputs;
479   bool ret = context->LoadLayerInput(node, &inputs);
480   if (!ret || inputs.size() != 1) {
481     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
482     return {false, {}};
483   }
484 
485   const std::vector<size_t> &x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
486   nvinfer1::Dims dim;
487   dim.nbDims = SizeToInt(x_shape.size());
488   std::fill(dim.d, dim.d + dim.nbDims, 1);
489 
490   auto AddConst = [&context, &dim](const float &coeff) -> nvinfer1::ITensor * {
491     std::shared_ptr<tensor::Tensor> weight = context->CreateTempWeight(kNumberTypeFloat32, {1});
492     auto value = static_cast<float *>(weight->data_c());
493     value[0] = coeff;
494 
495     auto *layer = context->network()->addConstant(dim, nvinfer1::Weights{nvinfer1::DataType::kFLOAT, value, 1});
496     MS_EXCEPTION_IF_NULL(layer);
497     return layer->getOutput(0);
498   };
499 
500   // y = x * Relu6(x + 3.0) / 6.0
501   // Relu6(x) = min(max(x, 0.0), 6.0)
502   auto *c0 = AddConst(0.0f);
503   auto *c1 = AddConst(3.0f);
504   auto *c2 = AddConst(6.0f);
505   auto *x = inputs[0].tensor();
506   nvinfer1::ILayer *layer = context->network()->addElementWise(*x, *c1, nvinfer1::ElementWiseOperation::kSUM);
507   MS_EXCEPTION_IF_NULL(layer);
508   layer = context->network()->addElementWise(*layer->getOutput(0), *c0, nvinfer1::ElementWiseOperation::kMAX);
509   MS_EXCEPTION_IF_NULL(layer);
510   layer = context->network()->addElementWise(*layer->getOutput(0), *c2, nvinfer1::ElementWiseOperation::kMIN);
511   MS_EXCEPTION_IF_NULL(layer);
512   layer = context->network()->addElementWise(*layer->getOutput(0), *c2, nvinfer1::ElementWiseOperation::kDIV);
513   MS_EXCEPTION_IF_NULL(layer);
514   layer = context->network()->addElementWise(*x, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
515   MS_EXCEPTION_IF_NULL(layer);
516 
517   return {true, {layer->getOutput(0)}};
518 }
519 
MS_TRT_CONVERTER_FUNC_REG(MatMul)520 MS_TRT_CONVERTER_FUNC_REG(MatMul) {
521   std::vector<LayerInput> inputs;
522   bool ret = context->LoadLayerInput(node, &inputs);
523   if (!ret || inputs.size() != 2) {
524     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
525     return {false, {}};
526   }
527 
528   const auto &transpose_a = AnfAlgo::GetNodeAttr<bool>(node, "transpose_a");
529   const auto &transpose_b = AnfAlgo::GetNodeAttr<bool>(node, "transpose_b");
530   if (inputs[0].IsTensor() && inputs[1].IsWeight() && transpose_a == false && transpose_b == true) {
531     // Reshape x from (M, K) to (M, K, 1, 1)
532     nvinfer1::Dims unsqueeze_dims = inputs[0].tensor()->getDimensions();
533     for (size_t i = 0; i < 2; i++) {
534       unsqueeze_dims.d[unsqueeze_dims.nbDims++] = 1;
535     }
536     auto x_reshape = context->network()->addShuffle(*inputs[0].tensor());
537     x_reshape->setReshapeDimensions(unsqueeze_dims);
538 
539     // Apply addFullyConnected: y = x * w^T + b
540     nvinfer1::Weights bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
541     const auto &w_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
542     auto *layer =
543       context->network()->addFullyConnected(*x_reshape->getOutput(0), w_shape[0], *inputs[1].weight(), bias);
544     MS_EXCEPTION_IF_NULL(layer);
545 
546     // Reshape x from (M, N, 1, 1) to (M, N)
547     const auto &y_shape = AnfAlgo::GetOutputInferShape(node, 0);
548     const nvinfer1::Dims &y_dims = TrtUtils::MsDimsToTrtDims(y_shape, false);
549     auto *squeeze_y = context->network()->addShuffle(*layer->getOutput(0));
550     squeeze_y->setReshapeDimensions(y_dims);
551 
552     return {true, {squeeze_y->getOutput(0)}};
553   } else {
554     auto op1 = transpose_a ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE;
555     auto op2 = transpose_b ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE;
556     const std::vector<size_t> &x1_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
557     const std::vector<size_t> &x2_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
558     nvinfer1::ITensor *x1 = ToTensor(&inputs[0], x1_shape, context);
559     nvinfer1::ITensor *x2 = ToTensor(&inputs[1], x2_shape, context);
560     auto *layer = context->network()->addMatrixMultiply(*x1, op1, *x2, op2);
561     MS_EXCEPTION_IF_NULL(layer);
562     return {true, {layer->getOutput(0)}};
563   }
564 }
565 
MS_TRT_CONVERTER_FUNC_REG(BatchMatMul)566 MS_TRT_CONVERTER_FUNC_REG(BatchMatMul) {
567   std::vector<LayerInput> inputs;
568   bool ret = context->LoadLayerInput(node, &inputs);
569   if (!ret || inputs.size() != 2) {
570     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
571     return {false, {}};
572   }
573 
574   const auto &transpose_a = AnfAlgo::GetNodeAttr<bool>(node, "transpose_a");
575   const auto &transpose_b = AnfAlgo::GetNodeAttr<bool>(node, "transpose_b");
576   const auto &trt_transpose1 = transpose_a ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE;
577   const auto &trt_transpose2 = transpose_b ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE;
578 
579   std::vector<size_t> shape1 = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
580   std::vector<size_t> shape2 = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
581   nvinfer1::ITensor *tensor1 = ToTensor(&inputs[0], shape1, context);
582   nvinfer1::ITensor *tensor2 = ToTensor(&inputs[1], shape2, context);
583   auto *layer = context->network()->addMatrixMultiply(*tensor1, trt_transpose1, *tensor2, trt_transpose2);
584   MS_EXCEPTION_IF_NULL(layer);
585 
586   return {true, {layer->getOutput(0)}};
587 }
588 
MS_TRT_CONVERTER_FUNC_REG(BiasAdd)589 MS_TRT_CONVERTER_FUNC_REG(BiasAdd) {
590   std::vector<LayerInput> inputs;
591   bool ret = context->LoadLayerInput(node, &inputs);
592   if (!ret || inputs.size() != 2) {
593     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
594     return {false, {}};
595   }
596 
597   const auto &x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
598   const auto &bias_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
599   const auto &format = AnfAlgo::GetNodeAttr<std::string>(node, "format");
600   const string::size_type &pos = format.find("C");
601   if (pos == std::string::npos || pos >= x_shape.size()) {
602     MS_LOG(WARNING) << "The format " << format << "' invalid";
603     return {false, {}};
604   }
605 
606   // Convert bias to ITensor same dims as x.
607   std::vector<size_t> unsqueeze_bias_dims(x_shape.size(), 1);
608   unsqueeze_bias_dims[pos] = SizeToInt(bias_shape[0]);
609   nvinfer1::ITensor *bias = ToTensor(&inputs[1], unsqueeze_bias_dims, context);
610 
611   // Create Broadcast Add layer.
612   auto *layer = context->network()->addElementWise(*inputs[0].tensor(), *bias, nvinfer1::ElementWiseOperation::kSUM);
613   MS_EXCEPTION_IF_NULL(layer);
614 
615   return {true, {layer->getOutput(0)}};
616 }
617 
618 // NoOp
MS_TRT_CONVERTER_FUNC_REG(Reshape)619 MS_TRT_CONVERTER_FUNC_REG(Reshape) { return AddReshapeLayer(node, context); }
MS_TRT_CONVERTER_FUNC_REG(ExpandDims)620 MS_TRT_CONVERTER_FUNC_REG(ExpandDims) { return AddReshapeLayer(node, context); }
MS_TRT_CONVERTER_FUNC_REG(Squeeze)621 MS_TRT_CONVERTER_FUNC_REG(Squeeze) { return AddReshapeLayer(node, context); }
MS_TRT_CONVERTER_FUNC_REG(Flatten)622 MS_TRT_CONVERTER_FUNC_REG(Flatten) { return AddReshapeLayer(node, context); }
623 
MS_TRT_CONVERTER_FUNC_REG(BatchNorm)624 MS_TRT_CONVERTER_FUNC_REG(BatchNorm) {
625   std::vector<LayerInput> inputs;
626   bool ret = context->LoadLayerInput(node, &inputs);
627   if (!ret || inputs.size() != 5 || !inputs[0].IsTensor() || !inputs[1].IsWeight() || !inputs[2].IsWeight() ||
628       !inputs[3].IsWeight() || !inputs[4].IsWeight()) {
629     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
630     return {false, {}};
631   }
632 
633   auto primitive = GetCNodePrimitive(node);
634   MS_EXCEPTION_IF_NULL(primitive);
635   auto is_training = AnfAlgo::GetNodeAttr<bool>(node, "is_training");
636   if (is_training != false) {
637     MS_LOG(WARNING) << "Operation not support, is_training: " << is_training;
638     return {false, {}};
639   }
640 
641   const auto &format = AnfAlgo::GetNodeAttr<std::string>(node, "format");
642   if (format != "NCHW") {
643     MS_LOG(WARNING) << "The format " << format << "' invalid";
644     return {false, {}};
645   }
646 
647   // scale = gamma / sqrt(var + epsilon)
648   // y = (x - mean) * scale + beta
649   //   = x * scale - mean * scale + beta
650   //   = x * coeff + bias
651   auto gamma = static_cast<const float *>(inputs[1].weight()->values);
652   auto beta = static_cast<const float *>(inputs[2].weight()->values);
653   auto mean = static_cast<const float *>(inputs[3].weight()->values);
654   auto var = static_cast<const float *>(inputs[4].weight()->values);
655   auto epsilon = AnfAlgo::GetNodeAttr<float>(node, "epsilon");
656 
657   const TypeId &type = AnfAlgo::GetPrevNodeOutputInferDataType(node, 1);
658   const std::vector<size_t> &shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
659   int64_t channel_num = SizeToLong(shape[0]);
660   auto coeff = context->CreateTempWeight(type, shape);
661   auto bias = context->CreateTempWeight(type, shape);
662   auto coeff_value = static_cast<float *>(coeff->data_c());
663   auto bias_value = static_cast<float *>(bias->data_c());
664   for (int64_t i = 0; i < channel_num; i++) {
665     float scale = gamma[i] / sqrtf(var[i] + epsilon);
666     coeff_value[i] = scale;
667     bias_value[i] = beta[i] - mean[i] * scale;
668   }
669 
670   const nvinfer1::Weights &scale{nvinfer1::DataType::kFLOAT, coeff_value, channel_num};
671   const nvinfer1::Weights &shift{nvinfer1::DataType::kFLOAT, bias_value, channel_num};
672   const nvinfer1::Weights &pow{nvinfer1::DataType::kFLOAT, nullptr, 0};
673   auto *layer = context->network()->addScale(*inputs[0].tensor(), nvinfer1::ScaleMode::kCHANNEL, shift, scale, pow);
674   MS_EXCEPTION_IF_NULL(layer);
675 
676   return {true, {layer->getOutput(0)}};
677 }
678 
MS_TRT_CONVERTER_FUNC_REG(Concat)679 MS_TRT_CONVERTER_FUNC_REG(Concat) {
680   std::vector<LayerInput> inputs;
681   bool ret = context->LoadLayerInput(node, &inputs);
682   if (!ret || inputs.size() == 0) {
683     MS_LOG(WARNING) << "Get inputs failed. Input num: " << inputs.size();
684     return {false, {}};
685   }
686 
687   std::vector<nvinfer1::ITensor *> tensors;
688   for (const auto &input : inputs) {
689     if (input.IsWeight()) {
690       MS_LOG(WARNING) << "Concat input do not support weight.";
691       return {false, {}};
692     }
693     tensors.push_back(input.tensor());
694   }
695 
696   auto *layer = context->network()->addConcatenation(tensors.data(), tensors.size());
697   MS_EXCEPTION_IF_NULL(layer);
698 
699   auto axis = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(node, "axis"));
700   if (axis < 0) {
701     auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
702     axis += SizeToInt(input_shape.size());
703   }
704   layer->setAxis(axis);
705 
706   return {true, {layer->getOutput(0)}};
707 }
708 
MS_TRT_CONVERTER_FUNC_REG(Conv2DBackpropInput)709 MS_TRT_CONVERTER_FUNC_REG(Conv2DBackpropInput) {
710   std::vector<LayerInput> inputs;
711   bool ret = context->LoadLayerInput(node, &inputs);
712   if (!ret || inputs.size() != 2 || !inputs[0].IsTensor() || !inputs[1].IsWeight()) {
713     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
714     return {false, {}};
715   }
716 
717   const auto &format = AnfAlgo::GetNodeAttr<std::string>(node, "format");
718   if (format != "NCHW") {
719     MS_LOG(WARNING) << "The format: " << format << " not supported.";
720     return {false, {}};
721   }
722 
723   const auto &kernel_size = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "kernel_size");
724   const auto &output_shape = AnfAlgo::GetOutputInferShape(node, 0);
725   const nvinfer1::Weights &bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
726   auto *layer = context->network()->addDeconvolutionNd(
727     *(inputs[0].tensor()), SizeToInt(output_shape[1]),
728     nvinfer1::DimsHW{LongToInt(kernel_size[0]), LongToInt(kernel_size[1])}, *(inputs[1].weight()), bias);
729   MS_EXCEPTION_IF_NULL(layer);
730 
731   const auto &strides = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "stride");
732   layer->setStride(nvinfer1::DimsHW{LongToInt(strides[2]), LongToInt(strides[3])});
733 
734   auto pad_mode = AnfAlgo::GetNodeAttr<std::string>(node, "pad_mode");
735   std::transform(pad_mode.begin(), pad_mode.end(), pad_mode.begin(), toupper);
736   if (pad_mode == "SAME") {
737     layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
738   }
739 
740   if (pad_mode == "PAD") {
741     const auto &pad_list = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "pad_list");
742     layer->setPaddingMode(nvinfer1::PaddingMode::kEXPLICIT_ROUND_DOWN);
743     layer->setPrePadding(nvinfer1::DimsHW{LongToInt(pad_list[0]), LongToInt(pad_list[2])});
744     layer->setPostPadding(nvinfer1::DimsHW{LongToInt(pad_list[1]), LongToInt(pad_list[3])});
745   }
746 
747   return {true, {layer->getOutput(0)}};
748 }
749 
MS_TRT_CONVERTER_FUNC_REG(Slice)750 MS_TRT_CONVERTER_FUNC_REG(Slice) {
751   std::vector<LayerInput> inputs;
752   bool ret = context->LoadLayerInput(node, &inputs);
753   if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
754     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
755     return {false, {}};
756   }
757 
758   const auto &begin = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "begin");
759   const auto &size = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "size");
760 
761   nvinfer1::Dims trt_start = TrtUtils::MsDimsToTrtDims(begin, false);
762   nvinfer1::Dims trt_size = TrtUtils::MsDimsToTrtDims(size, false);
763   nvinfer1::Dims trt_stride;
764   for (int32_t i = 0; i < trt_start.nbDims; i++) {
765     trt_stride.d[trt_stride.nbDims++] = 1;
766   }
767 
768   auto *layer = context->network()->addSlice(*inputs[0].tensor(), trt_start, trt_size, trt_stride);
769   MS_EXCEPTION_IF_NULL(layer);
770 
771   return {true, {layer->getOutput(0)}};
772 }
773 
MS_TRT_CONVERTER_FUNC_REG(Transpose)774 MS_TRT_CONVERTER_FUNC_REG(Transpose) {
775   std::vector<LayerInput> inputs;
776   bool ret = context->LoadLayerInput(node, &inputs);
777   if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
778     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
779     return {false, {}};
780   }
781 
782   const auto &perm = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "perm");
783   nvinfer1::Permutation trt_perm;
784   for (size_t i = 0; i < perm.size(); i++) {
785     trt_perm.order[i] = LongToInt(perm[i]);
786   }
787 
788   auto *layer = context->network()->addShuffle(*inputs[0].tensor());
789   MS_EXCEPTION_IF_NULL(layer);
790   layer->setFirstTranspose(trt_perm);
791 
792   return {true, {layer->getOutput(0)}};
793 }
794 
MS_TRT_CONVERTER_FUNC_REG(Softmax)795 MS_TRT_CONVERTER_FUNC_REG(Softmax) {
796   std::vector<LayerInput> inputs;
797   bool ret = context->LoadLayerInput(node, &inputs);
798   if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
799     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
800     return {false, {}};
801   }
802 
803   const std::vector<size_t> &input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
804   const ValuePtr &value = AnfAlgo::GetCNodePrimitive(node)->GetAttr("axis");
805   uint32_t reduce_axes = 0;
806   if (value->isa<ValueTuple>() || value->isa<ValueList>()) {
807     const auto &axis = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "axis");
808     if (axis.size() != 1) {
809       MS_LOG(WARNING) << "Only one axis can be set. Axis size" << axis.size();
810       return {false, {}};
811     }
812     int offset = axis[0] >= 0 ? LongToInt(axis[0]) : LongToInt(axis[0] + input_shape.size());
813     reduce_axes = 1U << offset;
814   } else {
815     const auto &axis = AnfAlgo::GetNodeAttr<int64_t>(node, "axis");
816     int offset = axis >= 0 ? LongToInt(axis) : LongToInt(axis + input_shape.size());
817     reduce_axes = 1UL << offset;
818   }
819 
820   auto *layer = context->network()->addSoftMax(*inputs[0].tensor());
821   MS_EXCEPTION_IF_NULL(layer);
822   layer->setAxes(reduce_axes);
823   return {true, {layer->getOutput(0)}};
824 }
825 
MS_TRT_CONVERTER_FUNC_REG(LogSoftmax)826 MS_TRT_CONVERTER_FUNC_REG(LogSoftmax) {
827   std::vector<LayerInput> inputs;
828   bool ret = context->LoadLayerInput(node, &inputs);
829   if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
830     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
831     return {false, {}};
832   }
833 
834   const std::vector<size_t> &input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
835   const auto &axis = AnfAlgo::GetNodeAttr<int64_t>(node, "axis");
836   int offset = axis >= 0 ? LongToInt(axis) : LongToInt(axis + input_shape.size());
837   uint32_t reduce_axes = 1UL << offset;
838 
839   auto *softmax_layer = context->network()->addSoftMax(*inputs[0].tensor());
840   MS_EXCEPTION_IF_NULL(softmax_layer);
841   softmax_layer->setAxes(reduce_axes);
842 
843   auto *log_layer = context->network()->addUnary(*softmax_layer->getOutput(0), nvinfer1::UnaryOperation::kLOG);
844   MS_EXCEPTION_IF_NULL(log_layer);
845 
846   return {true, {log_layer->getOutput(0)}};
847 }
848 
MS_TRT_CONVERTER_FUNC_REG(Gather)849 MS_TRT_CONVERTER_FUNC_REG(Gather) {
850   std::vector<LayerInput> inputs;
851   bool ret = context->LoadLayerInput(node, &inputs);
852   if (!ret || inputs.size() != 2) {
853     MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
854     return {false, {}};
855   }
856 
857   const std::vector<size_t> &input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
858   auto axis = AnfAlgo::GetNodeAttr<int64_t>(node, "axis");
859   axis = axis >= 0 ? axis : axis + input_shape.size();
860 
861   nvinfer1::ITensor *input = ToTensor(&inputs[0], input_shape, context);
862   const std::vector<size_t> &indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
863   nvinfer1::ITensor *indices = ToTensor(&inputs[1], indices_shape, context);
864 
865   auto *layer = context->network()->addGather(*input, *indices, LongToInt(axis));
866   MS_EXCEPTION_IF_NULL(layer);
867 
868   return {true, {layer->getOutput(0)}};
869 }
870 
MS_TRT_CONVERTER_FUNC_REG(Cast)871 MS_TRT_CONVERTER_FUNC_REG(Cast) {
872   std::vector<LayerInput> inputs;
873   bool ret = context->LoadLayerInput(node, &inputs);
874   if (!ret || inputs.size() != 1) {
875     MS_LOG(WARNING) << "Get inputs failed. Input num: " << inputs.size();
876     return {false, {}};
877   }
878 
879   const std::vector<size_t> &input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
880   nvinfer1::ITensor *input = ToTensor(&inputs[0], input_shape, context);
881 
882   const TypeId &dst_type = AnfAlgo::GetOutputInferDataType(node, 0);
883   std::variant<bool, nvinfer1::DataType> type = TrtUtils::MsDtypeToTrtDtype(dst_type);
884   if (type.index() != 1) {
885     return {false, {}};
886   }
887   auto trt_type = std::get<nvinfer1::DataType>(type);
888   auto *layer = context->network()->addIdentity(*input);
889   layer->setOutputType(0, trt_type);
890 
891   if (trt_type == nvinfer1::DataType::kHALF) {
892     MS_LOG(WARNING) << "The model is exported with auto-mixed-precsion or manual precision mode. "
893                     << "Retreat inference with native backend. It is recommended that export FP32 model "
894                     << "and then inference with FP16 precision mode configuration.";
895     return {false, {}};
896   }
897   return {true, {layer->getOutput(0)}};
898 }
899 
MS_TRT_CONVERTER_FUNC_REG(LayerNorm)900 MS_TRT_CONVERTER_FUNC_REG(LayerNorm) {
901   std::vector<LayerInput> inputs;
902   bool ret = context->LoadLayerInput(node, &inputs);
903   if (!ret || inputs.size() != 3 || !inputs[0].IsTensor()) {
904     MS_LOG(WARNING) << "Get inputs failed. Input num: " << inputs.size();
905     return {false, {}};
906   }
907 
908   // Calculate reduce axes
909   const std::vector<size_t> &input_shape = AnfAlgo::GetOutputInferShape(node, 0);
910   auto begin_norm_axis = AnfAlgo::GetNodeAttr<int64_t>(node, "begin_norm_axis");
911   begin_norm_axis = begin_norm_axis >= 0 ? begin_norm_axis : begin_norm_axis + input_shape.size();
912   uint32_t reduce_axes = 0;
913   for (size_t i = LongToSize(begin_norm_axis); i < input_shape.size(); i++) {
914     reduce_axes |= 1UL << i;
915   }
916 
917   // Reshape gamma and beta for broadcast
918   auto begin_params_axis = AnfAlgo::GetNodeAttr<int64_t>(node, "begin_params_axis");
919   begin_params_axis = begin_params_axis >= 0 ? begin_params_axis : begin_params_axis + input_shape.size();
920   std::vector<size_t> param_shape = input_shape;
921   for (size_t j = 0; j < LongToSize(begin_params_axis); j++) {
922     param_shape[j] = 1;
923   }
924 
925   auto epsilon = AnfAlgo::GetNodeAttr<float>(node, "epsilon");
926   std::shared_ptr<tensor::Tensor> weight = context->CreateTempWeight(kNumberTypeFloat32, {1});
927   auto value = static_cast<float *>(weight->data_c());
928   value[0] = epsilon;
929   nvinfer1::Dims dim;
930   dim.nbDims = SizeToInt(input_shape.size());
931   for (size_t i = 0; i < input_shape.size(); i++) {
932     dim.d[i] = 1;
933   }
934   auto *epsilon_layer = context->network()->addConstant(dim, nvinfer1::Weights{nvinfer1::DataType::kFLOAT, value, 1});
935   MS_EXCEPTION_IF_NULL(epsilon_layer);
936 
937   // y = (x - mean) / sqrt(var) * gamma + beta
938   auto *mean = context->network()->addReduce(*inputs[0].tensor(), nvinfer1::ReduceOperation::kAVG, reduce_axes, true);
939   MS_EXCEPTION_IF_NULL(mean);
940   auto *sub =
941     context->network()->addElementWise(*inputs[0].tensor(), *mean->getOutput(0), nvinfer1::ElementWiseOperation::kSUB);
942   MS_EXCEPTION_IF_NULL(sub);
943   auto *pow =
944     context->network()->addElementWise(*sub->getOutput(0), *sub->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
945   MS_EXCEPTION_IF_NULL(pow);
946   auto *var = context->network()->addReduce(*pow->getOutput(0), nvinfer1::ReduceOperation::kAVG, reduce_axes, true);
947   MS_EXCEPTION_IF_NULL(var);
948   auto *var_epsilon = context->network()->addElementWise(*var->getOutput(0), *epsilon_layer->getOutput(0),
949                                                          nvinfer1::ElementWiseOperation::kSUM);
950   MS_EXCEPTION_IF_NULL(var_epsilon);
951   auto *std = context->network()->addUnary(*var_epsilon->getOutput(0), nvinfer1::UnaryOperation::kSQRT);
952   MS_EXCEPTION_IF_NULL(std);
953   auto *div =
954     context->network()->addElementWise(*sub->getOutput(0), *std->getOutput(0), nvinfer1::ElementWiseOperation::kDIV);
955   MS_EXCEPTION_IF_NULL(div);
956   auto *mul = context->network()->addElementWise(*div->getOutput(0), *ToTensor(&inputs[1], param_shape, context),
957                                                  nvinfer1::ElementWiseOperation::kPROD);
958   MS_EXCEPTION_IF_NULL(mul);
959   auto *add = context->network()->addElementWise(*mul->getOutput(0), *ToTensor(&inputs[2], param_shape, context),
960                                                  nvinfer1::ElementWiseOperation::kSUM);
961   MS_EXCEPTION_IF_NULL(add);
962 
963   return {true, {add->getOutput(0)}};
964 }
965 
MS_TRT_CONVERTER_FUNC_REG(Return)966 MS_TRT_CONVERTER_FUNC_REG(Return) {
967   std::vector<LayerInput> inputs;
968   bool ret = context->LoadLayerInput(node, &inputs);
969   if (!ret) {
970     return {false, {}};
971   }
972 
973   for (size_t i = 0; i < inputs.size(); ++i) {
974     nvinfer1::ITensor *input = nullptr;
975     if (inputs[i].IsTensor()) {
976       input = inputs[i].tensor();
977     } else {
978       std::vector<size_t> shape;
979       std::transform(inputs[i].shape().begin(), inputs[i].shape().end(), std::back_inserter(shape),
980                      [](int64_t d) { return LongToSize(d); });
981       input = ToTensor(&inputs[i], shape, context);
982     }
983 
984     const std::string &name = "return_output_" + std::to_string(i);
985     input->setName(name.c_str());
986     context->network()->markOutput(*input);
987   }
988 
989   return {true, {}};
990 }
991 }  // namespace opt
992 }  // namespace mindspore
993