1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <vector>
18 #include <string>
19 #include <variant>
20 #include <NvInfer.h>
21 #include "backend/optimizer/trt_pass/trt_converter_context.h"
22 #include "backend/optimizer/trt_pass/trt_op_factory.h"
23 #include "backend/kernel_compiler/gpu/trt/trt_utils.h"
24
25 namespace mindspore {
26 namespace opt {
27 namespace {
ToShape(LayerInput * input,const std::vector<size_t> & shape,std::shared_ptr<TrtConverterContext> context)28 nvinfer1::ITensor *ToShape(LayerInput *input, const std::vector<size_t> &shape,
29 std::shared_ptr<TrtConverterContext> context) {
30 MS_EXCEPTION_IF_NULL(input);
31 MS_EXCEPTION_IF_NULL(context);
32
33 if (!input->IsTensor()) {
34 MS_LOG(WARNING) << "Expect Tensor but got weight";
35 return nullptr;
36 }
37
38 const nvinfer1::Dims &src_dim = input->tensor()->getDimensions();
39 const nvinfer1::Dims &dst_dim = TrtUtils::MsDimsToTrtDims(shape, false);
40 if (TrtUtils::IsSameShape(src_dim, dst_dim)) {
41 return input->tensor();
42 }
43
44 auto *layer = context->network()->addShuffle(*input->tensor());
45 MS_EXCEPTION_IF_NULL(layer);
46 layer->setReshapeDimensions(dst_dim);
47
48 return layer->getOutput(0);
49 }
50
ToTensor(LayerInput * input,const std::vector<size_t> & shape,std::shared_ptr<TrtConverterContext> context)51 nvinfer1::ITensor *ToTensor(LayerInput *input, const std::vector<size_t> &shape,
52 std::shared_ptr<TrtConverterContext> context) {
53 MS_EXCEPTION_IF_NULL(input);
54 MS_EXCEPTION_IF_NULL(context);
55 if (input->IsTensor()) {
56 return ToShape(input, shape, context);
57 }
58
59 const nvinfer1::Dims &dim = TrtUtils::MsDimsToTrtDims(shape, false);
60 auto *const_layer = context->network()->addConstant(dim, *input->weight());
61 MS_EXCEPTION_IF_NULL(const_layer);
62 return const_layer->getOutput(0);
63 }
64
AddReshapeLayer(AnfNodePtr node,std::shared_ptr<TrtConverterContext> context)65 ConvertResult AddReshapeLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context) {
66 std::vector<LayerInput> inputs;
67 bool ret = context->LoadLayerInput(node, &inputs);
68 if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
69 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
70 return {false, {}};
71 }
72
73 auto *layer = context->network()->addShuffle(*inputs[0].tensor());
74 MS_EXCEPTION_IF_NULL(layer);
75 const auto &output_shape = AnfAlgo::GetOutputInferShape(node, 0);
76 const nvinfer1::Dims &dims = TrtUtils::MsDimsToTrtDims(output_shape, false);
77 layer->setReshapeDimensions(dims);
78
79 return {true, {layer->getOutput(0)}};
80 }
81
AddElementLayer(AnfNodePtr node,std::shared_ptr<TrtConverterContext> context,nvinfer1::ElementWiseOperation op_type)82 ConvertResult AddElementLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context,
83 nvinfer1::ElementWiseOperation op_type) {
84 std::vector<LayerInput> inputs;
85 bool ret = context->LoadLayerInput(node, &inputs);
86 if (!ret || inputs.size() != 2) {
87 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
88 return {false, {}};
89 }
90
91 const std::vector<size_t> &x1_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
92 const std::vector<size_t> &x2_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
93 const std::vector<size_t> &y_shape = AnfAlgo::GetOutputInferShape(node, 0);
94
95 auto Broadcast = [&context, &y_shape](nvinfer1::ITensor *tensor, const std::vector<size_t> &x_shape) {
96 if (x_shape.size() == y_shape.size()) {
97 return tensor;
98 }
99
100 // Copy x_shape to dim with tail align, and fill left axis with 1.
101 // For example:
102 // x: [C, H, W]
103 // y: [N, C, H, W]
104 // dim: [1, C, H, W]
105 nvinfer1::Dims dim;
106 dim.nbDims = SizeToInt(y_shape.size());
107 std::fill(dim.d, dim.d + dim.nbDims, 1);
108 size_t offset = y_shape.size() - x_shape.size();
109 for (size_t i = 0; i < x_shape.size(); i++) {
110 dim.d[i + offset] = SizeToInt(x_shape[i]);
111 }
112
113 auto *layer = context->network()->addShuffle(*tensor);
114 MS_EXCEPTION_IF_NULL(layer);
115 layer->setReshapeDimensions(dim);
116
117 return layer->getOutput(0);
118 };
119
120 auto *x1 = Broadcast(ToTensor(&inputs[0], x1_shape, context), x1_shape);
121 auto *x2 = Broadcast(ToTensor(&inputs[1], x2_shape, context), x2_shape);
122 auto *layer = context->network()->addElementWise(*x1, *x2, op_type);
123 MS_EXCEPTION_IF_NULL(layer);
124
125 return {true, {layer->getOutput(0)}};
126 }
127
AddPoolingLayer(AnfNodePtr node,std::shared_ptr<TrtConverterContext> context,nvinfer1::PoolingType pooling_type)128 ConvertResult AddPoolingLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context,
129 nvinfer1::PoolingType pooling_type) {
130 std::vector<LayerInput> inputs;
131 bool ret = context->LoadLayerInput(node, &inputs);
132 if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
133 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
134 return {false, {}};
135 }
136
137 const auto &format = AnfAlgo::GetNodeAttr<std::string>(node, "format");
138 if (format != "NCHW") {
139 MS_LOG(WARNING) << "The format: " << format << " not supported.";
140 return {false, {}};
141 }
142
143 const auto &kernel_size = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "kernel_size");
144 auto *layer = context->network()->addPoolingNd(
145 *(inputs[0].tensor()), pooling_type, nvinfer1::DimsHW{LongToInt(kernel_size[2]), LongToInt(kernel_size[3])});
146 MS_EXCEPTION_IF_NULL(layer);
147
148 const auto &strides = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "strides");
149 layer->setStride(nvinfer1::DimsHW{LongToInt(strides[2]), LongToInt(strides[3])});
150
151 auto pad_mode = AnfAlgo::GetNodeAttr<std::string>(node, "pad_mode");
152 std::transform(pad_mode.begin(), pad_mode.end(), pad_mode.begin(), toupper);
153 if (pad_mode == "SAME") {
154 layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
155 }
156
157 return {true, {layer->getOutput(0)}};
158 }
159
AddActivationLayer(AnfNodePtr node,std::shared_ptr<TrtConverterContext> context,nvinfer1::ActivationType act_type)160 ConvertResult AddActivationLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context,
161 nvinfer1::ActivationType act_type) {
162 std::vector<LayerInput> inputs;
163 bool ret = context->LoadLayerInput(node, &inputs);
164 if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
165 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
166 return {false, {}};
167 }
168
169 auto *layer = context->network()->addActivation(*inputs[0].tensor(), act_type);
170 MS_EXCEPTION_IF_NULL(layer);
171
172 return {true, {layer->getOutput(0)}};
173 }
174
AddUnaryLayer(AnfNodePtr node,std::shared_ptr<TrtConverterContext> context,nvinfer1::UnaryOperation op_type)175 ConvertResult AddUnaryLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context,
176 nvinfer1::UnaryOperation op_type) {
177 std::vector<LayerInput> inputs;
178 bool ret = context->LoadLayerInput(node, &inputs);
179 if (!ret || inputs.size() != 1) {
180 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
181 return {false, {}};
182 }
183
184 auto *layer = context->network()->addUnary(*inputs[0].tensor(), op_type);
185 MS_EXCEPTION_IF_NULL(layer);
186
187 return {true, {layer->getOutput(0)}};
188 }
189
AddReduceLayer(AnfNodePtr node,std::shared_ptr<TrtConverterContext> context,nvinfer1::ReduceOperation op_type)190 ConvertResult AddReduceLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context,
191 nvinfer1::ReduceOperation op_type) {
192 std::vector<LayerInput> inputs;
193 bool ret = context->LoadLayerInput(node, &inputs);
194 if (!ret || inputs.size() != 1) {
195 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
196 return {false, {}};
197 }
198
199 // Calculate reduce axes bitmask
200 const std::vector<size_t> &input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
201 const ValuePtr &value = AnfAlgo::GetCNodePrimitive(node)->GetAttr("axis");
202 uint32_t reduce_axes = 0;
203 if (value->isa<ValueTuple>() || value->isa<ValueList>()) {
204 const auto &axis = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "axis");
205 for (size_t i = 0; i < axis.size(); i++) {
206 int offset = axis[i] >= 0 ? LongToInt(axis[i]) : LongToInt(axis[i] + input_shape.size());
207 reduce_axes |= 1UL << offset;
208 }
209 } else {
210 const auto &axis = AnfAlgo::GetNodeAttr<int64_t>(node, "axis");
211 int offset = axis >= 0 ? LongToInt(axis) : LongToInt(axis + input_shape.size());
212 reduce_axes = 1UL << offset;
213 }
214
215 // Tensor-RT do not support reduce with no dimensions.
216 // Skip reduce operator if reduce_axes == 0
217 if (reduce_axes == 0) {
218 MS_LOG(WARNING) << "No dimension be be reduced. " << node->DebugString();
219 return {true, {inputs[0].tensor()}};
220 }
221
222 bool keep_dims = AnfAlgo::GetNodeAttr<bool>(node, "keep_dims");
223 // Tensor-RT do not support reduce all dimensions with keep_dims == false.
224 // Reduce with keep_dims = true, add apply reshape latter.
225 bool post_reshape = false;
226 if (keep_dims == false && (reduce_axes == (1UL << input_shape.size()) - 1)) {
227 keep_dims = true;
228 post_reshape = true;
229 }
230
231 nvinfer1::IReduceLayer *layer = context->network()->addReduce(*inputs[0].tensor(), op_type, reduce_axes, keep_dims);
232 MS_EXCEPTION_IF_NULL(layer);
233
234 if (post_reshape) {
235 nvinfer1::IShuffleLayer *reshape_layer = context->network()->addShuffle(*layer->getOutput(0));
236 MS_EXCEPTION_IF_NULL(reshape_layer);
237
238 nvinfer1::Dims dim;
239 dim.nbDims = 1;
240 dim.d[0] = 1;
241 reshape_layer->setReshapeDimensions(dim);
242
243 return {true, {reshape_layer->getOutput(0)}};
244 }
245
246 return {true, {layer->getOutput(0)}};
247 }
248 } // namespace
249
250 // Register operator converter from AnfNode to trt layer: `OPNAME` should keep the same as primitive definition.
251 #define MS_TRT_CONVERTER_FUNC_REG(OPNAME) \
252 ConvertResult Gpu##OPNAME##TrtConverter(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context); \
253 static const TrtOpRegister(Gpu##OPNAME##ConverterRegister)(#OPNAME, Gpu##OPNAME##TrtConverter); \
254 ConvertResult Gpu##OPNAME##TrtConverter(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context)
255
MS_TRT_CONVERTER_FUNC_REG(Conv2D)256 MS_TRT_CONVERTER_FUNC_REG(Conv2D) {
257 std::vector<LayerInput> inputs;
258 bool ret = context->LoadLayerInput(node, &inputs);
259 if (!ret || inputs.size() != 2 || !inputs[0].IsTensor() || !inputs[1].IsWeight()) {
260 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
261 return {false, {}};
262 }
263
264 const auto &data_format = AnfAlgo::GetNodeAttr<std::string>(node, "format");
265 if (data_format != "NCHW") {
266 MS_LOG(WARNING) << "The format: " << data_format << " not supported.";
267 return {false, {}};
268 }
269
270 const auto &kernel_size = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "kernel_size");
271 const auto &out_channel = AnfAlgo::GetNodeAttr<int64_t>(node, "out_channel");
272 nvinfer1::Weights bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
273 auto *layer = context->network()->addConvolutionNd(
274 *(inputs[0].tensor()), LongToInt(out_channel),
275 nvinfer1::DimsHW{LongToInt(kernel_size[0]), LongToInt(kernel_size[1])}, *(inputs[1].weight()), bias);
276 MS_EXCEPTION_IF_NULL(layer);
277
278 const auto &strides = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "stride");
279 layer->setStride(nvinfer1::DimsHW{LongToInt(strides[2]), LongToInt(strides[3])});
280
281 auto pad_mode = AnfAlgo::GetNodeAttr<std::string>(node, "pad_mode");
282 std::transform(pad_mode.begin(), pad_mode.end(), pad_mode.begin(), toupper);
283 if (pad_mode == "SAME") {
284 layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
285 }
286
287 if (pad_mode == "PAD") {
288 const auto &pad_list = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "pad_list");
289 layer->setPrePadding(nvinfer1::DimsHW{LongToInt(pad_list[0]), LongToInt(pad_list[2])});
290 layer->setPostPadding(nvinfer1::DimsHW{LongToInt(pad_list[1]), LongToInt(pad_list[3])});
291 }
292
293 const auto &group = AnfAlgo::GetNodeAttr<int64_t>(node, "group");
294 layer->setNbGroups(SizeToInt(group));
295
296 return {true, {layer->getOutput(0)}};
297 }
298
299 // Binary broadcast operators.
MS_TRT_CONVERTER_FUNC_REG(Add)300 MS_TRT_CONVERTER_FUNC_REG(Add) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kSUM); }
MS_TRT_CONVERTER_FUNC_REG(Sub)301 MS_TRT_CONVERTER_FUNC_REG(Sub) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kSUB); }
MS_TRT_CONVERTER_FUNC_REG(Mul)302 MS_TRT_CONVERTER_FUNC_REG(Mul) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kPROD); }
MS_TRT_CONVERTER_FUNC_REG(Div)303 MS_TRT_CONVERTER_FUNC_REG(Div) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kDIV); }
MS_TRT_CONVERTER_FUNC_REG(RealDiv)304 MS_TRT_CONVERTER_FUNC_REG(RealDiv) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kDIV); }
MS_TRT_CONVERTER_FUNC_REG(Pow)305 MS_TRT_CONVERTER_FUNC_REG(Pow) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kPOW); }
MS_TRT_CONVERTER_FUNC_REG(Maximum)306 MS_TRT_CONVERTER_FUNC_REG(Maximum) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kMAX); }
MS_TRT_CONVERTER_FUNC_REG(Minimum)307 MS_TRT_CONVERTER_FUNC_REG(Minimum) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kMIN); }
MS_TRT_CONVERTER_FUNC_REG(FloorDiv)308 MS_TRT_CONVERTER_FUNC_REG(FloorDiv) {
309 return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kFLOOR_DIV);
310 }
311
312 // Unary operators
MS_TRT_CONVERTER_FUNC_REG(Exp)313 MS_TRT_CONVERTER_FUNC_REG(Exp) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kEXP); }
MS_TRT_CONVERTER_FUNC_REG(Log)314 MS_TRT_CONVERTER_FUNC_REG(Log) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kLOG); }
MS_TRT_CONVERTER_FUNC_REG(Sqrt)315 MS_TRT_CONVERTER_FUNC_REG(Sqrt) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kSQRT); }
MS_TRT_CONVERTER_FUNC_REG(Reciprocal)316 MS_TRT_CONVERTER_FUNC_REG(Reciprocal) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kRECIP); }
MS_TRT_CONVERTER_FUNC_REG(Abs)317 MS_TRT_CONVERTER_FUNC_REG(Abs) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kABS); }
MS_TRT_CONVERTER_FUNC_REG(Neg)318 MS_TRT_CONVERTER_FUNC_REG(Neg) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kNEG); }
MS_TRT_CONVERTER_FUNC_REG(Sin)319 MS_TRT_CONVERTER_FUNC_REG(Sin) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kSIN); }
MS_TRT_CONVERTER_FUNC_REG(Cos)320 MS_TRT_CONVERTER_FUNC_REG(Cos) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kCOS); }
MS_TRT_CONVERTER_FUNC_REG(Tan)321 MS_TRT_CONVERTER_FUNC_REG(Tan) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kTAN); }
MS_TRT_CONVERTER_FUNC_REG(Sinh)322 MS_TRT_CONVERTER_FUNC_REG(Sinh) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kSINH); }
MS_TRT_CONVERTER_FUNC_REG(Cosh)323 MS_TRT_CONVERTER_FUNC_REG(Cosh) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kCOSH); }
MS_TRT_CONVERTER_FUNC_REG(Asin)324 MS_TRT_CONVERTER_FUNC_REG(Asin) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kASIN); }
MS_TRT_CONVERTER_FUNC_REG(Acos)325 MS_TRT_CONVERTER_FUNC_REG(Acos) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kACOS); }
MS_TRT_CONVERTER_FUNC_REG(Atan)326 MS_TRT_CONVERTER_FUNC_REG(Atan) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kATAN); }
MS_TRT_CONVERTER_FUNC_REG(Asinh)327 MS_TRT_CONVERTER_FUNC_REG(Asinh) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kASINH); }
MS_TRT_CONVERTER_FUNC_REG(Acosh)328 MS_TRT_CONVERTER_FUNC_REG(Acosh) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kACOSH); }
MS_TRT_CONVERTER_FUNC_REG(Ceil)329 MS_TRT_CONVERTER_FUNC_REG(Ceil) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kCEIL); }
MS_TRT_CONVERTER_FUNC_REG(Floor)330 MS_TRT_CONVERTER_FUNC_REG(Floor) { return AddUnaryLayer(node, context, nvinfer1::UnaryOperation::kFLOOR); }
331
332 // Reduce operators
MS_TRT_CONVERTER_FUNC_REG(ReduceSum)333 MS_TRT_CONVERTER_FUNC_REG(ReduceSum) { return AddReduceLayer(node, context, nvinfer1::ReduceOperation::kSUM); }
MS_TRT_CONVERTER_FUNC_REG(ReduceMean)334 MS_TRT_CONVERTER_FUNC_REG(ReduceMean) { return AddReduceLayer(node, context, nvinfer1::ReduceOperation::kAVG); }
MS_TRT_CONVERTER_FUNC_REG(ReduceMax)335 MS_TRT_CONVERTER_FUNC_REG(ReduceMax) { return AddReduceLayer(node, context, nvinfer1::ReduceOperation::kMAX); }
MS_TRT_CONVERTER_FUNC_REG(ReduceMin)336 MS_TRT_CONVERTER_FUNC_REG(ReduceMin) { return AddReduceLayer(node, context, nvinfer1::ReduceOperation::kMIN); }
MS_TRT_CONVERTER_FUNC_REG(ReduceProd)337 MS_TRT_CONVERTER_FUNC_REG(ReduceProd) { return AddReduceLayer(node, context, nvinfer1::ReduceOperation::kPROD); }
338
339 // Pooling operators.
MS_TRT_CONVERTER_FUNC_REG(AvgPool)340 MS_TRT_CONVERTER_FUNC_REG(AvgPool) { return AddPoolingLayer(node, context, nvinfer1::PoolingType::kAVERAGE); }
MS_TRT_CONVERTER_FUNC_REG(MaxPool)341 MS_TRT_CONVERTER_FUNC_REG(MaxPool) { return AddPoolingLayer(node, context, nvinfer1::PoolingType::kMAX); }
342
343 // Activation operators.
MS_TRT_CONVERTER_FUNC_REG(ReLU)344 MS_TRT_CONVERTER_FUNC_REG(ReLU) { return AddActivationLayer(node, context, nvinfer1::ActivationType::kRELU); }
MS_TRT_CONVERTER_FUNC_REG(Sigmoid)345 MS_TRT_CONVERTER_FUNC_REG(Sigmoid) { return AddActivationLayer(node, context, nvinfer1::ActivationType::kSIGMOID); }
MS_TRT_CONVERTER_FUNC_REG(Tanh)346 MS_TRT_CONVERTER_FUNC_REG(Tanh) { return AddActivationLayer(node, context, nvinfer1::ActivationType::kTANH); }
MS_TRT_CONVERTER_FUNC_REG(Elu)347 MS_TRT_CONVERTER_FUNC_REG(Elu) { return AddActivationLayer(node, context, nvinfer1::ActivationType::kELU); }
MS_TRT_CONVERTER_FUNC_REG(Softsign)348 MS_TRT_CONVERTER_FUNC_REG(Softsign) { return AddActivationLayer(node, context, nvinfer1::ActivationType::kSOFTSIGN); }
349
MS_TRT_CONVERTER_FUNC_REG(ReLU6)350 MS_TRT_CONVERTER_FUNC_REG(ReLU6) {
351 std::vector<LayerInput> inputs;
352 bool ret = context->LoadLayerInput(node, &inputs);
353 if (!ret || inputs.size() != 1) {
354 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
355 return {false, {}};
356 }
357
358 const std::vector<size_t> &x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
359 nvinfer1::Dims dim;
360 dim.nbDims = SizeToInt(x_shape.size());
361 std::fill(dim.d, dim.d + dim.nbDims, 1);
362
363 auto AddConst = [&context, &dim](const float &coeff) -> nvinfer1::ITensor * {
364 std::shared_ptr<tensor::Tensor> weight = context->CreateTempWeight(kNumberTypeFloat32, {1});
365 auto value = static_cast<float *>(weight->data_c());
366 value[0] = coeff;
367
368 auto *layer = context->network()->addConstant(dim, nvinfer1::Weights{nvinfer1::DataType::kFLOAT, value, 1});
369 MS_EXCEPTION_IF_NULL(layer);
370 return layer->getOutput(0);
371 };
372
373 // y = max(0.0, min(6.0, x)
374 auto *c0 = AddConst(0.0f);
375 auto *c1 = AddConst(6.0f);
376 auto *x = inputs[0].tensor();
377 nvinfer1::ILayer *layer = context->network()->addElementWise(*x, *c1, nvinfer1::ElementWiseOperation::kMIN);
378 MS_EXCEPTION_IF_NULL(layer);
379 layer = context->network()->addElementWise(*layer->getOutput(0), *c0, nvinfer1::ElementWiseOperation::kMAX);
380 MS_EXCEPTION_IF_NULL(layer);
381
382 return {true, {layer->getOutput(0)}};
383 }
384
MS_TRT_CONVERTER_FUNC_REG(GeLU)385 MS_TRT_CONVERTER_FUNC_REG(GeLU) {
386 std::vector<LayerInput> inputs;
387 bool ret = context->LoadLayerInput(node, &inputs);
388 if (!ret || inputs.size() != 1) {
389 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
390 return {false, {}};
391 }
392
393 const std::vector<size_t> &x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
394 nvinfer1::Dims dim;
395 dim.nbDims = SizeToInt(x_shape.size());
396 std::fill(dim.d, dim.d + dim.nbDims, 1);
397
398 auto AddConst = [&context, &dim](const float &coeff) -> nvinfer1::ITensor * {
399 std::shared_ptr<tensor::Tensor> weight = context->CreateTempWeight(kNumberTypeFloat32, {1});
400 auto value = static_cast<float *>(weight->data_c());
401 value[0] = coeff;
402
403 auto *layer = context->network()->addConstant(dim, nvinfer1::Weights{nvinfer1::DataType::kFLOAT, value, 1});
404 MS_EXCEPTION_IF_NULL(layer);
405 return layer->getOutput(0);
406 };
407
408 // y = 0.5 * x * (1 + tanh(0.7978846 * (x + 0.044715 * x^3)))
409 auto *c1 = AddConst(0.5f);
410 auto *c2 = AddConst(1.0f);
411 auto *c3 = AddConst(0.7978846f);
412 auto *c4 = AddConst(0.044715f);
413 auto *c5 = AddConst(3.0f);
414
415 auto *x = inputs[0].tensor();
416 nvinfer1::ILayer *layer = context->network()->addElementWise(*x, *c5, nvinfer1::ElementWiseOperation::kPOW);
417 MS_EXCEPTION_IF_NULL(layer);
418 layer = context->network()->addElementWise(*c4, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
419 MS_EXCEPTION_IF_NULL(layer);
420 layer = context->network()->addElementWise(*x, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kSUM);
421 MS_EXCEPTION_IF_NULL(layer);
422 layer = context->network()->addElementWise(*c3, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
423 MS_EXCEPTION_IF_NULL(layer);
424 layer = context->network()->addActivation(*layer->getOutput(0), nvinfer1::ActivationType::kTANH);
425 MS_EXCEPTION_IF_NULL(layer);
426 layer = context->network()->addElementWise(*c2, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kSUM);
427 MS_EXCEPTION_IF_NULL(layer);
428 layer = context->network()->addElementWise(*x, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
429 MS_EXCEPTION_IF_NULL(layer);
430 layer = context->network()->addElementWise(*c1, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
431 MS_EXCEPTION_IF_NULL(layer);
432
433 return {true, {layer->getOutput(0)}};
434 }
435
MS_TRT_CONVERTER_FUNC_REG(HSigmoid)436 MS_TRT_CONVERTER_FUNC_REG(HSigmoid) {
437 std::vector<LayerInput> inputs;
438 bool ret = context->LoadLayerInput(node, &inputs);
439 if (!ret || inputs.size() != 1) {
440 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
441 return {false, {}};
442 }
443
444 const std::vector<size_t> &x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
445 nvinfer1::Dims dim;
446 dim.nbDims = SizeToInt(x_shape.size());
447 std::fill(dim.d, dim.d + dim.nbDims, 1);
448
449 auto AddConst = [&context, &dim](const float &coeff) -> nvinfer1::ITensor * {
450 std::shared_ptr<tensor::Tensor> weight = context->CreateTempWeight(kNumberTypeFloat32, {1});
451 auto value = static_cast<float *>(weight->data_c());
452 value[0] = coeff;
453
454 auto *layer = context->network()->addConstant(dim, nvinfer1::Weights{nvinfer1::DataType::kFLOAT, value, 1});
455 MS_EXCEPTION_IF_NULL(layer);
456 return layer->getOutput(0);
457 };
458
459 // y = max(0, min(1.0, (x + 3.0)/6.0))
460 auto *c0 = AddConst(0.0f);
461 auto *c1 = AddConst(1.0f);
462 auto *c2 = AddConst(3.0f);
463 auto *c3 = AddConst(6.0f);
464 auto *x = inputs[0].tensor();
465 nvinfer1::ILayer *layer = context->network()->addElementWise(*x, *c2, nvinfer1::ElementWiseOperation::kSUM);
466 MS_EXCEPTION_IF_NULL(layer);
467 layer = context->network()->addElementWise(*layer->getOutput(0), *c3, nvinfer1::ElementWiseOperation::kDIV);
468 MS_EXCEPTION_IF_NULL(layer);
469 layer = context->network()->addElementWise(*layer->getOutput(0), *c1, nvinfer1::ElementWiseOperation::kMIN);
470 MS_EXCEPTION_IF_NULL(layer);
471 layer = context->network()->addElementWise(*layer->getOutput(0), *c0, nvinfer1::ElementWiseOperation::kMAX);
472 MS_EXCEPTION_IF_NULL(layer);
473
474 return {true, {layer->getOutput(0)}};
475 }
476
MS_TRT_CONVERTER_FUNC_REG(HSwish)477 MS_TRT_CONVERTER_FUNC_REG(HSwish) {
478 std::vector<LayerInput> inputs;
479 bool ret = context->LoadLayerInput(node, &inputs);
480 if (!ret || inputs.size() != 1) {
481 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
482 return {false, {}};
483 }
484
485 const std::vector<size_t> &x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
486 nvinfer1::Dims dim;
487 dim.nbDims = SizeToInt(x_shape.size());
488 std::fill(dim.d, dim.d + dim.nbDims, 1);
489
490 auto AddConst = [&context, &dim](const float &coeff) -> nvinfer1::ITensor * {
491 std::shared_ptr<tensor::Tensor> weight = context->CreateTempWeight(kNumberTypeFloat32, {1});
492 auto value = static_cast<float *>(weight->data_c());
493 value[0] = coeff;
494
495 auto *layer = context->network()->addConstant(dim, nvinfer1::Weights{nvinfer1::DataType::kFLOAT, value, 1});
496 MS_EXCEPTION_IF_NULL(layer);
497 return layer->getOutput(0);
498 };
499
500 // y = x * Relu6(x + 3.0) / 6.0
501 // Relu6(x) = min(max(x, 0.0), 6.0)
502 auto *c0 = AddConst(0.0f);
503 auto *c1 = AddConst(3.0f);
504 auto *c2 = AddConst(6.0f);
505 auto *x = inputs[0].tensor();
506 nvinfer1::ILayer *layer = context->network()->addElementWise(*x, *c1, nvinfer1::ElementWiseOperation::kSUM);
507 MS_EXCEPTION_IF_NULL(layer);
508 layer = context->network()->addElementWise(*layer->getOutput(0), *c0, nvinfer1::ElementWiseOperation::kMAX);
509 MS_EXCEPTION_IF_NULL(layer);
510 layer = context->network()->addElementWise(*layer->getOutput(0), *c2, nvinfer1::ElementWiseOperation::kMIN);
511 MS_EXCEPTION_IF_NULL(layer);
512 layer = context->network()->addElementWise(*layer->getOutput(0), *c2, nvinfer1::ElementWiseOperation::kDIV);
513 MS_EXCEPTION_IF_NULL(layer);
514 layer = context->network()->addElementWise(*x, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
515 MS_EXCEPTION_IF_NULL(layer);
516
517 return {true, {layer->getOutput(0)}};
518 }
519
MS_TRT_CONVERTER_FUNC_REG(MatMul)520 MS_TRT_CONVERTER_FUNC_REG(MatMul) {
521 std::vector<LayerInput> inputs;
522 bool ret = context->LoadLayerInput(node, &inputs);
523 if (!ret || inputs.size() != 2) {
524 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
525 return {false, {}};
526 }
527
528 const auto &transpose_a = AnfAlgo::GetNodeAttr<bool>(node, "transpose_a");
529 const auto &transpose_b = AnfAlgo::GetNodeAttr<bool>(node, "transpose_b");
530 if (inputs[0].IsTensor() && inputs[1].IsWeight() && transpose_a == false && transpose_b == true) {
531 // Reshape x from (M, K) to (M, K, 1, 1)
532 nvinfer1::Dims unsqueeze_dims = inputs[0].tensor()->getDimensions();
533 for (size_t i = 0; i < 2; i++) {
534 unsqueeze_dims.d[unsqueeze_dims.nbDims++] = 1;
535 }
536 auto x_reshape = context->network()->addShuffle(*inputs[0].tensor());
537 x_reshape->setReshapeDimensions(unsqueeze_dims);
538
539 // Apply addFullyConnected: y = x * w^T + b
540 nvinfer1::Weights bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
541 const auto &w_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
542 auto *layer =
543 context->network()->addFullyConnected(*x_reshape->getOutput(0), w_shape[0], *inputs[1].weight(), bias);
544 MS_EXCEPTION_IF_NULL(layer);
545
546 // Reshape x from (M, N, 1, 1) to (M, N)
547 const auto &y_shape = AnfAlgo::GetOutputInferShape(node, 0);
548 const nvinfer1::Dims &y_dims = TrtUtils::MsDimsToTrtDims(y_shape, false);
549 auto *squeeze_y = context->network()->addShuffle(*layer->getOutput(0));
550 squeeze_y->setReshapeDimensions(y_dims);
551
552 return {true, {squeeze_y->getOutput(0)}};
553 } else {
554 auto op1 = transpose_a ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE;
555 auto op2 = transpose_b ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE;
556 const std::vector<size_t> &x1_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
557 const std::vector<size_t> &x2_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
558 nvinfer1::ITensor *x1 = ToTensor(&inputs[0], x1_shape, context);
559 nvinfer1::ITensor *x2 = ToTensor(&inputs[1], x2_shape, context);
560 auto *layer = context->network()->addMatrixMultiply(*x1, op1, *x2, op2);
561 MS_EXCEPTION_IF_NULL(layer);
562 return {true, {layer->getOutput(0)}};
563 }
564 }
565
MS_TRT_CONVERTER_FUNC_REG(BatchMatMul)566 MS_TRT_CONVERTER_FUNC_REG(BatchMatMul) {
567 std::vector<LayerInput> inputs;
568 bool ret = context->LoadLayerInput(node, &inputs);
569 if (!ret || inputs.size() != 2) {
570 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
571 return {false, {}};
572 }
573
574 const auto &transpose_a = AnfAlgo::GetNodeAttr<bool>(node, "transpose_a");
575 const auto &transpose_b = AnfAlgo::GetNodeAttr<bool>(node, "transpose_b");
576 const auto &trt_transpose1 = transpose_a ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE;
577 const auto &trt_transpose2 = transpose_b ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE;
578
579 std::vector<size_t> shape1 = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
580 std::vector<size_t> shape2 = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
581 nvinfer1::ITensor *tensor1 = ToTensor(&inputs[0], shape1, context);
582 nvinfer1::ITensor *tensor2 = ToTensor(&inputs[1], shape2, context);
583 auto *layer = context->network()->addMatrixMultiply(*tensor1, trt_transpose1, *tensor2, trt_transpose2);
584 MS_EXCEPTION_IF_NULL(layer);
585
586 return {true, {layer->getOutput(0)}};
587 }
588
MS_TRT_CONVERTER_FUNC_REG(BiasAdd)589 MS_TRT_CONVERTER_FUNC_REG(BiasAdd) {
590 std::vector<LayerInput> inputs;
591 bool ret = context->LoadLayerInput(node, &inputs);
592 if (!ret || inputs.size() != 2) {
593 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
594 return {false, {}};
595 }
596
597 const auto &x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
598 const auto &bias_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
599 const auto &format = AnfAlgo::GetNodeAttr<std::string>(node, "format");
600 const string::size_type &pos = format.find("C");
601 if (pos == std::string::npos || pos >= x_shape.size()) {
602 MS_LOG(WARNING) << "The format " << format << "' invalid";
603 return {false, {}};
604 }
605
606 // Convert bias to ITensor same dims as x.
607 std::vector<size_t> unsqueeze_bias_dims(x_shape.size(), 1);
608 unsqueeze_bias_dims[pos] = SizeToInt(bias_shape[0]);
609 nvinfer1::ITensor *bias = ToTensor(&inputs[1], unsqueeze_bias_dims, context);
610
611 // Create Broadcast Add layer.
612 auto *layer = context->network()->addElementWise(*inputs[0].tensor(), *bias, nvinfer1::ElementWiseOperation::kSUM);
613 MS_EXCEPTION_IF_NULL(layer);
614
615 return {true, {layer->getOutput(0)}};
616 }
617
618 // NoOp
MS_TRT_CONVERTER_FUNC_REG(Reshape)619 MS_TRT_CONVERTER_FUNC_REG(Reshape) { return AddReshapeLayer(node, context); }
MS_TRT_CONVERTER_FUNC_REG(ExpandDims)620 MS_TRT_CONVERTER_FUNC_REG(ExpandDims) { return AddReshapeLayer(node, context); }
MS_TRT_CONVERTER_FUNC_REG(Squeeze)621 MS_TRT_CONVERTER_FUNC_REG(Squeeze) { return AddReshapeLayer(node, context); }
MS_TRT_CONVERTER_FUNC_REG(Flatten)622 MS_TRT_CONVERTER_FUNC_REG(Flatten) { return AddReshapeLayer(node, context); }
623
MS_TRT_CONVERTER_FUNC_REG(BatchNorm)624 MS_TRT_CONVERTER_FUNC_REG(BatchNorm) {
625 std::vector<LayerInput> inputs;
626 bool ret = context->LoadLayerInput(node, &inputs);
627 if (!ret || inputs.size() != 5 || !inputs[0].IsTensor() || !inputs[1].IsWeight() || !inputs[2].IsWeight() ||
628 !inputs[3].IsWeight() || !inputs[4].IsWeight()) {
629 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
630 return {false, {}};
631 }
632
633 auto primitive = GetCNodePrimitive(node);
634 MS_EXCEPTION_IF_NULL(primitive);
635 auto is_training = AnfAlgo::GetNodeAttr<bool>(node, "is_training");
636 if (is_training != false) {
637 MS_LOG(WARNING) << "Operation not support, is_training: " << is_training;
638 return {false, {}};
639 }
640
641 const auto &format = AnfAlgo::GetNodeAttr<std::string>(node, "format");
642 if (format != "NCHW") {
643 MS_LOG(WARNING) << "The format " << format << "' invalid";
644 return {false, {}};
645 }
646
647 // scale = gamma / sqrt(var + epsilon)
648 // y = (x - mean) * scale + beta
649 // = x * scale - mean * scale + beta
650 // = x * coeff + bias
651 auto gamma = static_cast<const float *>(inputs[1].weight()->values);
652 auto beta = static_cast<const float *>(inputs[2].weight()->values);
653 auto mean = static_cast<const float *>(inputs[3].weight()->values);
654 auto var = static_cast<const float *>(inputs[4].weight()->values);
655 auto epsilon = AnfAlgo::GetNodeAttr<float>(node, "epsilon");
656
657 const TypeId &type = AnfAlgo::GetPrevNodeOutputInferDataType(node, 1);
658 const std::vector<size_t> &shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
659 int64_t channel_num = SizeToLong(shape[0]);
660 auto coeff = context->CreateTempWeight(type, shape);
661 auto bias = context->CreateTempWeight(type, shape);
662 auto coeff_value = static_cast<float *>(coeff->data_c());
663 auto bias_value = static_cast<float *>(bias->data_c());
664 for (int64_t i = 0; i < channel_num; i++) {
665 float scale = gamma[i] / sqrtf(var[i] + epsilon);
666 coeff_value[i] = scale;
667 bias_value[i] = beta[i] - mean[i] * scale;
668 }
669
670 const nvinfer1::Weights &scale{nvinfer1::DataType::kFLOAT, coeff_value, channel_num};
671 const nvinfer1::Weights &shift{nvinfer1::DataType::kFLOAT, bias_value, channel_num};
672 const nvinfer1::Weights &pow{nvinfer1::DataType::kFLOAT, nullptr, 0};
673 auto *layer = context->network()->addScale(*inputs[0].tensor(), nvinfer1::ScaleMode::kCHANNEL, shift, scale, pow);
674 MS_EXCEPTION_IF_NULL(layer);
675
676 return {true, {layer->getOutput(0)}};
677 }
678
MS_TRT_CONVERTER_FUNC_REG(Concat)679 MS_TRT_CONVERTER_FUNC_REG(Concat) {
680 std::vector<LayerInput> inputs;
681 bool ret = context->LoadLayerInput(node, &inputs);
682 if (!ret || inputs.size() == 0) {
683 MS_LOG(WARNING) << "Get inputs failed. Input num: " << inputs.size();
684 return {false, {}};
685 }
686
687 std::vector<nvinfer1::ITensor *> tensors;
688 for (const auto &input : inputs) {
689 if (input.IsWeight()) {
690 MS_LOG(WARNING) << "Concat input do not support weight.";
691 return {false, {}};
692 }
693 tensors.push_back(input.tensor());
694 }
695
696 auto *layer = context->network()->addConcatenation(tensors.data(), tensors.size());
697 MS_EXCEPTION_IF_NULL(layer);
698
699 auto axis = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(node, "axis"));
700 if (axis < 0) {
701 auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
702 axis += SizeToInt(input_shape.size());
703 }
704 layer->setAxis(axis);
705
706 return {true, {layer->getOutput(0)}};
707 }
708
MS_TRT_CONVERTER_FUNC_REG(Conv2DBackpropInput)709 MS_TRT_CONVERTER_FUNC_REG(Conv2DBackpropInput) {
710 std::vector<LayerInput> inputs;
711 bool ret = context->LoadLayerInput(node, &inputs);
712 if (!ret || inputs.size() != 2 || !inputs[0].IsTensor() || !inputs[1].IsWeight()) {
713 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
714 return {false, {}};
715 }
716
717 const auto &format = AnfAlgo::GetNodeAttr<std::string>(node, "format");
718 if (format != "NCHW") {
719 MS_LOG(WARNING) << "The format: " << format << " not supported.";
720 return {false, {}};
721 }
722
723 const auto &kernel_size = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "kernel_size");
724 const auto &output_shape = AnfAlgo::GetOutputInferShape(node, 0);
725 const nvinfer1::Weights &bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
726 auto *layer = context->network()->addDeconvolutionNd(
727 *(inputs[0].tensor()), SizeToInt(output_shape[1]),
728 nvinfer1::DimsHW{LongToInt(kernel_size[0]), LongToInt(kernel_size[1])}, *(inputs[1].weight()), bias);
729 MS_EXCEPTION_IF_NULL(layer);
730
731 const auto &strides = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "stride");
732 layer->setStride(nvinfer1::DimsHW{LongToInt(strides[2]), LongToInt(strides[3])});
733
734 auto pad_mode = AnfAlgo::GetNodeAttr<std::string>(node, "pad_mode");
735 std::transform(pad_mode.begin(), pad_mode.end(), pad_mode.begin(), toupper);
736 if (pad_mode == "SAME") {
737 layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
738 }
739
740 if (pad_mode == "PAD") {
741 const auto &pad_list = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "pad_list");
742 layer->setPaddingMode(nvinfer1::PaddingMode::kEXPLICIT_ROUND_DOWN);
743 layer->setPrePadding(nvinfer1::DimsHW{LongToInt(pad_list[0]), LongToInt(pad_list[2])});
744 layer->setPostPadding(nvinfer1::DimsHW{LongToInt(pad_list[1]), LongToInt(pad_list[3])});
745 }
746
747 return {true, {layer->getOutput(0)}};
748 }
749
MS_TRT_CONVERTER_FUNC_REG(Slice)750 MS_TRT_CONVERTER_FUNC_REG(Slice) {
751 std::vector<LayerInput> inputs;
752 bool ret = context->LoadLayerInput(node, &inputs);
753 if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
754 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
755 return {false, {}};
756 }
757
758 const auto &begin = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "begin");
759 const auto &size = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "size");
760
761 nvinfer1::Dims trt_start = TrtUtils::MsDimsToTrtDims(begin, false);
762 nvinfer1::Dims trt_size = TrtUtils::MsDimsToTrtDims(size, false);
763 nvinfer1::Dims trt_stride;
764 for (int32_t i = 0; i < trt_start.nbDims; i++) {
765 trt_stride.d[trt_stride.nbDims++] = 1;
766 }
767
768 auto *layer = context->network()->addSlice(*inputs[0].tensor(), trt_start, trt_size, trt_stride);
769 MS_EXCEPTION_IF_NULL(layer);
770
771 return {true, {layer->getOutput(0)}};
772 }
773
MS_TRT_CONVERTER_FUNC_REG(Transpose)774 MS_TRT_CONVERTER_FUNC_REG(Transpose) {
775 std::vector<LayerInput> inputs;
776 bool ret = context->LoadLayerInput(node, &inputs);
777 if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
778 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
779 return {false, {}};
780 }
781
782 const auto &perm = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "perm");
783 nvinfer1::Permutation trt_perm;
784 for (size_t i = 0; i < perm.size(); i++) {
785 trt_perm.order[i] = LongToInt(perm[i]);
786 }
787
788 auto *layer = context->network()->addShuffle(*inputs[0].tensor());
789 MS_EXCEPTION_IF_NULL(layer);
790 layer->setFirstTranspose(trt_perm);
791
792 return {true, {layer->getOutput(0)}};
793 }
794
MS_TRT_CONVERTER_FUNC_REG(Softmax)795 MS_TRT_CONVERTER_FUNC_REG(Softmax) {
796 std::vector<LayerInput> inputs;
797 bool ret = context->LoadLayerInput(node, &inputs);
798 if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
799 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
800 return {false, {}};
801 }
802
803 const std::vector<size_t> &input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
804 const ValuePtr &value = AnfAlgo::GetCNodePrimitive(node)->GetAttr("axis");
805 uint32_t reduce_axes = 0;
806 if (value->isa<ValueTuple>() || value->isa<ValueList>()) {
807 const auto &axis = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "axis");
808 if (axis.size() != 1) {
809 MS_LOG(WARNING) << "Only one axis can be set. Axis size" << axis.size();
810 return {false, {}};
811 }
812 int offset = axis[0] >= 0 ? LongToInt(axis[0]) : LongToInt(axis[0] + input_shape.size());
813 reduce_axes = 1U << offset;
814 } else {
815 const auto &axis = AnfAlgo::GetNodeAttr<int64_t>(node, "axis");
816 int offset = axis >= 0 ? LongToInt(axis) : LongToInt(axis + input_shape.size());
817 reduce_axes = 1UL << offset;
818 }
819
820 auto *layer = context->network()->addSoftMax(*inputs[0].tensor());
821 MS_EXCEPTION_IF_NULL(layer);
822 layer->setAxes(reduce_axes);
823 return {true, {layer->getOutput(0)}};
824 }
825
MS_TRT_CONVERTER_FUNC_REG(LogSoftmax)826 MS_TRT_CONVERTER_FUNC_REG(LogSoftmax) {
827 std::vector<LayerInput> inputs;
828 bool ret = context->LoadLayerInput(node, &inputs);
829 if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
830 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 1 expected.";
831 return {false, {}};
832 }
833
834 const std::vector<size_t> &input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
835 const auto &axis = AnfAlgo::GetNodeAttr<int64_t>(node, "axis");
836 int offset = axis >= 0 ? LongToInt(axis) : LongToInt(axis + input_shape.size());
837 uint32_t reduce_axes = 1UL << offset;
838
839 auto *softmax_layer = context->network()->addSoftMax(*inputs[0].tensor());
840 MS_EXCEPTION_IF_NULL(softmax_layer);
841 softmax_layer->setAxes(reduce_axes);
842
843 auto *log_layer = context->network()->addUnary(*softmax_layer->getOutput(0), nvinfer1::UnaryOperation::kLOG);
844 MS_EXCEPTION_IF_NULL(log_layer);
845
846 return {true, {log_layer->getOutput(0)}};
847 }
848
MS_TRT_CONVERTER_FUNC_REG(Gather)849 MS_TRT_CONVERTER_FUNC_REG(Gather) {
850 std::vector<LayerInput> inputs;
851 bool ret = context->LoadLayerInput(node, &inputs);
852 if (!ret || inputs.size() != 2) {
853 MS_LOG(WARNING) << "Input num not match: " << inputs.size() << ", with 2 expected.";
854 return {false, {}};
855 }
856
857 const std::vector<size_t> &input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
858 auto axis = AnfAlgo::GetNodeAttr<int64_t>(node, "axis");
859 axis = axis >= 0 ? axis : axis + input_shape.size();
860
861 nvinfer1::ITensor *input = ToTensor(&inputs[0], input_shape, context);
862 const std::vector<size_t> &indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
863 nvinfer1::ITensor *indices = ToTensor(&inputs[1], indices_shape, context);
864
865 auto *layer = context->network()->addGather(*input, *indices, LongToInt(axis));
866 MS_EXCEPTION_IF_NULL(layer);
867
868 return {true, {layer->getOutput(0)}};
869 }
870
MS_TRT_CONVERTER_FUNC_REG(Cast)871 MS_TRT_CONVERTER_FUNC_REG(Cast) {
872 std::vector<LayerInput> inputs;
873 bool ret = context->LoadLayerInput(node, &inputs);
874 if (!ret || inputs.size() != 1) {
875 MS_LOG(WARNING) << "Get inputs failed. Input num: " << inputs.size();
876 return {false, {}};
877 }
878
879 const std::vector<size_t> &input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
880 nvinfer1::ITensor *input = ToTensor(&inputs[0], input_shape, context);
881
882 const TypeId &dst_type = AnfAlgo::GetOutputInferDataType(node, 0);
883 std::variant<bool, nvinfer1::DataType> type = TrtUtils::MsDtypeToTrtDtype(dst_type);
884 if (type.index() != 1) {
885 return {false, {}};
886 }
887 auto trt_type = std::get<nvinfer1::DataType>(type);
888 auto *layer = context->network()->addIdentity(*input);
889 layer->setOutputType(0, trt_type);
890
891 if (trt_type == nvinfer1::DataType::kHALF) {
892 MS_LOG(WARNING) << "The model is exported with auto-mixed-precsion or manual precision mode. "
893 << "Retreat inference with native backend. It is recommended that export FP32 model "
894 << "and then inference with FP16 precision mode configuration.";
895 return {false, {}};
896 }
897 return {true, {layer->getOutput(0)}};
898 }
899
MS_TRT_CONVERTER_FUNC_REG(LayerNorm)900 MS_TRT_CONVERTER_FUNC_REG(LayerNorm) {
901 std::vector<LayerInput> inputs;
902 bool ret = context->LoadLayerInput(node, &inputs);
903 if (!ret || inputs.size() != 3 || !inputs[0].IsTensor()) {
904 MS_LOG(WARNING) << "Get inputs failed. Input num: " << inputs.size();
905 return {false, {}};
906 }
907
908 // Calculate reduce axes
909 const std::vector<size_t> &input_shape = AnfAlgo::GetOutputInferShape(node, 0);
910 auto begin_norm_axis = AnfAlgo::GetNodeAttr<int64_t>(node, "begin_norm_axis");
911 begin_norm_axis = begin_norm_axis >= 0 ? begin_norm_axis : begin_norm_axis + input_shape.size();
912 uint32_t reduce_axes = 0;
913 for (size_t i = LongToSize(begin_norm_axis); i < input_shape.size(); i++) {
914 reduce_axes |= 1UL << i;
915 }
916
917 // Reshape gamma and beta for broadcast
918 auto begin_params_axis = AnfAlgo::GetNodeAttr<int64_t>(node, "begin_params_axis");
919 begin_params_axis = begin_params_axis >= 0 ? begin_params_axis : begin_params_axis + input_shape.size();
920 std::vector<size_t> param_shape = input_shape;
921 for (size_t j = 0; j < LongToSize(begin_params_axis); j++) {
922 param_shape[j] = 1;
923 }
924
925 auto epsilon = AnfAlgo::GetNodeAttr<float>(node, "epsilon");
926 std::shared_ptr<tensor::Tensor> weight = context->CreateTempWeight(kNumberTypeFloat32, {1});
927 auto value = static_cast<float *>(weight->data_c());
928 value[0] = epsilon;
929 nvinfer1::Dims dim;
930 dim.nbDims = SizeToInt(input_shape.size());
931 for (size_t i = 0; i < input_shape.size(); i++) {
932 dim.d[i] = 1;
933 }
934 auto *epsilon_layer = context->network()->addConstant(dim, nvinfer1::Weights{nvinfer1::DataType::kFLOAT, value, 1});
935 MS_EXCEPTION_IF_NULL(epsilon_layer);
936
937 // y = (x - mean) / sqrt(var) * gamma + beta
938 auto *mean = context->network()->addReduce(*inputs[0].tensor(), nvinfer1::ReduceOperation::kAVG, reduce_axes, true);
939 MS_EXCEPTION_IF_NULL(mean);
940 auto *sub =
941 context->network()->addElementWise(*inputs[0].tensor(), *mean->getOutput(0), nvinfer1::ElementWiseOperation::kSUB);
942 MS_EXCEPTION_IF_NULL(sub);
943 auto *pow =
944 context->network()->addElementWise(*sub->getOutput(0), *sub->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
945 MS_EXCEPTION_IF_NULL(pow);
946 auto *var = context->network()->addReduce(*pow->getOutput(0), nvinfer1::ReduceOperation::kAVG, reduce_axes, true);
947 MS_EXCEPTION_IF_NULL(var);
948 auto *var_epsilon = context->network()->addElementWise(*var->getOutput(0), *epsilon_layer->getOutput(0),
949 nvinfer1::ElementWiseOperation::kSUM);
950 MS_EXCEPTION_IF_NULL(var_epsilon);
951 auto *std = context->network()->addUnary(*var_epsilon->getOutput(0), nvinfer1::UnaryOperation::kSQRT);
952 MS_EXCEPTION_IF_NULL(std);
953 auto *div =
954 context->network()->addElementWise(*sub->getOutput(0), *std->getOutput(0), nvinfer1::ElementWiseOperation::kDIV);
955 MS_EXCEPTION_IF_NULL(div);
956 auto *mul = context->network()->addElementWise(*div->getOutput(0), *ToTensor(&inputs[1], param_shape, context),
957 nvinfer1::ElementWiseOperation::kPROD);
958 MS_EXCEPTION_IF_NULL(mul);
959 auto *add = context->network()->addElementWise(*mul->getOutput(0), *ToTensor(&inputs[2], param_shape, context),
960 nvinfer1::ElementWiseOperation::kSUM);
961 MS_EXCEPTION_IF_NULL(add);
962
963 return {true, {add->getOutput(0)}};
964 }
965
MS_TRT_CONVERTER_FUNC_REG(Return)966 MS_TRT_CONVERTER_FUNC_REG(Return) {
967 std::vector<LayerInput> inputs;
968 bool ret = context->LoadLayerInput(node, &inputs);
969 if (!ret) {
970 return {false, {}};
971 }
972
973 for (size_t i = 0; i < inputs.size(); ++i) {
974 nvinfer1::ITensor *input = nullptr;
975 if (inputs[i].IsTensor()) {
976 input = inputs[i].tensor();
977 } else {
978 std::vector<size_t> shape;
979 std::transform(inputs[i].shape().begin(), inputs[i].shape().end(), std::back_inserter(shape),
980 [](int64_t d) { return LongToSize(d); });
981 input = ToTensor(&inputs[i], shape, context);
982 }
983
984 const std::string &name = "return_output_" + std::to_string(i);
985 input->setName(name.c_str());
986 context->network()->markOutput(*input);
987 }
988
989 return {true, {}};
990 }
991 } // namespace opt
992 } // namespace mindspore
993