1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h"
17
18 #include <algorithm>
19 #include <cmath>
20 #include <cstring>
21 #include <map>
22 #include <memory>
23 #include <set>
24 #include <unordered_map>
25 #include <utility>
26 #include <vector>
27
28 #include "absl/memory/memory.h"
29 #include "absl/strings/match.h"
30 #include "absl/strings/str_cat.h"
31 #include "absl/strings/string_view.h"
32 #include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
33 #include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h"
34 #include "tensorflow/core/framework/node_def.pb.h" // NOLINT
35 #include "tensorflow/core/framework/node_def_builder.h"
36 #include "tensorflow/core/framework/tensor.pb.h" // NOLINT
37 #include "tensorflow/core/framework/tensor_shape.h"
38 #include "tensorflow/core/framework/tensor_shape.pb.h" // NOLINT
39 #include "tensorflow/core/framework/types.h"
40 #include "tensorflow/core/graph/algorithm.h"
41 #include "tensorflow/core/graph/graph.h"
42 #include "tensorflow/core/graph/graph_constructor.h"
43 #include "tensorflow/core/grappler/op_types.h"
44 #include "tensorflow/core/lib/core/errors.h"
45 #include "tensorflow/core/lib/core/status.h"
46 #include "tensorflow/core/lib/strings/numbers.h"
47 #include "tensorflow/core/lib/strings/str_util.h"
48 #include "tensorflow/core/lib/strings/strcat.h"
49 #include "tensorflow/core/platform/logging.h"
50 #include "tensorflow/core/platform/mutex.h"
51 #include "tensorflow/core/platform/protobuf.h"
52 #include "tensorflow/core/platform/tensor_coding.h"
53 #include "tensorflow/core/platform/types.h"
54 #include "tensorflow/core/public/version.h"
55 #include "tensorflow/core/util/strided_slice_op.h"
56
57 #if GOOGLE_CUDA
58 #if GOOGLE_TENSORRT
59 #include "third_party/tensorrt/NvInfer.h"
60 #include "third_party/tensorrt/NvInferPlugin.h"
61
62 // Check if the types are equal. Cast to int first so that failure log message
63 // would work!
64 #define TFTRT_CHECK_EQ_TYPE(val1, val2) CHECK_EQ((int)val1, (int)val2)
65
66 #define TFTRT_INTERNAL_ERROR_AT_NODE(node) \
67 do { \
68 return errors::Internal("TFTRT::", __FUNCTION__, ":", __LINE__, \
69 " failed to add TRT layer, at: ", node); \
70 } while (0)
71
72 #define TFTRT_RETURN_ERROR_IF_NULLPTR(ptr, node) \
73 do { \
74 if (ptr == nullptr) { \
75 TFTRT_INTERNAL_ERROR_AT_NODE(node); \
76 } \
77 } while (0)
78
79 namespace tensorflow {
80 namespace tensorrt {
81 namespace convert {
82
IsEngineInput(absl::string_view name)83 bool IsEngineInput(absl::string_view name) {
84 return absl::StartsWith(name, IONamePrefixes::kInputPHName);
85 }
IsEngineOutput(absl::string_view name)86 bool IsEngineOutput(absl::string_view name) {
87 return absl::StartsWith(name, IONamePrefixes::kOutputPHName);
88 }
89
90 using absl::StrAppend;
91 using absl::StrCat;
92
TfDataTypeToTrt(DataType tf_dtype,nvinfer1::DataType * trt_dtype)93 inline Status TfDataTypeToTrt(DataType tf_dtype,
94 nvinfer1::DataType* trt_dtype) {
95 switch (tf_dtype) {
96 case DataType::DT_FLOAT:
97 *trt_dtype = nvinfer1::DataType::kFLOAT;
98 break;
99 case DataType::DT_HALF:
100 *trt_dtype = nvinfer1::DataType::kHALF;
101 break;
102 case DataType::DT_INT32:
103 *trt_dtype = nvinfer1::DataType::kINT32;
104 break;
105 default:
106 return errors::InvalidArgument("Unsupported data type ",
107 DataTypeString(tf_dtype));
108 }
109 return Status::OK();
110 }
111
TrtDataTypeToTf(nvinfer1::DataType trt_dtype,DataType * tf_dtype)112 inline Status TrtDataTypeToTf(nvinfer1::DataType trt_dtype,
113 DataType* tf_dtype) {
114 switch (trt_dtype) {
115 case nvinfer1::DataType::kFLOAT:
116 *tf_dtype = DataType::DT_FLOAT;
117 break;
118 case nvinfer1::DataType::kHALF:
119 *tf_dtype = DataType::DT_HALF;
120 break;
121 case nvinfer1::DataType::kINT32:
122 *tf_dtype = DataType::DT_INT32;
123 break;
124 default:
125 return errors::InvalidArgument("Unsupported data type ",
126 DebugString(trt_dtype));
127 }
128 return Status::OK();
129 }
130
131 class TFAttrs {
132 public:
TFAttrs(const NodeDef & tf_node)133 explicit TFAttrs(const NodeDef& tf_node) {
134 for (const auto& attr : tf_node.attr()) {
135 attrs_.insert({attr.first, &attr.second});
136 }
137 }
138
count(const string & key) const139 bool count(const string& key) const { return attrs_.count(key); }
140
at(const string & key) const141 AttrValue const* at(const string& key) const {
142 if (!attrs_.count(key)) {
143 LOG(FATAL) << "Attribute not found: " << key;
144 }
145 return attrs_.at(key);
146 }
147
148 template <typename T>
149 T get(const string& key) const;
150
151 template <typename T>
get(const string & key,const T & default_value) const152 T get(const string& key, const T& default_value) const {
153 return attrs_.count(key) ? this->get<T>(key) : default_value;
154 }
155
156 private:
157 std::map<string, AttrValue const*> attrs_;
158 };
159
160 template <>
get(const string & key) const161 string TFAttrs::get<string>(const string& key) const {
162 return this->at(key)->s();
163 }
164
165 template <>
get(const string & key) const166 std::vector<int64> TFAttrs::get<std::vector<int64>>(const string& key) const {
167 auto attr = this->at(key)->list().i();
168 return std::vector<int64>(attr.begin(), attr.end());
169 }
170
171 template <>
get(const string & key) const172 std::vector<float> TFAttrs::get<std::vector<float>>(const string& key) const {
173 auto attr = this->at(key)->list().f();
174 return std::vector<float>(attr.begin(), attr.end());
175 }
176
177 template <>
get(const string & key) const178 nvinfer1::DataType TFAttrs::get<nvinfer1::DataType>(const string& key) const {
179 nvinfer1::DataType trt_dtype(nvinfer1::DataType::kFLOAT);
180 TF_CHECK_OK(TfDataTypeToTrt(this->at(key)->type(), &trt_dtype));
181 return trt_dtype;
182 }
183
184 template <>
get(const string & key) const185 DataType TFAttrs::get<DataType>(const string& key) const {
186 return this->at(key)->type();
187 }
188
189 template <>
get(const string & key) const190 float TFAttrs::get<float>(const string& key) const {
191 return this->at(key)->f();
192 }
193
194 template <>
get(const string & key) const195 bool TFAttrs::get<bool>(const string& key) const {
196 return this->at(key)->b();
197 }
198
199 template <>
get(const string & key) const200 int64 TFAttrs::get<int64>(const string& key) const {
201 return this->at(key)->i();
202 }
203
204 template <typename Container>
TensorShapeArrayToTrtDims(const Container & shape,nvinfer1::Dims * out,bool ignore_first_dim=false)205 Status TensorShapeArrayToTrtDims(const Container& shape, nvinfer1::Dims* out,
206 bool ignore_first_dim = false) {
207 PartialTensorShape tensor_shape;
208 TF_RETURN_IF_ERROR(TensorShapeUtils::MakeShape(shape, &tensor_shape));
209 *out = TensorShapeToTrtDims(tensor_shape, ignore_first_dim);
210 return Status::OK();
211 }
212
213 // TODO(laigd): use this utility function in more places.
RemoveBatchDimension(nvinfer1::Dims * dims)214 Status RemoveBatchDimension(nvinfer1::Dims* dims) {
215 if (dims->nbDims < 2) {
216 return errors::InvalidArgument(
217 "Dropping batch dimension requires dims with rank>=2.");
218 }
219 std::copy(dims->d + 1, dims->d + dims->nbDims, dims->d);
220 dims->nbDims--;
221 return Status::OK();
222 }
223
GetOutputProperties(const grappler::GraphProperties & graph_properties,const Node * node,const int out_port,PartialTensorShape * shape,DataType * dtype)224 void GetOutputProperties(const grappler::GraphProperties& graph_properties,
225 const Node* node, const int out_port,
226 PartialTensorShape* shape, DataType* dtype) {
227 if (graph_properties.HasOutputProperties(node->name())) {
228 auto output_params = graph_properties.GetOutputProperties(node->name());
229 auto out_shape = output_params.at(out_port);
230 *dtype = out_shape.dtype();
231 *shape = out_shape.shape();
232 } else {
233 LOG(INFO) << "Unknown output shape" << node->name();
234 *dtype = node->output_type(out_port);
235 }
236 }
237
GetInputProperties(const grappler::GraphProperties & graph_properties,const Node * node,const int in_port,PartialTensorShape * shape,DataType * dtype)238 void GetInputProperties(const grappler::GraphProperties& graph_properties,
239 const Node* node, const int in_port,
240 PartialTensorShape* shape, DataType* dtype) {
241 if (graph_properties.HasInputProperties(node->name())) {
242 auto input_params = graph_properties.GetInputProperties(node->name());
243 auto in_shape = input_params.at(in_port);
244 *dtype = in_shape.dtype();
245 *shape = in_shape.shape();
246 } else {
247 *dtype = node->input_type(in_port);
248 }
249 }
250
ValidateTensorProperties(const string & producer_node_type,const DataType dtype,const PartialTensorShape & shape,const bool use_implicit_batch,bool validation_only,nvinfer1::DataType * trt_dtype,nvinfer1::Dims * trt_dims,int * batch_size)251 Status ValidateTensorProperties(const string& producer_node_type,
252 const DataType dtype,
253 const PartialTensorShape& shape,
254 const bool use_implicit_batch,
255 bool validation_only,
256 nvinfer1::DataType* trt_dtype,
257 nvinfer1::Dims* trt_dims, int* batch_size) {
258 // Convert data type.
259 TF_RETURN_IF_ERROR(TfDataTypeToTrt(dtype, trt_dtype));
260
261 // Convert shape.
262 if (shape.dims() < 0) {
263 return errors::InvalidArgument("Input tensor rank is unknown.");
264 }
265 // Add 1 to maximum rank for implicit batch dim.
266 const int max_rank = nvinfer1::Dims::MAX_DIMS + (use_implicit_batch ? 1 : 0);
267 if (shape.dims() > max_rank) {
268 return errors::OutOfRange("Input tensor rank is greater than ", max_rank);
269 }
270 if (use_implicit_batch && (producer_node_type != "Const") &&
271 (shape.dims() < 1)) {
272 return errors::InvalidArgument(
273 "Scalar input tensor is not supported since the first dimension "
274 "is treated as batch dimension by TRT");
275 }
276 *trt_dims = TensorShapeToTrtDims(shape,
277 /*ignore_first_dim=*/use_implicit_batch);
278 // Get batch size for tensor if it will not be included the shape.
279 if (use_implicit_batch) {
280 *batch_size = shape.dim_size(0);
281 }
282
283 // Don't convert empty tensors (dim value of 0).
284 const int first_trt_dim = use_implicit_batch ? 1 : 0;
285 for (int d = first_trt_dim; d < shape.dims(); ++d) {
286 if (shape.dim_size(d) == 0) {
287 return errors::Unimplemented(
288 "Input tensor with shape ", shape.DebugString(),
289 " is an empty tensor, which is not supported by TRT");
290 }
291 }
292
293 if (validation_only) return Status::OK();
294 // Following are validations at runtime.
295
296 for (int d = first_trt_dim; d < shape.dims(); ++d) {
297 if (shape.dim_size(d) < 0) {
298 return errors::InvalidArgument(
299 "Input tensor with shape ", shape.DebugString(),
300 " has an unknown non-batch dimension at dim ", d);
301 }
302 }
303 return Status::OK();
304 }
305
GetTrtBroadcastShape(const TRT_TensorOrWeights & operand_l,const TRT_TensorOrWeights & operand_r,const bool check_feasibility,const bool use_implicit_batch,nvinfer1::Dims * operand_l_new_dims,nvinfer1::Dims * operand_r_new_dims)306 Status GetTrtBroadcastShape(const TRT_TensorOrWeights& operand_l,
307 const TRT_TensorOrWeights& operand_r,
308 const bool check_feasibility,
309 const bool use_implicit_batch,
310 nvinfer1::Dims* operand_l_new_dims,
311 nvinfer1::Dims* operand_r_new_dims) {
312 // TensorRT Elementwise op supports broadcast but requires both tensor to be
313 // of Identical rank
314 //
315 // We consider case of:
316 // 1. operand_l to be a Tensor & operand_r to be a Const;
317 // 2. operand_l to be a Tensor & operand_r to be a Tensor;
318 // note: const op const (constant folding) should fallback to TensorFlow
319 //
320 // broadcast scheme:
321 // T: 1 3 5 (tensor would not have batch dimension)
322 // W: 1 1 3 1 (weight would have all explicit dimensions)
323 // i. fill in explicit dimensions
324 // -> T: -1 1 3 5 (we put a -1 for batch dimension)
325 // -> W: 1 1 3 1
326 // ii. compare broadcast feasibility
327 //
328 // We cannot support the following since TensorRT does not allow manipulation
329 // on batch dimension, we cannot generate output with proper shape
330 // T: 3 5 1
331 // W: 1 1 1 1 3 5 1
332 // -> T: 1 1 1 -1 3 5 1
333 // -> W: 1 1 1 1 3 5 1
334 // ***************************************************************************
335 if (!operand_l.is_tensor() && !operand_r.is_tensor()) {
336 return errors::InvalidArgument(
337 "Broadcasting requires at least one of the operands be tensors");
338 }
339
340 const int max_nb_dims = nvinfer1::Dims::MAX_DIMS + 1;
341 auto compute_output_dims = [use_implicit_batch](
342 const TRT_TensorOrWeights& input,
343 int broadcast_num_dims, int* output_dims_array,
344 nvinfer1::Dims* output_dims) {
345 const nvinfer1::Dims input_dims = input.GetTrtDims();
346 std::fill(output_dims_array, output_dims_array + max_nb_dims, 1);
347 std::copy(input_dims.d, input_dims.d + input_dims.nbDims,
348 output_dims_array + broadcast_num_dims - input_dims.nbDims);
349 if (use_implicit_batch && input.is_tensor()) {
350 const int true_input_dims = input_dims.nbDims + 1;
351 if (true_input_dims < broadcast_num_dims) {
352 return errors::InvalidArgument(
353 "Broadcasting beyond batch dimension is not supported ",
354 "(tensor #dims ", true_input_dims, " vs broadcast #dims ",
355 broadcast_num_dims, ")");
356 }
357 // Set the batch dimension to -1, since batch size is not supposed to
358 // be broadcasted.
359 output_dims_array[0] = -1;
360 }
361 // Copy to output dimensions
362 if (use_implicit_batch) {
363 // Strip batch dimension while copying
364 output_dims->nbDims = broadcast_num_dims - 1;
365 std::copy(output_dims_array + 1, output_dims_array + broadcast_num_dims,
366 output_dims->d);
367 } else {
368 output_dims->nbDims = broadcast_num_dims;
369 std::copy(output_dims_array, output_dims_array + broadcast_num_dims,
370 output_dims->d);
371 }
372
373 return Status::OK();
374 };
375
376 // Compute the output dimensions.
377 const int broadcast_num_dims =
378 std::max(operand_l.GetTrtDims().nbDims +
379 (use_implicit_batch && operand_l.is_tensor()),
380 operand_r.GetTrtDims().nbDims +
381 (use_implicit_batch && operand_r.is_tensor()));
382 int output_l[max_nb_dims], output_r[max_nb_dims];
383 TF_RETURN_IF_ERROR(compute_output_dims(operand_l, broadcast_num_dims,
384 output_l, operand_l_new_dims));
385 TF_RETURN_IF_ERROR(compute_output_dims(operand_r, broadcast_num_dims,
386 output_r, operand_r_new_dims));
387
388 // Compare broadcast feasibility
389 if (check_feasibility) {
390 for (int i = 0; i < broadcast_num_dims; ++i) {
391 if ((output_l[i] != output_r[i]) && (output_l[i] != 1) &&
392 (output_r[i] != 1)) {
393 return errors::InvalidArgument("Infeasible broadcast scheme (",
394 "batch_dim: ", output_l[0], ", ",
395 DebugString(*operand_l_new_dims), " vs ",
396 "batch_dim: ", output_r[0], ", ",
397 DebugString(*operand_r_new_dims), ")");
398 }
399 }
400 }
401 return Status::OK();
402 }
403
CreateConstantLayer(const TRT_ShapedWeights & weights,const nvinfer1::Dims & dims)404 nvinfer1::ITensor* Converter::CreateConstantLayer(
405 const TRT_ShapedWeights& weights, const nvinfer1::Dims& dims) {
406 nvinfer1::Weights trt_weights = weights.GetTrtWeights();
407 nvinfer1::IConstantLayer* layer = network()->addConstant(dims, trt_weights);
408 if (!layer) return nullptr;
409 nvinfer1::ITensor* trt_tensor = layer->getOutput(0);
410 #if !IS_TRT_VERSION_GE(5, 1, 3, 0)
411 // TODO(laigd): there is a bug in TensorRT 5.0 library that, if we don't set
412 // the data type below, it will always be kFLOAT regardless what the data type
413 // of the weights is. Once NVIDIA fixes this bug, we should remove the data
414 // type setting logic below and test should still pass.
415 trt_tensor->setType(trt_weights.type);
416 #endif
417 return trt_tensor;
418 }
419
CreateBroadcastableScalarConstant(OpConverterParams * params,float value,const nvinfer1::Dims & dims,nvinfer1::ITensor ** tensor,const char * dtype_attr_name="T")420 Status CreateBroadcastableScalarConstant(OpConverterParams* params, float value,
421 const nvinfer1::Dims& dims,
422 nvinfer1::ITensor** tensor,
423 const char* dtype_attr_name = "T") {
424 nvinfer1::DataType trt_dtype =
425 nvinfer1::DataType::kFLOAT; // Default to FP32.
426 TFAttrs attrs(params->node_def);
427 if (attrs.count(dtype_attr_name)) {
428 DataType dtype = attrs.get<DataType>(dtype_attr_name);
429 TF_RETURN_IF_ERROR(TfDataTypeToTrt(dtype, &trt_dtype));
430 }
431
432 // In order to be broadcastable, the number of dims has to match.
433 nvinfer1::Dims broadcastable_dims(dims);
434 for (int i = 0; i < broadcastable_dims.nbDims; i++) {
435 broadcastable_dims.d[i] = 1;
436 }
437 TRT_ShapedWeights weights =
438 params->weight_store->GetTempWeights(trt_dtype, broadcastable_dims);
439 void* raw_ptr = weights.GetValues();
440 switch (trt_dtype) {
441 case nvinfer1::DataType::kFLOAT:
442 static_cast<float*>(raw_ptr)[0] = value;
443 break;
444 case nvinfer1::DataType::kHALF:
445 static_cast<Eigen::half*>(raw_ptr)[0] = Eigen::half(value);
446 break;
447 default:
448 return errors::InvalidArgument("Unsupported data type ",
449 DebugString(trt_dtype));
450 }
451 *tensor = params->converter->CreateConstantLayer(weights, broadcastable_dims);
452 TFTRT_RETURN_ERROR_IF_NULLPTR(*tensor, params->node_def.name());
453 params->converter->ProvideQuantizationRange(*tensor, value, value);
454 return Status::OK();
455 }
456
457 // Convert an axis from TF format to TRT format while validating. TF format
458 // includes the batch dimension, while TRT does not if implicit batching is used
459 // (i.e. for tensors). TF can also use negative indices.
ConvertAxis(int tf_axis,int trt_nb_dims,absl::string_view node_name,bool use_implicit_batch,int * trt_axis)460 Status ConvertAxis(int tf_axis, int trt_nb_dims, absl::string_view node_name,
461 bool use_implicit_batch, int* trt_axis) {
462 const int tf_nb_dims = trt_nb_dims + (use_implicit_batch ? 1 : 0);
463 // Check bounds.
464 if (tf_axis < -tf_nb_dims || tf_axis >= tf_nb_dims) {
465 return errors::InvalidArgument(
466 "Axis value of ", tf_axis, " is out of bounds, must be in range [",
467 -tf_nb_dims, ", ", tf_nb_dims, "), at ", node_name);
468 }
469 // Make negative axis positive.
470 if (tf_axis < 0) tf_axis += tf_nb_dims;
471 // Don't allow axis to be the batch dimension.
472 if (use_implicit_batch && tf_axis == 0) {
473 return errors::Unimplemented(
474 "TensorRT does not allow manipulation of the batch dimension, at ",
475 node_name);
476 }
477 // Remove batch dimension if it is implicit.
478 *trt_axis = use_implicit_batch ? tf_axis - 1 : tf_axis;
479 return Status::OK();
480 }
481
DimsEqual(const nvinfer1::Dims & dim_l,const nvinfer1::Dims & dim_r)482 inline bool DimsEqual(const nvinfer1::Dims& dim_l,
483 const nvinfer1::Dims& dim_r) {
484 if (dim_l.nbDims != dim_r.nbDims) {
485 return false;
486 }
487 for (int i = 0; i < dim_l.nbDims; i++) {
488 if (dim_l.d[i] != dim_r.d[i]) {
489 return false;
490 }
491 }
492 return true;
493 }
494
AllLengthsEqual(const std::vector<std::vector<int>> & inputs)495 bool AllLengthsEqual(const std::vector<std::vector<int>>& inputs) {
496 if (inputs.size() == 0) return true;
497 int length = inputs.at(0).size();
498 for (int i = 1; i < inputs.size(); i++) {
499 if (inputs.at(i).size() != length) return false;
500 }
501 return true;
502 }
503
GetTrtDimsForTensor(const Tensor & tensor)504 inline nvinfer1::Dims GetTrtDimsForTensor(const Tensor& tensor) {
505 nvinfer1::Dims dims;
506 dims.nbDims = tensor.dims();
507 for (int i = 0; i < dims.nbDims; i++) {
508 dims.d[i] = tensor.dim_size(i);
509 }
510 return dims;
511 }
512
Prod(const nvinfer1::Dims & dims)513 int64_t Prod(const nvinfer1::Dims& dims) {
514 int64_t count = 1;
515 for (int d = 0; d < dims.nbDims; ++d) {
516 count *= dims.d[d];
517 }
518 return count;
519 }
520
521 // Returns total number of elements in a TensorRT weights dimensions.
522 // Returning 0 means either some dim is 0 or the number of dims is 0 (TensorRT
523 // doesn't allow scalar weights).
524 // Note that for TF scalar constant, we always convert to dims [1].
TrtWeightDimsNumElements(const nvinfer1::Dims & dims)525 int64_t TrtWeightDimsNumElements(const nvinfer1::Dims& dims) {
526 if (dims.nbDims == 0) return 0;
527 return Prod(dims);
528 }
529
530 // Returns total number of elements in an ITensor dimension.
531 // Returns 1 if the number of dims is 0 (the total number is fully determined by
532 // the batch size).
533 // Returns -1 if any dimension is known.
TrtTensorDimsNumElements(const nvinfer1::Dims & dims)534 int64_t TrtTensorDimsNumElements(const nvinfer1::Dims& dims) {
535 if (!HasStaticShape(dims)) return -1;
536 return Prod(dims);
537 }
538
DimsHaveSameSize(const nvinfer1::Dims & lhs,const nvinfer1::Dims & rhs,bool is_tensor)539 bool DimsHaveSameSize(const nvinfer1::Dims& lhs, const nvinfer1::Dims& rhs,
540 bool is_tensor) {
541 if (is_tensor) {
542 return TrtTensorDimsNumElements(lhs) == TrtTensorDimsNumElements(rhs);
543 }
544 return TrtWeightDimsNumElements(lhs) == TrtWeightDimsNumElements(rhs);
545 }
546
547 // Returns whether both dimensions are fully specified and the total number of
548 // elements equals.
AreDimsStaticWithSameSize(const nvinfer1::Dims & lhs,const nvinfer1::Dims & rhs,bool is_tensor)549 bool AreDimsStaticWithSameSize(const nvinfer1::Dims& lhs,
550 const nvinfer1::Dims& rhs, bool is_tensor) {
551 if (!HasStaticShape(lhs) || !HasStaticShape(rhs)) return false;
552 return DimsHaveSameSize(lhs, rhs, is_tensor);
553 }
554
AreDimsStaticWithDifferentSize(const nvinfer1::Dims & lhs,const nvinfer1::Dims & rhs,bool is_tensor)555 bool AreDimsStaticWithDifferentSize(const nvinfer1::Dims& lhs,
556 const nvinfer1::Dims& rhs, bool is_tensor) {
557 if (!HasStaticShape(lhs) || !HasStaticShape(rhs)) return false;
558 return !DimsHaveSameSize(lhs, rhs, is_tensor);
559 }
560
CreateSamePadding(const nvinfer1::Dims & stride,const nvinfer1::Dims & kernel,const std::vector<int64_t> & input_dims)561 static std::vector<std::pair<int, int>> CreateSamePadding(
562 const nvinfer1::Dims& stride, const nvinfer1::Dims& kernel,
563 const std::vector<int64_t>& input_dims) {
564 std::vector<std::pair<int, int>> padding(input_dims.size());
565 CHECK_EQ(stride.nbDims, input_dims.size()); // TODO(jie): N+C? NC+?
566
567 for (size_t i = 0; i < input_dims.size(); ++i) {
568 // Formula to calculate the padding
569 int p = ((input_dims[i] - 1) / stride.d[i]) * stride.d[i] + kernel.d[i] -
570 input_dims[i];
571 p = (p > 0) ? p : 0;
572
573 // Right precedence padding, like in TensorFlow
574 int left = p / 2;
575 int right = p - left;
576
577 VLOG(2) << "PADDING_" << i << " pre: " << left << ", post: " << right
578 << "paras: " << input_dims[i] << ", " << stride.d[i] << ", "
579 << "kernel: " << kernel.d[i];
580 padding[i] = {left, right};
581 }
582 return padding;
583 }
584
GetCommonNameScope(const string & op_name_a,const string & op_name_b)585 string GetCommonNameScope(const string& op_name_a, const string& op_name_b) {
586 size_t last_scope_separator = 0;
587 const size_t min_size = std::min(op_name_a.size(), op_name_b.size());
588 for (size_t i = 0; i < min_size; ++i) {
589 if (op_name_a[i] != op_name_b[i]) break;
590 if (op_name_a[i] == '/') last_scope_separator = i + 1;
591 }
592 return op_name_a.substr(0, last_scope_separator);
593 }
594
595 // Verifies that shapes of the given inputs match after masking the specified
596 // dimension.
VerifyShapesMatch(absl::Span<const TRT_TensorOrWeights> inputs,int masked_dim,absl::string_view node_name)597 Status VerifyShapesMatch(absl::Span<const TRT_TensorOrWeights> inputs,
598 int masked_dim, absl::string_view node_name) {
599 size_t num_inputs = inputs.size();
600 if (num_inputs <= 1) return Status::OK();
601
602 const nvinfer1::Dims dims_0 = inputs.at(0).GetTrtDims();
603 for (size_t i = 1; i < num_inputs; ++i) {
604 const nvinfer1::Dims dim_i = inputs.at(i).GetTrtDims();
605 if (dim_i.nbDims != dims_0.nbDims) {
606 return errors::InvalidArgument(
607 "Received inputs with inconsistent rank, at ", node_name);
608 }
609 for (size_t j = 0; j < dims_0.nbDims; ++j) {
610 if (dim_i.d[j] != dims_0.d[j] && j != masked_dim) {
611 return errors::InvalidArgument(
612 "Received inputs with inconsistent shape, at ", node_name);
613 }
614 }
615 }
616 return Status::OK();
617 }
618
TRT_ShapedWeights(nvinfer1::DataType type)619 TRT_ShapedWeights::TRT_ShapedWeights(nvinfer1::DataType type) : type_(type) {
620 shape_.nbDims = 0;
621 }
622
TRT_ShapedWeights(nvinfer1::DataType type,nvinfer1::Dims dims,Tensor tensor)623 TRT_ShapedWeights::TRT_ShapedWeights(nvinfer1::DataType type,
624 nvinfer1::Dims dims, Tensor tensor)
625 : shape_(dims), type_(type), tensor_(tensor) {}
626
TRT_ShapedWeights(const TRT_ShapedWeights & rhs)627 TRT_ShapedWeights::TRT_ShapedWeights(const TRT_ShapedWeights& rhs)
628 : shape_(rhs.shape_), type_(rhs.type_), tensor_(rhs.tensor_) {}
629
count() const630 int64_t TRT_ShapedWeights::count() const {
631 return TrtWeightDimsNumElements(shape_);
632 }
633
GetTrtWeights() const634 nvinfer1::Weights TRT_ShapedWeights::GetTrtWeights() const {
635 return nvinfer1::Weights{type_, GetValues(), count()};
636 }
637
size_bytes() const638 size_t TRT_ShapedWeights::size_bytes() const {
639 size_t data_type_size = -1;
640 switch (type_) {
641 case nvinfer1::DataType::kFLOAT:
642 case nvinfer1::DataType::kINT32:
643 data_type_size = 4;
644 break;
645 case nvinfer1::DataType::kHALF:
646 data_type_size = 2;
647 break;
648 case nvinfer1::DataType::kINT8:
649 data_type_size = 1;
650 break;
651 }
652 return this->count() * data_type_size;
653 }
654
DebugString() const655 string TRT_ShapedWeights::DebugString() const {
656 return StrCat(
657 "TRT_ShapedWeights(shape=", tensorflow::tensorrt::DebugString(shape_),
658 ", type=", tensorflow::tensorrt::DebugString(type_),
659 ", values=", reinterpret_cast<uintptr_t>(GetValues()), ")");
660 }
661
662 // A fake ITensor implementation used to check whether the TF-TRT converter can
663 // handle specific node. We only need shape and type information, and the
664 // converter won't (and shouldn't) use this to build the TRT network.
665 class TRT_TensorOrWeights::SimpleITensor : public nvinfer1::ITensor {
666 public:
SimpleITensor(nvinfer1::DataType trt_dtype,const nvinfer1::Dims & trt_dims)667 SimpleITensor(nvinfer1::DataType trt_dtype, const nvinfer1::Dims& trt_dims)
668 : trt_dtype_(trt_dtype), trt_dims_(trt_dims) {}
669
setName(const char * name)670 void setName(const char* name) override {}
671
getName() const672 const char* getName() const override { return ""; }
673
setDimensions(nvinfer1::Dims dimensions)674 void setDimensions(nvinfer1::Dims dimensions) override {
675 trt_dims_ = dimensions;
676 }
677
getDimensions() const678 nvinfer1::Dims getDimensions() const override { return trt_dims_; }
679
setType(nvinfer1::DataType trt_dtype)680 void setType(nvinfer1::DataType trt_dtype) override {
681 trt_dtype_ = trt_dtype;
682 }
683
getType() const684 nvinfer1::DataType getType() const override { return trt_dtype_; }
685
isNetworkInput() const686 bool isNetworkInput() const override { return false; }
687
isNetworkOutput() const688 bool isNetworkOutput() const override { return false; }
689
setBroadcastAcrossBatch(bool broadcastAcrossBatch)690 void setBroadcastAcrossBatch(bool broadcastAcrossBatch) override {}
691
getBroadcastAcrossBatch() const692 bool getBroadcastAcrossBatch() const override { return false; }
693
getLocation() const694 nvinfer1::TensorLocation getLocation() const override {
695 // This is arbitrary, since we don't use it.
696 return nvinfer1::TensorLocation::kDEVICE;
697 }
698
setLocation(nvinfer1::TensorLocation location)699 void setLocation(nvinfer1::TensorLocation location) override {}
700
701 #if IS_TRT_VERSION_GE(5, 0, 0, 0)
setDynamicRange(float min,float max)702 bool setDynamicRange(float min, float max) override { return true; }
703
getDynamicRange() const704 float getDynamicRange() const override { return 0; }
705 #endif
706
707 #if IS_TRT_VERSION_GE(5, 1, 0, 0)
dynamicRangeIsSet() const708 bool dynamicRangeIsSet() const override { return true; }
709
resetDynamicRange()710 void resetDynamicRange() override {}
711
getDynamicRangeMin() const712 float getDynamicRangeMin() const override { return 0.f; }
713
getDynamicRangeMax() const714 float getDynamicRangeMax() const override { return 0.f; }
715 #endif
716
717 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
setAllowedFormats(nvinfer1::TensorFormats formats)718 void setAllowedFormats(nvinfer1::TensorFormats formats) override {}
719
getAllowedFormats() const720 nvinfer1::TensorFormats getAllowedFormats() const override { return 1; }
721
isShapeTensor() const722 bool isShapeTensor() const override { return false; }
723
isExecutionTensor() const724 bool isExecutionTensor() const override { return true; }
725 #endif
726
727 private:
728 nvinfer1::DataType trt_dtype_;
729 nvinfer1::Dims trt_dims_;
730 };
731
TRT_TensorOrWeights(nvinfer1::ITensor * tensor,int batch_size)732 TRT_TensorOrWeights::TRT_TensorOrWeights(nvinfer1::ITensor* tensor,
733 int batch_size)
734 : tensor_(tensor),
735 batch_size_(batch_size),
736 initialized_(true),
737 is_tensor_(true) {}
738
TRT_TensorOrWeights(nvinfer1::DataType trt_dtype,const nvinfer1::Dims & trt_dims,int batch_size)739 TRT_TensorOrWeights::TRT_TensorOrWeights(nvinfer1::DataType trt_dtype,
740 const nvinfer1::Dims& trt_dims,
741 int batch_size)
742 : simple_itensor_(new SimpleITensor(trt_dtype, trt_dims)),
743 batch_size_(batch_size),
744 initialized_(true),
745 is_tensor_(true) {}
746
TRT_TensorOrWeights(const TRT_ShapedWeights & weights)747 TRT_TensorOrWeights::TRT_TensorOrWeights(const TRT_ShapedWeights& weights)
748 : weights_(weights), initialized_(true), is_tensor_(false) {}
749
TRT_TensorOrWeights(const TRT_TensorOrWeights & rhs)750 TRT_TensorOrWeights::TRT_TensorOrWeights(const TRT_TensorOrWeights& rhs)
751 : tensor_(rhs.tensor_),
752 simple_itensor_(rhs.simple_itensor_),
753 batch_size_(rhs.batch_size_),
754 weights_(rhs.weights_),
755 initialized_(rhs.initialized_),
756 is_tensor_(rhs.is_tensor_) {}
757
operator =(const TRT_TensorOrWeights & rhs)758 void TRT_TensorOrWeights::operator=(const TRT_TensorOrWeights& rhs) {
759 tensor_ = rhs.tensor_;
760 simple_itensor_ = rhs.simple_itensor_;
761 batch_size_ = rhs.batch_size_;
762 weights_ = rhs.weights_;
763 initialized_ = rhs.initialized_;
764 is_tensor_ = rhs.is_tensor_;
765 }
766
tensor() const767 nvinfer1::ITensor* TRT_TensorOrWeights::tensor() const {
768 CHECK(is_tensor());
769 return tensor_ == nullptr ? simple_itensor_.get() : tensor_;
770 }
771
GetTrtDims() const772 nvinfer1::Dims TRT_TensorOrWeights::GetTrtDims() const {
773 if (is_tensor()) {
774 return tensor()->getDimensions();
775 } else {
776 return weights().shape_;
777 }
778 }
779
DebugString() const780 string TRT_TensorOrWeights::DebugString() const {
781 string output = "TRT_TensorOrWeights(type=";
782 if (is_tensor()) {
783 StrAppend(&output, "tensor=", tensorflow::tensorrt::DebugString(*tensor()),
784 ", batch_size=", batch_size_);
785 } else {
786 StrAppend(&output, "weights=", weights_.DebugString());
787 }
788 StrAppend(&output, ")");
789 return output;
790 }
791
792 // Perform 5 dimensional reorder of data on CPU
793 // This is done once at convert time and does not affect GPU inference perf
794 // Example: reorder NDHWC (Tensorflow) -> NCDHW (TensorRT)
795 template <typename T>
Reorder5(const nvinfer1::Dims & shape,const T * idata,const nvinfer1::Dims & istrides,T * odata,const nvinfer1::Dims & ostrides)796 void Reorder5(const nvinfer1::Dims& shape, const T* idata,
797 const nvinfer1::Dims& istrides, T* odata,
798 const nvinfer1::Dims& ostrides) {
799 for (int k = 0; k < shape.d[0]; ++k) {
800 for (int c = 0; c < shape.d[1]; ++c) {
801 for (int d = 0; d < shape.d[2]; ++d) {
802 for (int r = 0; r < shape.d[3]; ++r) {
803 for (int s = 0; s < shape.d[4]; ++s) {
804 odata[k * ostrides.d[0] + c * ostrides.d[1] + d * ostrides.d[2] +
805 r * ostrides.d[3] + s * ostrides.d[4]] =
806 idata[k * istrides.d[0] + c * istrides.d[1] +
807 d * istrides.d[2] + r * istrides.d[3] +
808 s * istrides.d[4]];
809 }
810 }
811 }
812 }
813 }
814 }
815
816 // TODO(jie): reorder4 & reorder2 should be merged?
817 // TODO(aaroey): fix the order of parameters.
818 template <typename T>
Reorder4(const nvinfer1::DimsNCHW & shape,const T * idata,const nvinfer1::DimsNCHW & istrides,T * odata,const nvinfer1::DimsNCHW & ostrides)819 void Reorder4(const nvinfer1::DimsNCHW& shape, const T* idata,
820 const nvinfer1::DimsNCHW& istrides, T* odata,
821 const nvinfer1::DimsNCHW& ostrides) {
822 for (int n = 0; n < shape.n(); ++n) {
823 for (int c = 0; c < shape.c(); ++c) {
824 for (int h = 0; h < shape.h(); ++h) {
825 for (int w = 0; w < shape.w(); ++w) {
826 odata[n * ostrides.n() + c * ostrides.c() + h * ostrides.h() +
827 w * ostrides.w()] = idata[n * istrides.n() + c * istrides.c() +
828 h * istrides.h() + w * istrides.w()];
829 }
830 }
831 }
832 }
833 }
834
835 template <typename T>
Reorder2(const nvinfer1::DimsHW & shape,const T * idata,const nvinfer1::DimsHW & istrides,T * odata,const nvinfer1::DimsHW & ostrides)836 void Reorder2(const nvinfer1::DimsHW& shape, const T* idata,
837 const nvinfer1::DimsHW& istrides, T* odata,
838 const nvinfer1::DimsHW& ostrides) {
839 for (int h = 0; h < shape.h(); ++h) {
840 for (int w = 0; w < shape.w(); ++w) {
841 odata[h * ostrides.h() + w * ostrides.w()] =
842 idata[h * istrides.h() + w * istrides.w()];
843 }
844 }
845 }
846
847 // TODO(jie): fallback to tensorflow!!
ReorderCKtoKC(const TRT_ShapedWeights & iweights,TRT_ShapedWeights * oweights)848 void ReorderCKtoKC(const TRT_ShapedWeights& iweights,
849 TRT_ShapedWeights* oweights) {
850 const int c = iweights.shape_.d[0];
851 const int k = iweights.shape_.d[1];
852 oweights->shape_.d[0] = k;
853 oweights->shape_.d[1] = c;
854 const nvinfer1::DimsHW istrides = {1, k};
855 const nvinfer1::DimsHW ostrides = {c, 1};
856 switch (iweights.TrtDType()) {
857 case nvinfer1::DataType::kFLOAT: {
858 Reorder2({k, c}, static_cast<float const*>(iweights.GetValues()),
859 istrides, static_cast<float*>(oweights->GetValues()), ostrides);
860 break;
861 }
862 case nvinfer1::DataType::kHALF: {
863 Reorder2({k, c}, static_cast<Eigen::half const*>(iweights.GetValues()),
864 istrides, static_cast<Eigen::half*>(oweights->GetValues()),
865 ostrides);
866 break;
867 }
868 default:
869 LOG(FATAL) << "Unsupported type in reorder expected fp32 or fp16 but got "
870 << DebugString(iweights.TrtDType());
871 }
872 }
873
ReorderRSCKToKCRS(const TRT_ShapedWeights & iweights,TRT_ShapedWeights * oweights,const int num_groups)874 void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights,
875 TRT_ShapedWeights* oweights, const int num_groups) {
876 CHECK(iweights.TrtDType() == oweights->TrtDType());
877 CHECK_EQ(iweights.size_bytes(), oweights->size_bytes());
878 // K indexes over output channels, C over input channels, and R and S over the
879 // height and width of the convolution
880 const int r = iweights.shape_.d[0];
881 const int s = iweights.shape_.d[1];
882 // TRT requires GKcRS, while TF depthwise has RSCK where c=1, C=G
883 const int c = iweights.shape_.d[2] / num_groups;
884 const int k = iweights.shape_.d[3] * num_groups;
885 VLOG(2) << "num_groups: " << num_groups << "c" << iweights.shape_.d[2]
886 << " then " << c << "k" << iweights.shape_.d[3] << " then " << k
887 << "r" << iweights.shape_.d[0] << " then " << r << "s"
888 << iweights.shape_.d[1] << " then " << s;
889 oweights->shape_.d[0] = k / num_groups;
890 oweights->shape_.d[1] = c * num_groups;
891 oweights->shape_.d[2] = r;
892 oweights->shape_.d[3] = s;
893 const nvinfer1::DimsNCHW istrides = {1, k, s * k * c, c * k};
894 const nvinfer1::DimsNCHW ostrides = {c * r * s, r * s, s, 1};
895 switch (iweights.TrtDType()) {
896 case nvinfer1::DataType::kFLOAT: {
897 Reorder4({k, c, r, s}, static_cast<float const*>(iweights.GetValues()),
898 istrides, static_cast<float*>(oweights->GetValues()), ostrides);
899 break;
900 }
901 case nvinfer1::DataType::kHALF: {
902 Reorder4({k, c, r, s},
903 static_cast<Eigen::half const*>(iweights.GetValues()), istrides,
904 static_cast<Eigen::half*>(oweights->GetValues()), ostrides);
905 break;
906 }
907
908 default:
909 LOG(FATAL) << "Unsupported type, expected fp32 or fp16 but got "
910 << DebugString(iweights.TrtDType());
911 }
912 }
913
914 // Initialize a Dims object with arbitrary dimension
InitDimsN(std::initializer_list<int> list)915 nvinfer1::Dims InitDimsN(std::initializer_list<int> list) {
916 nvinfer1::Dims dim;
917 dim.nbDims = list.size();
918 std::copy(list.begin(), list.end(), dim.d);
919 return dim;
920 }
921
922 // Reorder 3D convolution weights from TF to TRT
ReorderDRSCKToKCDRS(const TRT_ShapedWeights & iweights,TRT_ShapedWeights * oweights,const int num_groups)923 void ReorderDRSCKToKCDRS(const TRT_ShapedWeights& iweights,
924 TRT_ShapedWeights* oweights, const int num_groups) {
925 DCHECK(iweights.TrtDType() == oweights->TrtDType());
926 CHECK_EQ(iweights.size_bytes(), oweights->size_bytes());
927 // K indexes over output channels, C over input channels, and R, S, D over the
928 // height, width, depth
929 const int d = iweights.shape_.d[0];
930 const int r = iweights.shape_.d[1];
931 const int s = iweights.shape_.d[2];
932 // TRT requires GKcRS, while TF depthwise has RSCK where c=1, C=G
933 const int c = iweights.shape_.d[3] / num_groups;
934 const int k = iweights.shape_.d[4] * num_groups;
935
936 VLOG(2) << "num_groups: " << num_groups << ", c: " << iweights.shape_.d[3]
937 << " becomes " << c << ", k: " << iweights.shape_.d[4] << " becomes "
938 << k << ", d: " << d << ", r: " << r << ", s: " << s;
939
940 oweights->shape_.d[0] = iweights.shape_.d[4]; // k / num_groups;
941 oweights->shape_.d[1] = iweights.shape_.d[3]; // c * num_groups;
942 oweights->shape_.d[2] = d;
943 oweights->shape_.d[3] = r;
944 oweights->shape_.d[4] = s;
945
946 nvinfer1::Dims shape =
947 InitDimsN({k, c, d, r, s}); // KCDRS shape (same as output)
948
949 nvinfer1::Dims ostrides =
950 InitDimsN({c * d * r * s, d * r * s, r * s, s,
951 1}); // Output = KCDRS = k*CDRS + c*DRS + d*RS + r*S + s
952
953 nvinfer1::Dims istrides =
954 InitDimsN({1, k, r * s * c * k, s * c * k,
955 c * k}); // Input = DRSCK = k*1 + c*K + d*RSCK + r*SCK + s*CK
956
957 switch (iweights.TrtDType()) {
958 case nvinfer1::DataType::kFLOAT: {
959 Reorder5(shape, static_cast<float const*>(iweights.GetValues()), istrides,
960 static_cast<float*>(oweights->GetValues()), ostrides);
961 break;
962 }
963 case nvinfer1::DataType::kHALF: {
964 Reorder5(shape, static_cast<Eigen::half const*>(iweights.GetValues()),
965 istrides, static_cast<Eigen::half*>(oweights->GetValues()),
966 ostrides);
967 break;
968 }
969 default:
970 LOG(FATAL) << "Unsupported type, expected fp32 or fp16 but got "
971 << DebugString(iweights.TrtDType());
972 }
973 }
974
GetTempWeights(nvinfer1::DataType trt_dtype,const nvinfer1::Dims & dims)975 TRT_ShapedWeights TrtWeightStore::GetTempWeights(nvinfer1::DataType trt_dtype,
976 const nvinfer1::Dims& dims) {
977 TensorShape shape;
978 DataType tf_dtype;
979 // TODO(laigd): make it return a status.
980 TF_CHECK_OK(TensorShapeUtils::MakeShape(dims.d, dims.nbDims, &shape));
981 TF_CHECK_OK(TrtDataTypeToTf(trt_dtype, &tf_dtype));
982 // TODO(jie): check weights size_bytes. 0 means type error
983 Tensor tensor(tf_dtype, shape);
984 TRT_ShapedWeights weights(trt_dtype, dims, tensor);
985 store_.emplace_back(std::move(tensor));
986 return weights;
987 }
988
OpConverterParams(const NodeDef & node_def,const std::vector<TRT_TensorOrWeights> & inputs,std::vector<TRT_TensorOrWeights> * outputs,TrtWeightStore * weight_store,TrtPrecisionMode precision_mode,bool use_calibration,bool use_implicit_batch)989 OpConverterParams::OpConverterParams(
990 const NodeDef& node_def, const std::vector<TRT_TensorOrWeights>& inputs,
991 std::vector<TRT_TensorOrWeights>* outputs, TrtWeightStore* weight_store,
992 TrtPrecisionMode precision_mode, bool use_calibration,
993 bool use_implicit_batch)
994 : node_def(node_def),
995 inputs(inputs),
996 outputs(outputs),
997 validation_only(true),
998 weight_store(weight_store),
999 precision_mode(precision_mode),
1000 use_calibration(use_calibration),
1001 use_implicit_batch(use_implicit_batch) {}
1002
OpConverterParams(Converter * converter,const NodeDef & node_def,const std::vector<TRT_TensorOrWeights> & inputs,std::vector<TRT_TensorOrWeights> * outputs,TrtWeightStore * weight_store)1003 OpConverterParams::OpConverterParams(
1004 Converter* converter, const NodeDef& node_def,
1005 const std::vector<TRT_TensorOrWeights>& inputs,
1006 std::vector<TRT_TensorOrWeights>* outputs, TrtWeightStore* weight_store)
1007 : converter(converter),
1008 node_def(node_def),
1009 inputs(inputs),
1010 outputs(outputs),
1011 validation_only(false),
1012 weight_store(weight_store),
1013 precision_mode(converter->precision_mode()),
1014 use_calibration(converter->use_calibration()),
1015 use_implicit_batch(converter->use_implicit_batch()) {}
1016
1017 const std::set<string>* TrtNodeValidator::quantize_ops = new std::set<string>{
1018 "QuantizeAndDequantizeV2",
1019 "QuantizeAndDequantizeV3",
1020 "FakeQuantWithMinMaxVars",
1021 "FakeQuantWithMinMaxArgs",
1022 };
1023
TrtNodeValidator(const grappler::GraphProperties & graph_properties,TrtPrecisionMode precision_mode,bool use_calibration,bool use_implicit_batch)1024 TrtNodeValidator::TrtNodeValidator(
1025 const grappler::GraphProperties& graph_properties,
1026 TrtPrecisionMode precision_mode, bool use_calibration,
1027 bool use_implicit_batch)
1028 : graph_properties_(graph_properties),
1029 precision_mode_(precision_mode),
1030 use_calibration_(use_calibration),
1031 use_implicit_batch_(use_implicit_batch) {
1032 RegisterOpValidators();
1033 }
1034
ConvertToTensorOrWeights(const NodeDef & node_def,int output_port,TRT_TensorOrWeights * tensor_or_weights)1035 Status TrtNodeValidator::ConvertToTensorOrWeights(
1036 const NodeDef& node_def, int output_port,
1037 TRT_TensorOrWeights* tensor_or_weights) {
1038 if (node_def.op() == "Const") {
1039 if (output_port != 0) {
1040 return errors::InvalidArgument("Const node should only have one output.");
1041 }
1042 // The output of the conversion will be used as input to other nodes to
1043 // determine whether TRT supports those nodes. If it cannot convert the
1044 // Const, it's very likely we cannot treat it as a tensor and make it an
1045 // input to the TRT network, since TRT removes the first dimension and
1046 // treats it as batch size. Also, it's not likely that the converter can
1047 // support the op, and performance may suffer even if it can, so we just
1048 // simply return error if the conversion fails.
1049 std::vector<TRT_TensorOrWeights> inputs;
1050 return ConvertConstToWeights(node_def, inputs, tensor_or_weights);
1051 }
1052 if (!graph_properties_.HasOutputProperties(node_def.name())) {
1053 return errors::InvalidArgument("Shape and data type are unknown");
1054 }
1055
1056 // Validate and convert shape and dtype.
1057 const auto& output_params =
1058 graph_properties_.GetOutputProperties(node_def.name());
1059 const auto& tensor_properties = output_params.at(output_port);
1060 const DataType dtype = tensor_properties.dtype();
1061 const PartialTensorShape shape = tensor_properties.shape();
1062 nvinfer1::DataType trt_dtype;
1063 nvinfer1::Dims trt_dims;
1064 int batch_size = -1;
1065 TF_RETURN_IF_ERROR(ValidateTensorProperties(
1066 node_def.op(), dtype, shape, use_implicit_batch_,
1067 /*validation_only_=*/true, &trt_dtype, &trt_dims, &batch_size));
1068
1069 // Adds a fake ITensor. This is fine since op converter operates in
1070 // validation-only mode and it won't (and shouldn't) use the tensor to do
1071 // any TRT network operations.
1072 *tensor_or_weights = TRT_TensorOrWeights(trt_dtype, trt_dims, batch_size);
1073 return Status::OK();
1074 }
1075
IsTensorRTCandidate(const Node * node)1076 Status TrtNodeValidator::IsTensorRTCandidate(const Node* node) {
1077 const string& op = node->def().op();
1078 // In INT8 mode, we will always apply the quantization ranges provided by
1079 // these ops to the relevant tensors. This happens regardless of the value of
1080 // use_calibration.
1081 bool is_supported_op = false;
1082 if (quantize_ops->count(op)) {
1083 is_supported_op = (precision_mode_ == TrtPrecisionMode::INT8);
1084 } else {
1085 is_supported_op = op_validators_.count(op);
1086 }
1087 if (!is_supported_op) {
1088 return errors::Unimplemented("Op type ", op, " is not supported.");
1089 }
1090
1091 // Convert input NodeDef and corresponding output ports to
1092 // TRT_TensorOrWeights.
1093 std::vector<TRT_TensorOrWeights> inputs;
1094 std::vector<const Edge*> input_edges;
1095 TF_RETURN_IF_ERROR(node->input_edges(&input_edges));
1096 for (const Edge* edge : input_edges) {
1097 TRT_TensorOrWeights tensor_or_weights;
1098 const NodeDef& src_def = edge->src()->def();
1099 Status status = ConvertToTensorOrWeights(src_def, edge->src_output(),
1100 &tensor_or_weights);
1101 if (!status.ok()) {
1102 return errors::Internal(
1103 "Failed to convert input ", src_def.name(),
1104 " to a TRT_TensorOrWeights: ", status.error_message());
1105 }
1106 inputs.push_back(tensor_or_weights);
1107 }
1108
1109 OpConverter validator = op_validators_[op];
1110 OpConverterParams params(node->def(), inputs, /*arg_outputs=*/nullptr,
1111 &weight_store_, precision_mode_, use_calibration_,
1112 use_implicit_batch_);
1113 return validator(¶ms);
1114 }
1115
ConvertConstToWeights(const NodeDef & const_node_def,const std::vector<TRT_TensorOrWeights> & inputs,TRT_TensorOrWeights * output)1116 Status TrtNodeValidator::ConvertConstToWeights(
1117 const NodeDef& const_node_def,
1118 const std::vector<TRT_TensorOrWeights>& inputs,
1119 TRT_TensorOrWeights* output) {
1120 std::vector<TRT_TensorOrWeights> outputs;
1121 OpConverterParams params(const_node_def, inputs, &outputs, &weight_store_,
1122 precision_mode_, use_calibration_,
1123 use_implicit_batch_);
1124 Status status = op_validators_["Const"](¶ms);
1125 if (status.ok() && output) *output = outputs[0];
1126 return status;
1127 }
1128
InitializeTrtPlugins(nvinfer1::ILogger * trt_logger)1129 static void InitializeTrtPlugins(nvinfer1::ILogger* trt_logger) {
1130 static mutex plugin_mutex(LINKER_INITIALIZED);
1131 static bool plugin_initialized = false;
1132 mutex_lock lock(plugin_mutex);
1133 if (plugin_initialized) return;
1134
1135 LOG(INFO) << "Linked TensorRT version: " << GetLinkedTensorRTVersion();
1136 LOG(INFO) << "Loaded TensorRT version: " << GetLoadedTensorRTVersion();
1137
1138 plugin_initialized = initLibNvInferPlugins(trt_logger, "");
1139 if (!plugin_initialized) {
1140 LOG(ERROR) << "Failed to initialize TensorRT plugins, and conversion may "
1141 "fail later.";
1142 }
1143
1144 int num_trt_plugins = 0;
1145 nvinfer1::IPluginCreator* const* trt_plugin_creator_list =
1146 getPluginRegistry()->getPluginCreatorList(&num_trt_plugins);
1147 if (!trt_plugin_creator_list) {
1148 LOG(WARNING) << "Can not find any TensorRT plugins in registry.";
1149 } else {
1150 VLOG(1) << "Found the following " << num_trt_plugins
1151 << " TensorRT plugins in registry:";
1152 for (int i = 0; i < num_trt_plugins; ++i) {
1153 if (!trt_plugin_creator_list[i]) {
1154 LOG(WARNING) << "TensorRT plugin at index " << i
1155 << " is not accessible (null pointer returned by "
1156 "getPluginCreatorList for this plugin)";
1157 } else {
1158 VLOG(1) << " " << trt_plugin_creator_list[i]->getPluginName();
1159 }
1160 }
1161 }
1162 }
1163
1164 // static
Create(TrtPrecisionMode precision_mode,bool use_calibration,nvinfer1::ILogger * trt_logger,const bool use_implicit_batch)1165 StatusOr<std::unique_ptr<Converter>> Converter::Create(
1166 TrtPrecisionMode precision_mode, bool use_calibration,
1167 nvinfer1::ILogger* trt_logger, const bool use_implicit_batch) {
1168 std::unique_ptr<Converter> converter = absl::WrapUnique(new Converter(
1169 precision_mode, use_calibration, trt_logger, use_implicit_batch));
1170 TF_RETURN_IF_ERROR(converter->Init(trt_logger));
1171 return converter;
1172 }
1173
Converter(TrtPrecisionMode precision_mode,bool use_calibration,nvinfer1::ILogger * trt_logger,const bool use_implicit_batch)1174 Converter::Converter(TrtPrecisionMode precision_mode, bool use_calibration,
1175 nvinfer1::ILogger* trt_logger,
1176 const bool use_implicit_batch)
1177 : precision_mode_(precision_mode),
1178 use_calibration_(use_calibration),
1179 use_implicit_batch_(use_implicit_batch) {
1180 InitializeTrtPlugins(trt_logger);
1181 this->RegisterOpConverters();
1182 }
1183
Init(nvinfer1::ILogger * trt_logger)1184 Status Converter::Init(nvinfer1::ILogger* trt_logger) {
1185 VLOG(1) << "Creating TensorRT builder";
1186 trt_builder_.reset(nvinfer1::createInferBuilder(*trt_logger));
1187
1188 VLOG(1) << "Creating TensorRT network";
1189 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
1190 const uint32_t flags =
1191 use_implicit_batch_
1192 ? 0U
1193 : (1U << static_cast<int>(
1194 nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH));
1195 trt_network_.reset(trt_builder_->createNetworkV2(flags));
1196 #else
1197 trt_network_.reset(trt_builder_->createNetwork());
1198 #endif
1199 if (!trt_network_) {
1200 return errors::Internal("Failed to create TensorRT network object");
1201 }
1202 return Status::OK();
1203 }
1204
ConvertNode(const NodeDef & node_def)1205 Status Converter::ConvertNode(const NodeDef& node_def) {
1206 std::vector<TRT_TensorOrWeights> inputs, outputs;
1207 TF_RETURN_IF_ERROR(this->GetInputs(node_def, &inputs));
1208
1209 OpConverterParams params(this, node_def, inputs, &outputs, &weight_store_);
1210 const string& op = node_def.op();
1211 auto itr = op_registry_.find(op);
1212 if (itr == op_registry_.end()) {
1213 return errors::Unimplemented("No converter registered for op: ", op);
1214 }
1215 OpConverter op_converter = itr->second;
1216 TF_RETURN_IF_ERROR(op_converter(¶ms));
1217
1218 for (size_t i = 0; i < outputs.size(); ++i) {
1219 TRT_TensorOrWeights& output = outputs[i];
1220 string output_name = node_def.name();
1221 if (i != 0) absl::StrAppend(&output_name, ":", i);
1222 // We need to check the name before setting it. If the input is one of the
1223 // engine input, setting the name here will overwrite engine input
1224 // bindings which will cause runtime error.
1225 // TODO(tmorris): Remove this work-around once we use TRT's IIdentityLayer
1226 // in ConvertIdentity.
1227 if (output.is_tensor()) {
1228 const char* tensor_name = output.tensor()->getName();
1229 if (!IsEngineInput(tensor_name)) {
1230 // TRT initializes tensor names as "(Unnamed ITensor* N)". We rename
1231 // them to match their corresponding TensorFlow name.
1232 // Note: ITensors that we create internally within TF-TRT which are
1233 // not inputs or outputs of a node will not be renamed. This is a
1234 // potential cause of confusion if an error message or warning
1235 // mentions the unnamed tensor.
1236 output.tensor()->setName(output_name.c_str());
1237 }
1238 }
1239 VLOG(2) << "Adding out tensor " << output_name << ": "
1240 << output.DebugString();
1241 Status status = AddTensorOrWeights(output_name, output);
1242 if (!status.ok()) {
1243 return Status(status.code(),
1244 StrCat("Failed to add output for node ", node_def.name(),
1245 ": ", status.error_message()));
1246 }
1247 }
1248 return Status::OK();
1249 }
1250
AddInputTensor(const string & name,nvinfer1::DataType dtype,const nvinfer1::Dims & dims,int batch_size)1251 Status Converter::AddInputTensor(const string& name, nvinfer1::DataType dtype,
1252 const nvinfer1::Dims& dims, int batch_size) {
1253 // We verify the batch size only for the input nodes, and rely on individual
1254 // op converter to ensure the batch size of the outputs is not changed.
1255 // TODO(laigd): we need to test this properties.
1256 Status status = MaybeUpdateBatchSize(batch_size);
1257 if (!status.ok()) {
1258 return Status(status.code(), StrCat("Batch size doesn't match for tensor ",
1259 name, ": ", status.error_message()));
1260 }
1261 nvinfer1::ITensor* tensor = network()->addInput(name.c_str(), dtype, dims);
1262 if (tensor == nullptr) {
1263 return errors::InvalidArgument("Failed to create Input layer tensor ", name,
1264 " rank=", dims.nbDims);
1265 }
1266 status = AddTensorOrWeights(name, TRT_TensorOrWeights(tensor));
1267 if (!status.ok()) {
1268 return Status(status.code(), StrCat("Failed to add input tensor ", name,
1269 ": ", status.error_message()));
1270 }
1271 return Status::OK();
1272 }
1273
RenameAndMarkOutputTensors(const std::vector<Converter::EngineOutputInfo> & output_tensors)1274 Status Converter::RenameAndMarkOutputTensors(
1275 const std::vector<Converter::EngineOutputInfo>& output_tensors) {
1276 for (const auto& output : output_tensors) {
1277 TRT_TensorOrWeights tensor_or_weights;
1278 TF_RETURN_IF_ERROR(
1279 GetTensorOrWeights(output.source_tensor_name, &tensor_or_weights));
1280 if (!tensor_or_weights.is_tensor()) {
1281 return errors::InvalidArgument("Output ", output.source_tensor_name,
1282 " is weights not tensor");
1283 }
1284 nvinfer1::ITensor* tensor = tensor_or_weights.tensor();
1285 if (tensor == nullptr) {
1286 return errors::NotFound("Output tensor not found: ",
1287 output.source_tensor_name);
1288 }
1289 // Check if this tensor has already been marked as an input or output.
1290 //
1291 // ConvertIdentity can cause the same tensor to be repeated in
1292 // output_tensors, which can cause us to overwrite the name of the output
1293 // tensor binding. For example, if we rename OutputPH_0 to OutputPH_1 then
1294 // we won't be able to locate OutputPH_0 during runtime. To fix this,
1295 // duplicate the tensor using no-op shuffle.
1296 //
1297 // TODO(tmorris): Remove this work-around once we use TRT's IIdentityLayer
1298 // in ConvertIdentity.
1299 if (IsEngineInput(tensor->getName()) || IsEngineOutput(tensor->getName())) {
1300 // Using shuffle layer for identity by not setting reshape or transpose.
1301 nvinfer1::IShuffleLayer* layer = network()->addShuffle(*tensor);
1302 TFTRT_RETURN_ERROR_IF_NULLPTR(
1303 layer, StrCat("Output Copy for ", tensor->getName()));
1304 MarkQuantizationRangesAsInferrable(tensor, layer->getOutput(0));
1305 tensor = layer->getOutput(0);
1306 }
1307 tensor->setName(output.dest_node_name.c_str());
1308 network()->markOutput(*tensor);
1309 // Set type after marking as output. TRT only supports setType for engine
1310 // outputs and inputs (type is inferred otherwise).
1311 tensor->setType(output.trt_dtype);
1312 VLOG(1) << "Marking output TRT tensor " << output.source_tensor_name
1313 << " with data type " << DebugString(output.trt_dtype)
1314 << ", which feeds TF node " << output.dest_node_name;
1315 }
1316 if (VLOG_IS_ON(2)) {
1317 VLOG(2) << "Created TensorRT network with the following layers:";
1318 for (int i = 0; i < network()->getNbLayers(); i++) {
1319 auto layer = network()->getLayer(i);
1320 VLOG(2) << " " << layer->getName() << " ("
1321 << "type: " << static_cast<int>(layer->getType())
1322 << ", precision: " << static_cast<int>(layer->getPrecision())
1323 << ")";
1324 }
1325 }
1326 return Status::OK();
1327 }
1328
BuildCudaEngine(TrtUniquePtrType<nvinfer1::ICudaEngine> * engine,int max_batch_size,size_t max_workspace_size_bytes,nvinfer1::IGpuAllocator * allocator,TRTInt8Calibrator * calibrator)1329 Status Converter::BuildCudaEngine(
1330 TrtUniquePtrType<nvinfer1::ICudaEngine>* engine, int max_batch_size,
1331 size_t max_workspace_size_bytes, nvinfer1::IGpuAllocator* allocator,
1332 TRTInt8Calibrator* calibrator) {
1333 VLOG(1) << "Configuring TensorRT builder";
1334 trt_builder_->setMaxBatchSize(max_batch_size);
1335 trt_builder_->setGpuAllocator(allocator);
1336 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
1337 // Create a network configuration and use it to build a TRT engine.
1338 TrtUniquePtrType<nvinfer1::IBuilderConfig> builder_config(
1339 trt_builder_->createBuilderConfig());
1340 builder_config->setMaxWorkspaceSize(max_workspace_size_bytes);
1341 if (precision_mode_ == TrtPrecisionMode::FP16) {
1342 builder_config->setFlag(nvinfer1::BuilderFlag::kFP16);
1343 } else if (precision_mode_ == TrtPrecisionMode::INT8) {
1344 builder_config->setFlag(nvinfer1::BuilderFlag::kFP16);
1345 builder_config->setFlag(nvinfer1::BuilderFlag::kINT8);
1346 if (use_calibration_) {
1347 builder_config->setInt8Calibrator(calibrator);
1348 } else {
1349 builder_config->setInt8Calibrator(nullptr);
1350 }
1351 }
1352
1353 VLOG(1) << "Building TensorRT engine";
1354 engine->reset(
1355 trt_builder_->buildEngineWithConfig(*network(), *builder_config));
1356 #else
1357 trt_builder_->setMaxWorkspaceSize(max_workspace_size_bytes);
1358 if (precision_mode_ == TrtPrecisionMode::FP16) {
1359 trt_builder_->setFp16Mode(true);
1360 } else if (precision_mode_ == TrtPrecisionMode::INT8) {
1361 // Setting FP16 mode as well allows TRT to also consider FP16 kernels and
1362 // use them in situations where they are faster than INT8 or where INT8 is
1363 // not supported for a given layer.
1364 trt_builder_->setFp16Mode(true);
1365 trt_builder_->setInt8Mode(true);
1366 if (use_calibration_) {
1367 trt_builder_->setInt8Calibrator(calibrator);
1368 } else {
1369 trt_builder_->setInt8Calibrator(nullptr);
1370 }
1371 }
1372
1373 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
1374 string precision_mode_str;
1375 TF_RETURN_IF_ERROR(
1376 TrtPrecisionModeToName(precision_mode_, &precision_mode_str));
1377 string trt_network_name = StrCat(
1378 "TF:", TF_VERSION_STRING, ", ", "TRT:", GetLoadedTensorRTVersion(), "-",
1379 "Precision:", precision_mode_str, ", ", "Calibration:", use_calibration_,
1380 ", ", "Max-Batch-Size:", max_batch_size, ", ",
1381 "Max-Workspace-Size:", max_workspace_size_bytes);
1382 VLOG(1) << "Setting TensorRT network name to " << trt_network_name;
1383 network()->setName(trt_network_name.c_str());
1384 #endif // #if IS_TRT_VERSION_GE(6, 0, 0, 0)
1385
1386 VLOG(1) << "Building TensorRT engine";
1387 engine->reset(trt_builder_->buildCudaEngine(*network()));
1388 #endif
1389 if (engine->get() == nullptr) {
1390 return errors::Internal("Failed to build TensorRT engine");
1391 }
1392 return Status::OK();
1393 }
1394
MaybeUpdateBatchSize(int batch_size)1395 Status Converter::MaybeUpdateBatchSize(int batch_size) {
1396 // OK iff either is unknown or they equal to each other.
1397 if (this->batch_size_ < 0 || batch_size < 0 ||
1398 this->batch_size_ == batch_size) {
1399 if (this->batch_size_ < 0 && batch_size >= 0) {
1400 this->batch_size_ = batch_size;
1401 }
1402 return Status::OK();
1403 }
1404 return errors::InvalidArgument(
1405 "Provided batch size does not match converter batch size: ", batch_size,
1406 " vs ", batch_size_);
1407 }
1408
AddTensorOrWeights(const string & name,TRT_TensorOrWeights input)1409 Status Converter::AddTensorOrWeights(const string& name,
1410 TRT_TensorOrWeights input) {
1411 // Set the batch size of the tensor, using batch size collected from the
1412 // input tensors to the TRT subgraph at the beginning of the conversion.
1413 // We rely on the individual op converter to understand the semantics of the
1414 // TF node, and make sure it doesn't change the batch size nor introduce
1415 // intra-element dependency inside the batch.
1416 if (use_implicit_batch_ && input.is_tensor()) {
1417 input.set_batch_size(batch_size_);
1418 }
1419 if (trt_tensors_.insert({name, std::move(input)}).second) return Status::OK();
1420 return errors::AlreadyExists("tensor/weights ", name, " already exist.");
1421 }
1422
GetTensorOrWeights(const string & name,TRT_TensorOrWeights * output)1423 Status Converter::GetTensorOrWeights(const string& name,
1424 TRT_TensorOrWeights* output) {
1425 if (!trt_tensors_.count(name)) {
1426 return errors::NotFound("Tensor or weights with name ", name,
1427 " could not be found.");
1428 }
1429 *output = trt_tensors_.at(name);
1430 return Status::OK();
1431 }
1432
TransposeTensor(nvinfer1::ITensor * input_tensor,const std::vector<int> & order_with_batch_dim,absl::string_view name,nvinfer1::ITensor ** output_tensor)1433 Status Converter::TransposeTensor(nvinfer1::ITensor* input_tensor,
1434 const std::vector<int>& order_with_batch_dim,
1435 absl::string_view name,
1436 nvinfer1::ITensor** output_tensor) {
1437 const auto dims = input_tensor->getDimensions();
1438
1439 if (order_with_batch_dim.size() - 1 != size_t(dims.nbDims)) {
1440 return errors::InvalidArgument(
1441 "Rank of perm for transpose does not match with that of the input.");
1442 }
1443 if (order_with_batch_dim[0] != 0) {
1444 return errors::Unimplemented(
1445 "Transpose at batch dimension is not supported.");
1446 }
1447
1448 nvinfer1::IShuffleLayer* layer = this->network()->addShuffle(*input_tensor);
1449 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, "TF-TRT Internal Transpose");
1450 layer->setName(std::basic_string<char>(name).c_str());
1451 MarkQuantizationRangesAsInferrable(input_tensor, layer->getOutput(0));
1452
1453 nvinfer1::Permutation permutation;
1454 for (int32_t i = 0; i < dims.nbDims; ++i) {
1455 permutation.order[i] = order_with_batch_dim[i + 1] - 1;
1456 }
1457 VLOG(1) << "TransposeTensor permutation: "
1458 << DebugString(permutation, dims.nbDims);
1459 layer->setFirstTranspose(permutation);
1460
1461 nvinfer1::Dims reshape_dims;
1462 reshape_dims.nbDims = dims.nbDims;
1463 for (int32_t i = 0; i < reshape_dims.nbDims; ++i) {
1464 reshape_dims.d[i] = 0;
1465 // TODO(aaroey): why not transposing the types as well?
1466 reshape_dims.type[i] = dims.type[i];
1467 }
1468 layer->setReshapeDimensions(reshape_dims);
1469
1470 *output_tensor = layer->getOutput(0);
1471 return Status::OK();
1472 }
1473
GetWeightRange(const TRT_ShapedWeights & weights,float * out_min,float * out_max) const1474 Status Converter::GetWeightRange(const TRT_ShapedWeights& weights,
1475 float* out_min, float* out_max) const {
1476 switch (weights.TrtDType()) {
1477 case nvinfer1::DataType::kFLOAT: {
1478 auto inp = static_cast<float const*>(weights.GetValues());
1479 auto result = std::minmax_element(inp, inp + weights.count());
1480 *out_min = *result.first;
1481 *out_max = *result.second;
1482 break;
1483 }
1484 case nvinfer1::DataType::kHALF: {
1485 auto inp = static_cast<Eigen::half const*>(weights.GetValues());
1486 auto result = std::minmax_element(inp, inp + weights.count());
1487 *out_min = Eigen::half_impl::half_to_float(*result.first);
1488 *out_max = Eigen::half_impl::half_to_float(*result.second);
1489 break;
1490 }
1491 case nvinfer1::DataType::kINT32: {
1492 auto inp = static_cast<int const*>(weights.GetValues());
1493 auto result = std::minmax_element(inp, inp + weights.count());
1494 *out_min = static_cast<float>(*result.first);
1495 *out_max = static_cast<float>(*result.second);
1496 break;
1497 }
1498 default:
1499 return errors::Unimplemented(
1500 "Data type not supported for GetWeightRange: ",
1501 DebugString(weights.TrtDType()));
1502 }
1503 return Status::OK();
1504 }
1505
PrepareTensorForShape(const TRT_TensorOrWeights & input,const nvinfer1::Dims & dims,const bool validation_only,nvinfer1::ITensor ** tensor)1506 Status Converter::PrepareTensorForShape(const TRT_TensorOrWeights& input,
1507 const nvinfer1::Dims& dims,
1508 const bool validation_only,
1509 nvinfer1::ITensor** tensor) {
1510 const nvinfer1::Dims input_dims = input.GetTrtDims();
1511 // If one of input_dims and dims doesn't have static shape, it means some of
1512 // the dims are unknown or need to be inferred. And we don't do further checks
1513 // but rely on the caller to not make mistakes.
1514 // Otherwise we do simple check to make sure the total sizes are the same.
1515 // If an input is a weight, it is going to become a tensor via
1516 // CreateConstantLayer. So we can treat it as a tensor for
1517 // AreDimsStaticWithDifferentSize(). This really only matters for 0-D tensors.
1518 if (AreDimsStaticWithDifferentSize(input_dims, dims, /*is_tensor=*/true)) {
1519 return errors::InvalidArgument(
1520 "Incompatible shapes: ", DebugString(input_dims), " vs. ",
1521 DebugString(dims));
1522 }
1523 // ConstantLayer requires static shapes (cannot infer -1).
1524 if (input.is_weights() && !HasStaticShape(dims)) {
1525 return errors::InvalidArgument("Shape is not fully defined: ",
1526 DebugString(dims));
1527 }
1528 if (validation_only) {
1529 *tensor = nullptr;
1530 return Status::OK();
1531 }
1532
1533 if (input.is_tensor()) {
1534 if (DimsEqual(input_dims, dims)) {
1535 *tensor = input.tensor();
1536 } else {
1537 nvinfer1::IShuffleLayer* layer =
1538 this->network()->addShuffle(*input.tensor());
1539 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, "TF-TRT Internal Reshape");
1540 layer->setReshapeDimensions(dims);
1541 MarkQuantizationRangesAsInferrable(input.tensor(), layer->getOutput(0));
1542 *tensor = layer->getOutput(0);
1543 }
1544 } else {
1545 *tensor = CreateConstantLayer(input.weights(), dims);
1546 TFTRT_RETURN_ERROR_IF_NULLPTR(*tensor, "TF-TRT Internal Reshape");
1547 if (precision_mode() == TrtPrecisionMode::INT8 && !use_calibration()) {
1548 // If we are in int8 mode and not calibrating, we need to explicitly set a
1549 // quantization range for the output tensor of the IConstantLayer. Here we
1550 // set the range to [min(weights), max(weights)].
1551 float min_range = 0.0f;
1552 float max_range = 0.0f;
1553 TF_RETURN_IF_ERROR(
1554 GetWeightRange(input.weights(), &min_range, &max_range));
1555 // Avoid setting range to 0 because TRT will throw an error. If the
1556 // weights are zero then the range doesn't matter: using 127.0f should
1557 // ensure the quantized weight will be exactly zero.
1558 if (min_range == 0.0f && max_range == 0.0f) {
1559 min_range = -127.0f;
1560 max_range = 127.0f;
1561 }
1562 ProvideQuantizationRange(*tensor, min_range, max_range);
1563 }
1564 }
1565 return Status::OK();
1566 }
1567
MarkQuantizationRangesAsInferrable(nvinfer1::ITensor * input,nvinfer1::ITensor * output)1568 void Converter::MarkQuantizationRangesAsInferrable(nvinfer1::ITensor* input,
1569 nvinfer1::ITensor* output) {
1570 quantization_infer_.push_back({input, output});
1571 quantization_infer_.push_back({output, input});
1572 }
1573
ProvideQuantizationRange(nvinfer1::ITensor * tensor,float min_range,float max_range)1574 void Converter::ProvideQuantizationRange(nvinfer1::ITensor* tensor,
1575 float min_range, float max_range) {
1576 float symmetric_range = std::max(std::abs(min_range), std::abs(max_range));
1577 quantization_ranges_[tensor] = symmetric_range;
1578 }
1579
1580 namespace {
1581
IsConvolution(const nvinfer1::ILayer * layer)1582 bool IsConvolution(const nvinfer1::ILayer* layer) {
1583 return layer->getType() == nvinfer1::LayerType::kCONVOLUTION;
1584 }
1585
IsScale(const nvinfer1::ILayer * layer)1586 bool IsScale(const nvinfer1::ILayer* layer) {
1587 return layer->getType() == nvinfer1::LayerType::kSCALE;
1588 }
1589
IsClipOrRelu(const nvinfer1::ILayer * layer)1590 bool IsClipOrRelu(const nvinfer1::ILayer* layer) {
1591 if (layer->getType() != nvinfer1::LayerType::kACTIVATION) {
1592 return false;
1593 }
1594 auto activation_type = static_cast<const nvinfer1::IActivationLayer*>(layer)
1595 ->getActivationType();
1596 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
1597 return activation_type == nvinfer1::ActivationType::kRELU ||
1598 activation_type == nvinfer1::ActivationType::kCLIP;
1599 #else
1600 return activation_type == nvinfer1::ActivationType::kRELU;
1601 #endif
1602 }
1603
IsAdd(const nvinfer1::ILayer * layer)1604 bool IsAdd(const nvinfer1::ILayer* layer) {
1605 if (layer->getType() != nvinfer1::LayerType::kELEMENTWISE) {
1606 return false;
1607 }
1608 auto operation =
1609 static_cast<const nvinfer1::IElementWiseLayer*>(layer)->getOperation();
1610 return operation == nvinfer1::ElementWiseOperation::kSUM;
1611 }
1612
1613 } // namespace
1614
MaybeApplyQuantizationRanges()1615 void Converter::MaybeApplyQuantizationRanges() {
1616 if (precision_mode() != TrtPrecisionMode::INT8) return;
1617
1618 // Infer ranges across marked ops.
1619 PropagateQuantizationRanges();
1620 // Apply ranges.
1621 #if IS_TRT_VERSION_GE(5, 0, 0, 0)
1622 for (auto pair : quantization_ranges_) {
1623 nvinfer1::ITensor* tensor = pair.first;
1624 const float range = pair.second;
1625 VLOG(1) << "Setting range for: " << tensor->getName() << ": " << range;
1626 // TODO(laigd): if 'tensor' already has a range set which doesn't match
1627 // 'range', it should report error.
1628 tensor->setDynamicRange(-range, range);
1629 }
1630 #endif
1631
1632 if (use_calibration()) return;
1633 #if !IS_TRT_VERSION_GE(6, 0, 0, 0)
1634 // Attempt to find tensors that are missing ranges, and set the corresponding
1635 // layer's precision to FP16 to avoid Builder::buildCudaEngine() failing.
1636 // This is only needed for TensorRT 5 and before because
1637 // TensorRT6 falls to FP16 internally.
1638 // TensorRT doesn't need ranges for intermediate tensors when layers are fused
1639 // so find fused layers first.
1640 // Get all tensors from network and deduce fused ops.
1641 std::map<nvinfer1::ILayer*, std::vector<nvinfer1::ILayer*>> layer_consumers;
1642 std::map<nvinfer1::ITensor*, nvinfer1::ILayer*> tensor_layer;
1643 std::set<nvinfer1::ITensor*> all_tensors;
1644 for (int i = 0; i < this->network()->getNbLayers(); i++) {
1645 nvinfer1::ILayer* layer = this->network()->getLayer(i);
1646 layer_consumers[layer] = {};
1647 for (int j = 0; j < layer->getNbInputs(); j++) {
1648 all_tensors.insert(layer->getInput(j));
1649 }
1650 for (int j = 0; j < layer->getNbOutputs(); j++) {
1651 tensor_layer[layer->getOutput(j)] = layer;
1652 all_tensors.insert(layer->getOutput(j));
1653 }
1654 }
1655 for (int i = 0; i < this->network()->getNbLayers(); i++) {
1656 nvinfer1::ILayer* layer = this->network()->getLayer(i);
1657 layer_consumers[layer] = {};
1658 for (int j = 0; j < layer->getNbInputs(); j++) {
1659 nvinfer1::ITensor* input_tensor = layer->getInput(j);
1660 auto input_layer = tensor_layer.find(input_tensor);
1661 if (input_layer != tensor_layer.end()) {
1662 auto consumed_layer = layer_consumers.find(input_layer->second);
1663 if (consumed_layer != layer_consumers.end()) {
1664 consumed_layer->second.push_back(layer);
1665 }
1666 }
1667 all_tensors.insert(input_tensor);
1668 }
1669 }
1670 // Identify fused tensors.
1671 // Conv+BiasAdd+Add+Activation(Clip or Relu), Conv+BiasAdd+Add,
1672 // Conv+BiasAdd+Activation(Clip or Relu), Conv+BiasAdd,
1673 // Conv+Activation(Clip or Relu) are fused.
1674 std::set<nvinfer1::ITensor*> fused_tensors;
1675 typedef std::function<bool(const nvinfer1::ILayer*)> matcher;
1676 const std::vector<std::pair<string, std::vector<matcher>>> fused_patterns = {
1677 {"Fused Conv+Bias+Add+Activation",
1678 {
1679 IsConvolution,
1680 IsScale,
1681 IsAdd,
1682 IsClipOrRelu,
1683 }},
1684 {"Fused Conv+Bias+Add",
1685 {
1686 IsConvolution,
1687 IsScale,
1688 IsAdd,
1689 }},
1690 {"Fused Conv+Bias+Activation",
1691 {
1692 IsConvolution,
1693 IsScale,
1694 IsClipOrRelu,
1695 }},
1696 {"Fused Conv+Bias",
1697 {
1698 IsConvolution,
1699 IsScale,
1700 }},
1701 {"Fused Conv+Activation",
1702 {
1703 IsConvolution,
1704 IsClipOrRelu,
1705 }},
1706 };
1707 for (int i = 0; i < this->network()->getNbLayers(); i++) {
1708 for (const auto& pattern : fused_patterns) {
1709 size_t last_matcher = pattern.second.size() - 1;
1710 nvinfer1::ILayer* layer = this->network()->getLayer(i);
1711 // We should skip this layer if its outputs are already marked as fused,
1712 // but all the current patterns start with a convolution and are ordered
1713 // in decreasing pattern length, so that is not necessary (yet).
1714 std::vector<nvinfer1::ILayer*> fused_candidates;
1715 for (size_t index = 0; index <= last_matcher; ++index) {
1716 if ((!pattern.second[index](layer)) ||
1717 (index < last_matcher && layer_consumers[layer].size() != 1)) {
1718 fused_candidates.clear();
1719 break;
1720 }
1721 if (index < last_matcher) {
1722 fused_candidates.push_back(layer);
1723 layer = layer_consumers[layer].front();
1724 }
1725 }
1726 if (!fused_candidates.empty()) {
1727 VLOG(1) << pattern.first;
1728 for (const auto& fused_layer : fused_candidates) {
1729 for (int i = 0; i < fused_layer->getNbOutputs(); i++) {
1730 VLOG(1) << " Fused output tensor:"
1731 << fused_layer->getOutput(i)->getName();
1732 fused_tensors.insert(fused_layer->getOutput(i));
1733 }
1734 }
1735 break; // Don't try other patterns on this layer.
1736 }
1737 }
1738 }
1739 // Find tensors with no ranges that are not fused and force their layers to
1740 // not be quantized.
1741 for (auto tensor : all_tensors) {
1742 if (!quantization_ranges_.count(tensor) &&
1743 fused_tensors.find(tensor) == fused_tensors.end()) {
1744 // Note: there may be some warnings for "(Unnamed ITensor* N)". These
1745 // are tensors which are created internally by TF-TRT. The ranges for
1746 // these unnamed ITensors are always inferred from user provided ranges,
1747 // thus there will also be a warning for the range(s) the user missed.
1748 LOG(WARNING) << "Quantization range was not found for "
1749 << tensor->getName() << ". "
1750 << "Setting invalid quantization range.";
1751 // Set the range to something unusable so the engine will fail if it
1752 // tries to actually use the tensor's range.
1753 tensor->setDynamicRange(0, 0);
1754 auto layer = tensor_layer.find(tensor);
1755 // If the tensor is the output of a layer, set the layer's precision
1756 // to fp16 so that it isn't quantized.
1757 // Shuffle doesn't support setting precision.
1758 if (layer != tensor_layer.end() &&
1759 layer->second->getType() != nvinfer1::LayerType::kSHUFFLE) {
1760 VLOG(1) << "And setting layer " << layer->second->getName()
1761 << " precision to fp16.";
1762 layer->second->setPrecision(nvinfer1::DataType::kHALF);
1763 }
1764 }
1765 }
1766 #endif
1767 }
1768
PropagateQuantizationRanges()1769 void Converter::PropagateQuantizationRanges() {
1770 // Propagate ranges across edges in quantization_infer_ until no new
1771 // information is added.
1772 // Note: this function modifies quantization_infer_, it might be better to
1773 // modify a copy instead if we for some reason need quantization_infer_
1774 // later.
1775 bool information_added = true;
1776 while (information_added) {
1777 information_added = false;
1778 for (auto it = quantization_infer_.begin();
1779 it != quantization_infer_.end();) {
1780 auto input_tensor_range = quantization_ranges_.find(it->first);
1781 auto output_tensor_range = quantization_ranges_.find(it->second);
1782 if (input_tensor_range != quantization_ranges_.end() &&
1783 output_tensor_range == quantization_ranges_.end()) {
1784 // Input has range but output doesn't: copy range
1785 // TODO(laigd): consider reporting error if it a different range is
1786 // already set.
1787 quantization_ranges_[it->second] = input_tensor_range->second;
1788 information_added = true;
1789 VLOG(1) << "Copy quantization range: " << it->first->getName() << " -> "
1790 << it->second->getName();
1791 }
1792 // We can remove edges when the output range is known
1793 if (quantization_ranges_.find(it->second) != quantization_ranges_.end()) {
1794 it = quantization_infer_.erase(it);
1795 } else {
1796 ++it;
1797 }
1798 }
1799 }
1800 }
1801
GetInputs(const NodeDef & node_def,std::vector<TRT_TensorOrWeights> * inputs) const1802 Status Converter::GetInputs(const NodeDef& node_def,
1803 std::vector<TRT_TensorOrWeights>* inputs) const {
1804 for (auto const& input_name : node_def.input()) {
1805 /*************************************************************************
1806 * TODO(jie): handle case 1) here.
1807 * Normalizes the inputs and extracts associated metadata:
1808 * 1) Inputs can contain a colon followed by a suffix of characters.
1809 * That suffix may be a single number (e.g. inputName:1) or several
1810 * word characters separated from a number by a colon
1811 * (e.g. inputName:foo:1). The
1812 * latter case is used to denote inputs and outputs of functions.
1813 * 2) Control dependency inputs contain caret at the beginning and we
1814 * remove this and annotate the edge as a control dependency.
1815 ************************************************************************/
1816 // skip control nodes
1817 if (input_name[0] == '^') continue;
1818 string name = input_name;
1819 auto last = name.find_last_of(':');
1820 // TODO(aaroey): use TensorId
1821 if (last != string::npos && last + 2 == name.size() &&
1822 name[last + 1] == '0') {
1823 name.erase(last);
1824 }
1825
1826 if (trt_tensors_.count(name)) {
1827 TRT_TensorOrWeights input = trt_tensors_.at(name);
1828 inputs->push_back(input);
1829 VLOG(2) << "Retrieved input " << name << ": " << input.DebugString();
1830 } else {
1831 // TODO(aaroey): this should not happen, make it a CHECK.
1832 // TODO(aaroey): use StrCat for pattern like this.
1833 string msg("Node ");
1834 StrAppend(&msg, node_def.name(), " should have an input named '", name,
1835 "' but it is not available");
1836 LOG(ERROR) << msg;
1837 return errors::InvalidArgument(msg);
1838 }
1839 }
1840 return Status::OK();
1841 }
1842
1843 // Checks that the number of inputs match, and enforces that the inputs marked
1844 // as true are constant weights. true means that the input must be a weight,
1845 // while false means the input must be a tensor. In the future, false will mean
1846 // the input can be a tensor or weight.
CheckInputsWeights(const OpConverterParams & params,const std::vector<std::pair<string,bool>> & inputs_is_weight)1847 Status CheckInputsWeights(
1848 const OpConverterParams& params,
1849 const std::vector<std::pair<string, bool>>& inputs_is_weight) {
1850 const auto& inputs = params.inputs;
1851 const auto& node_def = params.node_def;
1852 if (inputs.size() != inputs_is_weight.size()) {
1853 return errors::InvalidArgument(
1854 node_def.op(), " got ", inputs.size(), " inputs but expected ",
1855 inputs_is_weight.size(), ", at ", node_def.name());
1856 }
1857 for (int i = 0; i < inputs.size(); i++) {
1858 if (inputs_is_weight[i].second && inputs.at(i).is_tensor()) {
1859 return errors::Unimplemented("The input \"", inputs_is_weight[i].first,
1860 "\" for ", node_def.op(),
1861 " must be a constant, at ", node_def.name());
1862 }
1863 // TODO(tmorris): Remove this check and provide a method to automatically
1864 // retrieve an input as a tensor, converting via CreateConstantLayer if it
1865 // was originally a weight. We will want a caching mechanism to prevent many
1866 // duplicate constants from being created.
1867 if (!inputs_is_weight[i].second && inputs.at(i).is_weights()) {
1868 return errors::Unimplemented("The input \"", inputs_is_weight[i].first,
1869 "\" for ", node_def.op(),
1870 " must be a tensor, at ", node_def.name());
1871 }
1872 }
1873 return Status::OK();
1874 }
1875
AllowDataTypes(const OpConverterParams & params,const std::set<DataType> & allowed_dtypes,const char * dtype_attr_name="T")1876 Status AllowDataTypes(const OpConverterParams& params,
1877 const std::set<DataType>& allowed_dtypes,
1878 const char* dtype_attr_name = "T") {
1879 const auto& node_def = params.node_def;
1880 TFAttrs attrs(node_def);
1881 if (!attrs.count(dtype_attr_name)) {
1882 return errors::InvalidArgument("Attribute with name ", dtype_attr_name,
1883 " not found.");
1884 }
1885 const auto op_dtype = attrs.get<DataType>(dtype_attr_name);
1886 if (!allowed_dtypes.count(op_dtype)) {
1887 // Build string list of allowed types.
1888 std::ostringstream ss;
1889 for (auto it = allowed_dtypes.begin(); it != allowed_dtypes.end(); ++it) {
1890 if (it != allowed_dtypes.begin()) ss << ", ";
1891 ss << DataTypeString(*it);
1892 }
1893 return errors::Unimplemented("Data type ", DataTypeString(op_dtype),
1894 " is not supported for ", node_def.op(),
1895 ", must be one of [", ss.str(), "], at ",
1896 node_def.name());
1897 }
1898 return Status::OK();
1899 }
1900
1901 // ****************************************************************************
1902 // Constant folding functions for weights.
1903 // TODO(laigd): we should probably use eigen directly.
1904 // *****************************************************************************
1905 struct LambdaFactory {
1906 enum class OP_CATEGORY : int { RSQRT = 0, NEG, RECIP };
1907 OP_CATEGORY op;
1908
1909 template <typename T>
unarytensorflow::tensorrt::convert::LambdaFactory1910 std::function<T(T)> unary() {
1911 switch (op) {
1912 case OP_CATEGORY::RSQRT: {
1913 VLOG(2) << "RSQRT GETS DONE";
1914 return [](T t) -> T { return 1.0 / std::sqrt(t); };
1915 }
1916 case OP_CATEGORY::NEG:
1917 return [](T t) -> T { return -t; };
1918 case OP_CATEGORY::RECIP:
1919 return [](T t) -> T { return 1.0 / t; };
1920 default:
1921 LOG(ERROR) << "Not supported op for unary: " << static_cast<int>(op);
1922 return nullptr;
1923 }
1924 }
1925 };
1926
1927 template <>
unary()1928 std::function<Eigen::half(Eigen::half)> LambdaFactory::unary<Eigen::half>() {
1929 switch (op) {
1930 case OP_CATEGORY::RSQRT: {
1931 VLOG(2) << "RSQRT GETS DONE";
1932 return [](Eigen::half t) {
1933 return Eigen::half(1.0 / std::sqrt(static_cast<float>(t)));
1934 };
1935 }
1936 case OP_CATEGORY::NEG:
1937 return [](Eigen::half t) { return -t; };
1938 case OP_CATEGORY::RECIP:
1939 return [](Eigen::half t) {
1940 return Eigen::half(1.0 / static_cast<float>(t));
1941 };
1942 default:
1943 LOG(ERROR) << "Not supported op for unary: " << static_cast<int>(op);
1944 return nullptr;
1945 }
1946 }
1947
UnaryCompute(const TRT_ShapedWeights & iweights,TRT_ShapedWeights * oweights,LambdaFactory unary_op)1948 Status UnaryCompute(const TRT_ShapedWeights& iweights,
1949 TRT_ShapedWeights* oweights, LambdaFactory unary_op) {
1950 CHECK(iweights.TrtDType() == oweights->TrtDType());
1951 switch (iweights.TrtDType()) {
1952 case nvinfer1::DataType::kFLOAT: {
1953 auto inp = static_cast<float const*>(iweights.GetValues());
1954 auto oup = static_cast<float*>(oweights->GetValues());
1955 std::transform(inp, inp + iweights.count(), oup, unary_op.unary<float>());
1956 break;
1957 }
1958 case nvinfer1::DataType::kHALF: {
1959 auto inp = static_cast<Eigen::half const*>(iweights.GetValues());
1960 auto oup = static_cast<Eigen::half*>(oweights->GetValues());
1961 std::transform(inp, inp + iweights.count(), oup,
1962 unary_op.unary<Eigen::half>());
1963 break;
1964 }
1965 default:
1966 return errors::Unimplemented("Data type not supported: ",
1967 DebugString(iweights.TrtDType()));
1968 }
1969 return Status::OK();
1970 }
1971
1972 // Before TRT 5.1.3, we have to calculate padding for convolutions ourselves.
Conv2DPaddingHelper(OpConverterParams * params,const TFAttrs & attrs,const nvinfer1::DimsHW & kernel_size,const nvinfer1::DimsHW & dilation,const nvinfer1::DimsHW & stride,const std::vector<int64_t> & input_dims,nvinfer1::ITensor * tensor,std::vector<std::pair<int,int>> * padding,nvinfer1::ITensor ** padded_tensor)1973 Status Conv2DPaddingHelper(OpConverterParams* params, const TFAttrs& attrs,
1974 const nvinfer1::DimsHW& kernel_size,
1975 const nvinfer1::DimsHW& dilation,
1976 const nvinfer1::DimsHW& stride,
1977 const std::vector<int64_t>& input_dims,
1978 nvinfer1::ITensor* tensor,
1979 std::vector<std::pair<int, int>>* padding,
1980 nvinfer1::ITensor** padded_tensor) {
1981 if (attrs.get<string>("padding") == "SAME") {
1982 nvinfer1::DimsHW effective_kernel_size = kernel_size;
1983 effective_kernel_size.h() += (kernel_size.h() - 1) * (dilation.h() - 1);
1984 effective_kernel_size.w() += (kernel_size.w() - 1) * (dilation.w() - 1);
1985 *padding = CreateSamePadding(stride, effective_kernel_size, input_dims);
1986 } else {
1987 *padding = {{0, 0}, {0, 0}};
1988 }
1989
1990 // Handle asymmetric padding. TensorRT 5.1 added support for asymmetric
1991 // padding via setPrePadding and setPostPadding. Due to a bug in 5.1.2, we can
1992 // only use asymmetric padding in convolutions with 5.1.3+. But in 5.1.3, we
1993 // will always use setPaddingMode for simplicity.
1994 if ((*padding)[0].first != (*padding)[0].second ||
1995 (*padding)[1].first != (*padding)[1].second) {
1996 auto pad_layer = params->converter->network()->addPadding(
1997 *tensor, nvinfer1::DimsHW((*padding)[0].first, (*padding)[1].first),
1998 nvinfer1::DimsHW((*padding)[0].second, (*padding)[1].second));
1999 TFTRT_RETURN_ERROR_IF_NULLPTR(pad_layer, params->node_def.name());
2000 params->converter->MarkQuantizationRangesAsInferrable(
2001 tensor, pad_layer->getOutput(0));
2002 *padding = {{0, 0}, {0, 0}};
2003 tensor = pad_layer->getOutput(0);
2004 }
2005 *padded_tensor = tensor;
2006 return Status::OK();
2007 }
2008
ConvertConv2DHelper(OpConverterParams * params,int group,bool is_conv2d_backprop_input)2009 Status ConvertConv2DHelper(OpConverterParams* params, int group,
2010 bool is_conv2d_backprop_input) {
2011 const auto& inputs = params->inputs;
2012 const auto& node_def = params->node_def;
2013 TRT_TensorOrWeights backprop_output_size;
2014 nvinfer1::ITensor* tensor = nullptr;
2015 if (is_conv2d_backprop_input) {
2016 // In the case when Conv2dBackpropInput is used for conv2d_transpose, these
2017 // inputs correspond to: output size, filter, and input.
2018 TF_RETURN_IF_ERROR(CheckInputsWeights(
2019 *params,
2020 {{"input_sizes", true}, {"filter", true}, {"out_backprop", false}}));
2021 backprop_output_size = inputs.at(0);
2022 tensor = inputs.at(2).tensor();
2023 } else {
2024 TF_RETURN_IF_ERROR(
2025 CheckInputsWeights(*params, {{"input", false}, {"filter", true}}));
2026 tensor = inputs.at(0).tensor();
2027 }
2028 TF_RETURN_IF_ERROR(
2029 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
2030 TRT_ShapedWeights weights_rsck = inputs.at(1).weights();
2031 if (weights_rsck.shape_.nbDims != 4) {
2032 return errors::InvalidArgument("Conv2D expects kernel of dimension 4, at " +
2033 node_def.name());
2034 }
2035 TFAttrs attrs(node_def);
2036 auto data_format = attrs.get<string>("data_format");
2037 int c_index = (data_format == "NHWC") ? 3 : 1;
2038 int h_index = (data_format == "NHWC") ? 1 : 2;
2039 int w_index = (data_format == "NHWC") ? 2 : 3;
2040 auto tf_dilations = attrs.get<std::vector<int64>>("dilations");
2041 if (tf_dilations.size() != 4) {
2042 return errors::InvalidArgument(
2043 "Convolution dilations field must specify 4 dimensions, at ",
2044 node_def.name());
2045 }
2046 if (tf_dilations[0] != 1 || tf_dilations[c_index] != 1) {
2047 return errors::Unimplemented(
2048 "Dilation rate must be 1 for batch and channel dimensions, at ",
2049 node_def.name());
2050 }
2051 const nvinfer1::DimsHW dilation(tf_dilations[h_index], tf_dilations[w_index]);
2052 if (is_conv2d_backprop_input && (dilation.d[0] != 1 || dilation.d[1] != 1)) {
2053 return errors::Unimplemented(
2054 "Dilation with Conv2DBackpropInput (conv2d_transpose) is not supported",
2055 ", at ", node_def.name());
2056 }
2057
2058 const auto tf_stride = attrs.get<std::vector<int64>>("strides");
2059 if (tf_stride.size() != 4) {
2060 return errors::InvalidArgument(
2061 "Convolution strides field must specify 4 dimensions, at ",
2062 node_def.name());
2063 }
2064 if (tf_stride[0] != 1 || tf_stride[c_index] != 1) {
2065 return errors::Unimplemented(
2066 "Stride must be 1 for batch and channel dimensions, at ",
2067 node_def.name());
2068 }
2069 const nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]);
2070 if (params->validation_only) return Status::OK();
2071
2072 // Transpose to NCHW (NCHW is required for IConvLayer).
2073 const bool need_transpose = (data_format == "NHWC");
2074 if (need_transpose) {
2075 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
2076 tensor, {0, 3, 1, 2}, StrCat(node_def.name(), "_to_NCHW"), &tensor));
2077 }
2078 // Dimensions of transposed tensor.
2079 const auto tensor_dim = tensor->getDimensions();
2080
2081 // group == 0 signifies that this is a depthwise convolution, so set
2082 // num_groups to size of input's channel dim. For a non-depthwise conv,
2083 // num_groups will be 1.
2084 const int num_groups = (group == 0) ? tensor_dim.d[0] : group;
2085
2086 // For conv, TF weights are RSCK, and TRT expects KCRS.
2087 // For backprop, TF weights are RSKC, and TRT expects CKRS.
2088 // Therefore, this reorder will work for both cases.
2089 TRT_ShapedWeights weights =
2090 params->weight_store->GetTempWeights(weights_rsck);
2091 ReorderRSCKToKCRS(weights_rsck, &weights, num_groups);
2092 TRT_ShapedWeights biases(weights.TrtDType());
2093 const int output_axis = is_conv2d_backprop_input ? 1 : 0;
2094 const int noutput = weights.shape_.d[output_axis] * num_groups;
2095 nvinfer1::DimsHW kernel_size;
2096 kernel_size.h() = weights.shape_.d[2];
2097 kernel_size.w() = weights.shape_.d[3];
2098
2099 // Before TRT 5.1.3, we have to calculate padding ourselves.
2100 #if !IS_TRT_VERSION_GE(5, 1, 3, 0)
2101 std::vector<std::pair<int, int>> padding;
2102 std::vector<int64_t> input_dims;
2103 if (is_conv2d_backprop_input) {
2104 // For backprop, calculate padding based on "input_sizes" input, which
2105 // actually corresponds to output size. ("input_sizes" makes sense in the
2106 // context of Conv2DBackpropInput).
2107 // We use h_index and w_index instead of 1 and 2 because we havent
2108 // transposed backprop_output_size along with the input.
2109 auto output_size_weights =
2110 static_cast<int*>(backprop_output_size.weights().GetValues());
2111 input_dims = {output_size_weights[h_index], output_size_weights[w_index]};
2112 } else {
2113 // Use 1 and 2 because tensor_dim has the dimensions of the transposed
2114 // input.
2115 input_dims = {static_cast<int>(tensor_dim.d[1]),
2116 static_cast<int>(tensor_dim.d[2])};
2117 }
2118 nvinfer1::ITensor* padded_tensor = nullptr;
2119 TF_RETURN_IF_ERROR(Conv2DPaddingHelper(params, attrs, kernel_size, dilation,
2120 stride, input_dims, tensor, &padding,
2121 &padded_tensor));
2122 tensor = padded_tensor;
2123 #endif
2124
2125 // Add convolution.
2126 nvinfer1::ILayer* conv_layer = nullptr;
2127 if (is_conv2d_backprop_input) {
2128 nvinfer1::IDeconvolutionLayer* layer =
2129 params->converter->network()->addDeconvolution(
2130 *tensor, noutput, kernel_size, weights.GetTrtWeights(),
2131 biases.GetTrtWeights());
2132 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
2133 layer->setStride(stride);
2134 // TensorRT 5.1.3 added support for padding modes.
2135 #if IS_TRT_VERSION_GE(5, 1, 3, 0)
2136 // VALID padding is the default TRT behavior.
2137 if (attrs.get<string>("padding") == "SAME") {
2138 // SAME_UPPER means that post padding is preferred.
2139 layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
2140 }
2141 #else
2142 layer->setPadding(nvinfer1::DimsHW{padding[0].first, padding[1].first});
2143 #endif
2144 layer->setName(node_def.name().c_str());
2145 layer->setNbGroups(num_groups);
2146 conv_layer = layer;
2147 } else {
2148 nvinfer1::IConvolutionLayer* layer =
2149 params->converter->network()->addConvolution(
2150 *tensor, noutput, kernel_size, weights.GetTrtWeights(),
2151 biases.GetTrtWeights());
2152 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
2153 layer->setStride(stride);
2154 #if IS_TRT_VERSION_GE(5, 1, 3, 0)
2155 if (attrs.get<string>("padding") == "SAME") {
2156 layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
2157 }
2158 #else
2159 layer->setPadding(nvinfer1::DimsHW{padding[0].first, padding[1].first});
2160 #endif
2161 layer->setName(node_def.name().c_str());
2162 layer->setNbGroups(num_groups);
2163 layer->setDilation(dilation);
2164 conv_layer = layer;
2165 }
2166 nvinfer1::ITensor* output_tensor = conv_layer->getOutput(0);
2167 // Add an extra padding for Deconv because TRT doesn't accept the
2168 // argument output_shape and thus the TRT output shape could be wrong
2169 // in case of strides>1.
2170 if (is_conv2d_backprop_input) {
2171 auto tf_output_shape =
2172 static_cast<int*>(backprop_output_size.weights().GetValues());
2173 nvinfer1::Dims trt_output_shape = output_tensor->getDimensions();
2174 // What determines the padding size is the difference between the given
2175 // input_sizes (tf_output_shape) and TRT computed size.
2176 const int height_diff = tf_output_shape[h_index] - trt_output_shape.d[1];
2177 const int width_diff = tf_output_shape[w_index] - trt_output_shape.d[2];
2178 if ((height_diff < 0) || (width_diff < 0)) {
2179 return errors::InvalidArgument(
2180 "input_sizes argument of Conv2DBackprop (i.e. output_shape argument "
2181 "of conv2d_transpose) ",
2182 "is too small for the given out_backprop argument of Conv2DBackprop "
2183 "(i.e. input argument of conv2d_transpose). Expect: ",
2184 "(", tf_output_shape[h_index], ", ", tf_output_shape[w_index],
2185 ") >= ", "(", trt_output_shape.d[1], ", ", trt_output_shape.d[2],
2186 ") for op ", node_def.name());
2187 }
2188 // Only add a padding layer if padding sizes are larger than 0
2189 if ((height_diff > 0) || (width_diff > 0)) {
2190 nvinfer1::DimsHW pre_padding(0, 0);
2191 nvinfer1::DimsHW post_padding(height_diff, width_diff);
2192 nvinfer1::IPaddingLayer* padding_layer =
2193 params->converter->network()->addPadding(*output_tensor, pre_padding,
2194 post_padding);
2195 output_tensor = padding_layer->getOutput(0);
2196 }
2197 }
2198 // Restore transpose.
2199 if (need_transpose) {
2200 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
2201 output_tensor, {0, 2, 3, 1}, StrCat(node_def.name(), "_to_NHWC"),
2202 &output_tensor));
2203 }
2204 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
2205 return Status::OK();
2206 }
2207
ConvertTranspose(OpConverterParams * params)2208 Status ConvertTranspose(OpConverterParams* params) {
2209 const auto& inputs = params->inputs;
2210 TF_RETURN_IF_ERROR(
2211 CheckInputsWeights(*params, {{"x", false}, {"perm", true}}));
2212 TF_RETURN_IF_ERROR(AllowDataTypes(
2213 *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
2214 // Get the permutation from weights.
2215 TRT_ShapedWeights weights = inputs.at(1).weights();
2216 const int* weights_ptr = static_cast<int*>(weights.GetValues());
2217 std::vector<int> perm(weights_ptr, weights_ptr + weights.count());
2218
2219 // Verify the permutation.
2220 nvinfer1::ITensor* input_tensor = inputs.at(0).tensor();
2221 if (perm.size() - 1 != size_t(input_tensor->getDimensions().nbDims)) {
2222 return errors::InvalidArgument(
2223 "Rank of perm for transpose does not match with that of the input.");
2224 }
2225 if (perm[0] != 0) {
2226 return errors::Unimplemented(
2227 "Transpose at batch dimension is not supported.");
2228 }
2229
2230 if (params->validation_only) return Status::OK();
2231
2232 // Start conversion.
2233 nvinfer1::ITensor* output_tensor = nullptr;
2234 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
2235 input_tensor, perm, params->node_def.name(), &output_tensor));
2236 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
2237 return Status::OK();
2238 }
2239
ConvertReshape(OpConverterParams * params)2240 Status ConvertReshape(OpConverterParams* params) {
2241 const auto& inputs = params->inputs;
2242 const auto& node_def = params->node_def;
2243 TF_RETURN_IF_ERROR(
2244 CheckInputsWeights(*params, {{"tensor", false}, {"shape", true}}));
2245 TF_RETURN_IF_ERROR(AllowDataTypes(
2246 *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
2247 const TRT_TensorOrWeights& input_tensor = inputs.at(0);
2248 TRT_ShapedWeights weights = inputs.at(1).weights();
2249 if (weights.count() == 0) {
2250 return errors::Unimplemented("Reshape to shape=[] is not supported, at ",
2251 node_def.name());
2252 }
2253
2254 const int* weights_ptr = static_cast<int*>(weights.GetValues());
2255
2256 // Check that it doesn't change the batch dimension. This check is
2257 // conservative, for example, when the first dim of the shape is -1 and input
2258 // tensor shape is not fixed, it is still possible that the reshape doesn't
2259 // change the batch dim, but as long as there is a possibility that it could
2260 // change the batch dim, it reject the conversion. The parameters are:
2261 //
2262 // * reshape_batch_dim: the value of the first dim of the input shape constant
2263 // * reshape_dims: all other dims of the input shape constant
2264 // * input_batch_dim: the value of the first dim of the input tensor to
2265 // reshape
2266 // * input_dims: all other dims of the input tensor to reshape
2267 //
2268 // The validation logic is:
2269 //
2270 // if input_batch_dim is fixed:
2271 // if reshape_batch_dim == input_batch_dim:
2272 // ok
2273 // elif reshape_batch_dim == -1 (meaning reshape_dims are fixed) and
2274 // input_dims are fixed and
2275 // prod(input_dims) == prod(reshape_dims)
2276 // ok
2277 // else:
2278 // not ok
2279 // elif input_dims are fixed:
2280 // if reshape_dims are fixed and
2281 // prod(input_dims) == prod(reshape_dims):
2282 // ok
2283 // else:
2284 // not ok
2285 // else:
2286 // not ok
2287 //
2288 // Note that the following is ok no matter whether reshape_batch_dim is fixed
2289 // or not:
2290 //
2291 // ```
2292 // input_batch_dim is not fixed &&
2293 // reshape_dims are fixed &&
2294 // prod(input_dims) == prod(reshape_dims),
2295 // ```
2296 //
2297 // because the non-batch dims of the new and old shapes match, and TF runtime
2298 // should make sure the batch dim is not changed.
2299
2300 const int input_batch_dim = input_tensor.batch_size();
2301 const int reshape_batch_dim = weights_ptr[0];
2302 const nvinfer1::Dims input_dims = input_tensor.GetTrtDims();
2303
2304 nvinfer1::Dims reshape_dims;
2305 reshape_dims.nbDims = weights.count() - 1;
2306 for (int i = 1; i < weights.count(); i++) {
2307 reshape_dims.d[i - 1] = weights_ptr[i];
2308 }
2309
2310 // Check that it doesn't change the batch dimension according to the logic
2311 // mentioned above.
2312 bool reshape_may_change_batch_dim = false;
2313 if (input_batch_dim > 0) { // Batch size is fixed.
2314 if (reshape_batch_dim == -1) { // Other dims of the shape must be fixed.
2315 if (!AreDimsStaticWithSameSize(input_dims, reshape_dims,
2316 /*is_tensor=*/true)) {
2317 reshape_may_change_batch_dim = true;
2318 }
2319 } else if (reshape_batch_dim != input_batch_dim) {
2320 reshape_may_change_batch_dim = true;
2321 } else {
2322 // This means (input_batch_dim>0 && input_batch_dim==reshape_batch_dim),
2323 // and TF runtime should make sure non-batch dims are matched.
2324 }
2325 } else if (!AreDimsStaticWithSameSize(input_dims, reshape_dims,
2326 /*is_tensor=*/true)) {
2327 reshape_may_change_batch_dim = true;
2328 }
2329 VLOG(1) << "input_batch_dim=" << input_batch_dim
2330 << ", input_dims=" << DebugString(input_dims)
2331 << "\nreshape_batch_dim=" << reshape_batch_dim
2332 << ", reshape_dims=" << DebugString(reshape_dims);
2333 if (reshape_may_change_batch_dim) {
2334 const string msg = StrCat(
2335 "Reshape on batch dimension is not supported, at ", node_def.name(),
2336 ". input_batch_dim=", input_batch_dim, ", ", DebugString(input_dims),
2337 "; reshape_batch_dim=", reshape_batch_dim, ", ",
2338 DebugString(reshape_dims));
2339 return errors::Unimplemented(msg);
2340 }
2341
2342 // Start conversion.
2343 nvinfer1::ITensor* output_tensor = nullptr;
2344 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
2345 input_tensor, reshape_dims, params->validation_only, &output_tensor));
2346 if (params->validation_only) return Status::OK();
2347
2348 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
2349 return Status::OK();
2350 }
2351
ConvertExpandDims(OpConverterParams * params)2352 Status ConvertExpandDims(OpConverterParams* params) {
2353 const auto& inputs = params->inputs;
2354 const auto& node_def = params->node_def;
2355 TF_RETURN_IF_ERROR(
2356 CheckInputsWeights(*params, {{"input", false}, {"axis", true}}));
2357 TF_RETURN_IF_ERROR(AllowDataTypes(
2358 *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
2359 // Get input shape as vector.
2360 const TRT_TensorOrWeights& input_tensor = inputs.at(0);
2361 const nvinfer1::Dims dims = input_tensor.GetTrtDims();
2362 std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
2363 // Get axis to expand on.
2364 auto axis = inputs.at(1).weights().GetSpan<int>();
2365 if (axis.size() != 1) {
2366 return errors::InvalidArgument("ExpandDims axis must be a scalar, at ",
2367 node_def.name());
2368 }
2369 // Use rank = nbDims + 1 for ConvertAxis's bounds checking to account for
2370 // ExpandDim's ability to add an axis at end of the shape.
2371 int trt_axis;
2372 TF_RETURN_IF_ERROR(ConvertAxis(axis[0], dims.nbDims + 1, node_def.name(),
2373 /*use_implicit_batch=*/true, &trt_axis));
2374 if (params->validation_only) return Status::OK();
2375
2376 // ExpandDims: Insert new dim of size 1.
2377 input_dims.insert(input_dims.begin() + trt_axis, 1);
2378 // Reshape tensor.
2379 nvinfer1::Dims new_dims;
2380 TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims));
2381 nvinfer1::ITensor* output_tensor = nullptr;
2382 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
2383 input_tensor, new_dims, /*validation_only=*/false, &output_tensor));
2384 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
2385 return Status::OK();
2386 }
2387
ConvertSqueeze(OpConverterParams * params)2388 Status ConvertSqueeze(OpConverterParams* params) {
2389 const auto& inputs = params->inputs;
2390 const auto& node_def = params->node_def;
2391 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
2392 TF_RETURN_IF_ERROR(AllowDataTypes(
2393 *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
2394 // Get input shape.
2395 const TRT_TensorOrWeights& input_tensor = inputs.at(0);
2396 const nvinfer1::Dims dims = input_tensor.GetTrtDims();
2397 std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
2398 // Mark axes to remove by setting them to 0.
2399 TFAttrs attrs(node_def);
2400 auto squeeze_dims = attrs.get<std::vector<int64>>("squeeze_dims");
2401 if (squeeze_dims.empty()) {
2402 return errors::Unimplemented(
2403 "Squeeze is only implemented for explicit dims, at ", node_def.name());
2404 }
2405 for (int tf_axis : squeeze_dims) {
2406 // Make sure axis is valid.
2407 int trt_axis;
2408 TF_RETURN_IF_ERROR(ConvertAxis(tf_axis, dims.nbDims, node_def.name(),
2409 /*use_implicit_batch=*/true, &trt_axis));
2410 // Make sure target dimension is size 1.
2411 if (input_dims[trt_axis] != 1) {
2412 return errors::InvalidArgument(
2413 "Dimension ", tf_axis, " with size ", input_dims[trt_axis],
2414 " cannot be squeezed because it must be size 1, at ",
2415 node_def.name());
2416 }
2417 // Mark dim for removal by setting to 0.
2418 input_dims[trt_axis] = 0;
2419 }
2420 if (params->validation_only) return Status::OK();
2421
2422 // Remove all dims which are equal to 0.
2423 input_dims.erase(std::remove(input_dims.begin(), input_dims.end(), 0),
2424 input_dims.end());
2425 // Reshape tensor.
2426 nvinfer1::Dims new_dims;
2427 TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims));
2428 nvinfer1::ITensor* output_tensor = nullptr;
2429 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
2430 input_tensor, new_dims, /*validation_only=*/false, &output_tensor));
2431 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
2432 return Status::OK();
2433 }
2434
2435 template <typename Container>
ConvertStridedSliceHelper(OpConverterParams * params,const TRT_TensorOrWeights & input,Container begin,Container size,const Container & stride,const nvinfer1::Dims * final_shape=nullptr)2436 Status ConvertStridedSliceHelper(OpConverterParams* params,
2437 const TRT_TensorOrWeights& input,
2438 Container begin, Container size,
2439 const Container& stride,
2440 const nvinfer1::Dims* final_shape = nullptr) {
2441 const auto& node_def = params->node_def;
2442 // Get input dims.
2443 nvinfer1::Dims dims = input.GetTrtDims();
2444 std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
2445 // Temporarily add batch dimension so that indexes line up properly.
2446 input_dims.insert(input_dims.begin(), -1);
2447 // Check bounds.
2448 for (int i = 1; i < input_dims.size(); i++) {
2449 if (begin[i] < 0 || begin[i] > input_dims[i]) {
2450 return errors::InvalidArgument("\"begin\" for dimension ",
2451 std::to_string(i), " in ", node_def.op(),
2452 " is out of range, at ", node_def.name());
2453 }
2454 const int end = begin[i] + size[i];
2455 if (end < 0 || end > input_dims[i]) {
2456 return errors::InvalidArgument("\"begin\" + \"size\" for dimension ",
2457 std::to_string(i), " in ", node_def.op(),
2458 " is out of range, at ", node_def.name());
2459 }
2460 if (size[i] <= 0) {
2461 return errors::InvalidArgument("\"size\" cannot be negative or zero for ",
2462 node_def.op(), ", at ", node_def.name());
2463 }
2464 }
2465 // TRT 5.1 adds ISliceLayer. For older versions, we attempt to use the
2466 // padding layer with negative padding.
2467 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
2468 nvinfer1::Dims begin_dims, size_dims, stride_dims;
2469 TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(begin, &begin_dims,
2470 /*ignore_first_dim=*/true));
2471 TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(size, &size_dims,
2472 /*ignore_first_dim=*/true));
2473 TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(stride, &stride_dims,
2474 /*ignore_first_dim=*/true));
2475 if (params->validation_only) return Status::OK();
2476
2477 nvinfer1::ISliceLayer* layer = params->converter->network()->addSlice(
2478 *input.tensor(), begin_dims, size_dims, stride_dims);
2479 nvinfer1::ITensor* tensor = layer->getOutput(0);
2480 // Reshape for shrink_axis.
2481 if (final_shape) {
2482 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
2483 TRT_TensorOrWeights(tensor), *final_shape, /*validation_only=*/false,
2484 &tensor));
2485 }
2486 params->outputs->push_back(TRT_TensorOrWeights(tensor));
2487 return Status::OK();
2488 #else
2489 // Use IPaddingLayer.
2490 // Strides must be 1 in this case.
2491 for (int x : stride) {
2492 if (x != 1) {
2493 return errors::Unimplemented(
2494 "Strides other than 1 are not supported with this version of TRT, "
2495 "at ",
2496 node_def.name());
2497 }
2498 }
2499 // Rank must be 2, 3 or 4.
2500 if (input_dims.size() > 4) {
2501 return errors::Unimplemented(node_def.op(),
2502 " for tensors with rank > 4 is not supported "
2503 "in this version of TRT, at ",
2504 node_def.name());
2505 }
2506 // Reshape if necessary to 4-D, since IPaddingLayer requires a 4-D input.
2507 const bool need_reshape = (input_dims.size() != 4);
2508 int reshape_dims_added = 0;
2509 nvinfer1::Dims reshape_dims;
2510 if (need_reshape) {
2511 // Add new dims after batch dim until tensor is 4D.
2512 while (input_dims.size() < 4) {
2513 input_dims.insert(input_dims.begin() + 1, 1);
2514 begin.insert(begin.begin() + 1, 0);
2515 size.insert(size.begin() + 1, 1);
2516 reshape_dims_added++;
2517 }
2518 TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &reshape_dims,
2519 /*ignore_first_dim=*/true));
2520 }
2521 // Find dimensions which need to be sliced.
2522 std::vector<int> pad_dims;
2523 for (int i = 1; i < input_dims.size(); i++) {
2524 if ((begin[i] != 0) || (begin[i] + size[i] != input_dims[i])) {
2525 pad_dims.push_back(i);
2526 }
2527 }
2528 if (pad_dims.empty()) {
2529 // No dimensions are changed, so this is a no-op. We could just return the
2530 // input without creating a new layer. TRT will crash if an empty engine
2531 // with no layers is attempted to be created, so we add a no-op shuffle to
2532 // prevent our unit tests from breaking.
2533 // TODO(tmorris): Allow empty engines in the unit tests and return the input
2534 // as output here.
2535 if (params->validation_only) return Status::OK();
2536 nvinfer1::IShuffleLayer* layer =
2537 params->converter->network()->addShuffle(*input.tensor());
2538 params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
2539 return Status::OK();
2540 } else if (pad_dims.size() == 1) {
2541 // Only one dim is modified but we have to have 2, mark a second dim which
2542 // will have padding of 0. The dim we add is chosen to avoid an unnecessary
2543 // transpose.
2544 if (pad_dims[0] != 2) {
2545 pad_dims.push_back(2);
2546 } else {
2547 pad_dims.push_back(3);
2548 }
2549 } else if (pad_dims.size() > 2) {
2550 return errors::Unimplemented(
2551 node_def.op(),
2552 " can only modify up to 2 dimensions in this version of TRT, at ",
2553 node_def.name());
2554 }
2555 std::sort(pad_dims.begin(), pad_dims.end());
2556 // Convert to pre/post padding values. Since TRT does not have a StridedSlice
2557 // or Slice layer prior to 5.1, we instead create an IPaddingLayer with
2558 // negative padding.
2559 nvinfer1::DimsHW pre_padding, post_padding;
2560 for (int i = 0; i < pad_dims.size(); i++) {
2561 const int axis = pad_dims[i];
2562 pre_padding.d[i] = -begin[axis];
2563 post_padding.d[i] = (begin[axis] + size[axis]) - input_dims[axis];
2564 }
2565
2566 // IPaddingLayer will always apply the padding to dims 2,3 (input format is
2567 // NCHW).
2568 const bool need_transpose = !(pad_dims[0] == 2 && pad_dims[1] == 3);
2569 std::vector<int> transpose_order(input_dims.size());
2570 std::vector<int> inv_transpose_order(input_dims.size());
2571 if (need_transpose) {
2572 if (pad_dims[0] == 1 && pad_dims[1] == 3) {
2573 transpose_order = {0, 2, 1, 3};
2574 inv_transpose_order = {0, 2, 1, 3};
2575 } else if (pad_dims[0] == 1 && pad_dims[1] == 2) {
2576 transpose_order = {0, 3, 1, 2};
2577 inv_transpose_order = {0, 2, 3, 1};
2578 }
2579 }
2580 if (params->validation_only) return Status::OK();
2581
2582 // Start conversion.
2583 nvinfer1::ITensor* tensor = input.tensor();
2584 if (need_reshape) {
2585 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
2586 input, reshape_dims, /*validation_only=*/false, &tensor));
2587 }
2588 if (need_transpose) {
2589 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
2590 tensor, transpose_order, StrCat(node_def.name(), "_for_pad"), &tensor));
2591 }
2592 // Add padding layer
2593 nvinfer1::IPaddingLayer* layer = params->converter->network()->addPadding(
2594 *tensor, pre_padding, post_padding);
2595 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
2596 params->converter->MarkQuantizationRangesAsInferrable(tensor,
2597 layer->getOutput(0));
2598 tensor = layer->getOutput(0);
2599 // Restore transpose
2600 if (need_transpose) {
2601 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
2602 tensor, inv_transpose_order, StrCat(node_def.name(), "_after_pad"),
2603 &tensor));
2604 }
2605 // Reshape for shrink_axis.
2606 if (final_shape) {
2607 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
2608 TRT_TensorOrWeights(tensor), *final_shape, /*validation_only=*/false,
2609 &tensor));
2610 } else if (need_reshape) {
2611 // Restore reshape.
2612 // Calculate output dimensions
2613 for (int i = 0; i < pad_dims.size(); i++) {
2614 const int axis = pad_dims[i];
2615 input_dims[axis] = size[axis];
2616 }
2617 // Remove added 1 dimensions
2618 for (int i = 0; i < reshape_dims_added; i++) {
2619 int value = input_dims[1];
2620 if (value != 1) {
2621 return errors::Internal("StridedSlice error when reshaping, at ",
2622 node_def.name());
2623 }
2624 input_dims.erase(input_dims.begin() + 1);
2625 }
2626
2627 nvinfer1::Dims new_dims;
2628 TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims,
2629 /*ignore_first_dim=*/true));
2630 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
2631 TRT_TensorOrWeights(tensor), new_dims, /*validation_only=*/false,
2632 &tensor));
2633 }
2634
2635 params->outputs->push_back(TRT_TensorOrWeights(tensor));
2636 return Status::OK();
2637 #endif
2638 }
2639
ConvertSlice(OpConverterParams * params)2640 Status ConvertSlice(OpConverterParams* params) {
2641 const auto& inputs = params->inputs;
2642 const auto& node_def = params->node_def;
2643 TF_RETURN_IF_ERROR(CheckInputsWeights(
2644 *params, {{"input", false}, {"begin", true}, {"size", true}}));
2645 TF_RETURN_IF_ERROR(AllowDataTypes(
2646 *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
2647 std::vector<int> begin = inputs.at(1).weights().ToVector<int>();
2648 std::vector<int> size = inputs.at(2).weights().ToVector<int>();
2649 // Get input dims.
2650 nvinfer1::Dims dims = inputs.at(0).GetTrtDims();
2651 std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
2652 // Add batch dimension so that indexes line up properly.
2653 input_dims.insert(input_dims.begin(), inputs.at(0).batch_size());
2654 if (!AllLengthsEqual({input_dims, begin, size})) {
2655 return errors::InvalidArgument(
2656 "Length of begin and size arguments must equal rank of input for "
2657 "Slice, at ",
2658 node_def.name());
2659 }
2660 // Check that batch dimension is unmodified.
2661 const bool begin_is_modified = begin[0] != 0;
2662 // If size[0]s is not -1, we can only know if the batch dimension is
2663 // unmodified when the batch size is defined. When the batch size is
2664 // undefined, we don't convert to be safe.
2665 const bool batch_size_is_defined = input_dims[0] > 0;
2666 const bool size_is_modified =
2667 size[0] != -1 && (!batch_size_is_defined ||
2668 (batch_size_is_defined && size[0] != input_dims[0]));
2669 if (begin_is_modified || size_is_modified) {
2670 return errors::Unimplemented(
2671 "TensorRT does not allow modifications to the batch dimension, at ",
2672 node_def.name());
2673 }
2674 // Size of -1 signifies to take all remaining elements.
2675 for (int i = 1; i < input_dims.size(); i++) {
2676 if (size[i] == -1) {
2677 size[i] = input_dims[i] - begin[i];
2678 }
2679 }
2680 // Stride is 1 for all dims.
2681 std::vector<int> stride(begin.size(), 1);
2682 return ConvertStridedSliceHelper(params, inputs.at(0), begin, size, stride);
2683 }
2684
ConvertStridedSlice(OpConverterParams * params)2685 Status ConvertStridedSlice(OpConverterParams* params) {
2686 const auto& inputs = params->inputs;
2687 const auto& node_def = params->node_def;
2688 TF_RETURN_IF_ERROR(CheckInputsWeights(
2689 *params,
2690 {{"input", false}, {"begin", true}, {"end", true}, {"strides", true}}));
2691 TF_RETURN_IF_ERROR(AllowDataTypes(
2692 *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
2693
2694 TFAttrs attrs(node_def);
2695 // new_axis_mask is not supported.
2696 const int32 new_axis_mask = attrs.get<int64>("new_axis_mask");
2697 if (new_axis_mask != 0) {
2698 return errors::Unimplemented(
2699 "new_axis_mask is not supported for StridedSlice, at ",
2700 node_def.name());
2701 }
2702 const int32 begin_mask = attrs.get<int64>("begin_mask");
2703 const int32 end_mask = attrs.get<int64>("end_mask");
2704 const int32 ellipsis_mask = attrs.get<int64>("ellipsis_mask");
2705 const int32 shrink_axis_mask = attrs.get<int64>("shrink_axis_mask");
2706
2707 // Get input dims.
2708 nvinfer1::Dims dims = inputs.at(0).GetTrtDims();
2709 std::vector<int64> input_dims(dims.d, dims.d + dims.nbDims);
2710 // Add batch dimension so that indexes line up properly. Set it to -1 if it's
2711 // unknown, so ValidateStridedSliceOp() can handle it correctly below.
2712 input_dims.insert(input_dims.begin(),
2713 std::max(-1, inputs.at(0).batch_size()));
2714
2715 const TRT_ShapedWeights& begin_weights = inputs.at(1).weights();
2716 const TRT_ShapedWeights& end_weights = inputs.at(2).weights();
2717 const TRT_ShapedWeights& stride_weights = inputs.at(3).weights();
2718 if (!AllLengthsEqual({begin_weights.ToVector<int>(),
2719 end_weights.ToVector<int>(),
2720 stride_weights.ToVector<int>()})) {
2721 return errors::InvalidArgument(
2722 "Length of begin, end, and stride must be equal, at ", node_def.name());
2723 }
2724
2725 PartialTensorShape input_shape(input_dims);
2726 PartialTensorShape processing_shape;
2727 PartialTensorShape final_shape;
2728 bool is_identity;
2729 bool is_simple_slice;
2730 bool slice_dim0;
2731 absl::InlinedVector<int64, 4> begin;
2732 absl::InlinedVector<int64, 4> end;
2733 absl::InlinedVector<int64, 4> strides;
2734 TF_RETURN_IF_ERROR(ValidateStridedSliceOp(
2735 &begin_weights.GetTensor(), &end_weights.GetTensor(),
2736 stride_weights.GetTensor(), input_shape, begin_mask, end_mask,
2737 ellipsis_mask, new_axis_mask, shrink_axis_mask, &processing_shape,
2738 &final_shape, &is_identity, &is_simple_slice, &slice_dim0, &begin, &end,
2739 &strides));
2740
2741 // Negative or zero strides currently not supported.
2742 for (int stride : strides) {
2743 if (stride <= 0) {
2744 return errors::Unimplemented(
2745 "Negative or zero stride values are not supported for StridedSlice, "
2746 "at ",
2747 node_def.name());
2748 }
2749 }
2750
2751 // If batch dimension is covered by the ellipsis mask, it means it's left
2752 // untouched. Otherwise we check whether it modifies the batch dimension here.
2753 if (!(ellipsis_mask & 1) ||
2754 begin_weights.shape_.nbDims >= input_dims.size()) {
2755 // Check that batch dimension is unmodified. We need to use the expanded
2756 // begin/end/strides array since the original array may be incorrect when
2757 // (ellipsis_mask&1)==1.
2758 const bool begin_is_modified = !(begin_mask & 1) && (begin[0] != 0);
2759 const bool stride_is_modified = (strides[0] != 1);
2760 // If the batch size is -1 and the end mask is not set, we can only know if
2761 // the batch dimension is unmodified when the batch size is defined. When
2762 // the batch size is undefined, we don't convert to be safe.
2763 const bool batch_size_is_defined = (input_dims[0] > 0);
2764 const bool end_is_modified =
2765 !(end_mask & 1) && (!batch_size_is_defined ||
2766 (batch_size_is_defined && end[0] != input_dims[0]));
2767 if (begin_is_modified || stride_is_modified || end_is_modified) {
2768 return errors::Unimplemented(
2769 "TensorRT does not allow modifications to the batch dimension, at ",
2770 node_def.name());
2771 }
2772 }
2773 // Can't shrink axis on batch dimension.
2774 if (shrink_axis_mask & 1) {
2775 return errors::Unimplemented(
2776 "TensorRT does not allow modifications to the batch dimension, at ",
2777 node_def.name());
2778 }
2779 // TRT Slice layer uses (begin, size) instead of (begin, end)
2780 absl::InlinedVector<int64, 4> size(input_dims.size());
2781 for (int i = 0; i < input_dims.size(); i++) {
2782 // Divide by stride (round up)
2783 size[i] = (end[i] - begin[i] + strides[i] - 1) / strides[i];
2784 }
2785
2786 // shrink_axis_mask requires a reshape after the slice.
2787 nvinfer1::Dims final_shape_dims;
2788 nvinfer1::Dims* final_shape_dims_ptr = nullptr;
2789 if (shrink_axis_mask) {
2790 final_shape_dims =
2791 TensorShapeToTrtDims(final_shape, /*ignore_first_dim=*/true);
2792 final_shape_dims_ptr = &final_shape_dims;
2793 }
2794 return ConvertStridedSliceHelper(params, inputs.at(0), begin, size, strides,
2795 final_shape_dims_ptr);
2796 }
2797
ConvertConv2D(OpConverterParams * params)2798 Status ConvertConv2D(OpConverterParams* params) {
2799 return ConvertConv2DHelper(params, 1, /*is_conv2d_backprop_input=*/false);
2800 }
2801
ConvertConv2DDepthwise(OpConverterParams * params)2802 Status ConvertConv2DDepthwise(OpConverterParams* params) {
2803 return ConvertConv2DHelper(params, 0, /*is_conv2d_backprop_input=*/false);
2804 }
2805
ConvertConv2DBackpropInput(OpConverterParams * params)2806 Status ConvertConv2DBackpropInput(OpConverterParams* params) {
2807 return ConvertConv2DHelper(params, 1, /*is_conv2d_backprop_input=*/true);
2808 }
2809
2810 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
ConvertConv3DHelper(OpConverterParams * params,int group,bool is_conv3d_backprop_input=false)2811 Status ConvertConv3DHelper(OpConverterParams* params, int group,
2812 bool is_conv3d_backprop_input = false) {
2813 const int kNumDims = 5;
2814 const auto& inputs = params->inputs;
2815 const auto& node_def = params->node_def;
2816 TRT_TensorOrWeights backprop_output_size;
2817 nvinfer1::ITensor* tensor = nullptr;
2818 if (is_conv3d_backprop_input) {
2819 // In the case when Conv3dBackpropInput is used for conv3d_transpose, these
2820 // inputs correspond to: output size, filter, and input.
2821 TF_RETURN_IF_ERROR(CheckInputsWeights(
2822 *params,
2823 {{"input_sizes", true}, {"filter", true}, {"out_backprop", false}}));
2824 backprop_output_size = inputs.at(0);
2825 tensor = inputs.at(2).tensor();
2826 } else {
2827 TF_RETURN_IF_ERROR(
2828 CheckInputsWeights(*params, {{"input", false}, {"filter", true}}));
2829 tensor = inputs.at(0).tensor();
2830 }
2831 TF_RETURN_IF_ERROR(
2832 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
2833 const TRT_ShapedWeights weights_drsck = inputs.at(1).weights();
2834 if (weights_drsck.shape_.nbDims != kNumDims) {
2835 return errors::InvalidArgument("Conv3D expects kernel of dimension 5, at ",
2836 node_def.name());
2837 }
2838 TFAttrs attrs(node_def);
2839 auto data_format = attrs.get<string>("data_format");
2840 const bool is_ndhwc = (data_format == "NDHWC"); // Or NCDHW 01234 - > 02341
2841 const int d_index = is_ndhwc ? 1 : 2;
2842 const int h_index = is_ndhwc ? 2 : 3;
2843 const int w_index = is_ndhwc ? 3 : 4;
2844 const int c_index = is_ndhwc ? 4 : 1;
2845 auto tf_dilations = attrs.get<std::vector<int64>>("dilations");
2846 if (tf_dilations.size() != kNumDims) {
2847 return errors::InvalidArgument(
2848 "Convolution dilations field must specify 5 dimensions, at ",
2849 node_def.name());
2850 }
2851 if (tf_dilations[0] != 1 || tf_dilations[c_index] != 1) {
2852 return errors::Unimplemented(
2853 "Dilation rate must be 1 for batch and channel dimensions, at ",
2854 node_def.name());
2855 }
2856
2857 const nvinfer1::Dims3 dilation_dhw(
2858 tf_dilations[d_index], tf_dilations[h_index], tf_dilations[w_index]);
2859 if (is_conv3d_backprop_input &&
2860 (dilation_dhw.d[0] != 1 || dilation_dhw.d[1] != 1 ||
2861 dilation_dhw.d[2] != 1)) {
2862 return errors::Unimplemented(
2863 "Dilation with Conv3DBackpropInputV2 (conv3d_transpose) is not "
2864 "supported",
2865 ", at ", node_def.name());
2866 }
2867
2868 const auto tf_stride = attrs.get<std::vector<int64>>("strides");
2869 if (tf_stride.size() != kNumDims) {
2870 return errors::InvalidArgument(
2871 "Convolution strides field must specify 5 dimensions, at ",
2872 node_def.name());
2873 }
2874 if (tf_stride[0] != 1 || tf_stride[c_index] != 1) {
2875 return errors::Unimplemented(
2876 "Stride must be 1 for batch and channel dimensions, at ",
2877 node_def.name());
2878 }
2879
2880 const nvinfer1::Dims3 stride_dhw(tf_stride[d_index], tf_stride[h_index],
2881 tf_stride[w_index]);
2882 const auto tensor_dim = tensor->getDimensions();
2883
2884 // Asymmetric padding on Deconv not supported for now
2885 if (is_conv3d_backprop_input && attrs.get<string>("padding") == "SAME") {
2886 TRT_ShapedWeights weights =
2887 params->weight_store->GetTempWeights(weights_drsck);
2888
2889 nvinfer1::Dims3 effective_kernel_size(
2890 weights.shape_.d[0] +
2891 (weights.shape_.d[0] - 1) * (dilation_dhw.d[0] - 1), // D
2892 weights.shape_.d[1] +
2893 (weights.shape_.d[1] - 1) * (dilation_dhw.d[1] - 1), // R
2894 weights.shape_.d[2] +
2895 (weights.shape_.d[2] - 1) * (dilation_dhw.d[2] - 1) // S
2896 );
2897
2898 const auto output_size_weights =
2899 static_cast<int*>(backprop_output_size.weights().GetValues());
2900 const std::vector<int64_t> input_dims = {output_size_weights[d_index],
2901 output_size_weights[h_index],
2902 output_size_weights[w_index]};
2903
2904 const std::vector<std::pair<int, int>> padding =
2905 CreateSamePadding(stride_dhw, effective_kernel_size, input_dims);
2906
2907 if (padding[0].first != padding[0].second ||
2908 padding[1].first != padding[1].second ||
2909 padding[2].first != padding[2].second) {
2910 return errors::Unimplemented(
2911 "Asymmetric padding with Conv3DBackpropInputV2 (conv3d_transpose) is "
2912 "not supported, at ",
2913 node_def.name());
2914 }
2915 }
2916
2917 // Finished validation checks
2918 if (params->validation_only) return Status::OK();
2919
2920 // Transpose to NCDHW (NCDHW is required for IConvLayer).
2921 const bool need_transpose = is_ndhwc;
2922 if (need_transpose) {
2923 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
2924 tensor, {0, 4, 1, 2, 3}, StrCat(node_def.name(), "_to_NCDHW"),
2925 &tensor));
2926 }
2927
2928 // group == 0 signifies that this is a depthwise convolution, so set
2929 // num_groups to size of input's channel dim. For a non-depthwise conv,
2930 // num_groups will be 1.
2931 const int num_groups = (group == 0) ? tensor_dim.d[0] : group;
2932
2933 // For conv, TF weights are DRSCK, and TRT expects KCDRS.
2934 // For backprop, TF weights are DRSKC, and TRT expects KCDRS.
2935 // Therefore, this reorder will work for both cases.
2936 TRT_ShapedWeights weights =
2937 params->weight_store->GetTempWeights(weights_drsck);
2938 ReorderDRSCKToKCDRS(weights_drsck, &weights, num_groups);
2939 TRT_ShapedWeights biases(weights.TrtDType());
2940 const int output_axis = is_conv3d_backprop_input ? 1 : 0;
2941 const int noutput = weights.shape_.d[output_axis] * num_groups;
2942 nvinfer1::Dims3 kernel_size_drs(weights.shape_.d[2], // D
2943 weights.shape_.d[3], // R
2944 weights.shape_.d[4] // S
2945 );
2946
2947 // Add convolution.
2948 nvinfer1::ILayer* conv_layer = nullptr;
2949 if (is_conv3d_backprop_input) {
2950 nvinfer1::IDeconvolutionLayer* layer =
2951 params->converter->network()->addDeconvolutionNd(
2952 *tensor, noutput, kernel_size_drs, weights.GetTrtWeights(),
2953 biases.GetTrtWeights());
2954 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
2955 layer->setStrideNd(stride_dhw); // change to nd set stride
2956
2957 // TensorRT 5.1.3 added support for padding modes.
2958 if (attrs.get<string>("padding") == "SAME") {
2959 VLOG(2) << "Using SAME padding";
2960 // SAME_UPPER means that post padding is preferred.
2961 layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
2962 }
2963
2964 layer->setName(node_def.name().c_str());
2965 layer->setNbGroups(num_groups);
2966 conv_layer = layer;
2967 } else {
2968 nvinfer1::IConvolutionLayer* layer =
2969 params->converter->network()->addConvolutionNd(
2970 *tensor, noutput, kernel_size_drs, weights.GetTrtWeights(),
2971 biases.GetTrtWeights());
2972 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
2973 layer->setStrideNd(stride_dhw);
2974
2975 if (attrs.get<string>("padding") == "SAME") {
2976 VLOG(2) << "Using SAME padding";
2977 layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
2978 }
2979
2980 layer->setName(node_def.name().c_str());
2981 layer->setNbGroups(num_groups);
2982 layer->setDilationNd(dilation_dhw);
2983 conv_layer = layer;
2984 }
2985 nvinfer1::ITensor* output_tensor = conv_layer->getOutput(0);
2986
2987 // Restore transpose.
2988 if (need_transpose) {
2989 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
2990 output_tensor, {0, 2, 3, 4, 1}, StrCat(node_def.name(), "_to_NDHWC"),
2991 &output_tensor));
2992 }
2993 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
2994 return Status::OK();
2995 }
2996
ConvertConv3D(OpConverterParams * params)2997 Status ConvertConv3D(OpConverterParams* params) {
2998 return ConvertConv3DHelper(params, 1, /*is_conv3d_backprop_input=*/false);
2999 }
3000
ConvertConv3DBackpropInputV2(OpConverterParams * params)3001 Status ConvertConv3DBackpropInputV2(OpConverterParams* params) {
3002 return ConvertConv3DHelper(params, 1, /*is_conv3d_backprop_input=*/true);
3003 }
3004
ConvertPool3D(OpConverterParams * params)3005 Status ConvertPool3D(OpConverterParams* params) {
3006 const int kNumDims = 5;
3007 const auto& inputs = params->inputs;
3008 const auto& node_def = params->node_def;
3009 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
3010 TF_RETURN_IF_ERROR(
3011 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3012 nvinfer1::PoolingType type;
3013 if (node_def.op() == "MaxPool3D") {
3014 type = nvinfer1::PoolingType::kMAX;
3015 } else if (node_def.op() == "AvgPool3D") {
3016 type = nvinfer1::PoolingType::kAVERAGE;
3017 } else {
3018 return errors::Unimplemented("Unsupported pooling type: ", node_def.op(),
3019 ", at ", node_def.name());
3020 }
3021 TFAttrs attrs(node_def);
3022 const string padding_type = attrs.get<string>("padding");
3023 if ((padding_type != "SAME") && (padding_type != "VALID")) {
3024 return errors::Unimplemented("Unsupported padding type: ", padding_type,
3025 ", at ", node_def.name());
3026 }
3027 const auto data_format = attrs.get<string>("data_format");
3028 const bool is_ndhwc = (data_format == "NDHWC");
3029 const int c_index = is_ndhwc ? 4 : 1;
3030 const int d_index = is_ndhwc ? 1 : 2;
3031 const int h_index = is_ndhwc ? 2 : 3;
3032 const int w_index = is_ndhwc ? 3 : 4;
3033 const auto tf_stride = attrs.get<std::vector<int64>>("strides");
3034 if (tf_stride.size() != kNumDims) {
3035 return errors::InvalidArgument(
3036 "Pooling strides field must specify 5 dimensions, at ",
3037 node_def.name());
3038 }
3039 if (tf_stride[0] != 1 || tf_stride[c_index] != 1) {
3040 return errors::Unimplemented(
3041 "stride must be 1 for batch and channel dimensions, at ",
3042 node_def.name());
3043 }
3044 const auto tf_kernel = attrs.get<std::vector<int64>>("ksize");
3045 if (tf_kernel.size() != kNumDims) {
3046 return errors::InvalidArgument(
3047 "Pooling ksize field must specify 5 dimensions, at ", node_def.name());
3048 }
3049 if (tf_kernel[0] != 1 || tf_kernel[c_index] != 1) {
3050 return errors::Unimplemented(
3051 "ksize must be 1 for batch and channel dimensions, at ",
3052 node_def.name());
3053 }
3054 if (params->validation_only) return Status::OK();
3055
3056 nvinfer1::ITensor* tensor = inputs.at(0).tensor();
3057 if (data_format == "NDHWC") {
3058 // NDHWC => NCDHW
3059 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
3060 tensor, {0, 4, 1, 2, 3}, StrCat(node_def.name(), "_to_NCDHW"),
3061 &tensor));
3062 }
3063
3064 const nvinfer1::Dims3 stride(tf_stride[d_index], tf_stride[h_index],
3065 tf_stride[w_index]);
3066 const nvinfer1::Dims3 ksize(tf_kernel[d_index], tf_kernel[h_index],
3067 tf_kernel[w_index]);
3068
3069 nvinfer1::IPoolingLayer* layer =
3070 params->converter->network()->addPoolingNd(*tensor, type, ksize);
3071 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3072
3073 params->converter->MarkQuantizationRangesAsInferrable(tensor,
3074 layer->getOutput(0));
3075
3076 layer->setStrideNd(stride);
3077 // VALID padding is the default TRT behavior.
3078 if (padding_type == "SAME") {
3079 // SAME_UPPER means that post padding is preferred.
3080 layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
3081 }
3082 layer->setName(node_def.name().c_str());
3083 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
3084
3085 if (data_format == "NDHWC") {
3086 // NCDHW => NDHWC
3087 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
3088 output_tensor, {0, 2, 3, 4, 1}, StrCat(node_def.name(), "_to_NDHWC"),
3089 &output_tensor));
3090 }
3091
3092 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3093 return Status::OK();
3094 }
3095 #endif // #if IS_TRT_VERSION_GE(6, 0, 0, 0)
3096
ConvertFusedConv2DBiasActivation(OpConverterParams * params)3097 Status ConvertFusedConv2DBiasActivation(OpConverterParams* params) {
3098 const auto& inputs = params->inputs;
3099 const auto& node_def = params->node_def;
3100
3101 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false},
3102 {"filter", true},
3103 {"bias", true},
3104 {"side_input", true},
3105 {"conv_input_scale", true},
3106 {"side_input_scale", true}}));
3107 nvinfer1::ITensor* tensor = inputs.at(0).tensor();
3108 TF_RETURN_IF_ERROR(
3109 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3110 TRT_ShapedWeights weights = inputs.at(1).weights();
3111 if (weights.shape_.nbDims != 4) {
3112 return errors::InvalidArgument(
3113 "FusedConv2DBiasActivation expects kernel of dimension 4, at " +
3114 node_def.name());
3115 }
3116 TFAttrs attrs(node_def);
3117 auto data_format = attrs.get<string>("data_format");
3118 if (data_format != "NHWC" && data_format != "NCHW") {
3119 return errors::InvalidArgument("Unsupported data_format:", data_format,
3120 " at ", node_def.name());
3121 }
3122
3123 int c_index = (data_format == "NHWC") ? 3 : 1;
3124 int h_index = (data_format == "NHWC") ? 1 : 2;
3125 int w_index = (data_format == "NHWC") ? 2 : 3;
3126 auto tf_dilations = attrs.get<std::vector<int64>>("dilations");
3127 if (tf_dilations.size() != 4) {
3128 return errors::InvalidArgument(
3129 "Convolution dilations field must specify 4 dimensions, at ",
3130 node_def.name());
3131 }
3132 if (tf_dilations[0] != 1 || tf_dilations[c_index] != 1) {
3133 return errors::Unimplemented(
3134 "Dilation rate must be 1 for batch and channel dimensions, at ",
3135 node_def.name());
3136 }
3137 const nvinfer1::DimsHW dilation(tf_dilations[h_index], tf_dilations[w_index]);
3138
3139 const auto tf_stride = attrs.get<std::vector<int64>>("strides");
3140 if (tf_stride.size() != 4) {
3141 return errors::InvalidArgument(
3142 "Convolution strides field must specify 4 dimensions, at ",
3143 node_def.name());
3144 }
3145 if (tf_stride[0] != 1 || tf_stride[c_index] != 1) {
3146 return errors::Unimplemented(
3147 "Stride must be 1 for batch and channel dimensions, at ",
3148 node_def.name());
3149 }
3150 const nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]);
3151 const auto activation_mode = attrs.get<string>("activation_mode");
3152 auto op_pair = ActivationTypeMap()->find(activation_mode);
3153 if (op_pair == ActivationTypeMap()->end() && activation_mode != "None") {
3154 return errors::Unimplemented("Activation mode: ", activation_mode,
3155 " not supported at: ", node_def.name());
3156 }
3157
3158 const auto filter_format = attrs.get<string>("filter_format");
3159 if (filter_format != "HWIO" && filter_format != "OIHW") {
3160 return errors::InvalidArgument("Unsupported filter_format:", filter_format,
3161 " at ", node_def.name());
3162 }
3163 // Check that there's no side_input or conv_input_scale.
3164 TRT_ShapedWeights side_input = inputs.at(3).weights();
3165 if (side_input.count() != 0) {
3166 return errors::InvalidArgument(
3167 "FusedConv2DBiasActivation doesn't yet support side_input, at " +
3168 node_def.name());
3169 }
3170 TRT_ShapedWeights conv_input_scale = inputs.at(4).weights();
3171 if (conv_input_scale.count() != 1 ||
3172 conv_input_scale.TrtDType() != nvinfer1::DataType::kFLOAT ||
3173 conv_input_scale.GetSpan<float>()[0] != 1.0) {
3174 return errors::InvalidArgument(
3175 "FusedConv2DBiasActivation doesn't yet support conv_input_scale, at " +
3176 node_def.name());
3177 }
3178 if (params->validation_only) return Status::OK();
3179
3180 // Transpose to NCHW (NCHW is required for IConvLayer).
3181 const bool need_transpose = (data_format == "NHWC");
3182 if (need_transpose) {
3183 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
3184 tensor, {0, 3, 1, 2}, StrCat(node_def.name(), "_to_NCHW"), &tensor));
3185 }
3186
3187 nvinfer1::DimsHW kernel_size;
3188 if (filter_format == "OIHW") {
3189 kernel_size.h() = weights.shape_.d[2];
3190 kernel_size.w() = weights.shape_.d[3];
3191 } else {
3192 // HWIO.
3193 DCHECK_EQ(filter_format, "HWIO");
3194 kernel_size.h() = weights.shape_.d[0];
3195 kernel_size.w() = weights.shape_.d[1];
3196 }
3197 // Before TRT 5.1.3, we have to calculate padding ourselves.
3198 #if !IS_TRT_VERSION_GE(5, 1, 3, 0)
3199 const auto tensor_dim = tensor->getDimensions();
3200 std::vector<int64_t> input_dims;
3201 // Use 1 and 2 because tensor_dim has the dimensions of the transposed
3202 // input.
3203 input_dims = {static_cast<int>(tensor_dim.d[1]),
3204 static_cast<int>(tensor_dim.d[2])};
3205 std::vector<std::pair<int, int>> padding;
3206 nvinfer1::ITensor* padded_tensor = nullptr;
3207 TF_RETURN_IF_ERROR(Conv2DPaddingHelper(params, attrs, kernel_size, dilation,
3208 stride, input_dims, tensor, &padding,
3209 &padded_tensor));
3210 tensor = padded_tensor;
3211 #endif
3212
3213 // Add convolution.
3214 TRT_ShapedWeights biases = inputs.at(2).weights();
3215 nvinfer1::IConvolutionLayer* conv_layer = nullptr;
3216 if (filter_format == "OIHW") {
3217 // Weights are already in the right order.
3218 conv_layer = params->converter->network()->addConvolution(
3219 *tensor, weights.shape_.d[0], kernel_size, weights.GetTrtWeights(),
3220 biases.GetTrtWeights());
3221 } else {
3222 // For conv, TF weights are RSCK, and TRT expects KCRS.
3223 DCHECK_EQ(filter_format, "HWIO");
3224 TRT_ShapedWeights weights_kcrs =
3225 params->weight_store->GetTempWeights(weights);
3226 ReorderRSCKToKCRS(weights, &weights_kcrs, 1);
3227 conv_layer = params->converter->network()->addConvolution(
3228 *tensor, weights.shape_.d[3], kernel_size, weights_kcrs.GetTrtWeights(),
3229 biases.GetTrtWeights());
3230 }
3231 TFTRT_RETURN_ERROR_IF_NULLPTR(conv_layer, node_def.name());
3232 conv_layer->setStride(stride);
3233 #if IS_TRT_VERSION_GE(5, 1, 3, 0)
3234 if (attrs.get<string>("padding") == "SAME") {
3235 conv_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
3236 }
3237 #else
3238 conv_layer->setPadding(nvinfer1::DimsHW{padding[0].first, padding[1].first});
3239 #endif
3240 conv_layer->setName(node_def.name().c_str());
3241 conv_layer->setNbGroups(1);
3242 conv_layer->setDilation(dilation);
3243 nvinfer1::ITensor* output_tensor = conv_layer->getOutput(0);
3244
3245 // Add activation if there is one.
3246 if (op_pair != ActivationTypeMap()->end()) {
3247 nvinfer1::IActivationLayer* activation_layer =
3248 params->converter->network()->addActivation(*output_tensor,
3249 op_pair->second);
3250 TFTRT_RETURN_ERROR_IF_NULLPTR(activation_layer, node_def.name());
3251 output_tensor = activation_layer->getOutput(0);
3252 }
3253 // Restore transpose.
3254 if (need_transpose) {
3255 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
3256 output_tensor, {0, 2, 3, 1}, StrCat(node_def.name(), "_to_NHWC"),
3257 &output_tensor));
3258 }
3259 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3260 return Status::OK();
3261 }
3262
ConvertPool(OpConverterParams * params)3263 Status ConvertPool(OpConverterParams* params) {
3264 const auto& inputs = params->inputs;
3265 const auto& node_def = params->node_def;
3266 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
3267 TF_RETURN_IF_ERROR(
3268 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3269 nvinfer1::PoolingType type;
3270 if (node_def.op() == "MaxPool") {
3271 type = nvinfer1::PoolingType::kMAX;
3272 } else if (node_def.op() == "AvgPool") {
3273 type = nvinfer1::PoolingType::kAVERAGE;
3274 } else {
3275 return errors::Unimplemented("Unsupported pooling type: ", node_def.op(),
3276 ", at ", node_def.name());
3277 }
3278 TFAttrs attrs(node_def);
3279 const string padding_type = attrs.get<string>("padding");
3280 if ((padding_type != "SAME") && (padding_type != "VALID")) {
3281 return errors::Unimplemented("Unsupported padding type: ", padding_type,
3282 ", at ", node_def.name());
3283 }
3284 if (params->validation_only) return Status::OK();
3285
3286 nvinfer1::ITensor* tensor = inputs.at(0).tensor();
3287 int h_index = 2;
3288 int w_index = 3;
3289 const auto data_format = attrs.get<string>("data_format");
3290 if (data_format == "NHWC") {
3291 h_index = 1;
3292 w_index = 2;
3293 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
3294 tensor, {0, 3, 1, 2}, StrCat(node_def.name(), "_to_NCHW"), &tensor));
3295 }
3296
3297 const auto tf_stride = attrs.get<std::vector<int64>>("strides");
3298 const nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]);
3299
3300 const auto tf_kernel = attrs.get<std::vector<int64>>("ksize");
3301 const nvinfer1::DimsHW ksize(tf_kernel[h_index], tf_kernel[w_index]);
3302
3303 // Before TRT 5.1.3, we have to calculate padding ourselves.
3304 #if !IS_TRT_VERSION_GE(5, 1, 3, 0)
3305 auto tensor_dim = tensor->getDimensions();
3306 std::vector<std::pair<int, int>> padding;
3307 if (padding_type == "SAME") {
3308 // This is NCHW tensor with no batch dimension.
3309 // 1 -> h
3310 // 2 -> w
3311 padding = CreateSamePadding(
3312 stride, ksize,
3313 {static_cast<int>(tensor_dim.d[1]), static_cast<int>(tensor_dim.d[2])});
3314 } else if (padding_type == "VALID") {
3315 padding = {{0, 0}, {0, 0}};
3316 }
3317 #endif
3318 // TensorRT 5.1 added support for asymmetric padding. Before that, we need an
3319 // extra padding layer.
3320 #if !IS_TRT_VERSION_GE(5, 1, 0, 0)
3321 // Asymmetric padding case.
3322 if (padding[0].first != padding[0].second ||
3323 padding[1].first != padding[1].second) {
3324 auto pad_layer = params->converter->network()->addPadding(
3325 *tensor, nvinfer1::DimsHW(padding[0].first, padding[1].first),
3326 nvinfer1::DimsHW(padding[0].second, padding[1].second));
3327 TFTRT_RETURN_ERROR_IF_NULLPTR(pad_layer, node_def.name());
3328 params->converter->MarkQuantizationRangesAsInferrable(
3329 tensor, pad_layer->getOutput(0));
3330 padding = {{0, 0}, {0, 0}};
3331 tensor = pad_layer->getOutput(0);
3332 }
3333 #endif
3334
3335 nvinfer1::IPoolingLayer* layer =
3336 params->converter->network()->addPooling(*tensor, type, ksize);
3337 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3338 // TODO(tmorris): Average pooling may not be entirely safe to infer
3339 // quantization range through (at least forwards - backwards should be fine).
3340 // Max pooling is okay.
3341 params->converter->MarkQuantizationRangesAsInferrable(tensor,
3342 layer->getOutput(0));
3343
3344 layer->setStride(stride);
3345 #if IS_TRT_VERSION_GE(5, 1, 3, 0)
3346 // VALID padding is the default TRT behavior.
3347 if (attrs.get<string>("padding") == "SAME") {
3348 // SAME_UPPER means that post padding is preferred.
3349 layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
3350 }
3351 #elif IS_TRT_VERSION_GE(5, 1, 0, 0)
3352 layer->setPrePadding(nvinfer1::DimsHW{padding[0].first, padding[1].first});
3353 layer->setPostPadding(nvinfer1::DimsHW{padding[0].second, padding[1].second});
3354 #else
3355 layer->setPadding(nvinfer1::DimsHW{padding[0].first, padding[1].first});
3356 #endif
3357 layer->setName(node_def.name().c_str());
3358 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
3359
3360 if (data_format == "NHWC") {
3361 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
3362 output_tensor, {0, 2, 3, 1}, StrCat(node_def.name(), "_to_NHWC"),
3363 &output_tensor));
3364 }
3365 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3366 return Status::OK();
3367 }
3368
ConvertLeakyRelu(OpConverterParams * params)3369 Status ConvertLeakyRelu(OpConverterParams* params) {
3370 const auto& inputs = params->inputs;
3371 const auto& node_def = params->node_def;
3372 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
3373 TF_RETURN_IF_ERROR(
3374 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3375 TFAttrs attrs(node_def);
3376 const float alpha = attrs.get<float>("alpha");
3377
3378 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
3379 // Use IActivationLayer when available.
3380 if (params->validation_only) return Status::OK();
3381
3382 nvinfer1::IActivationLayer* layer =
3383 params->converter->network()->addActivation(
3384 *inputs.at(0).tensor(), nvinfer1::ActivationType::kLEAKY_RELU);
3385 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3386 layer->setAlpha(alpha);
3387 params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
3388 return Status::OK();
3389 #else
3390 // Use elementwise ops when IActivationLayer is not available.
3391 if (alpha < 0.0f || alpha > 1.0f) {
3392 return errors::Unimplemented(
3393 "Alpha value for LeakyRelu must be between 0 and 1, at ",
3394 node_def.name());
3395 }
3396 if (params->validation_only) return Status::OK();
3397
3398 nvinfer1::ITensor* tensor = inputs.at(0).tensor();
3399 // Create const for alpha.
3400 nvinfer1::ITensor* const_alpha_tensor = nullptr;
3401 TF_RETURN_IF_ERROR(CreateBroadcastableScalarConstant(
3402 params, alpha, tensor->getDimensions(), &const_alpha_tensor));
3403 // alpha * x
3404 nvinfer1::IElementWiseLayer* mul_layer =
3405 params->converter->network()->addElementWise(
3406 *tensor, *const_alpha_tensor, nvinfer1::ElementWiseOperation::kPROD);
3407 TFTRT_RETURN_ERROR_IF_NULLPTR(mul_layer, node_def.name());
3408 // max(x, alpha * x)
3409 nvinfer1::IElementWiseLayer* max_layer =
3410 params->converter->network()->addElementWise(
3411 *tensor, *mul_layer->getOutput(0),
3412 nvinfer1::ElementWiseOperation::kMAX);
3413 TFTRT_RETURN_ERROR_IF_NULLPTR(max_layer, node_def.name());
3414 nvinfer1::ITensor* output_tensor = max_layer->getOutput(0);
3415 params->converter->MarkQuantizationRangesAsInferrable(
3416 output_tensor, mul_layer->getOutput(0));
3417
3418 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3419 return Status::OK();
3420 #endif
3421 }
3422
3423 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
ConvertClipByValue(OpConverterParams * params)3424 Status ConvertClipByValue(OpConverterParams* params) {
3425 const auto& inputs = params->inputs;
3426 const auto& node_def = params->node_def;
3427 // TODO(tmorris): We can also allow the case where min and max are tensors by
3428 // using elementwise min and max layers.
3429 TF_RETURN_IF_ERROR(CheckInputsWeights(
3430 *params,
3431 {{"t", false}, {"clip_value_min", true}, {"clip_value_max", true}}));
3432 TF_RETURN_IF_ERROR(
3433 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3434 if (params->validation_only) return Status::OK();
3435
3436 TFAttrs attrs(node_def);
3437 const DataType dtype = attrs.get<DataType>("T");
3438 float clip_value_min = 0.0f;
3439 float clip_value_max = 0.0f;
3440 // TODO(tmorris): Add a templated helper function to get scalar weights of
3441 // InType casted to OutType.
3442 if (dtype == DataType::DT_FLOAT) {
3443 clip_value_min = inputs.at(1).weights().GetSpan<float>()[0];
3444 clip_value_max = inputs.at(2).weights().GetSpan<float>()[0];
3445 } else if (dtype == DataType::DT_HALF) {
3446 clip_value_min = Eigen::half_impl::half_to_float(
3447 inputs.at(1).weights().GetSpan<Eigen::half>()[0]);
3448 clip_value_max = Eigen::half_impl::half_to_float(
3449 inputs.at(2).weights().GetSpan<Eigen::half>()[0]);
3450 }
3451
3452 nvinfer1::IActivationLayer* layer =
3453 params->converter->network()->addActivation(
3454 *inputs.at(0).tensor(), nvinfer1::ActivationType::kCLIP);
3455 layer->setAlpha(clip_value_min);
3456 layer->setBeta(clip_value_max);
3457 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3458 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
3459 params->converter->ProvideQuantizationRange(output_tensor, clip_value_min,
3460 clip_value_max);
3461 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3462 return Status::OK();
3463 }
3464 #endif
3465
3466 const std::unordered_map<string, nvinfer1::ActivationType>*
ActivationTypeMap()3467 ActivationTypeMap() {
3468 static auto* const m =
3469 new std::unordered_map<string, nvinfer1::ActivationType>({
3470 {"Relu", nvinfer1::ActivationType::kRELU},
3471 {"Sigmoid", nvinfer1::ActivationType::kSIGMOID},
3472 {"Tanh", nvinfer1::ActivationType::kTANH},
3473 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
3474 {"Elu", nvinfer1::ActivationType::kELU},
3475 {"Selu", nvinfer1::ActivationType::kSELU},
3476 {"Softsign", nvinfer1::ActivationType::kSOFTSIGN},
3477 {"Softplus", nvinfer1::ActivationType::kSOFTPLUS},
3478 #endif
3479 });
3480 return m;
3481 }
3482
ConvertActivation(OpConverterParams * params)3483 Status ConvertActivation(OpConverterParams* params) {
3484 const auto& inputs = params->inputs;
3485 const auto& node_def = params->node_def;
3486 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
3487 TF_RETURN_IF_ERROR(
3488 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3489 auto op_pair = ActivationTypeMap()->find(node_def.op());
3490 if (op_pair == ActivationTypeMap()->end()) {
3491 return errors::Unimplemented("Activation op: ", node_def.op(),
3492 " not supported at: ", node_def.name());
3493 }
3494 if (params->validation_only) return Status::OK();
3495
3496 // Start conversion.
3497 nvinfer1::IActivationLayer* layer =
3498 params->converter->network()->addActivation(*inputs.at(0).tensor(),
3499 op_pair->second);
3500 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3501 // Set parameters.
3502 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
3503 if (node_def.op() == "Elu") {
3504 layer->setAlpha(1.0f);
3505 } else if (node_def.op() == "Selu") {
3506 // From tensorflow/core/kernels/relu_op_functor.h
3507 layer->setAlpha(1.7580993408473768599402175208123f);
3508 layer->setBeta(1.0507009873554804934193349852946f);
3509 } else if (node_def.op() == "Softplus") {
3510 layer->setAlpha(1.0f);
3511 layer->setBeta(1.0f);
3512 }
3513 #endif
3514 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
3515 // Set quantization range for output when known.
3516 if (node_def.op() == "Sigmoid") {
3517 params->converter->ProvideQuantizationRange(output_tensor, 0.0f, 1.0f);
3518 } else if (node_def.op() == "Tanh") {
3519 params->converter->ProvideQuantizationRange(output_tensor, -1.0f, 1.0f);
3520 } else if (node_def.op() == "Softsign") {
3521 params->converter->ProvideQuantizationRange(output_tensor, -1.0f, 1.0f);
3522 }
3523 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3524 return Status::OK();
3525 }
3526
ConvertQuantize(OpConverterParams * params)3527 Status ConvertQuantize(OpConverterParams* params) {
3528 const auto& inputs = params->inputs;
3529 const auto& node_def = params->node_def;
3530 if (node_def.op() == "FakeQuantWithMinMaxArgs") {
3531 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
3532 } else if (node_def.op() == "FakeQuantWithMinMaxVars") {
3533 TF_RETURN_IF_ERROR(CheckInputsWeights(
3534 *params, {{"input", false}, {"min", true}, {"max", true}}));
3535 } else if (node_def.op() == "QuantizeAndDequantizeV2") {
3536 TF_RETURN_IF_ERROR(CheckInputsWeights(
3537 *params, {{"input", false}, {"input_min", true}, {"input_max", true}}));
3538 } else if (node_def.op() == "QuantizeAndDequantizeV3") {
3539 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false},
3540 {"input_min", true},
3541 {"input_max", true},
3542 {"num_bits", true}}));
3543 }
3544 float min_range = 0.0f;
3545 float max_range = 0.0f;
3546 if (node_def.op() == "FakeQuantWithMinMaxArgs") {
3547 // Get ranges via node attributes.
3548 TFAttrs attrs(node_def);
3549 if (attrs.count("min") == 0 || attrs.count("max") == 0) {
3550 return errors::InvalidArgument("Min or max attribute not found for ",
3551 node_def.op(), " at ", node_def.name());
3552 }
3553 min_range = attrs.get<float>("min");
3554 max_range = attrs.get<float>("max");
3555 } else if (node_def.op() == "FakeQuantWithMinMaxVars" ||
3556 node_def.op() == "QuantizeAndDequantizeV2" ||
3557 node_def.op() == "QuantizeAndDequantizeV3") {
3558 // Get ranges via inputs.
3559 auto get_weights_value = [&inputs](int index) {
3560 auto raw_weights =
3561 static_cast<float*>(inputs.at(index).weights().GetValues());
3562 return raw_weights[0];
3563 };
3564 min_range = get_weights_value(1);
3565 max_range = get_weights_value(2);
3566 } else {
3567 return errors::InvalidArgument("Unknown quantization op ", node_def.op(),
3568 ", at ", node_def.name());
3569 }
3570 if (params->validation_only) return Status::OK();
3571
3572 // Store ranges for tensor
3573 params->converter->ProvideQuantizationRange(inputs.at(0).tensor(), min_range,
3574 max_range);
3575 // Sometimes, TRT may not quantize a tensor, either because it chooses to
3576 // execute a higher precision kernel or because of op fusion. In these cases,
3577 // accuracy will suffer if the model was trained to expect quantization at
3578 // that tensor. We should consider adding a clip(tensor, min_range, max_range)
3579 // operation here to ensure that any arbitrarily placed quantize node will
3580 // execute as expected. However, this will negatively affect performance. If
3581 // users train their models in a way which models inference as close as
3582 // possible (i.e. not quantizing in place where fusion will occur), then there
3583 // is no problem with the current implementation.
3584 params->outputs->push_back(inputs.at(0));
3585 return Status::OK();
3586 }
3587
ConvertRelu6(OpConverterParams * params)3588 Status ConvertRelu6(OpConverterParams* params) {
3589 const auto& inputs = params->inputs;
3590 const auto& node_def = params->node_def;
3591 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
3592 TF_RETURN_IF_ERROR(
3593 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3594 if (params->validation_only) return Status::OK();
3595
3596 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
3597 // Use IActivationLayer for TRT >= 5.1
3598 nvinfer1::IActivationLayer* layer =
3599 params->converter->network()->addActivation(
3600 *inputs.at(0).tensor(), nvinfer1::ActivationType::kCLIP);
3601 layer->setAlpha(0.0f);
3602 layer->setBeta(6.0f);
3603 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3604 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
3605 params->converter->ProvideQuantizationRange(output_tensor, 0.0f, 6.0f);
3606 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3607 return Status::OK();
3608 #else
3609 // Convert using min(Relu(x), 6) before TRT 5.1
3610 // Input Tensor
3611 nvinfer1::ITensor* tensor = inputs.at(0).tensor();
3612
3613 // Relu operation i.e. Relu(x) = max(0, x)
3614 nvinfer1::IActivationLayer* relu_layer =
3615 params->converter->network()->addActivation(
3616 *tensor, nvinfer1::ActivationType::kRELU);
3617 TFTRT_RETURN_ERROR_IF_NULLPTR(relu_layer, node_def.name());
3618
3619 // Large range of relu is problematic during quantization in INT8 precision
3620 // mode. Setting dynamic range of relu = [0.f, 6.0f] helps with quantization.
3621 // TRT only uses dynamic ranges in INT8 precision mode,
3622 // and this does not affect the FP32 path.
3623 params->converter->ProvideQuantizationRange(relu_layer->getOutput(0), 0.0f,
3624 6.0f);
3625
3626 // Create a constant layer to store the floating point weight i.e. 6.0f
3627 nvinfer1::ITensor* const6_tensor = nullptr;
3628 TF_RETURN_IF_ERROR(CreateBroadcastableScalarConstant(
3629 params, 6.0f, relu_layer->getOutput(0)->getDimensions(), &const6_tensor));
3630
3631 // ElementWise Min Operation
3632 // Min op is a nop for INT8 execution path, as the input tensor
3633 // to this layer will only have values in range [0.f, 6.0f].
3634 nvinfer1::IElementWiseLayer* relu6_layer =
3635 params->converter->network()->addElementWise(
3636 *relu_layer->getOutput(0), *const6_tensor,
3637 nvinfer1::ElementWiseOperation::kMIN);
3638 TFTRT_RETURN_ERROR_IF_NULLPTR(relu6_layer, node_def.name());
3639 nvinfer1::ITensor* output_tensor = relu6_layer->getOutput(0);
3640 params->converter->ProvideQuantizationRange(output_tensor, 0.0f, 6.0f);
3641
3642 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3643 return Status::OK();
3644 #endif
3645 }
3646
ConvertBiasAddInt8WithoutCalibration(OpConverterParams * params)3647 Status ConvertBiasAddInt8WithoutCalibration(OpConverterParams* params) {
3648 const auto& inputs = params->inputs;
3649 const auto& node_def = params->node_def;
3650 TF_RETURN_IF_ERROR(
3651 CheckInputsWeights(*params, {{"value", false}, {"bias", true}}));
3652 TF_RETURN_IF_ERROR(
3653 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3654 if (params->validation_only) return Status::OK();
3655
3656 nvinfer1::ITensor* tensor = inputs.at(0).tensor();
3657 const nvinfer1::Dims original_dims = tensor->getDimensions();
3658 TFAttrs attrs(node_def);
3659 const string data_format = attrs.get<string>("data_format");
3660 const int channel_index =
3661 (data_format == "NHWC" ? original_dims.nbDims - 1 : 0);
3662
3663 nvinfer1::Permutation permutation;
3664 if (channel_index != 0) {
3665 // Permute the dimensions so that the channel dimension is the first
3666 // dimension.
3667 for (int i = 0; i < original_dims.nbDims; ++i) {
3668 permutation.order[i] = i;
3669 }
3670 permutation.order[0] = channel_index;
3671 permutation.order[channel_index] = 0;
3672 VLOG(1) << "ConvertBiasAdd permutation: "
3673 << DebugString(permutation, original_dims.nbDims);
3674 }
3675
3676 // TensorRT addScale requires input to be of rank 3, we need to apply
3677 // transpose as well as reshape.
3678 // TODO(laigd): this doesn't match what the TRT doc says, fix the doc?
3679 if (channel_index != 0 || original_dims.nbDims != 3) {
3680 nvinfer1::IShuffleLayer* shuffle_layer =
3681 params->converter->network()->addShuffle(*tensor);
3682 TFTRT_RETURN_ERROR_IF_NULLPTR(shuffle_layer, node_def.name());
3683 params->converter->MarkQuantizationRangesAsInferrable(
3684 tensor, shuffle_layer->getOutput(0));
3685
3686 // NOTE(laigd): for some reason we need to apply the reshape
3687 // unconditionally. The default shape has nbDims==-1 and it seems the
3688 // behavior is undefined in some cases.
3689 nvinfer1::Dims reshape_dims;
3690 reshape_dims.nbDims = 3;
3691 // 0 means copying from input; -1 means inferring from the rest.
3692 reshape_dims.d[0] = 0;
3693 reshape_dims.d[1] = original_dims.nbDims >= 2 ? 0 : 1;
3694 reshape_dims.d[2] = original_dims.nbDims >= 3 ? -1 : 1;
3695 shuffle_layer->setReshapeDimensions(reshape_dims);
3696
3697 if (channel_index != 0) {
3698 shuffle_layer->setFirstTranspose(permutation);
3699 }
3700 tensor = shuffle_layer->getOutput(0);
3701 }
3702
3703 TRT_ShapedWeights weights = inputs.at(1).weights();
3704 nvinfer1::ScaleMode mode = nvinfer1::ScaleMode::kCHANNEL;
3705 if (weights.shape_.d[0] == 1) {
3706 mode = nvinfer1::ScaleMode::kUNIFORM;
3707 }
3708
3709 TRT_ShapedWeights empty_weights(weights.TrtDType());
3710 nvinfer1::IScaleLayer* layer = params->converter->network()->addScale(
3711 *tensor, mode, weights.GetTrtWeights(), empty_weights.GetTrtWeights(),
3712 empty_weights.GetTrtWeights());
3713 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3714
3715 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
3716
3717 // Restore transpose & reshape.
3718 if (channel_index != 0 || original_dims.nbDims != 3) {
3719 nvinfer1::IShuffleLayer* shuffle_layer =
3720 params->converter->network()->addShuffle(*output_tensor);
3721 TFTRT_RETURN_ERROR_IF_NULLPTR(shuffle_layer, node_def.name());
3722 // NOTE: for same reason as mentioned above we need to apply the reshape
3723 // unconditionally.
3724 nvinfer1::Dims reshape_dims = original_dims;
3725 if (channel_index != 0) {
3726 // NOTE: according to NVIDIA dimension types are deprecated, so we don't
3727 // need to copy them back.
3728 reshape_dims.d[channel_index] = original_dims.d[0];
3729 reshape_dims.d[0] = original_dims.d[channel_index];
3730 }
3731 shuffle_layer->setReshapeDimensions(reshape_dims);
3732
3733 if (channel_index != 0) {
3734 shuffle_layer->setSecondTranspose(permutation);
3735 }
3736 params->converter->MarkQuantizationRangesAsInferrable(
3737 output_tensor, shuffle_layer->getOutput(0));
3738 output_tensor = shuffle_layer->getOutput(0);
3739 }
3740
3741 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3742 return Status::OK();
3743 }
3744
ConvertBiasAdd(OpConverterParams * params)3745 Status ConvertBiasAdd(OpConverterParams* params) {
3746 if (params->precision_mode == TrtPrecisionMode::INT8 &&
3747 !params->use_calibration) {
3748 // NOTE(laigd): based on some observation, it seems TensorRT cannot fuse
3749 // IConvolutionLayer and IElementwiseLayer and will require range
3750 // information for the output of Conv2D. Using IScaleLayer will fix the
3751 // problem.
3752 return ConvertBiasAddInt8WithoutCalibration(params);
3753 }
3754 const auto& inputs = params->inputs;
3755 const auto& node_def = params->node_def;
3756
3757 if (inputs.size() != 2) {
3758 return errors::InvalidArgument(
3759 "BiasAdd expects exactly 2 inputs, but received ", inputs.size());
3760 }
3761
3762 if (inputs[0].is_weights() && inputs[1].is_weights()) {
3763 return errors::InvalidArgument(
3764 "All inputs are weights, but Grappler is expected to fold them.");
3765 }
3766
3767 TF_RETURN_IF_ERROR(
3768 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3769
3770 TFAttrs attrs(node_def);
3771 const string& data_format = attrs.get<string>("data_format");
3772
3773 nvinfer1::Dims input_shape = inputs.at(0).GetTrtDims();
3774 nvinfer1::Dims bias_shape = inputs.at(1).GetTrtDims();
3775 // If the input is NCHW, then we need to unsqueeze the bias such that its last
3776 // dimensions are 1s (and the first dimension is C).
3777 if (data_format == "NCHW") {
3778 bias_shape.nbDims = input_shape.nbDims;
3779 std::fill(bias_shape.d + 1, bias_shape.d + bias_shape.nbDims, 1);
3780 } else {
3781 // Next, broadcast the bias across the input.
3782 TF_RETURN_IF_ERROR(GetTrtBroadcastShape(inputs.at(0), inputs.at(1),
3783 /*check_feasibility=*/true,
3784 params->use_implicit_batch,
3785 &input_shape, &bias_shape));
3786 }
3787
3788 // Convert input to a TRT tensor
3789 nvinfer1::ITensor* input_tensor{nullptr};
3790 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
3791 inputs.at(0), input_shape, params->validation_only, &input_tensor));
3792
3793 // Finally, reshape bias. Since the bias is usually a constant, this will
3794 // normally happen at conversion-time.
3795 nvinfer1::ITensor* bias_tensor{nullptr};
3796 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
3797 inputs.at(1), bias_shape, params->validation_only, &bias_tensor));
3798 VLOG(2) << "Bias shape adjusted to " << DebugString(bias_shape);
3799
3800 if (params->validation_only) return Status::OK();
3801
3802 nvinfer1::IElementWiseLayer* layer =
3803 params->converter->network()->addElementWise(
3804 *input_tensor, *bias_tensor, nvinfer1::ElementWiseOperation::kSUM);
3805 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3806 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
3807
3808 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3809 return Status::OK();
3810 }
3811
GetTensorDimsWithProtoShape(const Tensor & tensor,nvinfer1::Dims * dims)3812 void GetTensorDimsWithProtoShape(const Tensor& tensor, nvinfer1::Dims* dims) {
3813 if (tensor.dims() > 0) {
3814 *dims = GetTrtDimsForTensor(tensor);
3815 } else {
3816 dims->nbDims = 1;
3817 // No dimension provided. Flatten it.
3818 dims->d[0] = tensor.NumElements();
3819 dims->type[0] = nvinfer1::DimensionType::kSPATIAL;
3820 for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; ++i) {
3821 dims->d[i] = 0;
3822 }
3823 }
3824 }
3825
3826 template <typename Input>
IsIntegerInInt32Bounds(const Input & inp)3827 inline bool IsIntegerInInt32Bounds(const Input& inp) {
3828 static_assert(std::is_integral<Input>::value,
3829 "This function is only implemented for integral types.");
3830 // If Input is always within the range of int32, return true.
3831 if (sizeof(Input) < sizeof(int32) || std::is_same<Input, int32>::value) {
3832 return true;
3833 }
3834 // Otherwise, we need to check the value of the input. If the input is
3835 // unsigned, we only check the upper bound.
3836 if (!std::numeric_limits<Input>::is_signed) {
3837 return inp <= static_cast<Input>(std::numeric_limits<int32>::max());
3838 }
3839 // We can safely cast lowest() here since we now know that Input is signed and
3840 // sizeof(Input) >= sizeof(int32)
3841 return (inp >= static_cast<Input>(std::numeric_limits<int32>::lowest()) &&
3842 inp <= static_cast<Input>(std::numeric_limits<int32>::max()));
3843 }
3844
3845 template <DataType dtype>
CopyToTrtInt32Array(const Tensor & tensor,int32 * dst)3846 Status CopyToTrtInt32Array(const Tensor& tensor, int32* dst) {
3847 typedef typename EnumToDataType<dtype>::Type CType;
3848 const CType* src = tensor.flat<CType>().data();
3849 for (int i = 0; i < tensor.NumElements(); ++i) {
3850 // This becomes a no-op if CType is within bounds of int32
3851 if (!IsIntegerInInt32Bounds(src[i])) {
3852 return errors::InvalidArgument("Value at index ", i,
3853 " is outside the range of int32");
3854 }
3855 dst[i] = static_cast<int32>(src[i]);
3856 }
3857 return Status::OK();
3858 }
3859
TfTensorToTrtWeights(const Tensor & tensor,TrtWeightStore * weight_store,TRT_ShapedWeights * weights)3860 Status TfTensorToTrtWeights(const Tensor& tensor, TrtWeightStore* weight_store,
3861 TRT_ShapedWeights* weights) {
3862 const DataType dtype = tensor.dtype();
3863
3864 // We always convert the integer constants to INT32.
3865 //
3866 // TODO(aaroey): FP16 will remain in half format and is not converted to
3867 // FP32, but the converter currently uses all float weights as FP32. Fix
3868 // this.
3869 DataType converted_dtype = DataTypeIsInteger(dtype) ? DT_INT32 : dtype;
3870
3871 // Verify that the dtype is supported by TensorRT. Otherwise, return an error.
3872 nvinfer1::DataType trt_dtype;
3873 TF_RETURN_IF_ERROR(TfDataTypeToTrt(converted_dtype, &trt_dtype));
3874
3875 if (tensor.NumElements() == 0) {
3876 // Return empty weights.
3877 *weights = TRT_ShapedWeights(trt_dtype);
3878 return Status::OK();
3879 }
3880
3881 nvinfer1::Dims weight_dims;
3882 GetTensorDimsWithProtoShape(tensor, &weight_dims);
3883 *weights = weight_store->GetTempWeights(trt_dtype, weight_dims);
3884
3885 // Copy the tensor directly if the tensor does not require cast to the
3886 // supported type.
3887 if (converted_dtype == dtype) {
3888 char* dst = static_cast<char*>(weights->GetValues());
3889 memcpy(dst, tensor.tensor_data().data(), tensor.TotalBytes());
3890 return Status::OK();
3891 }
3892
3893 Status status = Status::OK();
3894 // Copy tensor elements after casting them to the converted DataType.
3895 int32* dst = static_cast<int32*>(weights->GetValues());
3896 switch (dtype) {
3897 case DT_INT8:
3898 status = CopyToTrtInt32Array<DT_INT8>(tensor, dst);
3899 break;
3900 case DT_UINT8:
3901 status = CopyToTrtInt32Array<DT_UINT8>(tensor, dst);
3902 break;
3903 case DT_INT16:
3904 status = CopyToTrtInt32Array<DT_INT16>(tensor, dst);
3905 break;
3906 case DT_UINT16:
3907 status = CopyToTrtInt32Array<DT_UINT16>(tensor, dst);
3908 break;
3909 case DT_UINT32:
3910 status = CopyToTrtInt32Array<DT_UINT32>(tensor, dst);
3911 break;
3912 case DT_INT64:
3913 status = CopyToTrtInt32Array<DT_INT64>(tensor, dst);
3914 break;
3915 case DT_UINT64:
3916 status = CopyToTrtInt32Array<DT_UINT64>(tensor, dst);
3917 break;
3918 default:
3919 return errors::Internal("Unexpected DataType: ", DataTypeString(dtype));
3920 }
3921 return status;
3922 }
3923
3924 // Convert a Const NodeDef to TRT_ShapedWeights. This is a special converter, it
3925 // always ignores the params->validation_only parameter but adds the converted
3926 // weights to params->outputs. We did this since TrtNodeValidator needs the
3927 // weights as input to other nodes, and use it to determine whether those nodes
3928 // are supported by TRT.
ConvertConst(OpConverterParams * params)3929 Status ConvertConst(OpConverterParams* params) {
3930 const auto& inputs = params->inputs;
3931 const auto& node_def = params->node_def;
3932 if (!inputs.empty()) {
3933 return errors::InvalidArgument(
3934 "Constant node is expected to have empty input list: ",
3935 node_def.name());
3936 }
3937
3938 // Create shaped weights as output
3939 const auto& tensor_proto = node_def.attr().at("value").tensor();
3940 Tensor tensor;
3941 if (!tensor.FromProto(tensor_proto)) {
3942 return errors::Internal("Cannot parse weight tensor proto: ",
3943 node_def.name());
3944 }
3945
3946 TFAttrs attrs(node_def);
3947 const DataType dtype = attrs.get<DataType>("dtype");
3948 if (dtype != tensor.dtype()) {
3949 return errors::InvalidArgument("DataType mismatch between attr (",
3950 DataTypeString(dtype), ") and tensor (",
3951 DataTypeString(tensor.dtype()), ")");
3952 }
3953
3954 TRT_ShapedWeights weights;
3955 TF_RETURN_IF_ERROR(
3956 TfTensorToTrtWeights(tensor, params->weight_store, &weights));
3957
3958 if (params->outputs != nullptr) {
3959 params->outputs->push_back(TRT_TensorOrWeights(weights));
3960 }
3961 return Status::OK();
3962 }
3963
ConvertIdentity(OpConverterParams * params)3964 Status ConvertIdentity(OpConverterParams* params) {
3965 // TODO(tmorris): TRT's Identity layer does not get optimized away as of TRT
3966 // 5.0, however once we know that it does it would be nice to use that
3967 // instead.
3968 if (params->validation_only) return Status::OK();
3969 params->outputs->push_back(params->inputs.at(0));
3970 return Status::OK();
3971 }
3972
3973 const std::unordered_map<string, nvinfer1::ElementWiseOperation>*
BinaryOperationMap()3974 BinaryOperationMap() {
3975 static auto* const m =
3976 new std::unordered_map<string, nvinfer1::ElementWiseOperation> {
3977 {"Add", nvinfer1::ElementWiseOperation::kSUM},
3978 {"AddV2", nvinfer1::ElementWiseOperation::kSUM},
3979 {"Mul", nvinfer1::ElementWiseOperation::kPROD},
3980 {"Sub", nvinfer1::ElementWiseOperation::kSUB},
3981 {"Div", nvinfer1::ElementWiseOperation::kDIV},
3982 #if IS_TRT_VERSION_GE(5, 1, 0, 0)
3983 // This op applies Floor after Div.
3984 {"FloorDiv", nvinfer1::ElementWiseOperation::kDIV},
3985 #endif
3986 {"RealDiv", nvinfer1::ElementWiseOperation::kDIV},
3987 {"Minimum", nvinfer1::ElementWiseOperation::kMIN},
3988 {"Maximum", nvinfer1::ElementWiseOperation::kMAX},
3989 {"Pow", nvinfer1::ElementWiseOperation::kPOW},
3990 };
3991 return m;
3992 }
3993
ConvertBinary(OpConverterParams * params)3994 Status ConvertBinary(OpConverterParams* params) {
3995 const auto& inputs = params->inputs;
3996 const auto& node_def = params->node_def;
3997 if (inputs.size() != 2) {
3998 return errors::InvalidArgument(node_def.op(), " got ", inputs.size(),
3999 " inputs but expected 2, at ",
4000 node_def.name());
4001 }
4002 TF_RETURN_IF_ERROR(
4003 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4004
4005 // Constant folding should have been done by TensorFlow
4006 if (inputs.at(0).is_weights() && inputs.at(1).is_weights()) {
4007 return errors::Unimplemented(
4008 "Constant folding is falled back to TensorFlow, binary op received "
4009 "both input as constant at: ",
4010 node_def.name());
4011 }
4012 const TRT_TensorOrWeights& operand_l = inputs.at(0);
4013 const TRT_TensorOrWeights& operand_r = inputs.at(1);
4014
4015 auto op_pair = BinaryOperationMap()->find(node_def.op());
4016 if (op_pair == BinaryOperationMap()->end()) {
4017 return errors::Unimplemented("Binary op ", node_def.op(),
4018 " not supported at: ", node_def.name());
4019 }
4020
4021 nvinfer1::Dims broadcasted_dims_l, broadcasted_dims_r;
4022 TF_RETURN_IF_ERROR(GetTrtBroadcastShape(
4023 operand_l, operand_r, /*check_feasibility=*/true,
4024 params->use_implicit_batch, &broadcasted_dims_l, &broadcasted_dims_r));
4025 nvinfer1::ITensor* tensor_l = nullptr;
4026 nvinfer1::ITensor* tensor_r = nullptr;
4027 // This will also convert constants to tensors, and set quantization ranges.
4028 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
4029 operand_l, broadcasted_dims_l, params->validation_only, &tensor_l));
4030 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
4031 operand_r, broadcasted_dims_r, params->validation_only, &tensor_r));
4032 if (params->validation_only) return Status::OK();
4033
4034 // Add ElementWise layer.
4035 nvinfer1::ILayer* layer = params->converter->network()->addElementWise(
4036 *tensor_l, *tensor_r, op_pair->second);
4037 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4038 nvinfer1::ITensor* trt_tensor = layer->getOutput(0);
4039
4040 #if IS_TRT_VERSION_GE(5, 1, 0, 0)
4041 if (node_def.op() == "FloorDiv") {
4042 layer = params->converter->network()->addUnary(
4043 *trt_tensor, nvinfer1::UnaryOperation::kFLOOR);
4044 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4045 trt_tensor = layer->getOutput(0);
4046 }
4047 #endif
4048 params->outputs->push_back(TRT_TensorOrWeights(trt_tensor));
4049 return Status::OK();
4050 }
4051
ConvertRsqrt(OpConverterParams * params)4052 Status ConvertRsqrt(OpConverterParams* params) {
4053 const auto& inputs = params->inputs;
4054 const auto& node_def = params->node_def;
4055 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false}}));
4056 TF_RETURN_IF_ERROR(
4057 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4058 if (params->validation_only) return Status::OK();
4059
4060 // TODO(tmorris): params->converter is null during validation. Allow
4061 // precision_mode and use_calibration to be accessed during validation and
4062 // include this check in validation.
4063 // We will need a quantization range for intermediate tensor if not using
4064 // calibration.
4065 //
4066 // x -> [Sqrt] -> sqrt(x) -> [Recip] -> 1/sqrt(x)
4067 // ^
4068 // need range here
4069 if (params->converter->precision_mode() == TrtPrecisionMode::INT8 &&
4070 !params->converter->use_calibration()) {
4071 return errors::Unimplemented(
4072 "Intermediate quantization range cannot be determined without"
4073 " calibration for Rsqrt, consider replacing with "
4074 "Sqrt -> FakeQuant -> Reciprocal ops, at ",
4075 node_def.name());
4076 }
4077 // Start conversion.
4078 nvinfer1::ITensor* tensor = inputs.at(0).tensor();
4079 // Sqrt
4080 nvinfer1::IUnaryLayer* sqrt_layer = params->converter->network()->addUnary(
4081 *tensor, nvinfer1::UnaryOperation::kSQRT);
4082 TFTRT_RETURN_ERROR_IF_NULLPTR(sqrt_layer, node_def.name());
4083 // Recip
4084 nvinfer1::IUnaryLayer* recip_layer = params->converter->network()->addUnary(
4085 *sqrt_layer->getOutput(0), nvinfer1::UnaryOperation::kRECIP);
4086 TFTRT_RETURN_ERROR_IF_NULLPTR(recip_layer, node_def.name());
4087 params->outputs->push_back(TRT_TensorOrWeights(recip_layer->getOutput(0)));
4088 return Status::OK();
4089 }
4090
4091 const std::unordered_map<string, nvinfer1::UnaryOperation>*
UnaryOperationMap()4092 UnaryOperationMap() {
4093 static auto* const m =
4094 new std::unordered_map<string, nvinfer1::UnaryOperation>({
4095 {"Neg", nvinfer1::UnaryOperation::kNEG},
4096 {"Exp", nvinfer1::UnaryOperation::kEXP},
4097 {"Log", nvinfer1::UnaryOperation::kLOG},
4098 {"Sqrt", nvinfer1::UnaryOperation::kSQRT},
4099 {"Abs", nvinfer1::UnaryOperation::kABS},
4100 {"Reciprocal", nvinfer1::UnaryOperation::kRECIP},
4101 #if IS_TRT_VERSION_GE(5, 1, 0, 0)
4102 {"Sin", nvinfer1::UnaryOperation::kSIN},
4103 {"Cos", nvinfer1::UnaryOperation::kCOS},
4104 {"Tan", nvinfer1::UnaryOperation::kTAN},
4105 {"Sinh", nvinfer1::UnaryOperation::kSINH},
4106 {"Cosh", nvinfer1::UnaryOperation::kCOSH},
4107 {"Asin", nvinfer1::UnaryOperation::kASIN},
4108 {"Acos", nvinfer1::UnaryOperation::kACOS},
4109 {"Atan", nvinfer1::UnaryOperation::kATAN},
4110 {"Asinh", nvinfer1::UnaryOperation::kASINH},
4111 {"Acosh", nvinfer1::UnaryOperation::kACOSH},
4112 {"Atanh", nvinfer1::UnaryOperation::kATANH},
4113 {"Ceil", nvinfer1::UnaryOperation::kCEIL},
4114 {"Floor", nvinfer1::UnaryOperation::kFLOOR},
4115 #endif
4116 });
4117 return m;
4118 }
4119
ConvertUnary(OpConverterParams * params)4120 Status ConvertUnary(OpConverterParams* params) {
4121 const auto& inputs = params->inputs;
4122 const auto& node_def = params->node_def;
4123 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false}}));
4124 TF_RETURN_IF_ERROR(
4125 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4126 auto op_pair = UnaryOperationMap()->find(node_def.op());
4127 if (op_pair == UnaryOperationMap()->end()) {
4128 return errors::Unimplemented("Unary op: ", node_def.op(),
4129 " not supported at: ", node_def.name());
4130 }
4131 if (params->validation_only) return Status::OK();
4132
4133 // Start conversion.
4134 nvinfer1::ITensor* tensor = inputs.at(0).tensor();
4135 nvinfer1::IUnaryLayer* layer =
4136 params->converter->network()->addUnary(*tensor, op_pair->second);
4137 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4138 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
4139
4140 // Set quantization ranges.
4141 if (node_def.op() == "Sin" || node_def.op() == "Cos") {
4142 params->converter->ProvideQuantizationRange(output_tensor, -1.0f, 1.0f);
4143 } else if (node_def.op() == "Asin" || node_def.op() == "Atan") {
4144 params->converter->ProvideQuantizationRange(output_tensor, -M_PI_2, M_PI_2);
4145 } else if (node_def.op() == "Acos") {
4146 params->converter->ProvideQuantizationRange(output_tensor, 0.0f, M_PI);
4147 } else if (node_def.op() == "Neg" || node_def.op() == "Abs") {
4148 // Neg and Abs will have same range as input since TRT uses symmetric
4149 // quantization.
4150 // TODO(tmorris): Should we infer ranges for Ceil and Floor as well?
4151 params->converter->MarkQuantizationRangesAsInferrable(tensor,
4152 output_tensor);
4153 }
4154 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
4155 return Status::OK();
4156 }
4157
ConvertSquare(OpConverterParams * params)4158 Status ConvertSquare(OpConverterParams* params) {
4159 const auto& inputs = params->inputs;
4160 const auto& node_def = params->node_def;
4161 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false}}));
4162 TF_RETURN_IF_ERROR(
4163 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4164 if (params->validation_only) return Status::OK();
4165
4166 // Constant 2 with same rank as input
4167 nvinfer1::ITensor* const2_tensor = nullptr;
4168 TF_RETURN_IF_ERROR(CreateBroadcastableScalarConstant(
4169 params, 2.0f, inputs.at(0).GetTrtDims(), &const2_tensor));
4170
4171 // ElementWise Pow Operation
4172 nvinfer1::IElementWiseLayer* layer =
4173 params->converter->network()->addElementWise(
4174 *inputs.at(0).tensor(), *const2_tensor,
4175 nvinfer1::ElementWiseOperation::kPOW);
4176 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4177 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
4178
4179 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
4180 return Status::OK();
4181 }
4182
ConvertReduce(OpConverterParams * params)4183 Status ConvertReduce(OpConverterParams* params) {
4184 const auto& inputs = params->inputs;
4185 const auto& node_def = params->node_def;
4186 TF_RETURN_IF_ERROR(
4187 CheckInputsWeights(*params, {{"input", false}, {"axis", true}}));
4188 TF_RETURN_IF_ERROR(
4189 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4190
4191 nvinfer1::ITensor* tensor = inputs.at(0).tensor();
4192 auto tf_axes_list = inputs.at(1).weights().GetSpan<int>();
4193
4194 TFAttrs attrs(node_def);
4195 // Only expect to handle INT32 as attributes for now
4196 if (attrs.get<DataType>("Tidx") != DataType::DT_INT32) {
4197 return errors::Unimplemented("Tidx supports only DT_INT32");
4198 }
4199
4200 int axes = 0;
4201 if (tf_axes_list.size() == 0) {
4202 return errors::InvalidArgument(
4203 "TRT cannot support reduce on all (batch) dimensions, at",
4204 node_def.name());
4205 }
4206 for (int i = 0; i < tf_axes_list.size(); i++) {
4207 int trt_axis;
4208 TF_RETURN_IF_ERROR(
4209 ConvertAxis(tf_axes_list[i], tensor->getDimensions().nbDims,
4210 node_def.name(), /*use_implicit_batch=*/true, &trt_axis));
4211 axes |= (1 << trt_axis);
4212 }
4213
4214 nvinfer1::ReduceOperation reduce_operation;
4215 if (node_def.op() == "Sum") {
4216 reduce_operation = nvinfer1::ReduceOperation::kSUM;
4217 } else if (node_def.op() == "Prod") {
4218 reduce_operation = nvinfer1::ReduceOperation::kPROD;
4219 } else if (node_def.op() == "Max") {
4220 reduce_operation = nvinfer1::ReduceOperation::kMAX;
4221 } else if (node_def.op() == "Min") {
4222 reduce_operation = nvinfer1::ReduceOperation::kMIN;
4223 } else if (node_def.op() == "Mean") {
4224 reduce_operation = nvinfer1::ReduceOperation::kAVG;
4225 } else {
4226 return errors::Unimplemented("Op not supported ", node_def.op(), ", at ",
4227 node_def.name());
4228 }
4229 if (params->validation_only) return Status::OK();
4230
4231 const auto keep_dims = attrs.get<bool>("keep_dims");
4232 nvinfer1::ILayer* layer = params->converter->network()->addReduce(
4233 *tensor, reduce_operation, axes, keep_dims);
4234 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4235
4236 params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
4237 return Status::OK();
4238 }
4239
4240 // TensorRT does not support the Pack op natively. Therefore, Pack op is
4241 // converted by first expanding input tensors by adding a new dimension of size
4242 // one at the specified axis and then concatenating the tensors at the same
4243 // axis.
ConvertPack(OpConverterParams * params)4244 Status ConvertPack(OpConverterParams* params) {
4245 const auto& inputs = params->inputs;
4246 const auto& node_def = params->node_def;
4247
4248 TFAttrs attrs(node_def);
4249 const int num_inputs = attrs.get<int64>("N");
4250 if (num_inputs != inputs.size()) {
4251 return errors::InvalidArgument(
4252 "Number of inputs for Pack is inconsistent with N attribute, at ",
4253 node_def.name());
4254 }
4255
4256 // Validate inputs. Values must be tensors for now.
4257 std::vector<std::pair<string, bool>> inputs_is_weight;
4258 for (int i = 0; i < num_inputs; ++i) {
4259 inputs_is_weight.push_back({StrCat("values_", i), false});
4260 }
4261 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, inputs_is_weight));
4262
4263 // TODO(hinsu): Enable INT32 with TensorRT version 5.1.3 after testing.
4264 TF_RETURN_IF_ERROR(
4265 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4266
4267 if (num_inputs > 1) {
4268 // Verify that inputs are compatible for concatenation after the expansion.
4269 TF_RETURN_IF_ERROR(
4270 VerifyShapesMatch(inputs, /*masked_dim=*/-1, node_def.name()));
4271 }
4272
4273 // Convert axis from the TensorFlow format to TensorRT format.
4274 const nvinfer1::Dims dims = inputs.at(0).GetTrtDims();
4275 const int64 tf_axis = attrs.get<int64>("axis");
4276 int trt_axis;
4277 TF_RETURN_IF_ERROR(ConvertAxis(tf_axis, dims.nbDims + 1, node_def.name(),
4278 /*use_implicit_batch=*/true, &trt_axis));
4279
4280 // Compute expanded dimensions and then reshape input tensors.
4281 std::vector<int> tensor_dims(dims.d, dims.d + dims.nbDims);
4282 tensor_dims.insert(tensor_dims.begin() + trt_axis, 1);
4283 nvinfer1::Dims expanded_dims;
4284 TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(tensor_dims, &expanded_dims));
4285 std::vector<nvinfer1::ITensor*> expanded_tensors;
4286 for (const TRT_TensorOrWeights& tensor : inputs) {
4287 nvinfer1::ITensor* expanded_tensor = nullptr;
4288 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
4289 tensor, expanded_dims, params->validation_only, &expanded_tensor));
4290 if (!params->validation_only) {
4291 expanded_tensors.push_back(expanded_tensor);
4292 }
4293 }
4294 if (params->validation_only) return Status::OK();
4295
4296 // If there is only one tensor in the input, return the expanded tensor.
4297 if (num_inputs == 1) {
4298 params->outputs->push_back(TRT_TensorOrWeights(expanded_tensors[0]));
4299 return Status::OK();
4300 }
4301
4302 // Otherwise, concatenate expanded tensors.
4303 nvinfer1::IConcatenationLayer* layer =
4304 params->converter->network()->addConcatenation(
4305 const_cast<nvinfer1::ITensor**>(expanded_tensors.data()),
4306 expanded_tensors.size());
4307 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4308 // Note that trt_axis stays the same even after expanding tensors at the axis.
4309 layer->setAxis(trt_axis);
4310 params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
4311 return Status::OK();
4312 }
4313
ConvertPad(OpConverterParams * params)4314 Status ConvertPad(OpConverterParams* params) {
4315 const auto& inputs = params->inputs;
4316 const auto& node_def = params->node_def;
4317 TF_RETURN_IF_ERROR(
4318 CheckInputsWeights(*params, {{"tensor", false}, {"paddings", true}}));
4319 TF_RETURN_IF_ERROR(
4320 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4321
4322 // Implement tensor binaryOp weight [channel wise] for now;
4323 nvinfer1::ITensor* tensor = inputs.at(0).tensor();
4324 const auto dims = tensor->getDimensions();
4325 // Restore implicit batch dimension
4326 const int nb_dims = dims.nbDims + 1;
4327
4328 TRT_ShapedWeights pads = inputs.at(1).weights();
4329
4330 TFAttrs attrs(node_def);
4331 // Padding type here is done through TF type
4332 // so I can leverage their EnumToDataType for my cast
4333 auto padding_type = attrs.get<DataType>("Tpaddings");
4334 // TODO(jie): handle data type conversion for TRT?
4335
4336 if (pads.shape_.d[0] != nb_dims || pads.shape_.d[1] != 2) {
4337 return errors::InvalidArgument(
4338 "Pad only supports explicit padding on 4 dimensional tensor, at ",
4339 node_def.name());
4340 }
4341
4342 // Only expect to handle INT32 as attributes for now
4343 if (padding_type != DataType::DT_INT32) {
4344 return errors::Unimplemented("Tpaddings supports only DT_INT32");
4345 }
4346 auto pad_data = static_cast<int*>(pads.GetValues());
4347
4348 std::vector<int32_t> pad_index;
4349 for (int i = 0; i < nb_dims; i++) {
4350 if (pad_data[2 * i] != 0 || pad_data[2 * i + 1] != 0) {
4351 pad_index.push_back(i);
4352 }
4353 }
4354
4355 // No padding at all, we should exit
4356 if (pad_index.empty()) {
4357 params->outputs->push_back(inputs.at(0));
4358 return Status::OK();
4359 }
4360
4361 // Only supports padding on less than 2 axis GIE-2579
4362 if (pad_index.size() > 2) {
4363 return errors::InvalidArgument(
4364 "Padding layer does not support padding on > 2");
4365 }
4366
4367 // Padding on batch dimension is not supported
4368 if (pad_index[0] == 0) {
4369 return errors::InvalidArgument(
4370 "Padding layer does not support padding on batch dimension");
4371 }
4372
4373 // Not doing the legit thing here. ignoring padding on dim 1 and 3;
4374 // TODO(jie): implement pad as uff parser
4375 if (pad_index.size() == 2 && pad_index[0] == 0 && pad_index[1] == 3) {
4376 return errors::Unimplemented(
4377 "Padding layer does not support padding on dimension 1 and 3 yet");
4378 }
4379 if (params->validation_only) return Status::OK();
4380
4381 bool legit_pad = true;
4382 nvinfer1::DimsHW pre_padding(0, 0);
4383 nvinfer1::DimsHW post_padding(0, 0);
4384
4385 std::vector<int32_t> permuted_pad_index(pad_index);
4386 if (pad_index[0] == 1) {
4387 legit_pad = false;
4388 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
4389 tensor, {0, 3, 2, 1}, StrCat(node_def.name(), "_to_pad"), &tensor));
4390 permuted_pad_index[0] = 3;
4391 }
4392
4393 for (size_t i = 0; i < pad_index.size(); i++) {
4394 int index = pad_index[i];
4395 if (permuted_pad_index[i] == 2) {
4396 pre_padding.h() = pad_data[index * 2];
4397 post_padding.h() = pad_data[index * 2 + 1];
4398 } else if (permuted_pad_index[i] == 3) {
4399 pre_padding.w() = pad_data[index * 2];
4400 post_padding.w() = pad_data[index * 2 + 1];
4401 }
4402 }
4403
4404 nvinfer1::IPaddingLayer* layer = params->converter->network()->addPadding(
4405 *tensor, pre_padding, post_padding);
4406 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4407 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
4408 params->converter->MarkQuantizationRangesAsInferrable(tensor, output_tensor);
4409
4410 if (!legit_pad) {
4411 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
4412 output_tensor, {0, 3, 2, 1}, StrCat(node_def.name(), "_from_pad"),
4413 &output_tensor));
4414 }
4415
4416 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
4417 return Status::OK();
4418 }
4419
ConvertSplitHelper(OpConverterParams * params,const TRT_TensorOrWeights & input,int tf_axis,int num_splits,bool squeeze_after)4420 Status ConvertSplitHelper(OpConverterParams* params,
4421 const TRT_TensorOrWeights& input, int tf_axis,
4422 int num_splits, bool squeeze_after) {
4423 const auto& node_def = params->node_def;
4424 const nvinfer1::Dims dims = input.GetTrtDims();
4425 // Convert axis.
4426 int trt_axis;
4427 TF_RETURN_IF_ERROR(ConvertAxis(tf_axis, dims.nbDims, node_def.name(),
4428 /*use_implicit_batch=*/true, &trt_axis));
4429 // Dimension must equal num_splits for Unstack (when squeeze_after is true)
4430 if (squeeze_after && dims.d[trt_axis] != num_splits) {
4431 return errors::InvalidArgument(
4432 "Dimension ", tf_axis, " has size ", dims.d[trt_axis],
4433 " which is not equal to num of ", num_splits, ", at ", node_def.name());
4434 }
4435 // Dimension must be evenly divisible by num_splits.
4436 if (dims.d[trt_axis] % num_splits != 0) {
4437 return errors::InvalidArgument(
4438 "Dimension ", tf_axis, " of size ", dims.d[trt_axis],
4439 " is not evenly divisble by ", num_splits, ", at ", node_def.name());
4440 }
4441
4442 // Create parameters for StridedSliceHelper.
4443 // Slice will begin on zero for all dims, except the one being split which
4444 // will change.
4445 std::vector<int> begin(dims.nbDims, 0);
4446 // Determine size of split. Slice will get the full length of all dims, except
4447 // the one being split.
4448 std::vector<int> size(dims.d, dims.d + dims.nbDims);
4449 const int split_size_on_axis = dims.d[trt_axis] / num_splits;
4450 size[trt_axis] = split_size_on_axis;
4451 // Stride will always be 1
4452 std::vector<int> stride(dims.nbDims, 1);
4453 // Add dummy batch dimension
4454 begin.insert(begin.begin(), 0);
4455 size.insert(size.begin(), 1);
4456 stride.insert(stride.begin(), 1);
4457 // Create final shape for Unpack/Unstack, where split axis is squeezed.
4458 nvinfer1::Dims final_shape_for_unpack;
4459 nvinfer1::Dims* final_shape_for_unpack_ptr = nullptr;
4460 if (squeeze_after) {
4461 std::vector<int> size_after_squeeze(size);
4462 size_after_squeeze.erase(size_after_squeeze.begin() + trt_axis + 1);
4463 TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(
4464 size_after_squeeze, &final_shape_for_unpack, /*ignore_frst_dim=*/true));
4465 final_shape_for_unpack_ptr = &final_shape_for_unpack;
4466 }
4467
4468 // Slice the input. ConvertStridedSliceHelper will push the outputs onto
4469 // params->outputs.
4470 for (int i = 0; i < num_splits; ++i) {
4471 begin[trt_axis + 1] = i * split_size_on_axis;
4472 TF_RETURN_IF_ERROR(ConvertStridedSliceHelper(
4473 params, input, begin, size, stride, final_shape_for_unpack_ptr));
4474 }
4475 return Status::OK();
4476 }
4477
ConvertSplit(OpConverterParams * params)4478 Status ConvertSplit(OpConverterParams* params) {
4479 const auto& inputs = params->inputs;
4480 const auto& node_def = params->node_def;
4481 TF_RETURN_IF_ERROR(
4482 CheckInputsWeights(*params, {{"axis", true}, {"value", false}}));
4483 TF_RETURN_IF_ERROR(AllowDataTypes(*params, {
4484 DataType::DT_FLOAT, DataType::DT_HALF,
4485 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
4486 DataType::DT_INT32,
4487 #endif
4488 }));
4489 int tf_axis = inputs.at(0).weights().GetSpan<int>()[0];
4490 TFAttrs attrs(node_def);
4491 const int num_split = attrs.get<int64>("num_split");
4492
4493 return ConvertSplitHelper(params, inputs.at(1), tf_axis, num_split, false);
4494 }
4495
ConvertUnpack(OpConverterParams * params)4496 Status ConvertUnpack(OpConverterParams* params) {
4497 const auto& inputs = params->inputs;
4498 const auto& node_def = params->node_def;
4499 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"value", false}}));
4500 TF_RETURN_IF_ERROR(AllowDataTypes(*params, {
4501 DataType::DT_FLOAT, DataType::DT_HALF,
4502 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
4503 DataType::DT_INT32,
4504 #endif
4505 }));
4506 // Input must be rank 1 or higher, since we can't unpack on axis 0.
4507 if (inputs.at(0).GetTrtDims().nbDims == 0) {
4508 return errors::Unimplemented(
4509 "Input \"value\" for Unpack must be rank 2 or greater, at ",
4510 node_def.name());
4511 }
4512 TFAttrs attrs(node_def);
4513 const int tf_axis = attrs.get<int64>("axis");
4514 const int num = attrs.get<int64>("num");
4515
4516 return ConvertSplitHelper(params, inputs.at(0), tf_axis, num, true);
4517 }
4518
ConvertConcat(OpConverterParams * params)4519 Status ConvertConcat(OpConverterParams* params) {
4520 const auto& inputs = params->inputs;
4521 const auto& node_def = params->node_def;
4522 TFAttrs attrs(node_def);
4523 // Get number of tensor inputs.
4524 const int num_inputs = attrs.get<int64>("N");
4525 if (num_inputs != static_cast<int>(inputs.size()) - 1) {
4526 return errors::InvalidArgument(
4527 "Number of inputs for ConcatV2 is inconsistent with N attribute, at ",
4528 node_def.name());
4529 }
4530 // Validate inputs. Values must be tensors for now.
4531 std::vector<std::pair<string, bool>> inputs_is_weight;
4532 for (int i = 0; i < num_inputs; ++i) {
4533 inputs_is_weight.push_back({StrCat("values_", i), false});
4534 }
4535 inputs_is_weight.push_back({"axis", true});
4536 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, inputs_is_weight));
4537 // TODO(tmorris): There is a bug with Concat and INT32 in TRT - it is supposed
4538 // to be supported.
4539 TF_RETURN_IF_ERROR(
4540 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4541 const auto axis = inputs.at(num_inputs).weights().GetSpan<int>();
4542 if (axis.size() != 1) {
4543 return errors::InvalidArgument("Axis for ConcatV2 must be a scalar, at ",
4544 node_def.name());
4545 }
4546 int trt_axis = 0;
4547 const auto dim = inputs.at(0).GetTrtDims();
4548 TF_RETURN_IF_ERROR(ConvertAxis(axis[0], dim.nbDims, node_def.name(),
4549 /*use_implicit_batch=*/true, &trt_axis));
4550 // Check that dimensions match on non-concatenate axis.
4551 TF_RETURN_IF_ERROR(VerifyShapesMatch(
4552 absl::Span<const TRT_TensorOrWeights>(inputs).first(num_inputs), trt_axis,
4553 node_def.name()));
4554 if (params->validation_only) return Status::OK();
4555
4556 // Gather inputs as tensors
4557 std::vector<nvinfer1::ITensor const*> input_tensors;
4558 for (int i = 0; i < num_inputs; i++) {
4559 input_tensors.push_back(inputs.at(i).tensor());
4560 }
4561 nvinfer1::IConcatenationLayer* layer =
4562 params->converter->network()->addConcatenation(
4563 const_cast<nvinfer1::ITensor* const*>(input_tensors.data()),
4564 input_tensors.size());
4565 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4566 layer->setAxis(trt_axis);
4567 params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
4568 return Status::OK();
4569 }
4570
ConvertFusedBatchNorm(OpConverterParams * params)4571 Status ConvertFusedBatchNorm(OpConverterParams* params) {
4572 const auto& inputs = params->inputs;
4573 const auto& node_def = params->node_def;
4574 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false},
4575 {"scale", true},
4576 {"offset", true},
4577 {"mean", true},
4578 {"variance", true}}));
4579 TF_RETURN_IF_ERROR(
4580 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4581 TFAttrs attrs(node_def);
4582 float epsilon = attrs.get<float>("epsilon");
4583 auto data_format = attrs.get<string>("data_format");
4584 if (data_format != "NCHW") {
4585 return errors::Unimplemented(
4586 node_def.op(), " only supports data_format=NCHW, at ", node_def.name());
4587 }
4588 bool is_training = attrs.get<bool>("is_training");
4589 if (is_training) {
4590 // Trying to use batchnorm in training mode is a very common problem.
4591 // Because the error message will only be printed in VLOG(1) by the
4592 // segmenter, we issue a special warning so that users will actually see it.
4593 LOG(WARNING) << node_def.op() << " only supports is_training=false. If you "
4594 << "are using Keras, please call "
4595 << "keras.backend.set_learning_phase(0) before constructing "
4596 << "your model. At " << node_def.name();
4597 return errors::Unimplemented(node_def.op(),
4598 " only supports is_training=false, at ",
4599 node_def.name());
4600 }
4601 nvinfer1::ITensor* tensor = inputs.at(0).tensor();
4602
4603 // Check parameter types
4604 auto parameter_type = inputs.at(1).weights().TrtDType();
4605 if ((parameter_type != nvinfer1::DataType::kFLOAT) &&
4606 (parameter_type != nvinfer1::DataType::kHALF)) {
4607 return errors::Unimplemented(
4608 "Only float32 or float16 weight data type is supported, for node ",
4609 node_def.name(), " got ", DebugString(parameter_type));
4610 }
4611 for (int i = 1; i < 5; i++) {
4612 if (inputs.at(i).weights().TrtDType() != parameter_type) {
4613 return errors::Unimplemented(
4614 "Inconsistent parameter type for batchnorm is not supported, at: " +
4615 node_def.name());
4616 }
4617 }
4618
4619 TRT_ShapedWeights dummy_power_weights(parameter_type);
4620 size_t nweight = 0;
4621 for (int i = 1; i < 5; i++) {
4622 nweight = std::max<size_t>(nweight, inputs.at(i).weights().count());
4623 }
4624 const TRT_ShapedWeights* ptr_shape_weights = nullptr;
4625 for (int i = 1; i < 5; i++) {
4626 if (inputs.at(i).weights().count() == nweight) {
4627 ptr_shape_weights = &(inputs.at(i).weights());
4628 } else if (inputs.at(i).weights().count() != 1) {
4629 return errors::InvalidArgument(
4630 "Inconsistent batchnorm parameter count, at: " + node_def.name());
4631 }
4632 }
4633 if (params->validation_only) return Status::OK();
4634
4635 // We could technically have two weights with different shape.
4636 // that requires two addScale op, arguably less performant
4637 TRT_ShapedWeights combined_scale_weights =
4638 params->weight_store->GetTempWeights(*ptr_shape_weights);
4639 TRT_ShapedWeights combined_offset_weights =
4640 params->weight_store->GetTempWeights(*ptr_shape_weights);
4641
4642 const Eigen::half* cast_vals_array[4];
4643 const float* vals_array[4];
4644 for (int j = 0; j < 4; j++) {
4645 cast_vals_array[j] =
4646 static_cast<Eigen::half const*>(inputs.at(j + 1).weights().GetValues());
4647 vals_array[j] =
4648 static_cast<float const*>(inputs.at(j + 1).weights().GetValues());
4649 }
4650 Eigen::half* cast_combined_scale_vals =
4651 static_cast<Eigen::half*>(combined_scale_weights.GetValues());
4652 Eigen::half* cast_combined_offset_vals =
4653 static_cast<Eigen::half*>(combined_offset_weights.GetValues());
4654 float* combined_scale_vals =
4655 static_cast<float*>(combined_scale_weights.GetValues());
4656 float* combined_offset_vals =
4657 static_cast<float*>(combined_offset_weights.GetValues());
4658
4659 for (size_t i = 0; i < nweight; ++i) {
4660 float batchnorm_data[4];
4661 for (int j = 0; j < 4; j++) {
4662 if (inputs.at(j + 1).weights().count() != 1) {
4663 if (parameter_type == nvinfer1::DataType::kFLOAT) {
4664 batchnorm_data[j] = vals_array[j][i];
4665 } else if (parameter_type == nvinfer1::DataType::kHALF) {
4666 batchnorm_data[j] =
4667 Eigen::half_impl::half_to_float(cast_vals_array[j][i]);
4668 }
4669 } else {
4670 if (parameter_type == nvinfer1::DataType::kFLOAT) {
4671 batchnorm_data[j] = vals_array[j][0];
4672 } else if (parameter_type == nvinfer1::DataType::kHALF) {
4673 batchnorm_data[j] =
4674 Eigen::half_impl::half_to_float(cast_vals_array[j][0]);
4675 }
4676 }
4677 }
4678 float scale = batchnorm_data[0];
4679 float offset = batchnorm_data[1];
4680 float mean = batchnorm_data[2];
4681 float variance = batchnorm_data[3];
4682 float combined_scale_val = scale / sqrtf(variance + epsilon);
4683 float combined_offset_val = offset - mean * combined_scale_val;
4684 if (parameter_type == nvinfer1::DataType::kFLOAT) {
4685 combined_scale_vals[i] = combined_scale_val;
4686 combined_offset_vals[i] = combined_offset_val;
4687 } else if (parameter_type == nvinfer1::DataType::kHALF) {
4688 cast_combined_scale_vals[i] = Eigen::half(combined_scale_val);
4689 cast_combined_offset_vals[i] = Eigen::half(combined_offset_val);
4690 }
4691 }
4692
4693 nvinfer1::ScaleMode mode = nweight == 1 ? nvinfer1::ScaleMode::kUNIFORM
4694 : nvinfer1::ScaleMode::kCHANNEL;
4695 nvinfer1::IScaleLayer* layer = params->converter->network()->addScale(
4696 *tensor, mode, combined_offset_weights.GetTrtWeights(),
4697 combined_scale_weights.GetTrtWeights(),
4698 dummy_power_weights.GetTrtWeights());
4699 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4700 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
4701 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
4702 return Status::OK();
4703 }
4704
ConvertGather(OpConverterParams * params)4705 Status ConvertGather(OpConverterParams* params) {
4706 const auto& inputs = params->inputs;
4707 const auto& node_def = params->node_def;
4708 // TODO(tmorris): Use CheckInputsWeights by changing bool to enum with an
4709 // option for an input to be either tensor or weight.
4710 if (inputs.size() != 3) {
4711 return errors::InvalidArgument("GatherV2 got ", inputs.size(),
4712 " inputs but expected 3, at ",
4713 node_def.name());
4714 }
4715 const auto& params_input = inputs.at(0);
4716 const auto& indices_input = inputs.at(1);
4717 const auto& axis_input = inputs.at(2);
4718 if (!axis_input.is_weights()) {
4719 return errors::Unimplemented(
4720 "The input \"axis\" for GatherV2 must be a constant, at ",
4721 node_def.name());
4722 }
4723 if (!indices_input.is_tensor()) {
4724 return errors::Unimplemented(
4725 "The input \"indices\" for GatherV2 must be a tensor, at ",
4726 node_def.name());
4727 }
4728
4729 TF_RETURN_IF_ERROR(AllowDataTypes(
4730 *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32},
4731 /*dtype_attr_name=*/"Tparams"));
4732 TF_RETURN_IF_ERROR(AllowDataTypes(*params, {DataType::DT_INT32},
4733 /*dtype_attr_name=*/"Tindices"));
4734
4735 absl::Span<const int> axis = axis_input.weights().GetSpan<int>();
4736 if (axis.size() != 1) {
4737 return errors::InvalidArgument("Axis for GatherV2 must be a scalar, at ",
4738 node_def.name());
4739 }
4740 int trt_axis = 0;
4741 TF_RETURN_IF_ERROR(ConvertAxis(axis[0], params_input.GetTrtDims().nbDims,
4742 node_def.name(), params_input.is_tensor(),
4743 &trt_axis));
4744 if (params_input.is_weights() && trt_axis != 0) {
4745 return errors::Unimplemented(
4746 "The input axis must be zero when params is a weight.");
4747 }
4748 if (params_input.is_tensor() && indices_input.batch_size() != 1) {
4749 return errors::Unimplemented(
4750 "Indices must have a batch size of 1 when params is a tensor.");
4751 }
4752 // Both input are tensors, and the TF gather result will have rank:
4753 // (params.nbDims + 1) + (indices.nbDims + 1) - 1,
4754 // where "+ 1" adds the batch dim. If params is a weight, the TRT rank matches
4755 // the TF rank so we don't have to add + 1.
4756 const int params_tf_rank =
4757 params_input.GetTrtDims().nbDims + (params_input.is_tensor() ? 1 : 0);
4758 const int indices_tf_rank = indices_input.GetTrtDims().nbDims + 1;
4759 const int tf_gather_output_rank = params_tf_rank + indices_tf_rank - 1;
4760 if (tf_gather_output_rank > nvinfer1::Dims::MAX_DIMS + 1) {
4761 return errors::InvalidArgument(
4762 "Result of gather has dimension greater than ",
4763 nvinfer1::Dims::MAX_DIMS + 1);
4764 }
4765 if (params->validation_only) return Status::OK();
4766
4767 // Convert params to tensor is it is a weight.
4768 nvinfer1::ITensor* params_tensor = nullptr;
4769 if (params_input.is_weights()) {
4770 params_tensor = params->converter->CreateConstantLayer(
4771 params_input.weights(), params_input.GetTrtDims());
4772 } else {
4773 params_tensor = params_input.tensor();
4774 }
4775
4776 // Note on how IGatherLayer works: if both the data and indices tensors have
4777 // a batch size dimension of size N, it performs:
4778 // for batchid in xrange(N):
4779 // output[batchid, a0, ..., an, i, ..., j, b0, ..., bn] = (
4780 // data[batchid, a0, ..., an, indices[batchid, i, ..., j] b0, ..., bn])
4781 nvinfer1::IGatherLayer* layer = params->converter->network()->addGather(
4782 *params_tensor, *indices_input.tensor(), trt_axis);
4783 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4784
4785 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
4786 nvinfer1::Dims trt_gather_output_dims = output_tensor->getDimensions();
4787 // Note for the "- 2": one is for the output batch dim encapsulated by TF-TRT,
4788 // and the other is for the output dimension that is squeezed by IGatherLayer
4789 // because of the implicit batch dim in the indices (see the above note).
4790 const int expected_trt_output_rank =
4791 tf_gather_output_rank - (params_input.is_tensor() ? 2 : 1);
4792 if (trt_gather_output_dims.nbDims != expected_trt_output_rank) {
4793 return errors::Internal(
4794 "Get unexpected output dimensions of IGatherLayer. Expect nbDims: ",
4795 expected_trt_output_rank,
4796 ", actual nbDims: ", trt_gather_output_dims.nbDims);
4797 }
4798 // Reshape the output so after adding the implicit batch dim it'll match the
4799 // output shape of TF GatherV2.
4800 if (params_input.is_tensor()) {
4801 for (int i = trt_gather_output_dims.nbDims; i > trt_axis; --i) {
4802 trt_gather_output_dims.d[i] = trt_gather_output_dims.d[i - 1];
4803 }
4804 trt_gather_output_dims.d[trt_axis] = 1;
4805 ++trt_gather_output_dims.nbDims;
4806
4807 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
4808 TRT_TensorOrWeights(output_tensor), trt_gather_output_dims,
4809 /*validation_only=*/false, &output_tensor));
4810 }
4811
4812 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
4813 return Status::OK();
4814 }
4815
ConvertFullyConnectedHelper(OpConverterParams * params,nvinfer1::ITensor * tensor_a,TRT_ShapedWeights weights_b,bool transpose_b,const string & node_name)4816 Status ConvertFullyConnectedHelper(OpConverterParams* params,
4817 nvinfer1::ITensor* tensor_a,
4818 TRT_ShapedWeights weights_b,
4819 bool transpose_b, const string& node_name) {
4820 // Reshape input to 3D - this will be a no-op unless using int8 precision.
4821 auto input_dim = tensor_a->getDimensions();
4822 while (input_dim.nbDims < 3) {
4823 input_dim.d[input_dim.nbDims++] = 1;
4824 }
4825 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
4826 TRT_TensorOrWeights(tensor_a), input_dim, /*validation_only=*/false,
4827 &tensor_a));
4828
4829 // FC layer will transpose weights, so we need to pre-transpose.
4830 TRT_ShapedWeights weights(weights_b.TrtDType());
4831 if (!transpose_b) {
4832 weights = params->weight_store->GetTempWeights(weights_b);
4833 ReorderCKtoKC(weights_b, &weights);
4834 } else {
4835 weights = weights_b;
4836 }
4837 TRT_ShapedWeights biases(weights.TrtDType());
4838 const int noutput = weights.shape_.d[0];
4839 nvinfer1::IFullyConnectedLayer* layer =
4840 params->converter->network()->addFullyConnected(
4841 *tensor_a, noutput, weights.GetTrtWeights(), biases.GetTrtWeights());
4842
4843 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_name);
4844 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
4845
4846 // Reshape output to 1D - this will be a no-op unless using int8 precision.
4847 auto output_dim = output_tensor->getDimensions();
4848 output_dim.nbDims = 1;
4849 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
4850 TRT_TensorOrWeights(output_tensor), output_dim, /*validation_only=*/false,
4851 &output_tensor));
4852
4853 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
4854 return Status::OK();
4855 }
4856
ConvertMatMulHelper(OpConverterParams * params,TRT_TensorOrWeights input_a,TRT_TensorOrWeights input_b,bool transpose_a,bool transpose_b,string node_name)4857 Status ConvertMatMulHelper(OpConverterParams* params,
4858 TRT_TensorOrWeights input_a,
4859 TRT_TensorOrWeights input_b, bool transpose_a,
4860 bool transpose_b, string node_name) {
4861 // TODO: ReorderCKtoKC is currently not general enough to transpose weights
4862 // that are not 2D.
4863 if ((transpose_a && input_a.is_weights() &&
4864 input_a.GetTrtDims().nbDims != 2) ||
4865 (transpose_b && input_b.is_weights() &&
4866 input_b.GetTrtDims().nbDims != 2)) {
4867 return errors::InvalidArgument(
4868 "Cannot currently transpose constant input if it is not 2 dimensional");
4869 }
4870
4871 // If A is a tensor, we can only transpose if it is at least 3D in TF,
4872 // or TRT will not do the correct transposition.
4873 if (transpose_a && input_a.is_tensor() && input_a.GetTrtDims().nbDims < 2) {
4874 return errors::InvalidArgument(
4875 "Cannot transpose first input if it is a tensor with fewer than 2 "
4876 "non-batch dimensions.");
4877 }
4878
4879 // If B is a tensor, then it must be at least 3D in TF,
4880 // or TRT won't be able to handle the multiply correctly.
4881 if (input_b.is_tensor() && input_b.GetTrtDims().nbDims < 2) {
4882 return errors::InvalidArgument(
4883 "Second input must either be a constant, or contain at least 2 "
4884 "non-batch dimensions.");
4885 }
4886 if (params->validation_only) return Status::OK();
4887
4888 // If an FC layer can be used and would be faster, use that instead.
4889 const bool can_use_fc =
4890 !transpose_a && input_a.is_tensor() && input_b.is_weights();
4891 const bool should_use_fc = can_use_fc && input_a.GetTrtDims().nbDims >= 3 &&
4892 input_b.GetTrtDims().nbDims == 2;
4893 // If int8 is specified, FC must be used unless it is not compatible, as MM
4894 // does not support int8 at this time.
4895 if (should_use_fc || (can_use_fc && params->converter->precision_mode() ==
4896 TrtPrecisionMode::INT8)) {
4897 return ConvertFullyConnectedHelper(
4898 params, input_a.tensor(), input_b.weights(), transpose_b, node_name);
4899 }
4900
4901 const auto get_matrix_op = [](nvinfer1::ITensor* in,
4902 bool transpose) -> nvinfer1::MatrixOperation {
4903 return (in->getDimensions().nbDims < 2)
4904 ? nvinfer1::MatrixOperation::kVECTOR
4905 : (transpose) ? nvinfer1::MatrixOperation::kTRANSPOSE
4906 : nvinfer1::MatrixOperation::kNONE;
4907 };
4908
4909 // If the MatMul operand is a constant, applies transposes at conversion-time
4910 // as necessary. If the operand is a tensor, does nothing. If required
4911 // transposes were applied, sets transpose to false.
4912 const auto prepare_matmul_operand =
4913 [¶ms](TRT_TensorOrWeights operand,
4914 bool* transpose) -> nvinfer1::ITensor* {
4915 if (operand.is_tensor()) {
4916 return operand.tensor();
4917 } else {
4918 TRT_ShapedWeights weights(operand.weights().TrtDType());
4919 if (*transpose) {
4920 weights = params->weight_store->GetTempWeights(operand.weights());
4921 ReorderCKtoKC(operand.weights(), &weights);
4922 // Weights have been transposed, can set transpose to false
4923 *transpose = false;
4924 } else {
4925 weights = operand.weights();
4926 }
4927 return params->converter->CreateConstantLayer(weights, weights.shape_);
4928 }
4929 };
4930
4931 nvinfer1::ITensor* tensor_a = prepare_matmul_operand(input_a, &transpose_a);
4932 nvinfer1::ITensor* tensor_b = prepare_matmul_operand(input_b, &transpose_b);
4933
4934 nvinfer1::IMatrixMultiplyLayer* layer =
4935 params->converter->network()->addMatrixMultiply(
4936 *tensor_a, get_matrix_op(tensor_a, transpose_a), *tensor_b,
4937 get_matrix_op(tensor_b, transpose_b));
4938
4939 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_name);
4940 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
4941 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
4942 return Status::OK();
4943 }
4944
4945 // inputs are both two dimensional (ops::MatMul)
ConvertMatMul(OpConverterParams * params)4946 Status ConvertMatMul(OpConverterParams* params) {
4947 const auto& inputs = params->inputs;
4948 const auto& node_def = params->node_def;
4949 if (inputs.size() != 2) {
4950 return errors::InvalidArgument(node_def.op(), " got ", inputs.size(),
4951 " inputs but expected 2, at ",
4952 node_def.name());
4953 }
4954 TF_RETURN_IF_ERROR(
4955 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4956
4957 TFAttrs attrs(node_def);
4958 bool transpose_a = attrs.get<bool>("transpose_a");
4959 bool transpose_b = attrs.get<bool>("transpose_b");
4960
4961 return ConvertMatMulHelper(params, inputs.at(0), inputs.at(1), transpose_a,
4962 transpose_b, node_def.name());
4963 }
4964
ConvertBatchMatMul(OpConverterParams * params)4965 Status ConvertBatchMatMul(OpConverterParams* params) {
4966 const auto& inputs = params->inputs;
4967 const auto& node_def = params->node_def;
4968 if (inputs.size() != 2) {
4969 return errors::InvalidArgument(node_def.op(), " got ", inputs.size(),
4970 " inputs but expected 2, at ",
4971 node_def.name());
4972 }
4973 // TODO(tmorris): Enable once false is updated to mean either tensor or weight
4974 // TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false}, {"y",
4975 // false}}));
4976 TF_RETURN_IF_ERROR(
4977 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4978 if (inputs.at(0).is_weights() && inputs.at(1).is_weights()) {
4979 return errors::InvalidArgument(
4980 "All inputs are weights, but Grappler is expected to fold them.");
4981 }
4982 if (inputs.at(0).is_tensor() && inputs.at(1).is_tensor() &&
4983 inputs.at(0).GetTrtDims().nbDims != inputs.at(1).GetTrtDims().nbDims) {
4984 return errors::Unimplemented(
4985 "Inputs must have the same rank if they are both tensors.");
4986 }
4987
4988 TFAttrs attrs(node_def);
4989 const bool transpose_a = attrs.get<bool>("adj_x");
4990 const bool transpose_b = attrs.get<bool>("adj_y");
4991
4992 // There is no way to batch constants in TRT. Example:
4993 // Tensor with TF Dims: 12 5 3 -> TRT Dims: 5 3
4994 // Weight with TF Dims: 12 3 6 -> TRT Dims: 12 3 6
4995 // It is not possible to treat the weight input as a batched [3, 6] tensor.
4996 const auto check_weight_is_not_batched =
4997 [](const TRT_TensorOrWeights& input_l,
4998 const TRT_TensorOrWeights& input_r) {
4999 // If input_l is a weight, then input_r must be a tensor because
5000 // otherwise the op would be handled by Grappler.
5001 if (input_l.is_weights() &&
5002 input_l.GetTrtDims().nbDims > input_r.GetTrtDims().nbDims &&
5003 input_l.GetTrtDims().d[0] != 1) {
5004 return errors::Unimplemented(
5005 "TensorRT does not support batched constants.");
5006 }
5007 return Status::OK();
5008 };
5009 TF_RETURN_IF_ERROR(check_weight_is_not_batched(inputs.at(0), inputs.at(1)));
5010 TF_RETURN_IF_ERROR(check_weight_is_not_batched(inputs.at(1), inputs.at(0)));
5011
5012 // Broadcast inputs. We don't check feasibility since the dimensions in a
5013 // MatMul don't need to match. For example, consider a valid set of inputs
5014 // which would produce an output of shape [N, T, K]:
5015 // input 0: [N, T, C]
5016 // input 1: [1, C, K]
5017 // Since C != K and T != C, check feasiblity would fail.
5018 nvinfer1::Dims broadcasted_dims_l, broadcasted_dims_r;
5019 TF_RETURN_IF_ERROR(GetTrtBroadcastShape(
5020 inputs.at(0), inputs.at(1), /*check_feasibility=*/false,
5021 params->use_implicit_batch, &broadcasted_dims_l, &broadcasted_dims_r));
5022 nvinfer1::ITensor* tensor_l = nullptr;
5023 nvinfer1::ITensor* tensor_r = nullptr;
5024 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
5025 inputs.at(0), broadcasted_dims_l, params->validation_only, &tensor_l));
5026 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
5027 inputs.at(1), broadcasted_dims_r, params->validation_only, &tensor_r));
5028 if (params->validation_only) return Status::OK();
5029
5030 return ConvertMatMulHelper(params, TRT_TensorOrWeights(tensor_l),
5031 TRT_TensorOrWeights(tensor_r), transpose_a,
5032 transpose_b, node_def.name());
5033 }
5034
ConvertSoftmax(OpConverterParams * params)5035 Status ConvertSoftmax(OpConverterParams* params) {
5036 const auto& inputs = params->inputs;
5037 const auto& node_def = params->node_def;
5038 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"logits", false}}));
5039 TF_RETURN_IF_ERROR(
5040 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
5041 nvinfer1::ITensor* tensor = inputs.at(0).tensor();
5042
5043 const int num_trt_dims = tensor->getDimensions().nbDims;
5044 if (num_trt_dims == 0) {
5045 return errors::InvalidArgument(
5046 "TensorRT Softmax cannot apply on batch dimension, at",
5047 node_def.name());
5048 }
5049 if (params->validation_only) return Status::OK();
5050
5051 nvinfer1::ISoftMaxLayer* layer =
5052 params->converter->network()->addSoftMax(*tensor);
5053 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
5054 // Tensorflow SoftMax assumes applying softmax on the last dimension.
5055 layer->setAxes(1 << (num_trt_dims - 1));
5056
5057 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
5058 // Quantization range for SoftMax is always (0, 1)
5059 params->converter->ProvideQuantizationRange(output_tensor, 0.0f, 1.0f);
5060 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
5061 return Status::OK();
5062 }
5063
ConvertArgMinMax(OpConverterParams * params)5064 Status ConvertArgMinMax(OpConverterParams* params) {
5065 const auto& inputs = params->inputs;
5066 const auto& node_def = params->node_def;
5067 TF_RETURN_IF_ERROR(
5068 CheckInputsWeights(*params, {{"input", false}, {"dimension", true}}));
5069 TF_RETURN_IF_ERROR(
5070 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
5071 // INT64 outputs are not supported by TRT.
5072 TFAttrs attrs(node_def);
5073 DataType output_dtype = attrs.get<DataType>("output_type");
5074 if (output_dtype != DataType::DT_INT32) {
5075 return errors::Unimplemented("Output type ", DataTypeString(output_dtype),
5076 " is not supported, at ", node_def.name());
5077 }
5078 int tf_axis = inputs.at(1).weights().GetSpan<int>()[0];
5079 int trt_axis;
5080 nvinfer1::Dims dims = inputs.at(0).GetTrtDims();
5081 TF_RETURN_IF_ERROR(ConvertAxis(tf_axis, dims.nbDims, node_def.name(),
5082 /*use_implicit_batch=*/true, &trt_axis));
5083 nvinfer1::TopKOperation topk_op;
5084 if (node_def.op() == "ArgMin") {
5085 topk_op = nvinfer1::TopKOperation::kMIN;
5086 } else if (node_def.op() == "ArgMax") {
5087 topk_op = nvinfer1::TopKOperation::kMAX;
5088 } else {
5089 return errors::InvalidArgument("Unsupported ArgMin/Max operation");
5090 }
5091 if (params->validation_only) return Status::OK();
5092
5093 // Use TopK with k = 1. Only indices output is needed (output 1).
5094 const uint32_t reduce_axes = 1 << trt_axis;
5095 nvinfer1::ITopKLayer* layer = params->converter->network()->addTopK(
5096 *inputs.at(0).tensor(), topk_op, 1, reduce_axes);
5097 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
5098 nvinfer1::ITensor* output_indices_tensor = layer->getOutput(1);
5099
5100 // Squeeze on axis.
5101 std::vector<int> size(dims.d, dims.d + dims.nbDims);
5102 size.erase(size.begin() + trt_axis);
5103 nvinfer1::Dims new_dims;
5104 TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(size, &new_dims));
5105 nvinfer1::ITensor* output_tensor = nullptr;
5106 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
5107 TRT_TensorOrWeights(output_indices_tensor), new_dims,
5108 /*validation_only=*/false, &output_tensor));
5109
5110 params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
5111 return Status::OK();
5112 }
5113
ConvertTopK(OpConverterParams * params)5114 Status ConvertTopK(OpConverterParams* params) {
5115 const auto& inputs = params->inputs;
5116 const auto& node_def = params->node_def;
5117 TF_RETURN_IF_ERROR(
5118 CheckInputsWeights(*params, {{"input", false}, {"k", true}}));
5119 TF_RETURN_IF_ERROR(
5120 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
5121 TFAttrs attrs(node_def);
5122 const bool sorted = attrs.get<bool>("sorted");
5123 if (!sorted) {
5124 // TensorRT only supports sorted output. Although TensorFlow API
5125 // doesn't specify the order of output elements in case sorted=false,
5126 // but it's safer to not convert because the output of TensorRT might
5127 // be different with TensorFlow which can cause confusion.
5128 return errors::InvalidArgument("Only sorted=True is supported, at",
5129 node_def.name());
5130 }
5131
5132 nvinfer1::ITensor* tensor = inputs.at(0).tensor();
5133 const int num_dims = tensor->getDimensions().nbDims;
5134 if (num_dims == 0) {
5135 return errors::InvalidArgument(
5136 "TensorRT TopK cannot apply on batch dimension, at", node_def.name());
5137 }
5138
5139 TRT_ShapedWeights k_w = inputs.at(1).weights();
5140 if (k_w.count() != 1) {
5141 return errors::InvalidArgument("k value of TopK should be a scalar, at",
5142 node_def.name());
5143 }
5144 // Note that ITopKLayer always have sorted outputs, so we don't need to handle
5145 // the 'sorted' attribute of the node.
5146 if (params->validation_only) return Status::OK();
5147
5148 const nvinfer1::TopKOperation op = nvinfer1::TopKOperation::kMAX;
5149 const int k = *(static_cast<int*>(k_w.GetValues()));
5150 const uint32_t reduce_axes = 1 << (num_dims - 1);
5151 nvinfer1::ITopKLayer* layer =
5152 params->converter->network()->addTopK(*tensor, op, k, reduce_axes);
5153 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
5154
5155 nvinfer1::ITensor* output_value_tensor = layer->getOutput(0);
5156 nvinfer1::ITensor* output_indices_tensor = layer->getOutput(1);
5157 params->outputs->push_back(TRT_TensorOrWeights(output_value_tensor));
5158 params->outputs->push_back(TRT_TensorOrWeights(output_indices_tensor));
5159 return Status::OK();
5160 }
5161
ConvertDepthSpaceShuffle(OpConverterParams * params)5162 Status ConvertDepthSpaceShuffle(OpConverterParams* params) {
5163 const auto& inputs = params->inputs;
5164 const auto& node_def = params->node_def;
5165 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
5166 TF_RETURN_IF_ERROR(AllowDataTypes(
5167 *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
5168 TFAttrs attrs(node_def);
5169 const int block_size = attrs.get<int64>("block_size");
5170 if (block_size < 2) {
5171 return errors::InvalidArgument("Block size must be 2 or greater, at ",
5172 node_def.name());
5173 }
5174 const string data_format = attrs.get<string>("data_format");
5175 if (data_format != "NCHW" && data_format != "NHWC") {
5176 return errors::Unimplemented("Data format ", data_format,
5177 " is not supported, at ", node_def.name());
5178 }
5179 nvinfer1::Dims dims = inputs.at(0).GetTrtDims();
5180 if (dims.nbDims != 3) {
5181 return errors::InvalidArgument("The input to ", node_def.op(),
5182 " must be rank 4, at ", node_def.name());
5183 }
5184 const int num_channels = data_format == "NCHW" ? dims.d[0] : dims.d[2];
5185 const int h = data_format == "NCHW" ? dims.d[1] : dims.d[0];
5186 const int w = data_format == "NCHW" ? dims.d[2] : dims.d[1];
5187 // Get shuffle parameters.
5188 nvinfer1::Dims first_shuffle_shape;
5189 nvinfer1::Permutation transpose_perm;
5190 nvinfer1::Dims second_shuffle_shape;
5191 if (node_def.op() == "DepthToSpace") {
5192 if (num_channels % (block_size * block_size) != 0) {
5193 return errors::InvalidArgument(
5194 "Number of channels must be divisible by block_size*block_size, at ",
5195 node_def.name());
5196 }
5197 // First Reshape [C, H, W] - > [r, r, C/(r*r), H, W]
5198 first_shuffle_shape = {
5199 /*nbDims=*/5,
5200 /*d=*/{block_size, block_size, num_channels / (block_size * block_size),
5201 h, w}};
5202 // Transpose [r, r, C/(r*r), H, W] -> [C/(r*r), H, r, W, r]
5203 transpose_perm = {2, 3, 0, 4, 1};
5204 // Second Reshape [C/(r*r), H, r, W, r] -> [C/(r*r), H * r, W * r]
5205 second_shuffle_shape =
5206 nvinfer1::DimsCHW(num_channels / (block_size * block_size),
5207 h * block_size, w * block_size);
5208 } else if (node_def.op() == "SpaceToDepth") {
5209 if (h % block_size != 0 || w % block_size != 0) {
5210 return errors::InvalidArgument(
5211 "Width and height must be divisible by block_size, at ",
5212 node_def.name());
5213 }
5214 // First Reshape [C, H, W] -> [C, H/r, r, W/r, r]
5215 first_shuffle_shape = {/*nbDims=*/5,
5216 /*d=*/{num_channels, h / block_size, block_size,
5217 w / block_size, block_size}};
5218 // Transpose [C, H/r, r, W/r, r] -> [r, r, C, H/r, W/r]
5219 transpose_perm = {2, 4, 0, 1, 3};
5220 // Second Reshape [r, r, C, H/r, W/r] -> [C*r*r, H/r, W/r]
5221 second_shuffle_shape = nvinfer1::DimsCHW(
5222 num_channels * block_size * block_size, h / block_size, w / block_size);
5223 }
5224 if (params->validation_only) return Status::OK();
5225
5226 nvinfer1::IShuffleLayer* first_shuffle =
5227 params->converter->network()->addShuffle(*inputs.at(0).tensor());
5228 TFTRT_RETURN_ERROR_IF_NULLPTR(first_shuffle, node_def.name());
5229 if (data_format == "NHWC") {
5230 first_shuffle->setFirstTranspose({2, 0, 1});
5231 }
5232 first_shuffle->setReshapeDimensions(first_shuffle_shape);
5233 first_shuffle->setSecondTranspose(transpose_perm);
5234
5235 nvinfer1::IShuffleLayer* second_shuffle =
5236 params->converter->network()->addShuffle(*first_shuffle->getOutput(0));
5237 TFTRT_RETURN_ERROR_IF_NULLPTR(second_shuffle, node_def.name());
5238 second_shuffle->setReshapeDimensions(second_shuffle_shape);
5239 if (data_format == "NHWC") {
5240 second_shuffle->setSecondTranspose({1, 2, 0});
5241 }
5242
5243 params->converter->MarkQuantizationRangesAsInferrable(
5244 inputs.at(0).tensor(), first_shuffle->getOutput(0));
5245 params->converter->MarkQuantizationRangesAsInferrable(
5246 first_shuffle->getOutput(0), second_shuffle->getOutput(0));
5247 params->outputs->push_back(TRT_TensorOrWeights(second_shuffle->getOutput(0)));
5248 return Status::OK();
5249 }
5250
ConvertSquaredDifference(OpConverterParams * params)5251 Status ConvertSquaredDifference(OpConverterParams* params) {
5252 TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false}, {"y", false}}));
5253 TF_RETURN_IF_ERROR(
5254 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
5255 const auto& inputs = params->inputs;
5256 const auto& node_def = params->node_def;
5257 // Broadcast inputs.
5258 nvinfer1::Dims broadcasted_dims_l, broadcasted_dims_r;
5259 TF_RETURN_IF_ERROR(GetTrtBroadcastShape(
5260 inputs.at(0), inputs.at(1), /*check_feasibility=*/true,
5261 params->use_implicit_batch, &broadcasted_dims_l, &broadcasted_dims_r));
5262 nvinfer1::ITensor* tensor_l = nullptr;
5263 nvinfer1::ITensor* tensor_r = nullptr;
5264 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
5265 inputs.at(0), broadcasted_dims_l, params->validation_only, &tensor_l));
5266 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
5267 inputs.at(1), broadcasted_dims_r, params->validation_only, &tensor_r));
5268 if (params->validation_only) return Status::OK();
5269
5270 // Subtract x - y.
5271 nvinfer1::IElementWiseLayer* sub =
5272 params->converter->network()->addElementWise(
5273 *tensor_l, *tensor_r, nvinfer1::ElementWiseOperation::kSUB);
5274 TFTRT_RETURN_ERROR_IF_NULLPTR(sub, node_def.name());
5275 // Multiply (x - y) * (x - y).
5276 nvinfer1::IElementWiseLayer* mul =
5277 params->converter->network()->addElementWise(
5278 *sub->getOutput(0), *sub->getOutput(0),
5279 nvinfer1::ElementWiseOperation::kPROD);
5280 TFTRT_RETURN_ERROR_IF_NULLPTR(mul, node_def.name());
5281
5282 params->outputs->push_back(TRT_TensorOrWeights(mul->getOutput(0)));
5283 return Status::OK();
5284 }
5285
5286 #if IS_TRT_VERSION_GE(5, 1, 0, 0)
ConvertCombinedNMS(OpConverterParams * params)5287 Status ConvertCombinedNMS(OpConverterParams* params) {
5288 TF_RETURN_IF_ERROR(
5289 CheckInputsWeights(*params, {{"boxes", false},
5290 {"scores", false},
5291 {"max_output_size_per_class", true},
5292 {"max_total_size", true},
5293 {"iou_threshold", true},
5294 {"score_threshold", true}}));
5295 const auto& inputs = params->inputs;
5296 const auto& node_def = params->node_def;
5297
5298 nvinfer1::ITensor* boxes_tensor = inputs.at(0).tensor();
5299 nvinfer1::ITensor* scores_tensor = inputs.at(1).tensor();
5300 TRT_ShapedWeights output_size_per_class = inputs.at(2).weights();
5301 TRT_ShapedWeights total_size = inputs.at(3).weights();
5302 TRT_ShapedWeights iou_threshold = inputs.at(4).weights();
5303 TRT_ShapedWeights score_threshold = inputs.at(5).weights();
5304
5305 // Validate tensors and weights (also set some of the needed plugin fields)
5306 const auto boxes_dims = boxes_tensor->getDimensions();
5307 const auto scores_dims = scores_tensor->getDimensions();
5308 if (boxes_dims.nbDims != 3) {
5309 return errors::InvalidArgument(
5310 "TensorRT BatchedNMS Plugin input boxes must be 3-D excluding batch ",
5311 node_def.name());
5312 }
5313 const int num_classes = scores_dims.d[1];
5314 bool box_check = boxes_dims.d[1] == 1 || boxes_dims.d[1] == num_classes;
5315 if (!box_check) {
5316 return errors::InvalidArgument(
5317 "TensorRT BatchedNMS Plugin third dimension of boxes must be either 1 "
5318 "or num_classes ",
5319 node_def.name());
5320 }
5321 if (output_size_per_class.shape_.nbDims != 1) {
5322 return errors::InvalidArgument(
5323 "TensorRT BatchedNMS Plugin max_output_size_per_class must be 0-D ",
5324 node_def.name());
5325 }
5326 int max_size_per_class =
5327 *(static_cast<int*>(output_size_per_class.GetValues()));
5328 if (max_size_per_class <= 0) {
5329 return errors::InvalidArgument(
5330 "TensorRT BatchedNMS Plugin max_output_size_per_class should be > 0",
5331 node_def.name());
5332 }
5333 if (total_size.shape_.nbDims != 1) {
5334 return errors::InvalidArgument(
5335 "TensorRT BatchedNMS Plugin max_total_size must be 0-D ",
5336 node_def.name());
5337 }
5338 int max_total_size = *(static_cast<int*>(total_size.GetValues()));
5339 if (max_total_size <= 0) {
5340 return errors::InvalidArgument(
5341 "TensorRT BatchedNMS Plugin max_total_size should be > 0",
5342 node_def.name());
5343 }
5344 if (iou_threshold.shape_.nbDims != 1) {
5345 return errors::InvalidArgument(
5346 "TensorRT BatchedNMS Plugin iou_threshold must be 0-D ",
5347 node_def.name());
5348 }
5349 float iou_thresh = *(static_cast<float*>(iou_threshold.GetValues()));
5350 if (iou_thresh < 0.0 || iou_thresh > 1.0) {
5351 return errors::InvalidArgument(
5352 "TensorRT BatchedNMS Plugin iou_threshold must be in [0, 1]",
5353 node_def.name());
5354 }
5355 if (score_threshold.shape_.nbDims != 1) {
5356 return errors::InvalidArgument(
5357 "TensorRT BatchedNMS Plugin score_threshold must be 0-D ",
5358 node_def.name());
5359 }
5360
5361 if (params->validation_only) return Status::OK();
5362
5363 // TF op CombinedNonMaxSuppression doesn't have the option of
5364 // not normalizing coordinates.
5365 const bool is_normalized = true;
5366 // Set plugin fields and the field collection
5367 TFAttrs attrs(node_def);
5368 bool share_location = (boxes_dims.d[1] == 1);
5369 const bool pad_per_class = attrs.get<bool>("pad_per_class");
5370 int top_k;
5371 if (pad_per_class) {
5372 top_k = std::min(max_size_per_class * num_classes, max_total_size);
5373 } else {
5374 top_k = max_total_size;
5375 }
5376 const int keep_top_k = top_k;
5377 float score_thresh = *(static_cast<float*>(score_threshold.GetValues()));
5378 const int background_id = -1;
5379 nvinfer1::PluginField fields[8] = {
5380 nvinfer1::PluginField{"shareLocation", &share_location,
5381 nvinfer1::PluginFieldType::kINT32, 1},
5382 nvinfer1::PluginField{"backgroundLabelId", &background_id,
5383 nvinfer1::PluginFieldType::kINT32, 1},
5384 nvinfer1::PluginField{"numClasses", &num_classes,
5385 nvinfer1::PluginFieldType::kINT32, 1},
5386 nvinfer1::PluginField{"topK", &top_k, nvinfer1::PluginFieldType::kINT32,
5387 1},
5388 nvinfer1::PluginField{"keepTopK", &keep_top_k,
5389 nvinfer1::PluginFieldType::kINT32, 1},
5390 nvinfer1::PluginField{"scoreThreshold", &score_thresh,
5391 nvinfer1::PluginFieldType::kFLOAT32, 1},
5392 nvinfer1::PluginField{"iouThreshold", &iou_thresh,
5393 nvinfer1::PluginFieldType::kFLOAT32, 1},
5394 nvinfer1::PluginField{"isNormalized", &is_normalized,
5395 nvinfer1::PluginFieldType::kINT32, 1},
5396 };
5397 nvinfer1::PluginFieldCollection fc{8, fields};
5398
5399 // Get plugin creator
5400 auto creator =
5401 getPluginRegistry()->getPluginCreator("BatchedNMS_TRT", "1", "");
5402 TFTRT_RETURN_ERROR_IF_NULLPTR(creator, node_def.name());
5403
5404 // Create plugin
5405 TrtUniquePtrType<nvinfer1::IPluginV2> plugin(
5406 creator->createPlugin(node_def.name().c_str(), &fc));
5407 TFTRT_RETURN_ERROR_IF_NULLPTR(plugin, node_def.name());
5408
5409 // Set plugin inputs
5410 std::vector<nvinfer1::ITensor*> plugin_inputs;
5411 plugin_inputs.push_back(boxes_tensor);
5412 plugin_inputs.push_back(scores_tensor);
5413
5414 // Add plugin to network
5415 nvinfer1::IPluginV2Layer* layer = params->converter->network()->addPluginV2(
5416 &plugin_inputs[0], static_cast<int>(plugin_inputs.size()), *plugin);
5417 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
5418
5419 // Set plugin outputs
5420 nvinfer1::ITensor* output_nmsed_boxes = layer->getOutput(1);
5421 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
5422 // TRT6 fixes (removes) the extra last dimension in CombinedNMS outputs
5423 nvinfer1::ITensor* output_num_detections = layer->getOutput(0);
5424 nvinfer1::ITensor* output_nmsed_scores = layer->getOutput(2);
5425 nvinfer1::ITensor* output_nmsed_classes = layer->getOutput(3);
5426 #else
5427 nvinfer1::ITensor* output_num_detections = nullptr;
5428 nvinfer1::ITensor* output_nmsed_scores = nullptr;
5429 nvinfer1::ITensor* output_nmsed_classes = nullptr;
5430
5431 auto shrink_last_dim = [params](nvinfer1::ITensor* in_tensor,
5432 nvinfer1::ITensor** out_tensor) {
5433 nvinfer1::Dims dims = in_tensor->getDimensions();
5434 if (dims.d[dims.nbDims - 1] != 1) {
5435 return errors::Internal("Expect last dims to be 1, for tensor ",
5436 DebugString(*in_tensor));
5437 }
5438 --dims.nbDims;
5439 TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
5440 TRT_TensorOrWeights(in_tensor), dims,
5441 /*validation_only=*/false, out_tensor));
5442 return Status::OK();
5443 };
5444 TF_RETURN_IF_ERROR(
5445 shrink_last_dim(layer->getOutput(2), &output_nmsed_scores));
5446 TF_RETURN_IF_ERROR(
5447 shrink_last_dim(layer->getOutput(3), &output_nmsed_classes));
5448 TF_RETURN_IF_ERROR(
5449 shrink_last_dim(layer->getOutput(0), &output_num_detections));
5450 #endif // IS_TRT_VERSION_GE(6, 0, 0, 0)
5451
5452 params->outputs->push_back(TRT_TensorOrWeights(output_nmsed_boxes));
5453 params->outputs->push_back(TRT_TensorOrWeights(output_nmsed_scores));
5454 params->outputs->push_back(TRT_TensorOrWeights(output_nmsed_classes));
5455 params->outputs->push_back(TRT_TensorOrWeights(output_num_detections));
5456
5457 return Status::OK();
5458 }
5459 #endif // IS_TRT_VERSION_GE(5, 1, 0, 0)
5460
5461 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
ConvertResize(OpConverterParams * params)5462 Status ConvertResize(OpConverterParams* params) {
5463 const auto& inputs = params->inputs;
5464 const auto& node_def = params->node_def;
5465 TF_RETURN_IF_ERROR(
5466 CheckInputsWeights(*params, {{"input", false}, {"size", true}}));
5467 TF_RETURN_IF_ERROR(AllowDataTypes(
5468 *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
5469
5470 // Get input tensor. Transpose it from NHWC to NCHW.
5471 nvinfer1::ITensor* tensor = inputs.at(0).tensor();
5472 TFTRT_RETURN_ERROR_IF_NULLPTR(tensor, params->node_def.name());
5473
5474 // Get output size. It must constain two values i.e. [H_out, W_out]
5475 TRT_ShapedWeights weights = inputs.at(1).weights();
5476 if (weights.count() != 2) {
5477 return errors::Unimplemented("Resize to shape=[] is not supported, at ",
5478 node_def.name());
5479 }
5480 const int* weights_ptr = static_cast<int*>(weights.GetValues());
5481
5482 // Verify and consume node attributes.
5483 TFAttrs attrs(node_def);
5484 bool align_corners = attrs.get<bool>("align_corners");
5485 TF_RETURN_IF_ERROR(
5486 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
5487
5488 // Verify resize mode. Initialize resize mode if supported.
5489 nvinfer1::ResizeMode resize_mode;
5490 if (node_def.op() == "ResizeBilinear") {
5491 resize_mode = nvinfer1::ResizeMode::kLINEAR;
5492 } else if (node_def.op() == "ResizeNearestNeighbor") {
5493 resize_mode = nvinfer1::ResizeMode::kNEAREST;
5494 } else {
5495 return errors::Unimplemented(node_def.op(), " is not yet implemented at ",
5496 node_def.name());
5497 }
5498
5499 // return after validation if only validation is requested.
5500 if (params->validation_only) return Status::OK();
5501
5502 // Transpose tensor from NHWC to NCHW format.
5503 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
5504 tensor, {0, 3, 1, 2}, StrCat(node_def.name(), "_to_NCHW"), &tensor));
5505
5506 // Calculate output dimensions.
5507 // Given input dimensions [N, C, H, W] and output size [H_out, W_out],
5508 // output dimensions equals [N, C, H_out, W_out]
5509 nvinfer1::Dims output_dimensions;
5510 output_dimensions.nbDims = tensor->getDimensions().nbDims;
5511 for (int i = 0; i < output_dimensions.nbDims; ++i) {
5512 output_dimensions.d[i] = tensor->getDimensions().d[i];
5513 }
5514 output_dimensions.d[output_dimensions.nbDims - 2] = weights_ptr[0];
5515 output_dimensions.d[output_dimensions.nbDims - 1] = weights_ptr[1];
5516
5517 // Add resize layer.
5518 nvinfer1::IResizeLayer* layer =
5519 params->converter->network()->addResize(*tensor);
5520 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
5521
5522 // Set layer parameters.
5523 layer->setResizeMode(resize_mode);
5524 layer->setOutputDimensions(output_dimensions);
5525 layer->setAlignCorners(align_corners);
5526
5527 // Get output tensor. Transpose it from NCHW to NHWC.
5528 nvinfer1::ITensor* output = layer->getOutput(0);
5529
5530 TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
5531 output, {0, 2, 3, 1}, StrCat(node_def.name(), "_to_NHWC"), &output));
5532 params->outputs->push_back(TRT_TensorOrWeights(output));
5533 // Success
5534 return Status::OK();
5535 } // ConvertResize
5536 #endif // IS_TRT_VERSION_GE(6, 0, 0, 0)
5537
ConvertAddN(OpConverterParams * params)5538 Status ConvertAddN(OpConverterParams* params) {
5539 const auto& inputs = params->inputs;
5540 const auto& node_def = params->node_def;
5541 TF_RETURN_IF_ERROR(
5542 AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
5543 TFAttrs attrs(node_def);
5544 const int num_inputs = attrs.get<int64>("N");
5545 if (num_inputs < 2) {
5546 return errors::InvalidArgument("AddN requires at least two inputs, at ",
5547 node_def.name());
5548 }
5549 if (inputs.size() != num_inputs) {
5550 return errors::InvalidArgument("Got ", inputs.size(),
5551 " inputs but expected ", num_inputs, ", at ",
5552 node_def.name());
5553 }
5554 for (const auto& input : inputs) {
5555 if (!input.is_tensor() && input.weights().shape_.d[0] != 1) {
5556 return errors::InvalidArgument(
5557 "Weights input to AddN is required to have batch dimension 1.");
5558 }
5559 }
5560 if (params->validation_only) return Status::OK();
5561
5562 // AddN doesn't support broadcast.
5563 std::vector<nvinfer1::ITensor*> tensor_inputs;
5564 for (const auto& input : inputs) {
5565 if (input.is_tensor()) {
5566 tensor_inputs.push_back(input.tensor());
5567 } else {
5568 auto dims = input.weights().shape_;
5569 TF_RETURN_IF_ERROR(RemoveBatchDimension(&dims));
5570 tensor_inputs.push_back(
5571 params->converter->CreateConstantLayer(input.weights(), dims));
5572 }
5573 }
5574 nvinfer1::ITensor* lhs = tensor_inputs[0];
5575 for (int i = 1; i < num_inputs; ++i) {
5576 nvinfer1::ITensor* rhs = tensor_inputs[i];
5577 nvinfer1::ILayer* layer = params->converter->network()->addElementWise(
5578 *lhs, *rhs, nvinfer1::ElementWiseOperation::kSUM);
5579 TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
5580 lhs = layer->getOutput(0);
5581 }
5582 params->outputs->push_back(TRT_TensorOrWeights(lhs));
5583 return Status::OK();
5584 }
5585
RegisterValidatableOpConverters(std::unordered_map<string,OpConverter> * registration)5586 static void RegisterValidatableOpConverters(
5587 std::unordered_map<string, OpConverter>* registration) {
5588 (*registration)["BiasAdd"] = ConvertBiasAdd;
5589 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
5590 (*registration)["ClipByValue"] = ConvertClipByValue;
5591 #endif
5592 #if IS_TRT_VERSION_GE(5, 1, 0, 0)
5593 (*registration)["CombinedNonMaxSuppression"] = ConvertCombinedNMS;
5594 #endif
5595 (*registration)["AddN"] = ConvertAddN;
5596 (*registration)["ConcatV2"] = ConvertConcat;
5597 (*registration)["Const"] = ConvertConst;
5598 (*registration)["Conv2D"] = ConvertConv2D;
5599 (*registration)["Conv2DBackpropInput"] = ConvertConv2DBackpropInput;
5600 (*registration)["DepthToSpace"] = ConvertDepthSpaceShuffle;
5601 (*registration)["DepthwiseConv2dNative"] = ConvertConv2DDepthwise;
5602 (*registration)["ExpandDims"] = ConvertExpandDims;
5603 (*registration)["FusedConv2DBiasActivation"] =
5604 ConvertFusedConv2DBiasActivation;
5605 (*registration)["GatherV2"] = ConvertGather;
5606 (*registration)["LeakyRelu"] = ConvertLeakyRelu;
5607 (*registration)["MatMul"] = ConvertMatMul;
5608 (*registration)["Pack"] = ConvertPack;
5609 (*registration)["Pad"] = ConvertPad;
5610 (*registration)["Relu6"] = ConvertRelu6;
5611 (*registration)["Reshape"] = ConvertReshape;
5612 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
5613 (*registration)["Conv3D"] = ConvertConv3D;
5614 (*registration)["Conv3DBackpropInputV2"] = ConvertConv3DBackpropInputV2;
5615 for (auto resize_mode : {"ResizeBilinear", "ResizeNearestNeighbor"}) {
5616 (*registration)[resize_mode] = ConvertResize;
5617 }
5618 for (auto pool_op_type : {"AvgPool3D", "MaxPool3D"}) {
5619 (*registration)[pool_op_type] = ConvertPool3D;
5620 }
5621 #endif
5622 (*registration)["Rsqrt"] = ConvertRsqrt;
5623 (*registration)["Slice"] = ConvertSlice;
5624 (*registration)["Softmax"] = ConvertSoftmax;
5625 (*registration)["SpaceToDepth"] = ConvertDepthSpaceShuffle;
5626 (*registration)["Split"] = ConvertSplit;
5627 (*registration)["Square"] = ConvertSquare;
5628 (*registration)["SquaredDifference"] = ConvertSquaredDifference;
5629 (*registration)["Squeeze"] = ConvertSqueeze;
5630 (*registration)["StridedSlice"] = ConvertStridedSlice;
5631 (*registration)["TopKV2"] = ConvertTopK;
5632 (*registration)["Transpose"] = ConvertTranspose;
5633 (*registration)["Unpack"] = ConvertUnpack;
5634
5635 for (auto quantization_op_type :
5636 {"QuantizeAndDequantizeV2", "QuantizeAndDequantizeV3",
5637 "FakeQuantWithMinMaxVars", "FakeQuantWithMinMaxArgs"}) {
5638 (*registration)[quantization_op_type] = ConvertQuantize;
5639 }
5640 for (const auto& binary_op_pair : *BinaryOperationMap()) {
5641 (*registration)[binary_op_pair.first] = ConvertBinary;
5642 }
5643 for (const auto& activation_op_pair : *ActivationTypeMap()) {
5644 (*registration)[activation_op_pair.first] = ConvertActivation;
5645 }
5646 for (auto pool_op_type : {"AvgPool", "MaxPool"}) {
5647 (*registration)[pool_op_type] = ConvertPool;
5648 }
5649 for (auto normalization_op_type :
5650 {"FusedBatchNorm", "FusedBatchNormV2", "FusedBatchNormV3"}) {
5651 (*registration)[normalization_op_type] = ConvertFusedBatchNorm;
5652 }
5653 for (auto unary_op_pair : *UnaryOperationMap()) {
5654 (*registration)[unary_op_pair.first] = ConvertUnary;
5655 }
5656 for (auto reduce_op_type : {"Sum", "Prod", "Max", "Min", "Mean"}) {
5657 (*registration)[reduce_op_type] = ConvertReduce;
5658 }
5659 for (auto arg_minmax_type : {"ArgMin", "ArgMax"}) {
5660 (*registration)[arg_minmax_type] = ConvertArgMinMax;
5661 }
5662 // The following are no-ops during inference and will not be mapped to any TRT
5663 // layer.
5664 for (auto identity_op_type : {"Identity", "Snapshot", "StopGradient"}) {
5665 (*registration)[identity_op_type] = ConvertIdentity;
5666 }
5667 for (auto batch_matmul_type : {"BatchMatMul", "BatchMatMulV2"}) {
5668 (*registration)[batch_matmul_type] = ConvertBatchMatMul;
5669 }
5670 }
5671
RegisterOpValidators()5672 void TrtNodeValidator::RegisterOpValidators() {
5673 RegisterValidatableOpConverters(&op_validators_);
5674 }
5675
RegisterOpConverters()5676 void Converter::RegisterOpConverters() {
5677 RegisterValidatableOpConverters(&op_registry_);
5678 }
5679
ConvertGraphDefToEngine(const GraphDef & gdef,TrtPrecisionMode precision_mode,int max_batch_size,size_t max_workspace_size_bytes,const std::vector<PartialTensorShape> & input_shapes,nvinfer1::ILogger * trt_logger,nvinfer1::IGpuAllocator * allocator,TRTInt8Calibrator * calibrator,TrtUniquePtrType<nvinfer1::ICudaEngine> * engine,bool use_calibration,const bool use_implicit_batch,bool * convert_successfully)5680 Status ConvertGraphDefToEngine(
5681 const GraphDef& gdef, TrtPrecisionMode precision_mode, int max_batch_size,
5682 size_t max_workspace_size_bytes,
5683 const std::vector<PartialTensorShape>& input_shapes,
5684 nvinfer1::ILogger* trt_logger, nvinfer1::IGpuAllocator* allocator,
5685 TRTInt8Calibrator* calibrator,
5686 TrtUniquePtrType<nvinfer1::ICudaEngine>* engine, bool use_calibration,
5687 const bool use_implicit_batch, bool* convert_successfully) {
5688 engine->reset();
5689 if (convert_successfully) *convert_successfully = false;
5690
5691 // Creating converter, TensorRT builder and network
5692 auto statusor = Converter::Create(precision_mode, use_calibration, trt_logger,
5693 use_implicit_batch);
5694 TF_RETURN_IF_ERROR(statusor.status());
5695 auto converter = std::move(statusor.ValueOrDie());
5696
5697 VLOG(1) << "Starting to convert TensorFlow ops to TensorRT layers";
5698 std::vector<Converter::EngineOutputInfo> output_tensors;
5699 // Graph nodes are already topologically sorted during construction
5700 for (const auto& node_def : gdef.node()) {
5701 const string& node_name = node_def.name();
5702 VLOG(2) << "Converting node " << node_name << ", op=" << node_def.op();
5703 if (IsEngineInput(node_name)) {
5704 int32 slot_number = -1;
5705 string type_key;
5706 if (node_def.op() == "Placeholder") {
5707 if (!strings::safe_strto32( // non-absl ok
5708 node_name.c_str() + strlen(IONamePrefixes::kInputPHName),
5709 &slot_number)) {
5710 return errors::InvalidArgument("Failed to parse slot number from ",
5711 node_name);
5712 }
5713 type_key = "dtype";
5714 } else if (tensorflow::grappler::IsArg(node_def)) {
5715 // Maybe remove the dependence on grappler and re-implement IsArg,
5716 // which is pretty simple (but could change if new Arg nodes are added)
5717 slot_number = node_def.attr().at("index").i();
5718 type_key = "T";
5719 } else {
5720 return errors::InvalidArgument(
5721 "Node ", node_name,
5722 " with is neither Placeholder nor Arg, instead ", node_def.op());
5723 }
5724 nvinfer1::DataType trt_dtype;
5725 nvinfer1::Dims trt_dims;
5726 int batch_size = -1;
5727 auto shape = input_shapes.at(slot_number);
5728 auto status = ValidateTensorProperties(
5729 node_def.op(), node_def.attr().at(type_key).type(), shape,
5730 use_implicit_batch, /*validation_only=*/false, &trt_dtype, &trt_dims,
5731 &batch_size);
5732 if (!status.ok()) {
5733 const string error_message =
5734 StrCat("Validation failed for ", node_name, " and input slot ",
5735 slot_number, ": ", status.error_message());
5736 LOG(WARNING) << error_message;
5737 return Status(status.code(), error_message);
5738 }
5739 VLOG(2) << "Adding engine input tensor " << node_name << " with shape "
5740 << DebugString(trt_dims);
5741 // TODO(laigd): the conversion should always happen at runtime where all
5742 // the shapes are known, and we can provide a mode to generate the
5743 // engines offline, by calling sess.run() and cache/serialize the engines.
5744 TF_RETURN_IF_ERROR(converter->AddInputTensor(node_name, trt_dtype,
5745 trt_dims, batch_size));
5746 } else if (IsEngineOutput(node_name)) {
5747 int32 slot_number = -1;
5748 if (node_def.op() == "Identity") {
5749 if (!strings::safe_strto32( // non-absl ok
5750 node_name.c_str() + strlen(IONamePrefixes::kOutputPHName),
5751 &slot_number)) {
5752 return errors::InvalidArgument("Failed to parse slot number from ",
5753 node_name);
5754 }
5755 } else if (tensorflow::grappler::IsRetval(node_def)) {
5756 slot_number = node_def.attr().at("index").i();
5757 } else {
5758 return errors::InvalidArgument(
5759 "Node with name ", node_name,
5760 " starting with IONamePrefixes::kOutputPHName is "
5761 "neither Identity nor Retval, instead ",
5762 node_def.op());
5763 }
5764 // Get output type that TensorFlow expects
5765 TFAttrs attrs(node_def);
5766 DataType tf_dtype = attrs.get<DataType>("T");
5767 nvinfer1::DataType trt_dtype;
5768 TF_RETURN_IF_ERROR(TfDataTypeToTrt(tf_dtype, &trt_dtype));
5769 if (output_tensors.size() <= slot_number) {
5770 output_tensors.resize(slot_number + 1);
5771 }
5772 output_tensors.at(slot_number) = {node_def.input(0), node_name,
5773 trt_dtype};
5774 } else {
5775 TF_RETURN_IF_ERROR(converter->ConvertNode(node_def));
5776 }
5777 }
5778 TF_RETURN_IF_ERROR(converter->RenameAndMarkOutputTensors(output_tensors));
5779 if (convert_successfully) *convert_successfully = true;
5780
5781 // Apply user provided quantization ranges to tensors
5782 converter->MaybeApplyQuantizationRanges();
5783
5784 // Build the engine.
5785 TF_RETURN_IF_ERROR(converter->BuildCudaEngine(
5786 engine, max_batch_size, max_workspace_size_bytes, allocator, calibrator));
5787
5788 VLOG(1) << "Finished conversion";
5789 return Status::OK();
5790 }
5791
ConvertSegmentToGraphDef(const Graph * graph,const grappler::GraphProperties & graph_properties,const std::vector<const Node * > & subgraph_nodes,std::vector<EngineConnection> * connections,GraphDef * segment_def,string * scope_name)5792 Status ConvertSegmentToGraphDef(
5793 const Graph* graph, const grappler::GraphProperties& graph_properties,
5794 const std::vector<const Node*>& subgraph_nodes, // In topological order
5795 std::vector<EngineConnection>* connections, GraphDef* segment_def,
5796 string* scope_name) {
5797 std::set<string> marker_nodes;
5798 // Update connection shapes/data types and add corresponding input/output
5799 // nodes in the segment graphdef.
5800 for (size_t i = 0; i < connections->size(); ++i) {
5801 auto& connection = connections->at(i);
5802 if (connection.is_control_edge()) continue;
5803 auto outside_node = graph->FindNodeId(connection.outside_id);
5804 if (!outside_node) {
5805 // This should never happen, unless the original graph is problematic.
5806 return errors::NotFound("Cannot find node with id ",
5807 connection.outside_id, " in the graph.");
5808 }
5809 // Updates the shape and data types of input/output connections.
5810 DataType dtype;
5811 PartialTensorShape partial_shape;
5812 if (connection.is_input_edge) {
5813 GetOutputProperties(graph_properties,
5814 graph->FindNodeId(connection.outside_id),
5815 connection.outside_port, &partial_shape, &dtype);
5816 connection.outside_shape = partial_shape;
5817 } else {
5818 GetInputProperties(graph_properties,
5819 graph->FindNodeId(connection.outside_id),
5820 connection.outside_port, &partial_shape, &dtype);
5821 connection.inside_shape = partial_shape;
5822 }
5823 connection.connection_type = dtype;
5824
5825 // Add dummy input/output nodes to the segment graphdef.
5826 if (connection.is_input_edge) {
5827 const string node_name =
5828 StrCat(IONamePrefixes::kInputPHName, connection.port_number);
5829 if (marker_nodes.count(node_name)) {
5830 VLOG(1) << "Reusing input " << node_name << " for the edge "
5831 << connection.outside_node_name << ":"
5832 << connection.outside_port << " -> "
5833 << connection.inside_node_name << ":" << connection.inside_port;
5834 continue;
5835 }
5836 marker_nodes.insert(node_name);
5837 auto seg_node = segment_def->add_node();
5838 NodeDefBuilder builder(node_name, "_Arg");
5839 auto status = builder.Attr("shape", partial_shape)
5840 .Attr("T", dtype)
5841 .Attr("index", connection.port_number)
5842 .Finalize(seg_node);
5843 VLOG(1) << "Constructing input " << node_name << " for the edge "
5844 << connection.outside_node_name << ":" << connection.outside_port
5845 << " -> " << connection.inside_node_name << ":"
5846 << connection.inside_port;
5847 } else {
5848 const string node_name =
5849 StrCat(IONamePrefixes::kOutputPHName, connection.port_number);
5850 if (marker_nodes.count(node_name)) {
5851 VLOG(1) << "Reusing output " << node_name << " for the edge "
5852 << connection.inside_node_name << ":" << connection.inside_port
5853 << " -> " << connection.outside_node_name << ":"
5854 << connection.outside_port;
5855 continue;
5856 }
5857 marker_nodes.insert(node_name);
5858 auto seg_node = segment_def->add_node();
5859 NodeDefBuilder builder(node_name, "_Retval");
5860 auto status =
5861 builder.Attr("T", dtype)
5862 .Attr("index", connection.port_number)
5863 .Input(connection.inside_node_name, connection.inside_port, dtype)
5864 .Finalize(seg_node);
5865 VLOG(1) << "Constructing output " << node_name << " for the edge "
5866 << connection.inside_node_name << ":" << connection.inside_port
5867 << " -> " << connection.outside_node_name << ":"
5868 << connection.outside_port;
5869 }
5870 } // for each connection.
5871
5872 std::unordered_map<int, int> old_to_new_id_map;
5873 // Copy internal nodes to new graphdef
5874 string local_scope = subgraph_nodes.front()->name();
5875 for (const Node* node : subgraph_nodes) {
5876 local_scope = GetCommonNameScope(local_scope, node->name());
5877 old_to_new_id_map[node->id()] = segment_def->node_size();
5878 auto snode = segment_def->add_node();
5879 *snode = node->def();
5880 VLOG(2) << "Copying " << snode->name() << " to subgraph";
5881 }
5882 // Update the inputs of the new input nodes to point to placeholder nodes.
5883 for (int i = 0; i < connections->size(); ++i) {
5884 auto& connection = connections->at(i);
5885 if (connection.is_control_edge() || !connection.is_input_edge) continue;
5886 auto snode =
5887 segment_def->mutable_node(old_to_new_id_map[connection.inside_id]);
5888 const string arg_name =
5889 StrCat(IONamePrefixes::kInputPHName, connection.port_number);
5890 VLOG(1) << "Updating " << snode->name() << ":" << connection.inside_port
5891 << " from " << snode->input(connection.inside_port) << " to "
5892 << arg_name;
5893 snode->set_input(connection.inside_port, arg_name);
5894 }
5895 std::set<string> subgraph_node_names;
5896 for (const Node* node : subgraph_nodes) {
5897 subgraph_node_names.insert(node->name());
5898 }
5899
5900 // Remove control inputs that are not inside the segment.
5901 for (int i = 0; i < segment_def->node_size(); ++i) {
5902 auto snode = segment_def->mutable_node(i);
5903 const int input_size = snode->input_size();
5904 int input_idx = 0;
5905 int actual_input_idx = 0;
5906 while (input_idx < input_size) {
5907 TensorId input = ParseTensorName(snode->input(input_idx));
5908 if (!subgraph_node_names.count(
5909 string(input.first.data(), input.first.size())) &&
5910 !IsEngineInput(input.first)) {
5911 if (input.second == Graph::kControlSlot) {
5912 VLOG(1) << "... removing control inputs " << input.first
5913 << " from subgraph.";
5914 ++input_idx;
5915 continue;
5916 } else {
5917 return errors::InvalidArgument(
5918 "Found non control input outside the segment that is not an "
5919 "engine connection to ",
5920 snode->name(), ": ", input.first);
5921 }
5922 }
5923 if (actual_input_idx != input_idx) {
5924 snode->set_input(actual_input_idx, snode->input(input_idx));
5925 }
5926 ++input_idx;
5927 ++actual_input_idx;
5928 }
5929 for (int remove = input_size - actual_input_idx; remove > 0; --remove) {
5930 snode->mutable_input()->RemoveLast();
5931 }
5932 }
5933 *scope_name = local_scope;
5934 return Status::OK();
5935 }
5936
operator ()(const Edge * out_edge) const5937 bool OutputEdgeValidator::operator()(const Edge* out_edge) const {
5938 if (out_edge->IsControlEdge()) return true;
5939 if (out_edge->src()->type_string() == "Const") {
5940 VLOG(1) << "--> Need to remove output node " << out_edge->src()->name()
5941 << " which is a Const.";
5942 return false;
5943 }
5944 return true;
5945 }
5946
5947 } // namespace convert
5948 } // namespace tensorrt
5949 } // namespace tensorflow
5950
5951 #endif // GOOGLE_TENSORRT
5952 #endif // GOOGLE_CUDA
5953