• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h"
17 
18 #include <algorithm>
19 #include <cmath>
20 #include <cstring>
21 #include <map>
22 #include <memory>
23 #include <set>
24 #include <unordered_map>
25 #include <utility>
26 #include <vector>
27 
28 #include "absl/memory/memory.h"
29 #include "absl/strings/match.h"
30 #include "absl/strings/str_cat.h"
31 #include "absl/strings/string_view.h"
32 #include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
33 #include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h"
34 #include "tensorflow/core/framework/node_def.pb.h"  // NOLINT
35 #include "tensorflow/core/framework/node_def_builder.h"
36 #include "tensorflow/core/framework/tensor.pb.h"  // NOLINT
37 #include "tensorflow/core/framework/tensor_shape.h"
38 #include "tensorflow/core/framework/tensor_shape.pb.h"  // NOLINT
39 #include "tensorflow/core/framework/types.h"
40 #include "tensorflow/core/graph/algorithm.h"
41 #include "tensorflow/core/graph/graph.h"
42 #include "tensorflow/core/graph/graph_constructor.h"
43 #include "tensorflow/core/grappler/op_types.h"
44 #include "tensorflow/core/lib/core/errors.h"
45 #include "tensorflow/core/lib/core/status.h"
46 #include "tensorflow/core/lib/strings/numbers.h"
47 #include "tensorflow/core/lib/strings/str_util.h"
48 #include "tensorflow/core/lib/strings/strcat.h"
49 #include "tensorflow/core/platform/logging.h"
50 #include "tensorflow/core/platform/mutex.h"
51 #include "tensorflow/core/platform/protobuf.h"
52 #include "tensorflow/core/platform/tensor_coding.h"
53 #include "tensorflow/core/platform/types.h"
54 #include "tensorflow/core/public/version.h"
55 #include "tensorflow/core/util/strided_slice_op.h"
56 
57 #if GOOGLE_CUDA
58 #if GOOGLE_TENSORRT
59 #include "third_party/tensorrt/NvInfer.h"
60 #include "third_party/tensorrt/NvInferPlugin.h"
61 
62 // Check if the types are equal. Cast to int first so that failure log message
63 // would work!
64 #define TFTRT_CHECK_EQ_TYPE(val1, val2) CHECK_EQ((int)val1, (int)val2)
65 
66 #define TFTRT_INTERNAL_ERROR_AT_NODE(node)                           \
67   do {                                                               \
68     return errors::Internal("TFTRT::", __FUNCTION__, ":", __LINE__,  \
69                             " failed to add TRT layer, at: ", node); \
70   } while (0)
71 
72 #define TFTRT_RETURN_ERROR_IF_NULLPTR(ptr, node) \
73   do {                                           \
74     if (ptr == nullptr) {                        \
75       TFTRT_INTERNAL_ERROR_AT_NODE(node);        \
76     }                                            \
77   } while (0)
78 
79 namespace tensorflow {
80 namespace tensorrt {
81 namespace convert {
82 
IsEngineInput(absl::string_view name)83 bool IsEngineInput(absl::string_view name) {
84   return absl::StartsWith(name, IONamePrefixes::kInputPHName);
85 }
IsEngineOutput(absl::string_view name)86 bool IsEngineOutput(absl::string_view name) {
87   return absl::StartsWith(name, IONamePrefixes::kOutputPHName);
88 }
89 
90 using absl::StrAppend;
91 using absl::StrCat;
92 
TfDataTypeToTrt(DataType tf_dtype,nvinfer1::DataType * trt_dtype)93 inline Status TfDataTypeToTrt(DataType tf_dtype,
94                               nvinfer1::DataType* trt_dtype) {
95   switch (tf_dtype) {
96     case DataType::DT_FLOAT:
97       *trt_dtype = nvinfer1::DataType::kFLOAT;
98       break;
99     case DataType::DT_HALF:
100       *trt_dtype = nvinfer1::DataType::kHALF;
101       break;
102     case DataType::DT_INT32:
103       *trt_dtype = nvinfer1::DataType::kINT32;
104       break;
105     default:
106       return errors::InvalidArgument("Unsupported data type ",
107                                      DataTypeString(tf_dtype));
108   }
109   return Status::OK();
110 }
111 
TrtDataTypeToTf(nvinfer1::DataType trt_dtype,DataType * tf_dtype)112 inline Status TrtDataTypeToTf(nvinfer1::DataType trt_dtype,
113                               DataType* tf_dtype) {
114   switch (trt_dtype) {
115     case nvinfer1::DataType::kFLOAT:
116       *tf_dtype = DataType::DT_FLOAT;
117       break;
118     case nvinfer1::DataType::kHALF:
119       *tf_dtype = DataType::DT_HALF;
120       break;
121     case nvinfer1::DataType::kINT32:
122       *tf_dtype = DataType::DT_INT32;
123       break;
124     default:
125       return errors::InvalidArgument("Unsupported data type ",
126                                      DebugString(trt_dtype));
127   }
128   return Status::OK();
129 }
130 
131 class TFAttrs {
132  public:
TFAttrs(const NodeDef & tf_node)133   explicit TFAttrs(const NodeDef& tf_node) {
134     for (const auto& attr : tf_node.attr()) {
135       attrs_.insert({attr.first, &attr.second});
136     }
137   }
138 
count(const string & key) const139   bool count(const string& key) const { return attrs_.count(key); }
140 
at(const string & key) const141   AttrValue const* at(const string& key) const {
142     if (!attrs_.count(key)) {
143       LOG(FATAL) << "Attribute not found: " << key;
144     }
145     return attrs_.at(key);
146   }
147 
148   template <typename T>
149   T get(const string& key) const;
150 
151   template <typename T>
get(const string & key,const T & default_value) const152   T get(const string& key, const T& default_value) const {
153     return attrs_.count(key) ? this->get<T>(key) : default_value;
154   }
155 
156  private:
157   std::map<string, AttrValue const*> attrs_;
158 };
159 
160 template <>
get(const string & key) const161 string TFAttrs::get<string>(const string& key) const {
162   return this->at(key)->s();
163 }
164 
165 template <>
get(const string & key) const166 std::vector<int64> TFAttrs::get<std::vector<int64>>(const string& key) const {
167   auto attr = this->at(key)->list().i();
168   return std::vector<int64>(attr.begin(), attr.end());
169 }
170 
171 template <>
get(const string & key) const172 std::vector<float> TFAttrs::get<std::vector<float>>(const string& key) const {
173   auto attr = this->at(key)->list().f();
174   return std::vector<float>(attr.begin(), attr.end());
175 }
176 
177 template <>
get(const string & key) const178 nvinfer1::DataType TFAttrs::get<nvinfer1::DataType>(const string& key) const {
179   nvinfer1::DataType trt_dtype(nvinfer1::DataType::kFLOAT);
180   TF_CHECK_OK(TfDataTypeToTrt(this->at(key)->type(), &trt_dtype));
181   return trt_dtype;
182 }
183 
184 template <>
get(const string & key) const185 DataType TFAttrs::get<DataType>(const string& key) const {
186   return this->at(key)->type();
187 }
188 
189 template <>
get(const string & key) const190 float TFAttrs::get<float>(const string& key) const {
191   return this->at(key)->f();
192 }
193 
194 template <>
get(const string & key) const195 bool TFAttrs::get<bool>(const string& key) const {
196   return this->at(key)->b();
197 }
198 
199 template <>
get(const string & key) const200 int64 TFAttrs::get<int64>(const string& key) const {
201   return this->at(key)->i();
202 }
203 
204 template <typename Container>
TensorShapeArrayToTrtDims(const Container & shape,nvinfer1::Dims * out,bool ignore_first_dim=false)205 Status TensorShapeArrayToTrtDims(const Container& shape, nvinfer1::Dims* out,
206                                  bool ignore_first_dim = false) {
207   PartialTensorShape tensor_shape;
208   TF_RETURN_IF_ERROR(TensorShapeUtils::MakeShape(shape, &tensor_shape));
209   *out = TensorShapeToTrtDims(tensor_shape, ignore_first_dim);
210   return Status::OK();
211 }
212 
213 // TODO(laigd): use this utility function in more places.
RemoveBatchDimension(nvinfer1::Dims * dims)214 Status RemoveBatchDimension(nvinfer1::Dims* dims) {
215   if (dims->nbDims < 2) {
216     return errors::InvalidArgument(
217         "Dropping batch dimension requires dims with rank>=2.");
218   }
219   std::copy(dims->d + 1, dims->d + dims->nbDims, dims->d);
220   dims->nbDims--;
221   return Status::OK();
222 }
223 
GetOutputProperties(const grappler::GraphProperties & graph_properties,const Node * node,const int out_port,PartialTensorShape * shape,DataType * dtype)224 void GetOutputProperties(const grappler::GraphProperties& graph_properties,
225                          const Node* node, const int out_port,
226                          PartialTensorShape* shape, DataType* dtype) {
227   if (graph_properties.HasOutputProperties(node->name())) {
228     auto output_params = graph_properties.GetOutputProperties(node->name());
229     auto out_shape = output_params.at(out_port);
230     *dtype = out_shape.dtype();
231     *shape = out_shape.shape();
232   } else {
233     LOG(INFO) << "Unknown output shape" << node->name();
234     *dtype = node->output_type(out_port);
235   }
236 }
237 
GetInputProperties(const grappler::GraphProperties & graph_properties,const Node * node,const int in_port,PartialTensorShape * shape,DataType * dtype)238 void GetInputProperties(const grappler::GraphProperties& graph_properties,
239                         const Node* node, const int in_port,
240                         PartialTensorShape* shape, DataType* dtype) {
241   if (graph_properties.HasInputProperties(node->name())) {
242     auto input_params = graph_properties.GetInputProperties(node->name());
243     auto in_shape = input_params.at(in_port);
244     *dtype = in_shape.dtype();
245     *shape = in_shape.shape();
246   } else {
247     *dtype = node->input_type(in_port);
248   }
249 }
250 
ValidateTensorProperties(const string & producer_node_type,const DataType dtype,const PartialTensorShape & shape,const bool use_implicit_batch,bool validation_only,nvinfer1::DataType * trt_dtype,nvinfer1::Dims * trt_dims,int * batch_size)251 Status ValidateTensorProperties(const string& producer_node_type,
252                                 const DataType dtype,
253                                 const PartialTensorShape& shape,
254                                 const bool use_implicit_batch,
255                                 bool validation_only,
256                                 nvinfer1::DataType* trt_dtype,
257                                 nvinfer1::Dims* trt_dims, int* batch_size) {
258   // Convert data type.
259   TF_RETURN_IF_ERROR(TfDataTypeToTrt(dtype, trt_dtype));
260 
261   // Convert shape.
262   if (shape.dims() < 0) {
263     return errors::InvalidArgument("Input tensor rank is unknown.");
264   }
265   // Add 1 to maximum rank for implicit batch dim.
266   const int max_rank = nvinfer1::Dims::MAX_DIMS + (use_implicit_batch ? 1 : 0);
267   if (shape.dims() > max_rank) {
268     return errors::OutOfRange("Input tensor rank is greater than ", max_rank);
269   }
270   if (use_implicit_batch && (producer_node_type != "Const") &&
271       (shape.dims() < 1)) {
272     return errors::InvalidArgument(
273         "Scalar input tensor is not supported since the first dimension "
274         "is treated as batch dimension by TRT");
275   }
276   *trt_dims = TensorShapeToTrtDims(shape,
277                                    /*ignore_first_dim=*/use_implicit_batch);
278   // Get batch size for tensor if it will not be included the shape.
279   if (use_implicit_batch) {
280     *batch_size = shape.dim_size(0);
281   }
282 
283   // Don't convert empty tensors (dim value of 0).
284   const int first_trt_dim = use_implicit_batch ? 1 : 0;
285   for (int d = first_trt_dim; d < shape.dims(); ++d) {
286     if (shape.dim_size(d) == 0) {
287       return errors::Unimplemented(
288           "Input tensor with shape ", shape.DebugString(),
289           " is an empty tensor, which is not supported by TRT");
290     }
291   }
292 
293   if (validation_only) return Status::OK();
294   // Following are validations at runtime.
295 
296   for (int d = first_trt_dim; d < shape.dims(); ++d) {
297     if (shape.dim_size(d) < 0) {
298       return errors::InvalidArgument(
299           "Input tensor with shape ", shape.DebugString(),
300           " has an unknown non-batch dimension at dim ", d);
301     }
302   }
303   return Status::OK();
304 }
305 
GetTrtBroadcastShape(const TRT_TensorOrWeights & operand_l,const TRT_TensorOrWeights & operand_r,const bool check_feasibility,const bool use_implicit_batch,nvinfer1::Dims * operand_l_new_dims,nvinfer1::Dims * operand_r_new_dims)306 Status GetTrtBroadcastShape(const TRT_TensorOrWeights& operand_l,
307                             const TRT_TensorOrWeights& operand_r,
308                             const bool check_feasibility,
309                             const bool use_implicit_batch,
310                             nvinfer1::Dims* operand_l_new_dims,
311                             nvinfer1::Dims* operand_r_new_dims) {
312   // TensorRT Elementwise op supports broadcast but requires both tensor to be
313   // of Identical rank
314   //
315   // We consider case of:
316   //   1. operand_l to be a Tensor & operand_r to be a Const;
317   //   2. operand_l to be a Tensor & operand_r to be a Tensor;
318   // note: const op const (constant folding) should fallback to TensorFlow
319   //
320   // broadcast scheme:
321   //       T:  1 3 5    (tensor would not have batch dimension)
322   //       W:  1 1 3 1  (weight would have all explicit dimensions)
323   // i. fill in explicit dimensions
324   //    -> T: -1 1 3 5  (we put a -1 for batch dimension)
325   //    -> W:  1 1 3 1
326   // ii. compare broadcast feasibility
327   //
328   // We cannot support the following since TensorRT does not allow manipulation
329   // on batch dimension, we cannot generate output with proper shape
330   //    T: 3 5 1
331   //    W: 1 1 1  1 3 5 1
332   // -> T: 1 1 1 -1 3 5 1
333   // -> W: 1 1 1  1 3 5 1
334   // ***************************************************************************
335   if (!operand_l.is_tensor() && !operand_r.is_tensor()) {
336     return errors::InvalidArgument(
337         "Broadcasting requires at least one of the operands be tensors");
338   }
339 
340   const int max_nb_dims = nvinfer1::Dims::MAX_DIMS + 1;
341   auto compute_output_dims = [use_implicit_batch](
342                                  const TRT_TensorOrWeights& input,
343                                  int broadcast_num_dims, int* output_dims_array,
344                                  nvinfer1::Dims* output_dims) {
345     const nvinfer1::Dims input_dims = input.GetTrtDims();
346     std::fill(output_dims_array, output_dims_array + max_nb_dims, 1);
347     std::copy(input_dims.d, input_dims.d + input_dims.nbDims,
348               output_dims_array + broadcast_num_dims - input_dims.nbDims);
349     if (use_implicit_batch && input.is_tensor()) {
350       const int true_input_dims = input_dims.nbDims + 1;
351       if (true_input_dims < broadcast_num_dims) {
352         return errors::InvalidArgument(
353             "Broadcasting beyond batch dimension is not supported ",
354             "(tensor #dims ", true_input_dims, " vs broadcast #dims ",
355             broadcast_num_dims, ")");
356       }
357       // Set the batch dimension to -1, since batch size is not supposed to
358       // be broadcasted.
359       output_dims_array[0] = -1;
360     }
361     // Copy to output dimensions
362     if (use_implicit_batch) {
363       // Strip batch dimension while copying
364       output_dims->nbDims = broadcast_num_dims - 1;
365       std::copy(output_dims_array + 1, output_dims_array + broadcast_num_dims,
366                 output_dims->d);
367     } else {
368       output_dims->nbDims = broadcast_num_dims;
369       std::copy(output_dims_array, output_dims_array + broadcast_num_dims,
370                 output_dims->d);
371     }
372 
373     return Status::OK();
374   };
375 
376   // Compute the output dimensions.
377   const int broadcast_num_dims =
378       std::max(operand_l.GetTrtDims().nbDims +
379                    (use_implicit_batch && operand_l.is_tensor()),
380                operand_r.GetTrtDims().nbDims +
381                    (use_implicit_batch && operand_r.is_tensor()));
382   int output_l[max_nb_dims], output_r[max_nb_dims];
383   TF_RETURN_IF_ERROR(compute_output_dims(operand_l, broadcast_num_dims,
384                                          output_l, operand_l_new_dims));
385   TF_RETURN_IF_ERROR(compute_output_dims(operand_r, broadcast_num_dims,
386                                          output_r, operand_r_new_dims));
387 
388   // Compare broadcast feasibility
389   if (check_feasibility) {
390     for (int i = 0; i < broadcast_num_dims; ++i) {
391       if ((output_l[i] != output_r[i]) && (output_l[i] != 1) &&
392           (output_r[i] != 1)) {
393         return errors::InvalidArgument("Infeasible broadcast scheme (",
394                                        "batch_dim: ", output_l[0], ", ",
395                                        DebugString(*operand_l_new_dims), " vs ",
396                                        "batch_dim: ", output_r[0], ", ",
397                                        DebugString(*operand_r_new_dims), ")");
398       }
399     }
400   }
401   return Status::OK();
402 }
403 
CreateConstantLayer(const TRT_ShapedWeights & weights,const nvinfer1::Dims & dims)404 nvinfer1::ITensor* Converter::CreateConstantLayer(
405     const TRT_ShapedWeights& weights, const nvinfer1::Dims& dims) {
406   nvinfer1::Weights trt_weights = weights.GetTrtWeights();
407   nvinfer1::IConstantLayer* layer = network()->addConstant(dims, trt_weights);
408   if (!layer) return nullptr;
409   nvinfer1::ITensor* trt_tensor = layer->getOutput(0);
410 #if !IS_TRT_VERSION_GE(5, 1, 3, 0)
411   // TODO(laigd): there is a bug in TensorRT 5.0 library that, if we don't set
412   // the data type below, it will always be kFLOAT regardless what the data type
413   // of the weights is. Once NVIDIA fixes this bug, we should remove the data
414   // type setting logic below and test should still pass.
415   trt_tensor->setType(trt_weights.type);
416 #endif
417   return trt_tensor;
418 }
419 
CreateBroadcastableScalarConstant(OpConverterParams * params,float value,const nvinfer1::Dims & dims,nvinfer1::ITensor ** tensor,const char * dtype_attr_name="T")420 Status CreateBroadcastableScalarConstant(OpConverterParams* params, float value,
421                                          const nvinfer1::Dims& dims,
422                                          nvinfer1::ITensor** tensor,
423                                          const char* dtype_attr_name = "T") {
424   nvinfer1::DataType trt_dtype =
425       nvinfer1::DataType::kFLOAT;  // Default to FP32.
426   TFAttrs attrs(params->node_def);
427   if (attrs.count(dtype_attr_name)) {
428     DataType dtype = attrs.get<DataType>(dtype_attr_name);
429     TF_RETURN_IF_ERROR(TfDataTypeToTrt(dtype, &trt_dtype));
430   }
431 
432   // In order to be broadcastable, the number of dims has to match.
433   nvinfer1::Dims broadcastable_dims(dims);
434   for (int i = 0; i < broadcastable_dims.nbDims; i++) {
435     broadcastable_dims.d[i] = 1;
436   }
437   TRT_ShapedWeights weights =
438       params->weight_store->GetTempWeights(trt_dtype, broadcastable_dims);
439   void* raw_ptr = weights.GetValues();
440   switch (trt_dtype) {
441     case nvinfer1::DataType::kFLOAT:
442       static_cast<float*>(raw_ptr)[0] = value;
443       break;
444     case nvinfer1::DataType::kHALF:
445       static_cast<Eigen::half*>(raw_ptr)[0] = Eigen::half(value);
446       break;
447     default:
448       return errors::InvalidArgument("Unsupported data type ",
449                                      DebugString(trt_dtype));
450   }
451   *tensor = params->converter->CreateConstantLayer(weights, broadcastable_dims);
452   TFTRT_RETURN_ERROR_IF_NULLPTR(*tensor, params->node_def.name());
453   params->converter->ProvideQuantizationRange(*tensor, value, value);
454   return Status::OK();
455 }
456 
457 // Convert an axis from TF format to TRT format while validating. TF format
458 // includes the batch dimension, while TRT does not if implicit batching is used
459 // (i.e. for tensors). TF can also use negative indices.
ConvertAxis(int tf_axis,int trt_nb_dims,absl::string_view node_name,bool use_implicit_batch,int * trt_axis)460 Status ConvertAxis(int tf_axis, int trt_nb_dims, absl::string_view node_name,
461                    bool use_implicit_batch, int* trt_axis) {
462   const int tf_nb_dims = trt_nb_dims + (use_implicit_batch ? 1 : 0);
463   // Check bounds.
464   if (tf_axis < -tf_nb_dims || tf_axis >= tf_nb_dims) {
465     return errors::InvalidArgument(
466         "Axis value of ", tf_axis, " is out of bounds, must be in range [",
467         -tf_nb_dims, ", ", tf_nb_dims, "), at ", node_name);
468   }
469   // Make negative axis positive.
470   if (tf_axis < 0) tf_axis += tf_nb_dims;
471   // Don't allow axis to be the batch dimension.
472   if (use_implicit_batch && tf_axis == 0) {
473     return errors::Unimplemented(
474         "TensorRT does not allow manipulation of the batch dimension, at ",
475         node_name);
476   }
477   // Remove batch dimension if it is implicit.
478   *trt_axis = use_implicit_batch ? tf_axis - 1 : tf_axis;
479   return Status::OK();
480 }
481 
DimsEqual(const nvinfer1::Dims & dim_l,const nvinfer1::Dims & dim_r)482 inline bool DimsEqual(const nvinfer1::Dims& dim_l,
483                       const nvinfer1::Dims& dim_r) {
484   if (dim_l.nbDims != dim_r.nbDims) {
485     return false;
486   }
487   for (int i = 0; i < dim_l.nbDims; i++) {
488     if (dim_l.d[i] != dim_r.d[i]) {
489       return false;
490     }
491   }
492   return true;
493 }
494 
AllLengthsEqual(const std::vector<std::vector<int>> & inputs)495 bool AllLengthsEqual(const std::vector<std::vector<int>>& inputs) {
496   if (inputs.size() == 0) return true;
497   int length = inputs.at(0).size();
498   for (int i = 1; i < inputs.size(); i++) {
499     if (inputs.at(i).size() != length) return false;
500   }
501   return true;
502 }
503 
GetTrtDimsForTensor(const Tensor & tensor)504 inline nvinfer1::Dims GetTrtDimsForTensor(const Tensor& tensor) {
505   nvinfer1::Dims dims;
506   dims.nbDims = tensor.dims();
507   for (int i = 0; i < dims.nbDims; i++) {
508     dims.d[i] = tensor.dim_size(i);
509   }
510   return dims;
511 }
512 
Prod(const nvinfer1::Dims & dims)513 int64_t Prod(const nvinfer1::Dims& dims) {
514   int64_t count = 1;
515   for (int d = 0; d < dims.nbDims; ++d) {
516     count *= dims.d[d];
517   }
518   return count;
519 }
520 
521 // Returns total number of elements in a TensorRT weights dimensions.
522 // Returning 0 means either some dim is 0 or the number of dims is 0 (TensorRT
523 // doesn't allow scalar weights).
524 // Note that for TF scalar constant, we always convert to dims [1].
TrtWeightDimsNumElements(const nvinfer1::Dims & dims)525 int64_t TrtWeightDimsNumElements(const nvinfer1::Dims& dims) {
526   if (dims.nbDims == 0) return 0;
527   return Prod(dims);
528 }
529 
530 // Returns total number of elements in an ITensor dimension.
531 // Returns 1 if the number of dims is 0 (the total number is fully determined by
532 // the batch size).
533 // Returns -1 if any dimension is known.
TrtTensorDimsNumElements(const nvinfer1::Dims & dims)534 int64_t TrtTensorDimsNumElements(const nvinfer1::Dims& dims) {
535   if (!HasStaticShape(dims)) return -1;
536   return Prod(dims);
537 }
538 
DimsHaveSameSize(const nvinfer1::Dims & lhs,const nvinfer1::Dims & rhs,bool is_tensor)539 bool DimsHaveSameSize(const nvinfer1::Dims& lhs, const nvinfer1::Dims& rhs,
540                       bool is_tensor) {
541   if (is_tensor) {
542     return TrtTensorDimsNumElements(lhs) == TrtTensorDimsNumElements(rhs);
543   }
544   return TrtWeightDimsNumElements(lhs) == TrtWeightDimsNumElements(rhs);
545 }
546 
547 // Returns whether both dimensions are fully specified and the total number of
548 // elements equals.
AreDimsStaticWithSameSize(const nvinfer1::Dims & lhs,const nvinfer1::Dims & rhs,bool is_tensor)549 bool AreDimsStaticWithSameSize(const nvinfer1::Dims& lhs,
550                                const nvinfer1::Dims& rhs, bool is_tensor) {
551   if (!HasStaticShape(lhs) || !HasStaticShape(rhs)) return false;
552   return DimsHaveSameSize(lhs, rhs, is_tensor);
553 }
554 
AreDimsStaticWithDifferentSize(const nvinfer1::Dims & lhs,const nvinfer1::Dims & rhs,bool is_tensor)555 bool AreDimsStaticWithDifferentSize(const nvinfer1::Dims& lhs,
556                                     const nvinfer1::Dims& rhs, bool is_tensor) {
557   if (!HasStaticShape(lhs) || !HasStaticShape(rhs)) return false;
558   return !DimsHaveSameSize(lhs, rhs, is_tensor);
559 }
560 
CreateSamePadding(const nvinfer1::Dims & stride,const nvinfer1::Dims & kernel,const std::vector<int64_t> & input_dims)561 static std::vector<std::pair<int, int>> CreateSamePadding(
562     const nvinfer1::Dims& stride, const nvinfer1::Dims& kernel,
563     const std::vector<int64_t>& input_dims) {
564   std::vector<std::pair<int, int>> padding(input_dims.size());
565   CHECK_EQ(stride.nbDims, input_dims.size());  // TODO(jie): N+C? NC+?
566 
567   for (size_t i = 0; i < input_dims.size(); ++i) {
568     // Formula to calculate the padding
569     int p = ((input_dims[i] - 1) / stride.d[i]) * stride.d[i] + kernel.d[i] -
570             input_dims[i];
571     p = (p > 0) ? p : 0;
572 
573     // Right precedence padding, like in TensorFlow
574     int left = p / 2;
575     int right = p - left;
576 
577     VLOG(2) << "PADDING_" << i << " pre: " << left << ", post: " << right
578             << "paras: " << input_dims[i] << ", " << stride.d[i] << ", "
579             << "kernel: " << kernel.d[i];
580     padding[i] = {left, right};
581   }
582   return padding;
583 }
584 
GetCommonNameScope(const string & op_name_a,const string & op_name_b)585 string GetCommonNameScope(const string& op_name_a, const string& op_name_b) {
586   size_t last_scope_separator = 0;
587   const size_t min_size = std::min(op_name_a.size(), op_name_b.size());
588   for (size_t i = 0; i < min_size; ++i) {
589     if (op_name_a[i] != op_name_b[i]) break;
590     if (op_name_a[i] == '/') last_scope_separator = i + 1;
591   }
592   return op_name_a.substr(0, last_scope_separator);
593 }
594 
595 // Verifies that shapes of the given inputs match after masking the specified
596 // dimension.
VerifyShapesMatch(absl::Span<const TRT_TensorOrWeights> inputs,int masked_dim,absl::string_view node_name)597 Status VerifyShapesMatch(absl::Span<const TRT_TensorOrWeights> inputs,
598                          int masked_dim, absl::string_view node_name) {
599   size_t num_inputs = inputs.size();
600   if (num_inputs <= 1) return Status::OK();
601 
602   const nvinfer1::Dims dims_0 = inputs.at(0).GetTrtDims();
603   for (size_t i = 1; i < num_inputs; ++i) {
604     const nvinfer1::Dims dim_i = inputs.at(i).GetTrtDims();
605     if (dim_i.nbDims != dims_0.nbDims) {
606       return errors::InvalidArgument(
607           "Received inputs with inconsistent rank, at ", node_name);
608     }
609     for (size_t j = 0; j < dims_0.nbDims; ++j) {
610       if (dim_i.d[j] != dims_0.d[j] && j != masked_dim) {
611         return errors::InvalidArgument(
612             "Received inputs with inconsistent shape, at ", node_name);
613       }
614     }
615   }
616   return Status::OK();
617 }
618 
TRT_ShapedWeights(nvinfer1::DataType type)619 TRT_ShapedWeights::TRT_ShapedWeights(nvinfer1::DataType type) : type_(type) {
620   shape_.nbDims = 0;
621 }
622 
TRT_ShapedWeights(nvinfer1::DataType type,nvinfer1::Dims dims,Tensor tensor)623 TRT_ShapedWeights::TRT_ShapedWeights(nvinfer1::DataType type,
624                                      nvinfer1::Dims dims, Tensor tensor)
625     : shape_(dims), type_(type), tensor_(tensor) {}
626 
TRT_ShapedWeights(const TRT_ShapedWeights & rhs)627 TRT_ShapedWeights::TRT_ShapedWeights(const TRT_ShapedWeights& rhs)
628     : shape_(rhs.shape_), type_(rhs.type_), tensor_(rhs.tensor_) {}
629 
count() const630 int64_t TRT_ShapedWeights::count() const {
631   return TrtWeightDimsNumElements(shape_);
632 }
633 
GetTrtWeights() const634 nvinfer1::Weights TRT_ShapedWeights::GetTrtWeights() const {
635   return nvinfer1::Weights{type_, GetValues(), count()};
636 }
637 
size_bytes() const638 size_t TRT_ShapedWeights::size_bytes() const {
639   size_t data_type_size = -1;
640   switch (type_) {
641     case nvinfer1::DataType::kFLOAT:
642     case nvinfer1::DataType::kINT32:
643       data_type_size = 4;
644       break;
645     case nvinfer1::DataType::kHALF:
646       data_type_size = 2;
647       break;
648     case nvinfer1::DataType::kINT8:
649       data_type_size = 1;
650       break;
651   }
652   return this->count() * data_type_size;
653 }
654 
DebugString() const655 string TRT_ShapedWeights::DebugString() const {
656   return StrCat(
657       "TRT_ShapedWeights(shape=", tensorflow::tensorrt::DebugString(shape_),
658       ", type=", tensorflow::tensorrt::DebugString(type_),
659       ", values=", reinterpret_cast<uintptr_t>(GetValues()), ")");
660 }
661 
662 // A fake ITensor implementation used to check whether the TF-TRT converter can
663 // handle specific node. We only need shape and type information, and the
664 // converter won't (and shouldn't) use this to build the TRT network.
665 class TRT_TensorOrWeights::SimpleITensor : public nvinfer1::ITensor {
666  public:
SimpleITensor(nvinfer1::DataType trt_dtype,const nvinfer1::Dims & trt_dims)667   SimpleITensor(nvinfer1::DataType trt_dtype, const nvinfer1::Dims& trt_dims)
668       : trt_dtype_(trt_dtype), trt_dims_(trt_dims) {}
669 
setName(const char * name)670   void setName(const char* name) override {}
671 
getName() const672   const char* getName() const override { return ""; }
673 
setDimensions(nvinfer1::Dims dimensions)674   void setDimensions(nvinfer1::Dims dimensions) override {
675     trt_dims_ = dimensions;
676   }
677 
getDimensions() const678   nvinfer1::Dims getDimensions() const override { return trt_dims_; }
679 
setType(nvinfer1::DataType trt_dtype)680   void setType(nvinfer1::DataType trt_dtype) override {
681     trt_dtype_ = trt_dtype;
682   }
683 
getType() const684   nvinfer1::DataType getType() const override { return trt_dtype_; }
685 
isNetworkInput() const686   bool isNetworkInput() const override { return false; }
687 
isNetworkOutput() const688   bool isNetworkOutput() const override { return false; }
689 
setBroadcastAcrossBatch(bool broadcastAcrossBatch)690   void setBroadcastAcrossBatch(bool broadcastAcrossBatch) override {}
691 
getBroadcastAcrossBatch() const692   bool getBroadcastAcrossBatch() const override { return false; }
693 
getLocation() const694   nvinfer1::TensorLocation getLocation() const override {
695     // This is arbitrary, since we don't use it.
696     return nvinfer1::TensorLocation::kDEVICE;
697   }
698 
setLocation(nvinfer1::TensorLocation location)699   void setLocation(nvinfer1::TensorLocation location) override {}
700 
701 #if IS_TRT_VERSION_GE(5, 0, 0, 0)
setDynamicRange(float min,float max)702   bool setDynamicRange(float min, float max) override { return true; }
703 
getDynamicRange() const704   float getDynamicRange() const override { return 0; }
705 #endif
706 
707 #if IS_TRT_VERSION_GE(5, 1, 0, 0)
dynamicRangeIsSet() const708   bool dynamicRangeIsSet() const override { return true; }
709 
resetDynamicRange()710   void resetDynamicRange() override {}
711 
getDynamicRangeMin() const712   float getDynamicRangeMin() const override { return 0.f; }
713 
getDynamicRangeMax() const714   float getDynamicRangeMax() const override { return 0.f; }
715 #endif
716 
717 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
setAllowedFormats(nvinfer1::TensorFormats formats)718   void setAllowedFormats(nvinfer1::TensorFormats formats) override {}
719 
getAllowedFormats() const720   nvinfer1::TensorFormats getAllowedFormats() const override { return 1; }
721 
isShapeTensor() const722   bool isShapeTensor() const override { return false; }
723 
isExecutionTensor() const724   bool isExecutionTensor() const override { return true; }
725 #endif
726 
727  private:
728   nvinfer1::DataType trt_dtype_;
729   nvinfer1::Dims trt_dims_;
730 };
731 
TRT_TensorOrWeights(nvinfer1::ITensor * tensor,int batch_size)732 TRT_TensorOrWeights::TRT_TensorOrWeights(nvinfer1::ITensor* tensor,
733                                          int batch_size)
734     : tensor_(tensor),
735       batch_size_(batch_size),
736       initialized_(true),
737       is_tensor_(true) {}
738 
TRT_TensorOrWeights(nvinfer1::DataType trt_dtype,const nvinfer1::Dims & trt_dims,int batch_size)739 TRT_TensorOrWeights::TRT_TensorOrWeights(nvinfer1::DataType trt_dtype,
740                                          const nvinfer1::Dims& trt_dims,
741                                          int batch_size)
742     : simple_itensor_(new SimpleITensor(trt_dtype, trt_dims)),
743       batch_size_(batch_size),
744       initialized_(true),
745       is_tensor_(true) {}
746 
TRT_TensorOrWeights(const TRT_ShapedWeights & weights)747 TRT_TensorOrWeights::TRT_TensorOrWeights(const TRT_ShapedWeights& weights)
748     : weights_(weights), initialized_(true), is_tensor_(false) {}
749 
TRT_TensorOrWeights(const TRT_TensorOrWeights & rhs)750 TRT_TensorOrWeights::TRT_TensorOrWeights(const TRT_TensorOrWeights& rhs)
751     : tensor_(rhs.tensor_),
752       simple_itensor_(rhs.simple_itensor_),
753       batch_size_(rhs.batch_size_),
754       weights_(rhs.weights_),
755       initialized_(rhs.initialized_),
756       is_tensor_(rhs.is_tensor_) {}
757 
operator =(const TRT_TensorOrWeights & rhs)758 void TRT_TensorOrWeights::operator=(const TRT_TensorOrWeights& rhs) {
759   tensor_ = rhs.tensor_;
760   simple_itensor_ = rhs.simple_itensor_;
761   batch_size_ = rhs.batch_size_;
762   weights_ = rhs.weights_;
763   initialized_ = rhs.initialized_;
764   is_tensor_ = rhs.is_tensor_;
765 }
766 
tensor() const767 nvinfer1::ITensor* TRT_TensorOrWeights::tensor() const {
768   CHECK(is_tensor());
769   return tensor_ == nullptr ? simple_itensor_.get() : tensor_;
770 }
771 
GetTrtDims() const772 nvinfer1::Dims TRT_TensorOrWeights::GetTrtDims() const {
773   if (is_tensor()) {
774     return tensor()->getDimensions();
775   } else {
776     return weights().shape_;
777   }
778 }
779 
DebugString() const780 string TRT_TensorOrWeights::DebugString() const {
781   string output = "TRT_TensorOrWeights(type=";
782   if (is_tensor()) {
783     StrAppend(&output, "tensor=", tensorflow::tensorrt::DebugString(*tensor()),
784               ", batch_size=", batch_size_);
785   } else {
786     StrAppend(&output, "weights=", weights_.DebugString());
787   }
788   StrAppend(&output, ")");
789   return output;
790 }
791 
792 // Perform 5 dimensional reorder of data on CPU
793 // This is done once at convert time and does not affect GPU inference perf
794 // Example: reorder NDHWC (Tensorflow) -> NCDHW (TensorRT)
795 template <typename T>
Reorder5(const nvinfer1::Dims & shape,const T * idata,const nvinfer1::Dims & istrides,T * odata,const nvinfer1::Dims & ostrides)796 void Reorder5(const nvinfer1::Dims& shape, const T* idata,
797               const nvinfer1::Dims& istrides, T* odata,
798               const nvinfer1::Dims& ostrides) {
799   for (int k = 0; k < shape.d[0]; ++k) {
800     for (int c = 0; c < shape.d[1]; ++c) {
801       for (int d = 0; d < shape.d[2]; ++d) {
802         for (int r = 0; r < shape.d[3]; ++r) {
803           for (int s = 0; s < shape.d[4]; ++s) {
804             odata[k * ostrides.d[0] + c * ostrides.d[1] + d * ostrides.d[2] +
805                   r * ostrides.d[3] + s * ostrides.d[4]] =
806                 idata[k * istrides.d[0] + c * istrides.d[1] +
807                       d * istrides.d[2] + r * istrides.d[3] +
808                       s * istrides.d[4]];
809           }
810         }
811       }
812     }
813   }
814 }
815 
816 // TODO(jie): reorder4 & reorder2 should be merged?
817 // TODO(aaroey): fix the order of parameters.
818 template <typename T>
Reorder4(const nvinfer1::DimsNCHW & shape,const T * idata,const nvinfer1::DimsNCHW & istrides,T * odata,const nvinfer1::DimsNCHW & ostrides)819 void Reorder4(const nvinfer1::DimsNCHW& shape, const T* idata,
820               const nvinfer1::DimsNCHW& istrides, T* odata,
821               const nvinfer1::DimsNCHW& ostrides) {
822   for (int n = 0; n < shape.n(); ++n) {
823     for (int c = 0; c < shape.c(); ++c) {
824       for (int h = 0; h < shape.h(); ++h) {
825         for (int w = 0; w < shape.w(); ++w) {
826           odata[n * ostrides.n() + c * ostrides.c() + h * ostrides.h() +
827                 w * ostrides.w()] = idata[n * istrides.n() + c * istrides.c() +
828                                           h * istrides.h() + w * istrides.w()];
829         }
830       }
831     }
832   }
833 }
834 
835 template <typename T>
Reorder2(const nvinfer1::DimsHW & shape,const T * idata,const nvinfer1::DimsHW & istrides,T * odata,const nvinfer1::DimsHW & ostrides)836 void Reorder2(const nvinfer1::DimsHW& shape, const T* idata,
837               const nvinfer1::DimsHW& istrides, T* odata,
838               const nvinfer1::DimsHW& ostrides) {
839   for (int h = 0; h < shape.h(); ++h) {
840     for (int w = 0; w < shape.w(); ++w) {
841       odata[h * ostrides.h() + w * ostrides.w()] =
842           idata[h * istrides.h() + w * istrides.w()];
843     }
844   }
845 }
846 
847 // TODO(jie): fallback to tensorflow!!
ReorderCKtoKC(const TRT_ShapedWeights & iweights,TRT_ShapedWeights * oweights)848 void ReorderCKtoKC(const TRT_ShapedWeights& iweights,
849                    TRT_ShapedWeights* oweights) {
850   const int c = iweights.shape_.d[0];
851   const int k = iweights.shape_.d[1];
852   oweights->shape_.d[0] = k;
853   oweights->shape_.d[1] = c;
854   const nvinfer1::DimsHW istrides = {1, k};
855   const nvinfer1::DimsHW ostrides = {c, 1};
856   switch (iweights.TrtDType()) {
857     case nvinfer1::DataType::kFLOAT: {
858       Reorder2({k, c}, static_cast<float const*>(iweights.GetValues()),
859                istrides, static_cast<float*>(oweights->GetValues()), ostrides);
860       break;
861     }
862     case nvinfer1::DataType::kHALF: {
863       Reorder2({k, c}, static_cast<Eigen::half const*>(iweights.GetValues()),
864                istrides, static_cast<Eigen::half*>(oweights->GetValues()),
865                ostrides);
866       break;
867     }
868     default:
869       LOG(FATAL) << "Unsupported type in reorder expected fp32 or fp16 but got "
870                  << DebugString(iweights.TrtDType());
871   }
872 }
873 
ReorderRSCKToKCRS(const TRT_ShapedWeights & iweights,TRT_ShapedWeights * oweights,const int num_groups)874 void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights,
875                        TRT_ShapedWeights* oweights, const int num_groups) {
876   CHECK(iweights.TrtDType() == oweights->TrtDType());
877   CHECK_EQ(iweights.size_bytes(), oweights->size_bytes());
878   // K indexes over output channels, C over input channels, and R and S over the
879   // height and width of the convolution
880   const int r = iweights.shape_.d[0];
881   const int s = iweights.shape_.d[1];
882   // TRT requires GKcRS, while TF depthwise has RSCK where c=1, C=G
883   const int c = iweights.shape_.d[2] / num_groups;
884   const int k = iweights.shape_.d[3] * num_groups;
885   VLOG(2) << "num_groups: " << num_groups << "c" << iweights.shape_.d[2]
886           << " then " << c << "k" << iweights.shape_.d[3] << " then " << k
887           << "r" << iweights.shape_.d[0] << " then " << r << "s"
888           << iweights.shape_.d[1] << " then " << s;
889   oweights->shape_.d[0] = k / num_groups;
890   oweights->shape_.d[1] = c * num_groups;
891   oweights->shape_.d[2] = r;
892   oweights->shape_.d[3] = s;
893   const nvinfer1::DimsNCHW istrides = {1, k, s * k * c, c * k};
894   const nvinfer1::DimsNCHW ostrides = {c * r * s, r * s, s, 1};
895   switch (iweights.TrtDType()) {
896     case nvinfer1::DataType::kFLOAT: {
897       Reorder4({k, c, r, s}, static_cast<float const*>(iweights.GetValues()),
898                istrides, static_cast<float*>(oweights->GetValues()), ostrides);
899       break;
900     }
901     case nvinfer1::DataType::kHALF: {
902       Reorder4({k, c, r, s},
903                static_cast<Eigen::half const*>(iweights.GetValues()), istrides,
904                static_cast<Eigen::half*>(oweights->GetValues()), ostrides);
905       break;
906     }
907 
908     default:
909       LOG(FATAL) << "Unsupported type, expected fp32 or fp16 but got "
910                  << DebugString(iweights.TrtDType());
911   }
912 }
913 
914 // Initialize a Dims object with arbitrary dimension
InitDimsN(std::initializer_list<int> list)915 nvinfer1::Dims InitDimsN(std::initializer_list<int> list) {
916   nvinfer1::Dims dim;
917   dim.nbDims = list.size();
918   std::copy(list.begin(), list.end(), dim.d);
919   return dim;
920 }
921 
922 // Reorder 3D convolution weights from TF to TRT
ReorderDRSCKToKCDRS(const TRT_ShapedWeights & iweights,TRT_ShapedWeights * oweights,const int num_groups)923 void ReorderDRSCKToKCDRS(const TRT_ShapedWeights& iweights,
924                          TRT_ShapedWeights* oweights, const int num_groups) {
925   DCHECK(iweights.TrtDType() == oweights->TrtDType());
926   CHECK_EQ(iweights.size_bytes(), oweights->size_bytes());
927   // K indexes over output channels, C over input channels, and R, S, D over the
928   // height, width, depth
929   const int d = iweights.shape_.d[0];
930   const int r = iweights.shape_.d[1];
931   const int s = iweights.shape_.d[2];
932   // TRT requires GKcRS, while TF depthwise has RSCK where c=1, C=G
933   const int c = iweights.shape_.d[3] / num_groups;
934   const int k = iweights.shape_.d[4] * num_groups;
935 
936   VLOG(2) << "num_groups: " << num_groups << ", c: " << iweights.shape_.d[3]
937           << " becomes " << c << ", k: " << iweights.shape_.d[4] << " becomes "
938           << k << ", d: " << d << ", r: " << r << ", s: " << s;
939 
940   oweights->shape_.d[0] = iweights.shape_.d[4];  // k / num_groups;
941   oweights->shape_.d[1] = iweights.shape_.d[3];  // c * num_groups;
942   oweights->shape_.d[2] = d;
943   oweights->shape_.d[3] = r;
944   oweights->shape_.d[4] = s;
945 
946   nvinfer1::Dims shape =
947       InitDimsN({k, c, d, r, s});  // KCDRS shape (same as output)
948 
949   nvinfer1::Dims ostrides =
950       InitDimsN({c * d * r * s, d * r * s, r * s, s,
951                  1});  // Output = KCDRS = k*CDRS + c*DRS + d*RS + r*S + s
952 
953   nvinfer1::Dims istrides =
954       InitDimsN({1, k, r * s * c * k, s * c * k,
955                  c * k});  // Input = DRSCK = k*1 + c*K + d*RSCK + r*SCK + s*CK
956 
957   switch (iweights.TrtDType()) {
958     case nvinfer1::DataType::kFLOAT: {
959       Reorder5(shape, static_cast<float const*>(iweights.GetValues()), istrides,
960                static_cast<float*>(oweights->GetValues()), ostrides);
961       break;
962     }
963     case nvinfer1::DataType::kHALF: {
964       Reorder5(shape, static_cast<Eigen::half const*>(iweights.GetValues()),
965                istrides, static_cast<Eigen::half*>(oweights->GetValues()),
966                ostrides);
967       break;
968     }
969     default:
970       LOG(FATAL) << "Unsupported type, expected fp32 or fp16 but got "
971                  << DebugString(iweights.TrtDType());
972   }
973 }
974 
GetTempWeights(nvinfer1::DataType trt_dtype,const nvinfer1::Dims & dims)975 TRT_ShapedWeights TrtWeightStore::GetTempWeights(nvinfer1::DataType trt_dtype,
976                                                  const nvinfer1::Dims& dims) {
977   TensorShape shape;
978   DataType tf_dtype;
979   // TODO(laigd): make it return a status.
980   TF_CHECK_OK(TensorShapeUtils::MakeShape(dims.d, dims.nbDims, &shape));
981   TF_CHECK_OK(TrtDataTypeToTf(trt_dtype, &tf_dtype));
982   // TODO(jie): check weights size_bytes. 0 means type error
983   Tensor tensor(tf_dtype, shape);
984   TRT_ShapedWeights weights(trt_dtype, dims, tensor);
985   store_.emplace_back(std::move(tensor));
986   return weights;
987 }
988 
OpConverterParams(const NodeDef & node_def,const std::vector<TRT_TensorOrWeights> & inputs,std::vector<TRT_TensorOrWeights> * outputs,TrtWeightStore * weight_store,TrtPrecisionMode precision_mode,bool use_calibration,bool use_implicit_batch)989 OpConverterParams::OpConverterParams(
990     const NodeDef& node_def, const std::vector<TRT_TensorOrWeights>& inputs,
991     std::vector<TRT_TensorOrWeights>* outputs, TrtWeightStore* weight_store,
992     TrtPrecisionMode precision_mode, bool use_calibration,
993     bool use_implicit_batch)
994     : node_def(node_def),
995       inputs(inputs),
996       outputs(outputs),
997       validation_only(true),
998       weight_store(weight_store),
999       precision_mode(precision_mode),
1000       use_calibration(use_calibration),
1001       use_implicit_batch(use_implicit_batch) {}
1002 
OpConverterParams(Converter * converter,const NodeDef & node_def,const std::vector<TRT_TensorOrWeights> & inputs,std::vector<TRT_TensorOrWeights> * outputs,TrtWeightStore * weight_store)1003 OpConverterParams::OpConverterParams(
1004     Converter* converter, const NodeDef& node_def,
1005     const std::vector<TRT_TensorOrWeights>& inputs,
1006     std::vector<TRT_TensorOrWeights>* outputs, TrtWeightStore* weight_store)
1007     : converter(converter),
1008       node_def(node_def),
1009       inputs(inputs),
1010       outputs(outputs),
1011       validation_only(false),
1012       weight_store(weight_store),
1013       precision_mode(converter->precision_mode()),
1014       use_calibration(converter->use_calibration()),
1015       use_implicit_batch(converter->use_implicit_batch()) {}
1016 
1017 const std::set<string>* TrtNodeValidator::quantize_ops = new std::set<string>{
1018     "QuantizeAndDequantizeV2",
1019     "QuantizeAndDequantizeV3",
1020     "FakeQuantWithMinMaxVars",
1021     "FakeQuantWithMinMaxArgs",
1022 };
1023 
TrtNodeValidator(const grappler::GraphProperties & graph_properties,TrtPrecisionMode precision_mode,bool use_calibration,bool use_implicit_batch)1024 TrtNodeValidator::TrtNodeValidator(
1025     const grappler::GraphProperties& graph_properties,
1026     TrtPrecisionMode precision_mode, bool use_calibration,
1027     bool use_implicit_batch)
1028     : graph_properties_(graph_properties),
1029       precision_mode_(precision_mode),
1030       use_calibration_(use_calibration),
1031       use_implicit_batch_(use_implicit_batch) {
1032   RegisterOpValidators();
1033 }
1034 
ConvertToTensorOrWeights(const NodeDef & node_def,int output_port,TRT_TensorOrWeights * tensor_or_weights)1035 Status TrtNodeValidator::ConvertToTensorOrWeights(
1036     const NodeDef& node_def, int output_port,
1037     TRT_TensorOrWeights* tensor_or_weights) {
1038   if (node_def.op() == "Const") {
1039     if (output_port != 0) {
1040       return errors::InvalidArgument("Const node should only have one output.");
1041     }
1042     // The output of the conversion will be used as input to other nodes to
1043     // determine whether TRT supports those nodes. If it cannot convert the
1044     // Const, it's very likely we cannot treat it as a tensor and make it an
1045     // input to the TRT network, since TRT removes the first dimension and
1046     // treats it as batch size. Also, it's not likely that the converter can
1047     // support the op, and performance may suffer even if it can, so we just
1048     // simply return error if the conversion fails.
1049     std::vector<TRT_TensorOrWeights> inputs;
1050     return ConvertConstToWeights(node_def, inputs, tensor_or_weights);
1051   }
1052   if (!graph_properties_.HasOutputProperties(node_def.name())) {
1053     return errors::InvalidArgument("Shape and data type are unknown");
1054   }
1055 
1056   // Validate and convert shape and dtype.
1057   const auto& output_params =
1058       graph_properties_.GetOutputProperties(node_def.name());
1059   const auto& tensor_properties = output_params.at(output_port);
1060   const DataType dtype = tensor_properties.dtype();
1061   const PartialTensorShape shape = tensor_properties.shape();
1062   nvinfer1::DataType trt_dtype;
1063   nvinfer1::Dims trt_dims;
1064   int batch_size = -1;
1065   TF_RETURN_IF_ERROR(ValidateTensorProperties(
1066       node_def.op(), dtype, shape, use_implicit_batch_,
1067       /*validation_only_=*/true, &trt_dtype, &trt_dims, &batch_size));
1068 
1069   // Adds a fake ITensor. This is fine since op converter operates in
1070   // validation-only mode and it won't (and shouldn't) use the tensor to do
1071   // any TRT network operations.
1072   *tensor_or_weights = TRT_TensorOrWeights(trt_dtype, trt_dims, batch_size);
1073   return Status::OK();
1074 }
1075 
IsTensorRTCandidate(const Node * node)1076 Status TrtNodeValidator::IsTensorRTCandidate(const Node* node) {
1077   const string& op = node->def().op();
1078   // In INT8 mode, we will always apply the quantization ranges provided by
1079   // these ops to the relevant tensors. This happens regardless of the value of
1080   // use_calibration.
1081   bool is_supported_op = false;
1082   if (quantize_ops->count(op)) {
1083     is_supported_op = (precision_mode_ == TrtPrecisionMode::INT8);
1084   } else {
1085     is_supported_op = op_validators_.count(op);
1086   }
1087   if (!is_supported_op) {
1088     return errors::Unimplemented("Op type ", op, " is not supported.");
1089   }
1090 
1091   // Convert input NodeDef and corresponding output ports to
1092   // TRT_TensorOrWeights.
1093   std::vector<TRT_TensorOrWeights> inputs;
1094   std::vector<const Edge*> input_edges;
1095   TF_RETURN_IF_ERROR(node->input_edges(&input_edges));
1096   for (const Edge* edge : input_edges) {
1097     TRT_TensorOrWeights tensor_or_weights;
1098     const NodeDef& src_def = edge->src()->def();
1099     Status status = ConvertToTensorOrWeights(src_def, edge->src_output(),
1100                                              &tensor_or_weights);
1101     if (!status.ok()) {
1102       return errors::Internal(
1103           "Failed to convert input ", src_def.name(),
1104           " to a TRT_TensorOrWeights: ", status.error_message());
1105     }
1106     inputs.push_back(tensor_or_weights);
1107   }
1108 
1109   OpConverter validator = op_validators_[op];
1110   OpConverterParams params(node->def(), inputs, /*arg_outputs=*/nullptr,
1111                            &weight_store_, precision_mode_, use_calibration_,
1112                            use_implicit_batch_);
1113   return validator(&params);
1114 }
1115 
ConvertConstToWeights(const NodeDef & const_node_def,const std::vector<TRT_TensorOrWeights> & inputs,TRT_TensorOrWeights * output)1116 Status TrtNodeValidator::ConvertConstToWeights(
1117     const NodeDef& const_node_def,
1118     const std::vector<TRT_TensorOrWeights>& inputs,
1119     TRT_TensorOrWeights* output) {
1120   std::vector<TRT_TensorOrWeights> outputs;
1121   OpConverterParams params(const_node_def, inputs, &outputs, &weight_store_,
1122                            precision_mode_, use_calibration_,
1123                            use_implicit_batch_);
1124   Status status = op_validators_["Const"](&params);
1125   if (status.ok() && output) *output = outputs[0];
1126   return status;
1127 }
1128 
InitializeTrtPlugins(nvinfer1::ILogger * trt_logger)1129 static void InitializeTrtPlugins(nvinfer1::ILogger* trt_logger) {
1130   static mutex plugin_mutex(LINKER_INITIALIZED);
1131   static bool plugin_initialized = false;
1132   mutex_lock lock(plugin_mutex);
1133   if (plugin_initialized) return;
1134 
1135   LOG(INFO) << "Linked TensorRT version: " << GetLinkedTensorRTVersion();
1136   LOG(INFO) << "Loaded TensorRT version: " << GetLoadedTensorRTVersion();
1137 
1138   plugin_initialized = initLibNvInferPlugins(trt_logger, "");
1139   if (!plugin_initialized) {
1140     LOG(ERROR) << "Failed to initialize TensorRT plugins, and conversion may "
1141                   "fail later.";
1142   }
1143 
1144   int num_trt_plugins = 0;
1145   nvinfer1::IPluginCreator* const* trt_plugin_creator_list =
1146       getPluginRegistry()->getPluginCreatorList(&num_trt_plugins);
1147   if (!trt_plugin_creator_list) {
1148     LOG(WARNING) << "Can not find any TensorRT plugins in registry.";
1149   } else {
1150     VLOG(1) << "Found the following " << num_trt_plugins
1151             << " TensorRT plugins in registry:";
1152     for (int i = 0; i < num_trt_plugins; ++i) {
1153       if (!trt_plugin_creator_list[i]) {
1154         LOG(WARNING) << "TensorRT plugin at index " << i
1155                      << " is not accessible (null pointer returned by "
1156                         "getPluginCreatorList for this plugin)";
1157       } else {
1158         VLOG(1) << "  " << trt_plugin_creator_list[i]->getPluginName();
1159       }
1160     }
1161   }
1162 }
1163 
1164 // static
Create(TrtPrecisionMode precision_mode,bool use_calibration,nvinfer1::ILogger * trt_logger,const bool use_implicit_batch)1165 StatusOr<std::unique_ptr<Converter>> Converter::Create(
1166     TrtPrecisionMode precision_mode, bool use_calibration,
1167     nvinfer1::ILogger* trt_logger, const bool use_implicit_batch) {
1168   std::unique_ptr<Converter> converter = absl::WrapUnique(new Converter(
1169       precision_mode, use_calibration, trt_logger, use_implicit_batch));
1170   TF_RETURN_IF_ERROR(converter->Init(trt_logger));
1171   return converter;
1172 }
1173 
Converter(TrtPrecisionMode precision_mode,bool use_calibration,nvinfer1::ILogger * trt_logger,const bool use_implicit_batch)1174 Converter::Converter(TrtPrecisionMode precision_mode, bool use_calibration,
1175                      nvinfer1::ILogger* trt_logger,
1176                      const bool use_implicit_batch)
1177     : precision_mode_(precision_mode),
1178       use_calibration_(use_calibration),
1179       use_implicit_batch_(use_implicit_batch) {
1180   InitializeTrtPlugins(trt_logger);
1181   this->RegisterOpConverters();
1182 }
1183 
Init(nvinfer1::ILogger * trt_logger)1184 Status Converter::Init(nvinfer1::ILogger* trt_logger) {
1185   VLOG(1) << "Creating TensorRT builder";
1186   trt_builder_.reset(nvinfer1::createInferBuilder(*trt_logger));
1187 
1188   VLOG(1) << "Creating TensorRT network";
1189 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
1190   const uint32_t flags =
1191       use_implicit_batch_
1192           ? 0U
1193           : (1U << static_cast<int>(
1194                  nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH));
1195   trt_network_.reset(trt_builder_->createNetworkV2(flags));
1196 #else
1197   trt_network_.reset(trt_builder_->createNetwork());
1198 #endif
1199   if (!trt_network_) {
1200     return errors::Internal("Failed to create TensorRT network object");
1201   }
1202   return Status::OK();
1203 }
1204 
ConvertNode(const NodeDef & node_def)1205 Status Converter::ConvertNode(const NodeDef& node_def) {
1206   std::vector<TRT_TensorOrWeights> inputs, outputs;
1207   TF_RETURN_IF_ERROR(this->GetInputs(node_def, &inputs));
1208 
1209   OpConverterParams params(this, node_def, inputs, &outputs, &weight_store_);
1210   const string& op = node_def.op();
1211   auto itr = op_registry_.find(op);
1212   if (itr == op_registry_.end()) {
1213     return errors::Unimplemented("No converter registered for op: ", op);
1214   }
1215   OpConverter op_converter = itr->second;
1216   TF_RETURN_IF_ERROR(op_converter(&params));
1217 
1218   for (size_t i = 0; i < outputs.size(); ++i) {
1219     TRT_TensorOrWeights& output = outputs[i];
1220     string output_name = node_def.name();
1221     if (i != 0) absl::StrAppend(&output_name, ":", i);
1222     // We need to check the name before setting it. If the input is one of the
1223     // engine input, setting the name here will overwrite engine input
1224     // bindings which will cause runtime error.
1225     // TODO(tmorris): Remove this work-around once we use TRT's IIdentityLayer
1226     // in ConvertIdentity.
1227     if (output.is_tensor()) {
1228       const char* tensor_name = output.tensor()->getName();
1229       if (!IsEngineInput(tensor_name)) {
1230         // TRT initializes tensor names as "(Unnamed ITensor* N)". We rename
1231         // them to match their corresponding TensorFlow name.
1232         // Note: ITensors that we create internally within TF-TRT which are
1233         // not inputs or outputs of a node will not be renamed. This is a
1234         // potential cause of confusion if an error message or warning
1235         // mentions the unnamed tensor.
1236         output.tensor()->setName(output_name.c_str());
1237       }
1238     }
1239     VLOG(2) << "Adding out tensor " << output_name << ": "
1240             << output.DebugString();
1241     Status status = AddTensorOrWeights(output_name, output);
1242     if (!status.ok()) {
1243       return Status(status.code(),
1244                     StrCat("Failed to add output for node ", node_def.name(),
1245                            ": ", status.error_message()));
1246     }
1247   }
1248   return Status::OK();
1249 }
1250 
AddInputTensor(const string & name,nvinfer1::DataType dtype,const nvinfer1::Dims & dims,int batch_size)1251 Status Converter::AddInputTensor(const string& name, nvinfer1::DataType dtype,
1252                                  const nvinfer1::Dims& dims, int batch_size) {
1253   // We verify the batch size only for the input nodes, and rely on individual
1254   // op converter to ensure the batch size of the outputs is not changed.
1255   // TODO(laigd): we need to test this properties.
1256   Status status = MaybeUpdateBatchSize(batch_size);
1257   if (!status.ok()) {
1258     return Status(status.code(), StrCat("Batch size doesn't match for tensor ",
1259                                         name, ": ", status.error_message()));
1260   }
1261   nvinfer1::ITensor* tensor = network()->addInput(name.c_str(), dtype, dims);
1262   if (tensor == nullptr) {
1263     return errors::InvalidArgument("Failed to create Input layer tensor ", name,
1264                                    " rank=", dims.nbDims);
1265   }
1266   status = AddTensorOrWeights(name, TRT_TensorOrWeights(tensor));
1267   if (!status.ok()) {
1268     return Status(status.code(), StrCat("Failed to add input tensor ", name,
1269                                         ": ", status.error_message()));
1270   }
1271   return Status::OK();
1272 }
1273 
RenameAndMarkOutputTensors(const std::vector<Converter::EngineOutputInfo> & output_tensors)1274 Status Converter::RenameAndMarkOutputTensors(
1275     const std::vector<Converter::EngineOutputInfo>& output_tensors) {
1276   for (const auto& output : output_tensors) {
1277     TRT_TensorOrWeights tensor_or_weights;
1278     TF_RETURN_IF_ERROR(
1279         GetTensorOrWeights(output.source_tensor_name, &tensor_or_weights));
1280     if (!tensor_or_weights.is_tensor()) {
1281       return errors::InvalidArgument("Output ", output.source_tensor_name,
1282                                      " is weights not tensor");
1283     }
1284     nvinfer1::ITensor* tensor = tensor_or_weights.tensor();
1285     if (tensor == nullptr) {
1286       return errors::NotFound("Output tensor not found: ",
1287                               output.source_tensor_name);
1288     }
1289     // Check if this tensor has already been marked as an input or output.
1290     //
1291     // ConvertIdentity can cause the same tensor to be repeated in
1292     // output_tensors, which can cause us to overwrite the name of the output
1293     // tensor binding. For example, if we rename OutputPH_0 to OutputPH_1 then
1294     // we won't be able to locate OutputPH_0 during runtime. To fix this,
1295     // duplicate the tensor using no-op shuffle.
1296     //
1297     // TODO(tmorris): Remove this work-around once we use TRT's IIdentityLayer
1298     // in ConvertIdentity.
1299     if (IsEngineInput(tensor->getName()) || IsEngineOutput(tensor->getName())) {
1300       // Using shuffle layer for identity by not setting reshape or transpose.
1301       nvinfer1::IShuffleLayer* layer = network()->addShuffle(*tensor);
1302       TFTRT_RETURN_ERROR_IF_NULLPTR(
1303           layer, StrCat("Output Copy for ", tensor->getName()));
1304       MarkQuantizationRangesAsInferrable(tensor, layer->getOutput(0));
1305       tensor = layer->getOutput(0);
1306     }
1307     tensor->setName(output.dest_node_name.c_str());
1308     network()->markOutput(*tensor);
1309     // Set type after marking as output. TRT only supports setType for engine
1310     // outputs and inputs (type is inferred otherwise).
1311     tensor->setType(output.trt_dtype);
1312     VLOG(1) << "Marking output TRT tensor " << output.source_tensor_name
1313             << " with data type " << DebugString(output.trt_dtype)
1314             << ", which feeds TF node " << output.dest_node_name;
1315   }
1316   if (VLOG_IS_ON(2)) {
1317     VLOG(2) << "Created TensorRT network with the following layers:";
1318     for (int i = 0; i < network()->getNbLayers(); i++) {
1319       auto layer = network()->getLayer(i);
1320       VLOG(2) << "    " << layer->getName() << " ("
1321               << "type: " << static_cast<int>(layer->getType())
1322               << ", precision: " << static_cast<int>(layer->getPrecision())
1323               << ")";
1324     }
1325   }
1326   return Status::OK();
1327 }
1328 
BuildCudaEngine(TrtUniquePtrType<nvinfer1::ICudaEngine> * engine,int max_batch_size,size_t max_workspace_size_bytes,nvinfer1::IGpuAllocator * allocator,TRTInt8Calibrator * calibrator)1329 Status Converter::BuildCudaEngine(
1330     TrtUniquePtrType<nvinfer1::ICudaEngine>* engine, int max_batch_size,
1331     size_t max_workspace_size_bytes, nvinfer1::IGpuAllocator* allocator,
1332     TRTInt8Calibrator* calibrator) {
1333   VLOG(1) << "Configuring TensorRT builder";
1334   trt_builder_->setMaxBatchSize(max_batch_size);
1335   trt_builder_->setGpuAllocator(allocator);
1336 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
1337   // Create a network configuration and use it to build a TRT engine.
1338   TrtUniquePtrType<nvinfer1::IBuilderConfig> builder_config(
1339       trt_builder_->createBuilderConfig());
1340   builder_config->setMaxWorkspaceSize(max_workspace_size_bytes);
1341   if (precision_mode_ == TrtPrecisionMode::FP16) {
1342     builder_config->setFlag(nvinfer1::BuilderFlag::kFP16);
1343   } else if (precision_mode_ == TrtPrecisionMode::INT8) {
1344     builder_config->setFlag(nvinfer1::BuilderFlag::kFP16);
1345     builder_config->setFlag(nvinfer1::BuilderFlag::kINT8);
1346     if (use_calibration_) {
1347       builder_config->setInt8Calibrator(calibrator);
1348     } else {
1349       builder_config->setInt8Calibrator(nullptr);
1350     }
1351   }
1352 
1353   VLOG(1) << "Building TensorRT engine";
1354   engine->reset(
1355       trt_builder_->buildEngineWithConfig(*network(), *builder_config));
1356 #else
1357   trt_builder_->setMaxWorkspaceSize(max_workspace_size_bytes);
1358   if (precision_mode_ == TrtPrecisionMode::FP16) {
1359     trt_builder_->setFp16Mode(true);
1360   } else if (precision_mode_ == TrtPrecisionMode::INT8) {
1361     // Setting FP16 mode as well allows TRT to also consider FP16 kernels and
1362     // use them in situations where they are faster than INT8 or where INT8 is
1363     // not supported for a given layer.
1364     trt_builder_->setFp16Mode(true);
1365     trt_builder_->setInt8Mode(true);
1366     if (use_calibration_) {
1367       trt_builder_->setInt8Calibrator(calibrator);
1368     } else {
1369       trt_builder_->setInt8Calibrator(nullptr);
1370     }
1371   }
1372 
1373 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
1374   string precision_mode_str;
1375   TF_RETURN_IF_ERROR(
1376       TrtPrecisionModeToName(precision_mode_, &precision_mode_str));
1377   string trt_network_name = StrCat(
1378       "TF:", TF_VERSION_STRING, ", ", "TRT:", GetLoadedTensorRTVersion(), "-",
1379       "Precision:", precision_mode_str, ", ", "Calibration:", use_calibration_,
1380       ", ", "Max-Batch-Size:", max_batch_size, ", ",
1381       "Max-Workspace-Size:", max_workspace_size_bytes);
1382   VLOG(1) << "Setting TensorRT network name to " << trt_network_name;
1383   network()->setName(trt_network_name.c_str());
1384 #endif  // #if IS_TRT_VERSION_GE(6, 0, 0, 0)
1385 
1386   VLOG(1) << "Building TensorRT engine";
1387   engine->reset(trt_builder_->buildCudaEngine(*network()));
1388 #endif
1389   if (engine->get() == nullptr) {
1390     return errors::Internal("Failed to build TensorRT engine");
1391   }
1392   return Status::OK();
1393 }
1394 
MaybeUpdateBatchSize(int batch_size)1395 Status Converter::MaybeUpdateBatchSize(int batch_size) {
1396   // OK iff either is unknown or they equal to each other.
1397   if (this->batch_size_ < 0 || batch_size < 0 ||
1398       this->batch_size_ == batch_size) {
1399     if (this->batch_size_ < 0 && batch_size >= 0) {
1400       this->batch_size_ = batch_size;
1401     }
1402     return Status::OK();
1403   }
1404   return errors::InvalidArgument(
1405       "Provided batch size does not match converter batch size: ", batch_size,
1406       " vs ", batch_size_);
1407 }
1408 
AddTensorOrWeights(const string & name,TRT_TensorOrWeights input)1409 Status Converter::AddTensorOrWeights(const string& name,
1410                                      TRT_TensorOrWeights input) {
1411   // Set the batch size of the tensor, using batch size collected from the
1412   // input tensors to the TRT subgraph at the beginning of the conversion.
1413   // We rely on the individual op converter to understand the semantics of the
1414   // TF node, and make sure it doesn't change the batch size nor introduce
1415   // intra-element dependency inside the batch.
1416   if (use_implicit_batch_ && input.is_tensor()) {
1417     input.set_batch_size(batch_size_);
1418   }
1419   if (trt_tensors_.insert({name, std::move(input)}).second) return Status::OK();
1420   return errors::AlreadyExists("tensor/weights ", name, " already exist.");
1421 }
1422 
GetTensorOrWeights(const string & name,TRT_TensorOrWeights * output)1423 Status Converter::GetTensorOrWeights(const string& name,
1424                                      TRT_TensorOrWeights* output) {
1425   if (!trt_tensors_.count(name)) {
1426     return errors::NotFound("Tensor or weights with name ", name,
1427                             " could not be found.");
1428   }
1429   *output = trt_tensors_.at(name);
1430   return Status::OK();
1431 }
1432 
TransposeTensor(nvinfer1::ITensor * input_tensor,const std::vector<int> & order_with_batch_dim,absl::string_view name,nvinfer1::ITensor ** output_tensor)1433 Status Converter::TransposeTensor(nvinfer1::ITensor* input_tensor,
1434                                   const std::vector<int>& order_with_batch_dim,
1435                                   absl::string_view name,
1436                                   nvinfer1::ITensor** output_tensor) {
1437   const auto dims = input_tensor->getDimensions();
1438 
1439   if (order_with_batch_dim.size() - 1 != size_t(dims.nbDims)) {
1440     return errors::InvalidArgument(
1441         "Rank of perm for transpose does not match with that of the input.");
1442   }
1443   if (order_with_batch_dim[0] != 0) {
1444     return errors::Unimplemented(
1445         "Transpose at batch dimension is not supported.");
1446   }
1447 
1448   nvinfer1::IShuffleLayer* layer = this->network()->addShuffle(*input_tensor);
1449   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, "TF-TRT Internal Transpose");
1450   layer->setName(std::basic_string<char>(name).c_str());
1451   MarkQuantizationRangesAsInferrable(input_tensor, layer->getOutput(0));
1452 
1453   nvinfer1::Permutation permutation;
1454   for (int32_t i = 0; i < dims.nbDims; ++i) {
1455     permutation.order[i] = order_with_batch_dim[i + 1] - 1;
1456   }
1457   VLOG(1) << "TransposeTensor permutation: "
1458           << DebugString(permutation, dims.nbDims);
1459   layer->setFirstTranspose(permutation);
1460 
1461   nvinfer1::Dims reshape_dims;
1462   reshape_dims.nbDims = dims.nbDims;
1463   for (int32_t i = 0; i < reshape_dims.nbDims; ++i) {
1464     reshape_dims.d[i] = 0;
1465     // TODO(aaroey): why not transposing the types as well?
1466     reshape_dims.type[i] = dims.type[i];
1467   }
1468   layer->setReshapeDimensions(reshape_dims);
1469 
1470   *output_tensor = layer->getOutput(0);
1471   return Status::OK();
1472 }
1473 
GetWeightRange(const TRT_ShapedWeights & weights,float * out_min,float * out_max) const1474 Status Converter::GetWeightRange(const TRT_ShapedWeights& weights,
1475                                  float* out_min, float* out_max) const {
1476   switch (weights.TrtDType()) {
1477     case nvinfer1::DataType::kFLOAT: {
1478       auto inp = static_cast<float const*>(weights.GetValues());
1479       auto result = std::minmax_element(inp, inp + weights.count());
1480       *out_min = *result.first;
1481       *out_max = *result.second;
1482       break;
1483     }
1484     case nvinfer1::DataType::kHALF: {
1485       auto inp = static_cast<Eigen::half const*>(weights.GetValues());
1486       auto result = std::minmax_element(inp, inp + weights.count());
1487       *out_min = Eigen::half_impl::half_to_float(*result.first);
1488       *out_max = Eigen::half_impl::half_to_float(*result.second);
1489       break;
1490     }
1491     case nvinfer1::DataType::kINT32: {
1492       auto inp = static_cast<int const*>(weights.GetValues());
1493       auto result = std::minmax_element(inp, inp + weights.count());
1494       *out_min = static_cast<float>(*result.first);
1495       *out_max = static_cast<float>(*result.second);
1496       break;
1497     }
1498     default:
1499       return errors::Unimplemented(
1500           "Data type not supported for GetWeightRange: ",
1501           DebugString(weights.TrtDType()));
1502   }
1503   return Status::OK();
1504 }
1505 
PrepareTensorForShape(const TRT_TensorOrWeights & input,const nvinfer1::Dims & dims,const bool validation_only,nvinfer1::ITensor ** tensor)1506 Status Converter::PrepareTensorForShape(const TRT_TensorOrWeights& input,
1507                                         const nvinfer1::Dims& dims,
1508                                         const bool validation_only,
1509                                         nvinfer1::ITensor** tensor) {
1510   const nvinfer1::Dims input_dims = input.GetTrtDims();
1511   // If one of input_dims and dims doesn't have static shape, it means some of
1512   // the dims are unknown or need to be inferred. And we don't do further checks
1513   // but rely on the caller to not make mistakes.
1514   // Otherwise we do simple check to make sure the total sizes are the same.
1515   // If an input is a weight, it is going to become a tensor via
1516   // CreateConstantLayer. So we can treat it as a tensor for
1517   // AreDimsStaticWithDifferentSize(). This really only matters for 0-D tensors.
1518   if (AreDimsStaticWithDifferentSize(input_dims, dims, /*is_tensor=*/true)) {
1519     return errors::InvalidArgument(
1520         "Incompatible shapes: ", DebugString(input_dims), " vs. ",
1521         DebugString(dims));
1522   }
1523   // ConstantLayer requires static shapes (cannot infer -1).
1524   if (input.is_weights() && !HasStaticShape(dims)) {
1525     return errors::InvalidArgument("Shape is not fully defined: ",
1526                                    DebugString(dims));
1527   }
1528   if (validation_only) {
1529     *tensor = nullptr;
1530     return Status::OK();
1531   }
1532 
1533   if (input.is_tensor()) {
1534     if (DimsEqual(input_dims, dims)) {
1535       *tensor = input.tensor();
1536     } else {
1537       nvinfer1::IShuffleLayer* layer =
1538           this->network()->addShuffle(*input.tensor());
1539       TFTRT_RETURN_ERROR_IF_NULLPTR(layer, "TF-TRT Internal Reshape");
1540       layer->setReshapeDimensions(dims);
1541       MarkQuantizationRangesAsInferrable(input.tensor(), layer->getOutput(0));
1542       *tensor = layer->getOutput(0);
1543     }
1544   } else {
1545     *tensor = CreateConstantLayer(input.weights(), dims);
1546     TFTRT_RETURN_ERROR_IF_NULLPTR(*tensor, "TF-TRT Internal Reshape");
1547     if (precision_mode() == TrtPrecisionMode::INT8 && !use_calibration()) {
1548       // If we are in int8 mode and not calibrating, we need to explicitly set a
1549       // quantization range for the output tensor of the IConstantLayer. Here we
1550       // set the range to [min(weights), max(weights)].
1551       float min_range = 0.0f;
1552       float max_range = 0.0f;
1553       TF_RETURN_IF_ERROR(
1554           GetWeightRange(input.weights(), &min_range, &max_range));
1555       // Avoid setting range to 0 because TRT will throw an error. If the
1556       // weights are zero then the range doesn't matter: using 127.0f should
1557       // ensure the quantized weight will be exactly zero.
1558       if (min_range == 0.0f && max_range == 0.0f) {
1559         min_range = -127.0f;
1560         max_range = 127.0f;
1561       }
1562       ProvideQuantizationRange(*tensor, min_range, max_range);
1563     }
1564   }
1565   return Status::OK();
1566 }
1567 
MarkQuantizationRangesAsInferrable(nvinfer1::ITensor * input,nvinfer1::ITensor * output)1568 void Converter::MarkQuantizationRangesAsInferrable(nvinfer1::ITensor* input,
1569                                                    nvinfer1::ITensor* output) {
1570   quantization_infer_.push_back({input, output});
1571   quantization_infer_.push_back({output, input});
1572 }
1573 
ProvideQuantizationRange(nvinfer1::ITensor * tensor,float min_range,float max_range)1574 void Converter::ProvideQuantizationRange(nvinfer1::ITensor* tensor,
1575                                          float min_range, float max_range) {
1576   float symmetric_range = std::max(std::abs(min_range), std::abs(max_range));
1577   quantization_ranges_[tensor] = symmetric_range;
1578 }
1579 
1580 namespace {
1581 
IsConvolution(const nvinfer1::ILayer * layer)1582 bool IsConvolution(const nvinfer1::ILayer* layer) {
1583   return layer->getType() == nvinfer1::LayerType::kCONVOLUTION;
1584 }
1585 
IsScale(const nvinfer1::ILayer * layer)1586 bool IsScale(const nvinfer1::ILayer* layer) {
1587   return layer->getType() == nvinfer1::LayerType::kSCALE;
1588 }
1589 
IsClipOrRelu(const nvinfer1::ILayer * layer)1590 bool IsClipOrRelu(const nvinfer1::ILayer* layer) {
1591   if (layer->getType() != nvinfer1::LayerType::kACTIVATION) {
1592     return false;
1593   }
1594   auto activation_type = static_cast<const nvinfer1::IActivationLayer*>(layer)
1595                              ->getActivationType();
1596 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
1597   return activation_type == nvinfer1::ActivationType::kRELU ||
1598          activation_type == nvinfer1::ActivationType::kCLIP;
1599 #else
1600   return activation_type == nvinfer1::ActivationType::kRELU;
1601 #endif
1602 }
1603 
IsAdd(const nvinfer1::ILayer * layer)1604 bool IsAdd(const nvinfer1::ILayer* layer) {
1605   if (layer->getType() != nvinfer1::LayerType::kELEMENTWISE) {
1606     return false;
1607   }
1608   auto operation =
1609       static_cast<const nvinfer1::IElementWiseLayer*>(layer)->getOperation();
1610   return operation == nvinfer1::ElementWiseOperation::kSUM;
1611 }
1612 
1613 }  // namespace
1614 
MaybeApplyQuantizationRanges()1615 void Converter::MaybeApplyQuantizationRanges() {
1616   if (precision_mode() != TrtPrecisionMode::INT8) return;
1617 
1618   // Infer ranges across marked ops.
1619   PropagateQuantizationRanges();
1620   // Apply ranges.
1621 #if IS_TRT_VERSION_GE(5, 0, 0, 0)
1622   for (auto pair : quantization_ranges_) {
1623     nvinfer1::ITensor* tensor = pair.first;
1624     const float range = pair.second;
1625     VLOG(1) << "Setting range for: " << tensor->getName() << ": " << range;
1626     // TODO(laigd): if 'tensor' already has a range set which doesn't match
1627     // 'range', it should report error.
1628     tensor->setDynamicRange(-range, range);
1629   }
1630 #endif
1631 
1632   if (use_calibration()) return;
1633 #if !IS_TRT_VERSION_GE(6, 0, 0, 0)
1634   // Attempt to find tensors that are missing ranges, and set the corresponding
1635   // layer's precision to FP16 to avoid Builder::buildCudaEngine() failing.
1636   // This is only needed for TensorRT 5 and before because
1637   // TensorRT6 falls to FP16 internally.
1638   // TensorRT doesn't need ranges for intermediate tensors when layers are fused
1639   // so find fused layers first.
1640   // Get all tensors from network and deduce fused ops.
1641   std::map<nvinfer1::ILayer*, std::vector<nvinfer1::ILayer*>> layer_consumers;
1642   std::map<nvinfer1::ITensor*, nvinfer1::ILayer*> tensor_layer;
1643   std::set<nvinfer1::ITensor*> all_tensors;
1644   for (int i = 0; i < this->network()->getNbLayers(); i++) {
1645     nvinfer1::ILayer* layer = this->network()->getLayer(i);
1646     layer_consumers[layer] = {};
1647     for (int j = 0; j < layer->getNbInputs(); j++) {
1648       all_tensors.insert(layer->getInput(j));
1649     }
1650     for (int j = 0; j < layer->getNbOutputs(); j++) {
1651       tensor_layer[layer->getOutput(j)] = layer;
1652       all_tensors.insert(layer->getOutput(j));
1653     }
1654   }
1655   for (int i = 0; i < this->network()->getNbLayers(); i++) {
1656     nvinfer1::ILayer* layer = this->network()->getLayer(i);
1657     layer_consumers[layer] = {};
1658     for (int j = 0; j < layer->getNbInputs(); j++) {
1659       nvinfer1::ITensor* input_tensor = layer->getInput(j);
1660       auto input_layer = tensor_layer.find(input_tensor);
1661       if (input_layer != tensor_layer.end()) {
1662         auto consumed_layer = layer_consumers.find(input_layer->second);
1663         if (consumed_layer != layer_consumers.end()) {
1664           consumed_layer->second.push_back(layer);
1665         }
1666       }
1667       all_tensors.insert(input_tensor);
1668     }
1669   }
1670   // Identify fused tensors.
1671   // Conv+BiasAdd+Add+Activation(Clip or Relu), Conv+BiasAdd+Add,
1672   // Conv+BiasAdd+Activation(Clip or Relu), Conv+BiasAdd,
1673   // Conv+Activation(Clip or Relu) are fused.
1674   std::set<nvinfer1::ITensor*> fused_tensors;
1675   typedef std::function<bool(const nvinfer1::ILayer*)> matcher;
1676   const std::vector<std::pair<string, std::vector<matcher>>> fused_patterns = {
1677       {"Fused Conv+Bias+Add+Activation",
1678        {
1679            IsConvolution,
1680            IsScale,
1681            IsAdd,
1682            IsClipOrRelu,
1683        }},
1684       {"Fused Conv+Bias+Add",
1685        {
1686            IsConvolution,
1687            IsScale,
1688            IsAdd,
1689        }},
1690       {"Fused Conv+Bias+Activation",
1691        {
1692            IsConvolution,
1693            IsScale,
1694            IsClipOrRelu,
1695        }},
1696       {"Fused Conv+Bias",
1697        {
1698            IsConvolution,
1699            IsScale,
1700        }},
1701       {"Fused Conv+Activation",
1702        {
1703            IsConvolution,
1704            IsClipOrRelu,
1705        }},
1706   };
1707   for (int i = 0; i < this->network()->getNbLayers(); i++) {
1708     for (const auto& pattern : fused_patterns) {
1709       size_t last_matcher = pattern.second.size() - 1;
1710       nvinfer1::ILayer* layer = this->network()->getLayer(i);
1711       // We should skip this layer if its outputs are already marked as fused,
1712       // but all the current patterns start with a convolution and are ordered
1713       // in decreasing pattern length, so that is not necessary (yet).
1714       std::vector<nvinfer1::ILayer*> fused_candidates;
1715       for (size_t index = 0; index <= last_matcher; ++index) {
1716         if ((!pattern.second[index](layer)) ||
1717             (index < last_matcher && layer_consumers[layer].size() != 1)) {
1718           fused_candidates.clear();
1719           break;
1720         }
1721         if (index < last_matcher) {
1722           fused_candidates.push_back(layer);
1723           layer = layer_consumers[layer].front();
1724         }
1725       }
1726       if (!fused_candidates.empty()) {
1727         VLOG(1) << pattern.first;
1728         for (const auto& fused_layer : fused_candidates) {
1729           for (int i = 0; i < fused_layer->getNbOutputs(); i++) {
1730             VLOG(1) << "  Fused output tensor:"
1731                     << fused_layer->getOutput(i)->getName();
1732             fused_tensors.insert(fused_layer->getOutput(i));
1733           }
1734         }
1735         break;  // Don't try other patterns on this layer.
1736       }
1737     }
1738   }
1739   // Find tensors with no ranges that are not fused and force their layers to
1740   // not be quantized.
1741   for (auto tensor : all_tensors) {
1742     if (!quantization_ranges_.count(tensor) &&
1743         fused_tensors.find(tensor) == fused_tensors.end()) {
1744       // Note: there may be some warnings for "(Unnamed ITensor* N)". These
1745       // are tensors which are created internally by TF-TRT. The ranges for
1746       // these unnamed ITensors are always inferred from user provided ranges,
1747       // thus there will also be a warning for the range(s) the user missed.
1748       LOG(WARNING) << "Quantization range was not found for "
1749                    << tensor->getName() << ". "
1750                    << "Setting invalid quantization range.";
1751       // Set the range to something unusable so the engine will fail if it
1752       // tries to actually use the tensor's range.
1753       tensor->setDynamicRange(0, 0);
1754       auto layer = tensor_layer.find(tensor);
1755       // If the tensor is the output of a layer, set the layer's precision
1756       // to fp16 so that it isn't quantized.
1757       // Shuffle doesn't support setting precision.
1758       if (layer != tensor_layer.end() &&
1759           layer->second->getType() != nvinfer1::LayerType::kSHUFFLE) {
1760         VLOG(1) << "And setting layer " << layer->second->getName()
1761                 << " precision to fp16.";
1762         layer->second->setPrecision(nvinfer1::DataType::kHALF);
1763       }
1764     }
1765   }
1766 #endif
1767 }
1768 
PropagateQuantizationRanges()1769 void Converter::PropagateQuantizationRanges() {
1770   // Propagate ranges across edges in quantization_infer_ until no new
1771   // information is added.
1772   // Note: this function modifies quantization_infer_, it might be better to
1773   // modify a copy instead if we for some reason need quantization_infer_
1774   // later.
1775   bool information_added = true;
1776   while (information_added) {
1777     information_added = false;
1778     for (auto it = quantization_infer_.begin();
1779          it != quantization_infer_.end();) {
1780       auto input_tensor_range = quantization_ranges_.find(it->first);
1781       auto output_tensor_range = quantization_ranges_.find(it->second);
1782       if (input_tensor_range != quantization_ranges_.end() &&
1783           output_tensor_range == quantization_ranges_.end()) {
1784         // Input has range but output doesn't: copy range
1785         // TODO(laigd): consider reporting error if it a different range is
1786         // already set.
1787         quantization_ranges_[it->second] = input_tensor_range->second;
1788         information_added = true;
1789         VLOG(1) << "Copy quantization range: " << it->first->getName() << " -> "
1790                 << it->second->getName();
1791       }
1792       // We can remove edges when the output range is known
1793       if (quantization_ranges_.find(it->second) != quantization_ranges_.end()) {
1794         it = quantization_infer_.erase(it);
1795       } else {
1796         ++it;
1797       }
1798     }
1799   }
1800 }
1801 
GetInputs(const NodeDef & node_def,std::vector<TRT_TensorOrWeights> * inputs) const1802 Status Converter::GetInputs(const NodeDef& node_def,
1803                             std::vector<TRT_TensorOrWeights>* inputs) const {
1804   for (auto const& input_name : node_def.input()) {
1805     /*************************************************************************
1806      * TODO(jie): handle case 1) here.
1807      * Normalizes the inputs and extracts associated metadata:
1808      * 1) Inputs can contain a colon followed by a suffix of characters.
1809      *    That suffix may be a single number (e.g. inputName:1) or several
1810      *    word characters separated from a number by a colon
1811      *    (e.g. inputName:foo:1). The
1812      *    latter case is used to denote inputs and outputs of functions.
1813      * 2) Control dependency inputs contain caret at the beginning and we
1814      *    remove this and annotate the edge as a control dependency.
1815      ************************************************************************/
1816     // skip control nodes
1817     if (input_name[0] == '^') continue;
1818     string name = input_name;
1819     auto last = name.find_last_of(':');
1820     // TODO(aaroey): use TensorId
1821     if (last != string::npos && last + 2 == name.size() &&
1822         name[last + 1] == '0') {
1823       name.erase(last);
1824     }
1825 
1826     if (trt_tensors_.count(name)) {
1827       TRT_TensorOrWeights input = trt_tensors_.at(name);
1828       inputs->push_back(input);
1829       VLOG(2) << "Retrieved input " << name << ": " << input.DebugString();
1830     } else {
1831       // TODO(aaroey): this should not happen, make it a CHECK.
1832       // TODO(aaroey): use StrCat for pattern like this.
1833       string msg("Node ");
1834       StrAppend(&msg, node_def.name(), " should have an input named '", name,
1835                 "' but it is not available");
1836       LOG(ERROR) << msg;
1837       return errors::InvalidArgument(msg);
1838     }
1839   }
1840   return Status::OK();
1841 }
1842 
1843 // Checks that the number of inputs match, and enforces that the inputs marked
1844 // as true are constant weights. true means that the input must be a weight,
1845 // while false means the input must be a tensor. In the future, false will mean
1846 // the input can be a tensor or weight.
CheckInputsWeights(const OpConverterParams & params,const std::vector<std::pair<string,bool>> & inputs_is_weight)1847 Status CheckInputsWeights(
1848     const OpConverterParams& params,
1849     const std::vector<std::pair<string, bool>>& inputs_is_weight) {
1850   const auto& inputs = params.inputs;
1851   const auto& node_def = params.node_def;
1852   if (inputs.size() != inputs_is_weight.size()) {
1853     return errors::InvalidArgument(
1854         node_def.op(), " got ", inputs.size(), " inputs but expected ",
1855         inputs_is_weight.size(), ", at ", node_def.name());
1856   }
1857   for (int i = 0; i < inputs.size(); i++) {
1858     if (inputs_is_weight[i].second && inputs.at(i).is_tensor()) {
1859       return errors::Unimplemented("The input \"", inputs_is_weight[i].first,
1860                                    "\" for ", node_def.op(),
1861                                    " must be a constant, at ", node_def.name());
1862     }
1863     // TODO(tmorris): Remove this check and provide a method to automatically
1864     // retrieve an input as a tensor, converting via CreateConstantLayer if it
1865     // was originally a weight. We will want a caching mechanism to prevent many
1866     // duplicate constants from being created.
1867     if (!inputs_is_weight[i].second && inputs.at(i).is_weights()) {
1868       return errors::Unimplemented("The input \"", inputs_is_weight[i].first,
1869                                    "\" for ", node_def.op(),
1870                                    " must be a tensor, at ", node_def.name());
1871     }
1872   }
1873   return Status::OK();
1874 }
1875 
AllowDataTypes(const OpConverterParams & params,const std::set<DataType> & allowed_dtypes,const char * dtype_attr_name="T")1876 Status AllowDataTypes(const OpConverterParams& params,
1877                       const std::set<DataType>& allowed_dtypes,
1878                       const char* dtype_attr_name = "T") {
1879   const auto& node_def = params.node_def;
1880   TFAttrs attrs(node_def);
1881   if (!attrs.count(dtype_attr_name)) {
1882     return errors::InvalidArgument("Attribute with name ", dtype_attr_name,
1883                                    " not found.");
1884   }
1885   const auto op_dtype = attrs.get<DataType>(dtype_attr_name);
1886   if (!allowed_dtypes.count(op_dtype)) {
1887     // Build string list of allowed types.
1888     std::ostringstream ss;
1889     for (auto it = allowed_dtypes.begin(); it != allowed_dtypes.end(); ++it) {
1890       if (it != allowed_dtypes.begin()) ss << ", ";
1891       ss << DataTypeString(*it);
1892     }
1893     return errors::Unimplemented("Data type ", DataTypeString(op_dtype),
1894                                  " is not supported for ", node_def.op(),
1895                                  ", must be one of [", ss.str(), "], at ",
1896                                  node_def.name());
1897   }
1898   return Status::OK();
1899 }
1900 
1901 // ****************************************************************************
1902 // Constant folding functions for weights.
1903 // TODO(laigd): we should probably use eigen directly.
1904 // *****************************************************************************
1905 struct LambdaFactory {
1906   enum class OP_CATEGORY : int { RSQRT = 0, NEG, RECIP };
1907   OP_CATEGORY op;
1908 
1909   template <typename T>
unarytensorflow::tensorrt::convert::LambdaFactory1910   std::function<T(T)> unary() {
1911     switch (op) {
1912       case OP_CATEGORY::RSQRT: {
1913         VLOG(2) << "RSQRT GETS DONE";
1914         return [](T t) -> T { return 1.0 / std::sqrt(t); };
1915       }
1916       case OP_CATEGORY::NEG:
1917         return [](T t) -> T { return -t; };
1918       case OP_CATEGORY::RECIP:
1919         return [](T t) -> T { return 1.0 / t; };
1920       default:
1921         LOG(ERROR) << "Not supported op for unary: " << static_cast<int>(op);
1922         return nullptr;
1923     }
1924   }
1925 };
1926 
1927 template <>
unary()1928 std::function<Eigen::half(Eigen::half)> LambdaFactory::unary<Eigen::half>() {
1929   switch (op) {
1930     case OP_CATEGORY::RSQRT: {
1931       VLOG(2) << "RSQRT GETS DONE";
1932       return [](Eigen::half t) {
1933         return Eigen::half(1.0 / std::sqrt(static_cast<float>(t)));
1934       };
1935     }
1936     case OP_CATEGORY::NEG:
1937       return [](Eigen::half t) { return -t; };
1938     case OP_CATEGORY::RECIP:
1939       return [](Eigen::half t) {
1940         return Eigen::half(1.0 / static_cast<float>(t));
1941       };
1942     default:
1943       LOG(ERROR) << "Not supported op for unary: " << static_cast<int>(op);
1944       return nullptr;
1945   }
1946 }
1947 
UnaryCompute(const TRT_ShapedWeights & iweights,TRT_ShapedWeights * oweights,LambdaFactory unary_op)1948 Status UnaryCompute(const TRT_ShapedWeights& iweights,
1949                     TRT_ShapedWeights* oweights, LambdaFactory unary_op) {
1950   CHECK(iweights.TrtDType() == oweights->TrtDType());
1951   switch (iweights.TrtDType()) {
1952     case nvinfer1::DataType::kFLOAT: {
1953       auto inp = static_cast<float const*>(iweights.GetValues());
1954       auto oup = static_cast<float*>(oweights->GetValues());
1955       std::transform(inp, inp + iweights.count(), oup, unary_op.unary<float>());
1956       break;
1957     }
1958     case nvinfer1::DataType::kHALF: {
1959       auto inp = static_cast<Eigen::half const*>(iweights.GetValues());
1960       auto oup = static_cast<Eigen::half*>(oweights->GetValues());
1961       std::transform(inp, inp + iweights.count(), oup,
1962                      unary_op.unary<Eigen::half>());
1963       break;
1964     }
1965     default:
1966       return errors::Unimplemented("Data type not supported: ",
1967                                    DebugString(iweights.TrtDType()));
1968   }
1969   return Status::OK();
1970 }
1971 
1972 // Before TRT 5.1.3, we have to calculate padding for convolutions ourselves.
Conv2DPaddingHelper(OpConverterParams * params,const TFAttrs & attrs,const nvinfer1::DimsHW & kernel_size,const nvinfer1::DimsHW & dilation,const nvinfer1::DimsHW & stride,const std::vector<int64_t> & input_dims,nvinfer1::ITensor * tensor,std::vector<std::pair<int,int>> * padding,nvinfer1::ITensor ** padded_tensor)1973 Status Conv2DPaddingHelper(OpConverterParams* params, const TFAttrs& attrs,
1974                            const nvinfer1::DimsHW& kernel_size,
1975                            const nvinfer1::DimsHW& dilation,
1976                            const nvinfer1::DimsHW& stride,
1977                            const std::vector<int64_t>& input_dims,
1978                            nvinfer1::ITensor* tensor,
1979                            std::vector<std::pair<int, int>>* padding,
1980                            nvinfer1::ITensor** padded_tensor) {
1981   if (attrs.get<string>("padding") == "SAME") {
1982     nvinfer1::DimsHW effective_kernel_size = kernel_size;
1983     effective_kernel_size.h() += (kernel_size.h() - 1) * (dilation.h() - 1);
1984     effective_kernel_size.w() += (kernel_size.w() - 1) * (dilation.w() - 1);
1985     *padding = CreateSamePadding(stride, effective_kernel_size, input_dims);
1986   } else {
1987     *padding = {{0, 0}, {0, 0}};
1988   }
1989 
1990   // Handle asymmetric padding. TensorRT 5.1 added support for asymmetric
1991   // padding via setPrePadding and setPostPadding. Due to a bug in 5.1.2, we can
1992   // only use asymmetric padding in convolutions with 5.1.3+. But in 5.1.3, we
1993   // will always use setPaddingMode for simplicity.
1994   if ((*padding)[0].first != (*padding)[0].second ||
1995       (*padding)[1].first != (*padding)[1].second) {
1996     auto pad_layer = params->converter->network()->addPadding(
1997         *tensor, nvinfer1::DimsHW((*padding)[0].first, (*padding)[1].first),
1998         nvinfer1::DimsHW((*padding)[0].second, (*padding)[1].second));
1999     TFTRT_RETURN_ERROR_IF_NULLPTR(pad_layer, params->node_def.name());
2000     params->converter->MarkQuantizationRangesAsInferrable(
2001         tensor, pad_layer->getOutput(0));
2002     *padding = {{0, 0}, {0, 0}};
2003     tensor = pad_layer->getOutput(0);
2004   }
2005   *padded_tensor = tensor;
2006   return Status::OK();
2007 }
2008 
ConvertConv2DHelper(OpConverterParams * params,int group,bool is_conv2d_backprop_input)2009 Status ConvertConv2DHelper(OpConverterParams* params, int group,
2010                            bool is_conv2d_backprop_input) {
2011   const auto& inputs = params->inputs;
2012   const auto& node_def = params->node_def;
2013   TRT_TensorOrWeights backprop_output_size;
2014   nvinfer1::ITensor* tensor = nullptr;
2015   if (is_conv2d_backprop_input) {
2016     // In the case when Conv2dBackpropInput is used for conv2d_transpose, these
2017     // inputs correspond to: output size, filter, and input.
2018     TF_RETURN_IF_ERROR(CheckInputsWeights(
2019         *params,
2020         {{"input_sizes", true}, {"filter", true}, {"out_backprop", false}}));
2021     backprop_output_size = inputs.at(0);
2022     tensor = inputs.at(2).tensor();
2023   } else {
2024     TF_RETURN_IF_ERROR(
2025         CheckInputsWeights(*params, {{"input", false}, {"filter", true}}));
2026     tensor = inputs.at(0).tensor();
2027   }
2028   TF_RETURN_IF_ERROR(
2029       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
2030   TRT_ShapedWeights weights_rsck = inputs.at(1).weights();
2031   if (weights_rsck.shape_.nbDims != 4) {
2032     return errors::InvalidArgument("Conv2D expects kernel of dimension 4, at " +
2033                                    node_def.name());
2034   }
2035   TFAttrs attrs(node_def);
2036   auto data_format = attrs.get<string>("data_format");
2037   int c_index = (data_format == "NHWC") ? 3 : 1;
2038   int h_index = (data_format == "NHWC") ? 1 : 2;
2039   int w_index = (data_format == "NHWC") ? 2 : 3;
2040   auto tf_dilations = attrs.get<std::vector<int64>>("dilations");
2041   if (tf_dilations.size() != 4) {
2042     return errors::InvalidArgument(
2043         "Convolution dilations field must specify 4 dimensions, at ",
2044         node_def.name());
2045   }
2046   if (tf_dilations[0] != 1 || tf_dilations[c_index] != 1) {
2047     return errors::Unimplemented(
2048         "Dilation rate must be 1 for batch and channel dimensions, at ",
2049         node_def.name());
2050   }
2051   const nvinfer1::DimsHW dilation(tf_dilations[h_index], tf_dilations[w_index]);
2052   if (is_conv2d_backprop_input && (dilation.d[0] != 1 || dilation.d[1] != 1)) {
2053     return errors::Unimplemented(
2054         "Dilation with Conv2DBackpropInput (conv2d_transpose) is not supported",
2055         ", at ", node_def.name());
2056   }
2057 
2058   const auto tf_stride = attrs.get<std::vector<int64>>("strides");
2059   if (tf_stride.size() != 4) {
2060     return errors::InvalidArgument(
2061         "Convolution strides field must specify 4 dimensions, at ",
2062         node_def.name());
2063   }
2064   if (tf_stride[0] != 1 || tf_stride[c_index] != 1) {
2065     return errors::Unimplemented(
2066         "Stride must be 1 for batch and channel dimensions, at ",
2067         node_def.name());
2068   }
2069   const nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]);
2070   if (params->validation_only) return Status::OK();
2071 
2072   // Transpose to NCHW (NCHW is required for IConvLayer).
2073   const bool need_transpose = (data_format == "NHWC");
2074   if (need_transpose) {
2075     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
2076         tensor, {0, 3, 1, 2}, StrCat(node_def.name(), "_to_NCHW"), &tensor));
2077   }
2078   // Dimensions of transposed tensor.
2079   const auto tensor_dim = tensor->getDimensions();
2080 
2081   // group == 0 signifies that this is a depthwise convolution, so set
2082   // num_groups to size of input's channel dim. For a non-depthwise conv,
2083   // num_groups will be 1.
2084   const int num_groups = (group == 0) ? tensor_dim.d[0] : group;
2085 
2086   // For conv, TF weights are RSCK, and TRT expects KCRS.
2087   // For backprop, TF weights are RSKC, and TRT expects CKRS.
2088   // Therefore, this reorder will work for both cases.
2089   TRT_ShapedWeights weights =
2090       params->weight_store->GetTempWeights(weights_rsck);
2091   ReorderRSCKToKCRS(weights_rsck, &weights, num_groups);
2092   TRT_ShapedWeights biases(weights.TrtDType());
2093   const int output_axis = is_conv2d_backprop_input ? 1 : 0;
2094   const int noutput = weights.shape_.d[output_axis] * num_groups;
2095   nvinfer1::DimsHW kernel_size;
2096   kernel_size.h() = weights.shape_.d[2];
2097   kernel_size.w() = weights.shape_.d[3];
2098 
2099 // Before TRT 5.1.3, we have to calculate padding ourselves.
2100 #if !IS_TRT_VERSION_GE(5, 1, 3, 0)
2101   std::vector<std::pair<int, int>> padding;
2102   std::vector<int64_t> input_dims;
2103   if (is_conv2d_backprop_input) {
2104     // For backprop, calculate padding based on "input_sizes" input, which
2105     // actually corresponds to output size. ("input_sizes" makes sense in the
2106     // context of Conv2DBackpropInput).
2107     // We use h_index and w_index instead of 1 and 2 because we havent
2108     // transposed backprop_output_size along with the input.
2109     auto output_size_weights =
2110         static_cast<int*>(backprop_output_size.weights().GetValues());
2111     input_dims = {output_size_weights[h_index], output_size_weights[w_index]};
2112   } else {
2113     // Use 1 and 2 because tensor_dim has the dimensions of the transposed
2114     // input.
2115     input_dims = {static_cast<int>(tensor_dim.d[1]),
2116                   static_cast<int>(tensor_dim.d[2])};
2117   }
2118   nvinfer1::ITensor* padded_tensor = nullptr;
2119   TF_RETURN_IF_ERROR(Conv2DPaddingHelper(params, attrs, kernel_size, dilation,
2120                                          stride, input_dims, tensor, &padding,
2121                                          &padded_tensor));
2122   tensor = padded_tensor;
2123 #endif
2124 
2125   // Add convolution.
2126   nvinfer1::ILayer* conv_layer = nullptr;
2127   if (is_conv2d_backprop_input) {
2128     nvinfer1::IDeconvolutionLayer* layer =
2129         params->converter->network()->addDeconvolution(
2130             *tensor, noutput, kernel_size, weights.GetTrtWeights(),
2131             biases.GetTrtWeights());
2132     TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
2133     layer->setStride(stride);
2134 // TensorRT 5.1.3 added support for padding modes.
2135 #if IS_TRT_VERSION_GE(5, 1, 3, 0)
2136     // VALID padding is the default TRT behavior.
2137     if (attrs.get<string>("padding") == "SAME") {
2138       // SAME_UPPER means that post padding is preferred.
2139       layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
2140     }
2141 #else
2142     layer->setPadding(nvinfer1::DimsHW{padding[0].first, padding[1].first});
2143 #endif
2144     layer->setName(node_def.name().c_str());
2145     layer->setNbGroups(num_groups);
2146     conv_layer = layer;
2147   } else {
2148     nvinfer1::IConvolutionLayer* layer =
2149         params->converter->network()->addConvolution(
2150             *tensor, noutput, kernel_size, weights.GetTrtWeights(),
2151             biases.GetTrtWeights());
2152     TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
2153     layer->setStride(stride);
2154 #if IS_TRT_VERSION_GE(5, 1, 3, 0)
2155     if (attrs.get<string>("padding") == "SAME") {
2156       layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
2157     }
2158 #else
2159     layer->setPadding(nvinfer1::DimsHW{padding[0].first, padding[1].first});
2160 #endif
2161     layer->setName(node_def.name().c_str());
2162     layer->setNbGroups(num_groups);
2163     layer->setDilation(dilation);
2164     conv_layer = layer;
2165   }
2166   nvinfer1::ITensor* output_tensor = conv_layer->getOutput(0);
2167   // Add an extra padding for Deconv because TRT doesn't accept the
2168   // argument output_shape and thus the TRT output shape could be wrong
2169   // in case of strides>1.
2170   if (is_conv2d_backprop_input) {
2171     auto tf_output_shape =
2172         static_cast<int*>(backprop_output_size.weights().GetValues());
2173     nvinfer1::Dims trt_output_shape = output_tensor->getDimensions();
2174     // What determines the padding size is the difference between the given
2175     // input_sizes (tf_output_shape) and TRT computed size.
2176     const int height_diff = tf_output_shape[h_index] - trt_output_shape.d[1];
2177     const int width_diff = tf_output_shape[w_index] - trt_output_shape.d[2];
2178     if ((height_diff < 0) || (width_diff < 0)) {
2179       return errors::InvalidArgument(
2180           "input_sizes argument of Conv2DBackprop (i.e. output_shape argument "
2181           "of conv2d_transpose) ",
2182           "is too small for the given out_backprop argument of Conv2DBackprop "
2183           "(i.e. input argument of conv2d_transpose). Expect: ",
2184           "(", tf_output_shape[h_index], ", ", tf_output_shape[w_index],
2185           ") >= ", "(", trt_output_shape.d[1], ", ", trt_output_shape.d[2],
2186           ") for op ", node_def.name());
2187     }
2188     // Only add a padding layer if padding sizes are larger than 0
2189     if ((height_diff > 0) || (width_diff > 0)) {
2190       nvinfer1::DimsHW pre_padding(0, 0);
2191       nvinfer1::DimsHW post_padding(height_diff, width_diff);
2192       nvinfer1::IPaddingLayer* padding_layer =
2193           params->converter->network()->addPadding(*output_tensor, pre_padding,
2194                                                    post_padding);
2195       output_tensor = padding_layer->getOutput(0);
2196     }
2197   }
2198   // Restore transpose.
2199   if (need_transpose) {
2200     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
2201         output_tensor, {0, 2, 3, 1}, StrCat(node_def.name(), "_to_NHWC"),
2202         &output_tensor));
2203   }
2204   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
2205   return Status::OK();
2206 }
2207 
ConvertTranspose(OpConverterParams * params)2208 Status ConvertTranspose(OpConverterParams* params) {
2209   const auto& inputs = params->inputs;
2210   TF_RETURN_IF_ERROR(
2211       CheckInputsWeights(*params, {{"x", false}, {"perm", true}}));
2212   TF_RETURN_IF_ERROR(AllowDataTypes(
2213       *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
2214   // Get the permutation from weights.
2215   TRT_ShapedWeights weights = inputs.at(1).weights();
2216   const int* weights_ptr = static_cast<int*>(weights.GetValues());
2217   std::vector<int> perm(weights_ptr, weights_ptr + weights.count());
2218 
2219   // Verify the permutation.
2220   nvinfer1::ITensor* input_tensor = inputs.at(0).tensor();
2221   if (perm.size() - 1 != size_t(input_tensor->getDimensions().nbDims)) {
2222     return errors::InvalidArgument(
2223         "Rank of perm for transpose does not match with that of the input.");
2224   }
2225   if (perm[0] != 0) {
2226     return errors::Unimplemented(
2227         "Transpose at batch dimension is not supported.");
2228   }
2229 
2230   if (params->validation_only) return Status::OK();
2231 
2232   // Start conversion.
2233   nvinfer1::ITensor* output_tensor = nullptr;
2234   TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
2235       input_tensor, perm, params->node_def.name(), &output_tensor));
2236   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
2237   return Status::OK();
2238 }
2239 
ConvertReshape(OpConverterParams * params)2240 Status ConvertReshape(OpConverterParams* params) {
2241   const auto& inputs = params->inputs;
2242   const auto& node_def = params->node_def;
2243   TF_RETURN_IF_ERROR(
2244       CheckInputsWeights(*params, {{"tensor", false}, {"shape", true}}));
2245   TF_RETURN_IF_ERROR(AllowDataTypes(
2246       *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
2247   const TRT_TensorOrWeights& input_tensor = inputs.at(0);
2248   TRT_ShapedWeights weights = inputs.at(1).weights();
2249   if (weights.count() == 0) {
2250     return errors::Unimplemented("Reshape to shape=[] is not supported, at ",
2251                                  node_def.name());
2252   }
2253 
2254   const int* weights_ptr = static_cast<int*>(weights.GetValues());
2255 
2256   // Check that it doesn't change the batch dimension. This check is
2257   // conservative, for example, when the first dim of the shape is -1 and input
2258   // tensor shape is not fixed, it is still possible that the reshape doesn't
2259   // change the batch dim, but as long as there is a possibility that it could
2260   // change the batch dim, it reject the conversion. The parameters are:
2261   //
2262   // * reshape_batch_dim: the value of the first dim of the input shape constant
2263   // * reshape_dims: all other dims of the input shape constant
2264   // * input_batch_dim: the value of the first dim of the input tensor to
2265   //   reshape
2266   // * input_dims: all other dims of the input tensor to reshape
2267   //
2268   // The validation logic is:
2269   //
2270   // if input_batch_dim is fixed:
2271   //   if reshape_batch_dim == input_batch_dim:
2272   //     ok
2273   //   elif reshape_batch_dim == -1 (meaning reshape_dims are fixed) and
2274   //        input_dims are fixed and
2275   //        prod(input_dims) == prod(reshape_dims)
2276   //     ok
2277   //   else:
2278   //     not ok
2279   // elif input_dims are fixed:
2280   //   if reshape_dims are fixed and
2281   //      prod(input_dims) == prod(reshape_dims):
2282   //     ok
2283   //   else:
2284   //     not ok
2285   // else:
2286   //   not ok
2287   //
2288   // Note that the following is ok no matter whether reshape_batch_dim is fixed
2289   // or not:
2290   //
2291   // ```
2292   // input_batch_dim is not fixed &&
2293   //     reshape_dims are fixed &&
2294   //     prod(input_dims) == prod(reshape_dims),
2295   // ```
2296   //
2297   // because the non-batch dims of the new and old shapes match, and TF runtime
2298   // should make sure the batch dim is not changed.
2299 
2300   const int input_batch_dim = input_tensor.batch_size();
2301   const int reshape_batch_dim = weights_ptr[0];
2302   const nvinfer1::Dims input_dims = input_tensor.GetTrtDims();
2303 
2304   nvinfer1::Dims reshape_dims;
2305   reshape_dims.nbDims = weights.count() - 1;
2306   for (int i = 1; i < weights.count(); i++) {
2307     reshape_dims.d[i - 1] = weights_ptr[i];
2308   }
2309 
2310   // Check that it doesn't change the batch dimension according to the logic
2311   // mentioned above.
2312   bool reshape_may_change_batch_dim = false;
2313   if (input_batch_dim > 0) {        // Batch size is fixed.
2314     if (reshape_batch_dim == -1) {  // Other dims of the shape must be fixed.
2315       if (!AreDimsStaticWithSameSize(input_dims, reshape_dims,
2316                                      /*is_tensor=*/true)) {
2317         reshape_may_change_batch_dim = true;
2318       }
2319     } else if (reshape_batch_dim != input_batch_dim) {
2320       reshape_may_change_batch_dim = true;
2321     } else {
2322       // This means (input_batch_dim>0 && input_batch_dim==reshape_batch_dim),
2323       // and TF runtime should make sure non-batch dims are matched.
2324     }
2325   } else if (!AreDimsStaticWithSameSize(input_dims, reshape_dims,
2326                                         /*is_tensor=*/true)) {
2327     reshape_may_change_batch_dim = true;
2328   }
2329   VLOG(1) << "input_batch_dim=" << input_batch_dim
2330           << ", input_dims=" << DebugString(input_dims)
2331           << "\nreshape_batch_dim=" << reshape_batch_dim
2332           << ", reshape_dims=" << DebugString(reshape_dims);
2333   if (reshape_may_change_batch_dim) {
2334     const string msg = StrCat(
2335         "Reshape on batch dimension is not supported, at ", node_def.name(),
2336         ". input_batch_dim=", input_batch_dim, ", ", DebugString(input_dims),
2337         "; reshape_batch_dim=", reshape_batch_dim, ", ",
2338         DebugString(reshape_dims));
2339     return errors::Unimplemented(msg);
2340   }
2341 
2342   // Start conversion.
2343   nvinfer1::ITensor* output_tensor = nullptr;
2344   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
2345       input_tensor, reshape_dims, params->validation_only, &output_tensor));
2346   if (params->validation_only) return Status::OK();
2347 
2348   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
2349   return Status::OK();
2350 }
2351 
ConvertExpandDims(OpConverterParams * params)2352 Status ConvertExpandDims(OpConverterParams* params) {
2353   const auto& inputs = params->inputs;
2354   const auto& node_def = params->node_def;
2355   TF_RETURN_IF_ERROR(
2356       CheckInputsWeights(*params, {{"input", false}, {"axis", true}}));
2357   TF_RETURN_IF_ERROR(AllowDataTypes(
2358       *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
2359   // Get input shape as vector.
2360   const TRT_TensorOrWeights& input_tensor = inputs.at(0);
2361   const nvinfer1::Dims dims = input_tensor.GetTrtDims();
2362   std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
2363   // Get axis to expand on.
2364   auto axis = inputs.at(1).weights().GetSpan<int>();
2365   if (axis.size() != 1) {
2366     return errors::InvalidArgument("ExpandDims axis must be a scalar, at ",
2367                                    node_def.name());
2368   }
2369   // Use rank = nbDims + 1 for ConvertAxis's bounds checking to account for
2370   // ExpandDim's ability to add an axis at end of the shape.
2371   int trt_axis;
2372   TF_RETURN_IF_ERROR(ConvertAxis(axis[0], dims.nbDims + 1, node_def.name(),
2373                                  /*use_implicit_batch=*/true, &trt_axis));
2374   if (params->validation_only) return Status::OK();
2375 
2376   // ExpandDims: Insert new dim of size 1.
2377   input_dims.insert(input_dims.begin() + trt_axis, 1);
2378   // Reshape tensor.
2379   nvinfer1::Dims new_dims;
2380   TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims));
2381   nvinfer1::ITensor* output_tensor = nullptr;
2382   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
2383       input_tensor, new_dims, /*validation_only=*/false, &output_tensor));
2384   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
2385   return Status::OK();
2386 }
2387 
ConvertSqueeze(OpConverterParams * params)2388 Status ConvertSqueeze(OpConverterParams* params) {
2389   const auto& inputs = params->inputs;
2390   const auto& node_def = params->node_def;
2391   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
2392   TF_RETURN_IF_ERROR(AllowDataTypes(
2393       *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
2394   // Get input shape.
2395   const TRT_TensorOrWeights& input_tensor = inputs.at(0);
2396   const nvinfer1::Dims dims = input_tensor.GetTrtDims();
2397   std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
2398   // Mark axes to remove by setting them to 0.
2399   TFAttrs attrs(node_def);
2400   auto squeeze_dims = attrs.get<std::vector<int64>>("squeeze_dims");
2401   if (squeeze_dims.empty()) {
2402     return errors::Unimplemented(
2403         "Squeeze is only implemented for explicit dims, at ", node_def.name());
2404   }
2405   for (int tf_axis : squeeze_dims) {
2406     // Make sure axis is valid.
2407     int trt_axis;
2408     TF_RETURN_IF_ERROR(ConvertAxis(tf_axis, dims.nbDims, node_def.name(),
2409                                    /*use_implicit_batch=*/true, &trt_axis));
2410     // Make sure target dimension is size 1.
2411     if (input_dims[trt_axis] != 1) {
2412       return errors::InvalidArgument(
2413           "Dimension ", tf_axis, " with size ", input_dims[trt_axis],
2414           " cannot be squeezed because it must be size 1, at ",
2415           node_def.name());
2416     }
2417     // Mark dim for removal by setting to 0.
2418     input_dims[trt_axis] = 0;
2419   }
2420   if (params->validation_only) return Status::OK();
2421 
2422   // Remove all dims which are equal to 0.
2423   input_dims.erase(std::remove(input_dims.begin(), input_dims.end(), 0),
2424                    input_dims.end());
2425   // Reshape tensor.
2426   nvinfer1::Dims new_dims;
2427   TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims));
2428   nvinfer1::ITensor* output_tensor = nullptr;
2429   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
2430       input_tensor, new_dims, /*validation_only=*/false, &output_tensor));
2431   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
2432   return Status::OK();
2433 }
2434 
2435 template <typename Container>
ConvertStridedSliceHelper(OpConverterParams * params,const TRT_TensorOrWeights & input,Container begin,Container size,const Container & stride,const nvinfer1::Dims * final_shape=nullptr)2436 Status ConvertStridedSliceHelper(OpConverterParams* params,
2437                                  const TRT_TensorOrWeights& input,
2438                                  Container begin, Container size,
2439                                  const Container& stride,
2440                                  const nvinfer1::Dims* final_shape = nullptr) {
2441   const auto& node_def = params->node_def;
2442   // Get input dims.
2443   nvinfer1::Dims dims = input.GetTrtDims();
2444   std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
2445   // Temporarily add batch dimension so that indexes line up properly.
2446   input_dims.insert(input_dims.begin(), -1);
2447   // Check bounds.
2448   for (int i = 1; i < input_dims.size(); i++) {
2449     if (begin[i] < 0 || begin[i] > input_dims[i]) {
2450       return errors::InvalidArgument("\"begin\" for dimension ",
2451                                      std::to_string(i), " in ", node_def.op(),
2452                                      " is out of range, at ", node_def.name());
2453     }
2454     const int end = begin[i] + size[i];
2455     if (end < 0 || end > input_dims[i]) {
2456       return errors::InvalidArgument("\"begin\" + \"size\" for dimension ",
2457                                      std::to_string(i), " in ", node_def.op(),
2458                                      " is out of range, at ", node_def.name());
2459     }
2460     if (size[i] <= 0) {
2461       return errors::InvalidArgument("\"size\" cannot be negative or zero for ",
2462                                      node_def.op(), ", at ", node_def.name());
2463     }
2464   }
2465 // TRT 5.1 adds ISliceLayer. For older versions, we attempt to use the
2466 // padding layer with negative padding.
2467 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
2468   nvinfer1::Dims begin_dims, size_dims, stride_dims;
2469   TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(begin, &begin_dims,
2470                                                /*ignore_first_dim=*/true));
2471   TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(size, &size_dims,
2472                                                /*ignore_first_dim=*/true));
2473   TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(stride, &stride_dims,
2474                                                /*ignore_first_dim=*/true));
2475   if (params->validation_only) return Status::OK();
2476 
2477   nvinfer1::ISliceLayer* layer = params->converter->network()->addSlice(
2478       *input.tensor(), begin_dims, size_dims, stride_dims);
2479   nvinfer1::ITensor* tensor = layer->getOutput(0);
2480   // Reshape for shrink_axis.
2481   if (final_shape) {
2482     TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
2483         TRT_TensorOrWeights(tensor), *final_shape, /*validation_only=*/false,
2484         &tensor));
2485   }
2486   params->outputs->push_back(TRT_TensorOrWeights(tensor));
2487   return Status::OK();
2488 #else
2489   // Use IPaddingLayer.
2490   // Strides must be 1 in this case.
2491   for (int x : stride) {
2492     if (x != 1) {
2493       return errors::Unimplemented(
2494           "Strides other than 1 are not supported with this version of TRT, "
2495           "at ",
2496           node_def.name());
2497     }
2498   }
2499   // Rank must be 2, 3 or 4.
2500   if (input_dims.size() > 4) {
2501     return errors::Unimplemented(node_def.op(),
2502                                  " for tensors with rank > 4 is not supported "
2503                                  "in this version of TRT, at ",
2504                                  node_def.name());
2505   }
2506   // Reshape if necessary to 4-D, since IPaddingLayer requires a 4-D input.
2507   const bool need_reshape = (input_dims.size() != 4);
2508   int reshape_dims_added = 0;
2509   nvinfer1::Dims reshape_dims;
2510   if (need_reshape) {
2511     // Add new dims after batch dim until tensor is 4D.
2512     while (input_dims.size() < 4) {
2513       input_dims.insert(input_dims.begin() + 1, 1);
2514       begin.insert(begin.begin() + 1, 0);
2515       size.insert(size.begin() + 1, 1);
2516       reshape_dims_added++;
2517     }
2518     TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &reshape_dims,
2519                                                  /*ignore_first_dim=*/true));
2520   }
2521   // Find dimensions which need to be sliced.
2522   std::vector<int> pad_dims;
2523   for (int i = 1; i < input_dims.size(); i++) {
2524     if ((begin[i] != 0) || (begin[i] + size[i] != input_dims[i])) {
2525       pad_dims.push_back(i);
2526     }
2527   }
2528   if (pad_dims.empty()) {
2529     // No dimensions are changed, so this is a no-op. We could just return the
2530     // input without creating a new layer. TRT will crash if an empty engine
2531     // with no layers is attempted to be created, so we add a no-op shuffle to
2532     // prevent our unit tests from breaking.
2533     // TODO(tmorris): Allow empty engines in the unit tests and return the input
2534     // as output here.
2535     if (params->validation_only) return Status::OK();
2536     nvinfer1::IShuffleLayer* layer =
2537         params->converter->network()->addShuffle(*input.tensor());
2538     params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
2539     return Status::OK();
2540   } else if (pad_dims.size() == 1) {
2541     // Only one dim is modified but we have to have 2, mark a second dim which
2542     // will have padding of 0. The dim we add is chosen to avoid an unnecessary
2543     // transpose.
2544     if (pad_dims[0] != 2) {
2545       pad_dims.push_back(2);
2546     } else {
2547       pad_dims.push_back(3);
2548     }
2549   } else if (pad_dims.size() > 2) {
2550     return errors::Unimplemented(
2551         node_def.op(),
2552         " can only modify up to 2 dimensions in this version of TRT, at ",
2553         node_def.name());
2554   }
2555   std::sort(pad_dims.begin(), pad_dims.end());
2556   // Convert to pre/post padding values. Since TRT does not have a StridedSlice
2557   // or Slice layer prior to 5.1, we instead create an IPaddingLayer with
2558   // negative padding.
2559   nvinfer1::DimsHW pre_padding, post_padding;
2560   for (int i = 0; i < pad_dims.size(); i++) {
2561     const int axis = pad_dims[i];
2562     pre_padding.d[i] = -begin[axis];
2563     post_padding.d[i] = (begin[axis] + size[axis]) - input_dims[axis];
2564   }
2565 
2566   // IPaddingLayer will always apply the padding to dims 2,3 (input format is
2567   // NCHW).
2568   const bool need_transpose = !(pad_dims[0] == 2 && pad_dims[1] == 3);
2569   std::vector<int> transpose_order(input_dims.size());
2570   std::vector<int> inv_transpose_order(input_dims.size());
2571   if (need_transpose) {
2572     if (pad_dims[0] == 1 && pad_dims[1] == 3) {
2573       transpose_order = {0, 2, 1, 3};
2574       inv_transpose_order = {0, 2, 1, 3};
2575     } else if (pad_dims[0] == 1 && pad_dims[1] == 2) {
2576       transpose_order = {0, 3, 1, 2};
2577       inv_transpose_order = {0, 2, 3, 1};
2578     }
2579   }
2580   if (params->validation_only) return Status::OK();
2581 
2582   // Start conversion.
2583   nvinfer1::ITensor* tensor = input.tensor();
2584   if (need_reshape) {
2585     TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
2586         input, reshape_dims, /*validation_only=*/false, &tensor));
2587   }
2588   if (need_transpose) {
2589     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
2590         tensor, transpose_order, StrCat(node_def.name(), "_for_pad"), &tensor));
2591   }
2592   // Add padding layer
2593   nvinfer1::IPaddingLayer* layer = params->converter->network()->addPadding(
2594       *tensor, pre_padding, post_padding);
2595   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
2596   params->converter->MarkQuantizationRangesAsInferrable(tensor,
2597                                                         layer->getOutput(0));
2598   tensor = layer->getOutput(0);
2599   // Restore transpose
2600   if (need_transpose) {
2601     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
2602         tensor, inv_transpose_order, StrCat(node_def.name(), "_after_pad"),
2603         &tensor));
2604   }
2605   // Reshape for shrink_axis.
2606   if (final_shape) {
2607     TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
2608         TRT_TensorOrWeights(tensor), *final_shape, /*validation_only=*/false,
2609         &tensor));
2610   } else if (need_reshape) {
2611     // Restore reshape.
2612     // Calculate output dimensions
2613     for (int i = 0; i < pad_dims.size(); i++) {
2614       const int axis = pad_dims[i];
2615       input_dims[axis] = size[axis];
2616     }
2617     // Remove added 1 dimensions
2618     for (int i = 0; i < reshape_dims_added; i++) {
2619       int value = input_dims[1];
2620       if (value != 1) {
2621         return errors::Internal("StridedSlice error when reshaping, at ",
2622                                 node_def.name());
2623       }
2624       input_dims.erase(input_dims.begin() + 1);
2625     }
2626 
2627     nvinfer1::Dims new_dims;
2628     TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims,
2629                                                  /*ignore_first_dim=*/true));
2630     TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
2631         TRT_TensorOrWeights(tensor), new_dims, /*validation_only=*/false,
2632         &tensor));
2633   }
2634 
2635   params->outputs->push_back(TRT_TensorOrWeights(tensor));
2636   return Status::OK();
2637 #endif
2638 }
2639 
ConvertSlice(OpConverterParams * params)2640 Status ConvertSlice(OpConverterParams* params) {
2641   const auto& inputs = params->inputs;
2642   const auto& node_def = params->node_def;
2643   TF_RETURN_IF_ERROR(CheckInputsWeights(
2644       *params, {{"input", false}, {"begin", true}, {"size", true}}));
2645   TF_RETURN_IF_ERROR(AllowDataTypes(
2646       *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
2647   std::vector<int> begin = inputs.at(1).weights().ToVector<int>();
2648   std::vector<int> size = inputs.at(2).weights().ToVector<int>();
2649   // Get input dims.
2650   nvinfer1::Dims dims = inputs.at(0).GetTrtDims();
2651   std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
2652   // Add batch dimension so that indexes line up properly.
2653   input_dims.insert(input_dims.begin(), inputs.at(0).batch_size());
2654   if (!AllLengthsEqual({input_dims, begin, size})) {
2655     return errors::InvalidArgument(
2656         "Length of begin and size arguments must equal rank of input for "
2657         "Slice, at ",
2658         node_def.name());
2659   }
2660   // Check that batch dimension is unmodified.
2661   const bool begin_is_modified = begin[0] != 0;
2662   // If size[0]s is not -1, we can only know if the batch dimension is
2663   // unmodified when the batch size is defined. When the batch size is
2664   // undefined, we don't convert to be safe.
2665   const bool batch_size_is_defined = input_dims[0] > 0;
2666   const bool size_is_modified =
2667       size[0] != -1 && (!batch_size_is_defined ||
2668                         (batch_size_is_defined && size[0] != input_dims[0]));
2669   if (begin_is_modified || size_is_modified) {
2670     return errors::Unimplemented(
2671         "TensorRT does not allow modifications to the batch dimension, at ",
2672         node_def.name());
2673   }
2674   // Size of -1 signifies to take all remaining elements.
2675   for (int i = 1; i < input_dims.size(); i++) {
2676     if (size[i] == -1) {
2677       size[i] = input_dims[i] - begin[i];
2678     }
2679   }
2680   // Stride is 1 for all dims.
2681   std::vector<int> stride(begin.size(), 1);
2682   return ConvertStridedSliceHelper(params, inputs.at(0), begin, size, stride);
2683 }
2684 
ConvertStridedSlice(OpConverterParams * params)2685 Status ConvertStridedSlice(OpConverterParams* params) {
2686   const auto& inputs = params->inputs;
2687   const auto& node_def = params->node_def;
2688   TF_RETURN_IF_ERROR(CheckInputsWeights(
2689       *params,
2690       {{"input", false}, {"begin", true}, {"end", true}, {"strides", true}}));
2691   TF_RETURN_IF_ERROR(AllowDataTypes(
2692       *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
2693 
2694   TFAttrs attrs(node_def);
2695   // new_axis_mask is not supported.
2696   const int32 new_axis_mask = attrs.get<int64>("new_axis_mask");
2697   if (new_axis_mask != 0) {
2698     return errors::Unimplemented(
2699         "new_axis_mask is not supported for StridedSlice, at ",
2700         node_def.name());
2701   }
2702   const int32 begin_mask = attrs.get<int64>("begin_mask");
2703   const int32 end_mask = attrs.get<int64>("end_mask");
2704   const int32 ellipsis_mask = attrs.get<int64>("ellipsis_mask");
2705   const int32 shrink_axis_mask = attrs.get<int64>("shrink_axis_mask");
2706 
2707   // Get input dims.
2708   nvinfer1::Dims dims = inputs.at(0).GetTrtDims();
2709   std::vector<int64> input_dims(dims.d, dims.d + dims.nbDims);
2710   // Add batch dimension so that indexes line up properly. Set it to -1 if it's
2711   // unknown, so ValidateStridedSliceOp() can handle it correctly below.
2712   input_dims.insert(input_dims.begin(),
2713                     std::max(-1, inputs.at(0).batch_size()));
2714 
2715   const TRT_ShapedWeights& begin_weights = inputs.at(1).weights();
2716   const TRT_ShapedWeights& end_weights = inputs.at(2).weights();
2717   const TRT_ShapedWeights& stride_weights = inputs.at(3).weights();
2718   if (!AllLengthsEqual({begin_weights.ToVector<int>(),
2719                         end_weights.ToVector<int>(),
2720                         stride_weights.ToVector<int>()})) {
2721     return errors::InvalidArgument(
2722         "Length of begin, end, and stride must be equal, at ", node_def.name());
2723   }
2724 
2725   PartialTensorShape input_shape(input_dims);
2726   PartialTensorShape processing_shape;
2727   PartialTensorShape final_shape;
2728   bool is_identity;
2729   bool is_simple_slice;
2730   bool slice_dim0;
2731   absl::InlinedVector<int64, 4> begin;
2732   absl::InlinedVector<int64, 4> end;
2733   absl::InlinedVector<int64, 4> strides;
2734   TF_RETURN_IF_ERROR(ValidateStridedSliceOp(
2735       &begin_weights.GetTensor(), &end_weights.GetTensor(),
2736       stride_weights.GetTensor(), input_shape, begin_mask, end_mask,
2737       ellipsis_mask, new_axis_mask, shrink_axis_mask, &processing_shape,
2738       &final_shape, &is_identity, &is_simple_slice, &slice_dim0, &begin, &end,
2739       &strides));
2740 
2741   // Negative or zero strides currently not supported.
2742   for (int stride : strides) {
2743     if (stride <= 0) {
2744       return errors::Unimplemented(
2745           "Negative or zero stride values are not supported for StridedSlice, "
2746           "at ",
2747           node_def.name());
2748     }
2749   }
2750 
2751   // If batch dimension is covered by the ellipsis mask, it means it's left
2752   // untouched. Otherwise we check whether it modifies the batch dimension here.
2753   if (!(ellipsis_mask & 1) ||
2754       begin_weights.shape_.nbDims >= input_dims.size()) {
2755     // Check that batch dimension is unmodified. We need to use the expanded
2756     // begin/end/strides array since the original array may be incorrect when
2757     // (ellipsis_mask&1)==1.
2758     const bool begin_is_modified = !(begin_mask & 1) && (begin[0] != 0);
2759     const bool stride_is_modified = (strides[0] != 1);
2760     // If the batch size is -1 and the end mask is not set, we can only know if
2761     // the batch dimension is unmodified when the batch size is defined. When
2762     // the batch size is undefined, we don't convert to be safe.
2763     const bool batch_size_is_defined = (input_dims[0] > 0);
2764     const bool end_is_modified =
2765         !(end_mask & 1) && (!batch_size_is_defined ||
2766                             (batch_size_is_defined && end[0] != input_dims[0]));
2767     if (begin_is_modified || stride_is_modified || end_is_modified) {
2768       return errors::Unimplemented(
2769           "TensorRT does not allow modifications to the batch dimension, at ",
2770           node_def.name());
2771     }
2772   }
2773   // Can't shrink axis on batch dimension.
2774   if (shrink_axis_mask & 1) {
2775     return errors::Unimplemented(
2776         "TensorRT does not allow modifications to the batch dimension, at ",
2777         node_def.name());
2778   }
2779   // TRT Slice layer uses (begin, size) instead of (begin, end)
2780   absl::InlinedVector<int64, 4> size(input_dims.size());
2781   for (int i = 0; i < input_dims.size(); i++) {
2782     // Divide by stride (round up)
2783     size[i] = (end[i] - begin[i] + strides[i] - 1) / strides[i];
2784   }
2785 
2786   // shrink_axis_mask requires a reshape after the slice.
2787   nvinfer1::Dims final_shape_dims;
2788   nvinfer1::Dims* final_shape_dims_ptr = nullptr;
2789   if (shrink_axis_mask) {
2790     final_shape_dims =
2791         TensorShapeToTrtDims(final_shape, /*ignore_first_dim=*/true);
2792     final_shape_dims_ptr = &final_shape_dims;
2793   }
2794   return ConvertStridedSliceHelper(params, inputs.at(0), begin, size, strides,
2795                                    final_shape_dims_ptr);
2796 }
2797 
ConvertConv2D(OpConverterParams * params)2798 Status ConvertConv2D(OpConverterParams* params) {
2799   return ConvertConv2DHelper(params, 1, /*is_conv2d_backprop_input=*/false);
2800 }
2801 
ConvertConv2DDepthwise(OpConverterParams * params)2802 Status ConvertConv2DDepthwise(OpConverterParams* params) {
2803   return ConvertConv2DHelper(params, 0, /*is_conv2d_backprop_input=*/false);
2804 }
2805 
ConvertConv2DBackpropInput(OpConverterParams * params)2806 Status ConvertConv2DBackpropInput(OpConverterParams* params) {
2807   return ConvertConv2DHelper(params, 1, /*is_conv2d_backprop_input=*/true);
2808 }
2809 
2810 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
ConvertConv3DHelper(OpConverterParams * params,int group,bool is_conv3d_backprop_input=false)2811 Status ConvertConv3DHelper(OpConverterParams* params, int group,
2812                            bool is_conv3d_backprop_input = false) {
2813   const int kNumDims = 5;
2814   const auto& inputs = params->inputs;
2815   const auto& node_def = params->node_def;
2816   TRT_TensorOrWeights backprop_output_size;
2817   nvinfer1::ITensor* tensor = nullptr;
2818   if (is_conv3d_backprop_input) {
2819     // In the case when Conv3dBackpropInput is used for conv3d_transpose, these
2820     // inputs correspond to: output size, filter, and input.
2821     TF_RETURN_IF_ERROR(CheckInputsWeights(
2822         *params,
2823         {{"input_sizes", true}, {"filter", true}, {"out_backprop", false}}));
2824     backprop_output_size = inputs.at(0);
2825     tensor = inputs.at(2).tensor();
2826   } else {
2827     TF_RETURN_IF_ERROR(
2828         CheckInputsWeights(*params, {{"input", false}, {"filter", true}}));
2829     tensor = inputs.at(0).tensor();
2830   }
2831   TF_RETURN_IF_ERROR(
2832       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
2833   const TRT_ShapedWeights weights_drsck = inputs.at(1).weights();
2834   if (weights_drsck.shape_.nbDims != kNumDims) {
2835     return errors::InvalidArgument("Conv3D expects kernel of dimension 5, at ",
2836                                    node_def.name());
2837   }
2838   TFAttrs attrs(node_def);
2839   auto data_format = attrs.get<string>("data_format");
2840   const bool is_ndhwc = (data_format == "NDHWC");  // Or NCDHW 01234 - > 02341
2841   const int d_index = is_ndhwc ? 1 : 2;
2842   const int h_index = is_ndhwc ? 2 : 3;
2843   const int w_index = is_ndhwc ? 3 : 4;
2844   const int c_index = is_ndhwc ? 4 : 1;
2845   auto tf_dilations = attrs.get<std::vector<int64>>("dilations");
2846   if (tf_dilations.size() != kNumDims) {
2847     return errors::InvalidArgument(
2848         "Convolution dilations field must specify 5 dimensions, at ",
2849         node_def.name());
2850   }
2851   if (tf_dilations[0] != 1 || tf_dilations[c_index] != 1) {
2852     return errors::Unimplemented(
2853         "Dilation rate must be 1 for batch and channel dimensions, at ",
2854         node_def.name());
2855   }
2856 
2857   const nvinfer1::Dims3 dilation_dhw(
2858       tf_dilations[d_index], tf_dilations[h_index], tf_dilations[w_index]);
2859   if (is_conv3d_backprop_input &&
2860       (dilation_dhw.d[0] != 1 || dilation_dhw.d[1] != 1 ||
2861        dilation_dhw.d[2] != 1)) {
2862     return errors::Unimplemented(
2863         "Dilation with Conv3DBackpropInputV2 (conv3d_transpose) is not "
2864         "supported",
2865         ", at ", node_def.name());
2866   }
2867 
2868   const auto tf_stride = attrs.get<std::vector<int64>>("strides");
2869   if (tf_stride.size() != kNumDims) {
2870     return errors::InvalidArgument(
2871         "Convolution strides field must specify 5 dimensions, at ",
2872         node_def.name());
2873   }
2874   if (tf_stride[0] != 1 || tf_stride[c_index] != 1) {
2875     return errors::Unimplemented(
2876         "Stride must be 1 for batch and channel dimensions, at ",
2877         node_def.name());
2878   }
2879 
2880   const nvinfer1::Dims3 stride_dhw(tf_stride[d_index], tf_stride[h_index],
2881                                    tf_stride[w_index]);
2882   const auto tensor_dim = tensor->getDimensions();
2883 
2884   // Asymmetric padding on Deconv not supported for now
2885   if (is_conv3d_backprop_input && attrs.get<string>("padding") == "SAME") {
2886     TRT_ShapedWeights weights =
2887         params->weight_store->GetTempWeights(weights_drsck);
2888 
2889     nvinfer1::Dims3 effective_kernel_size(
2890         weights.shape_.d[0] +
2891             (weights.shape_.d[0] - 1) * (dilation_dhw.d[0] - 1),  // D
2892         weights.shape_.d[1] +
2893             (weights.shape_.d[1] - 1) * (dilation_dhw.d[1] - 1),  // R
2894         weights.shape_.d[2] +
2895             (weights.shape_.d[2] - 1) * (dilation_dhw.d[2] - 1)  // S
2896     );
2897 
2898     const auto output_size_weights =
2899         static_cast<int*>(backprop_output_size.weights().GetValues());
2900     const std::vector<int64_t> input_dims = {output_size_weights[d_index],
2901                                              output_size_weights[h_index],
2902                                              output_size_weights[w_index]};
2903 
2904     const std::vector<std::pair<int, int>> padding =
2905         CreateSamePadding(stride_dhw, effective_kernel_size, input_dims);
2906 
2907     if (padding[0].first != padding[0].second ||
2908         padding[1].first != padding[1].second ||
2909         padding[2].first != padding[2].second) {
2910       return errors::Unimplemented(
2911           "Asymmetric padding with Conv3DBackpropInputV2 (conv3d_transpose) is "
2912           "not supported, at ",
2913           node_def.name());
2914     }
2915   }
2916 
2917   // Finished validation checks
2918   if (params->validation_only) return Status::OK();
2919 
2920   // Transpose to NCDHW (NCDHW is required for IConvLayer).
2921   const bool need_transpose = is_ndhwc;
2922   if (need_transpose) {
2923     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
2924         tensor, {0, 4, 1, 2, 3}, StrCat(node_def.name(), "_to_NCDHW"),
2925         &tensor));
2926   }
2927 
2928   // group == 0 signifies that this is a depthwise convolution, so set
2929   // num_groups to size of input's channel dim. For a non-depthwise conv,
2930   // num_groups will be 1.
2931   const int num_groups = (group == 0) ? tensor_dim.d[0] : group;
2932 
2933   // For conv, TF weights are DRSCK, and TRT expects KCDRS.
2934   // For backprop, TF weights are DRSKC, and TRT expects KCDRS.
2935   // Therefore, this reorder will work for both cases.
2936   TRT_ShapedWeights weights =
2937       params->weight_store->GetTempWeights(weights_drsck);
2938   ReorderDRSCKToKCDRS(weights_drsck, &weights, num_groups);
2939   TRT_ShapedWeights biases(weights.TrtDType());
2940   const int output_axis = is_conv3d_backprop_input ? 1 : 0;
2941   const int noutput = weights.shape_.d[output_axis] * num_groups;
2942   nvinfer1::Dims3 kernel_size_drs(weights.shape_.d[2],  // D
2943                                   weights.shape_.d[3],  // R
2944                                   weights.shape_.d[4]   // S
2945   );
2946 
2947   // Add convolution.
2948   nvinfer1::ILayer* conv_layer = nullptr;
2949   if (is_conv3d_backprop_input) {
2950     nvinfer1::IDeconvolutionLayer* layer =
2951         params->converter->network()->addDeconvolutionNd(
2952             *tensor, noutput, kernel_size_drs, weights.GetTrtWeights(),
2953             biases.GetTrtWeights());
2954     TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
2955     layer->setStrideNd(stride_dhw);  // change to nd set stride
2956 
2957     // TensorRT 5.1.3 added support for padding modes.
2958     if (attrs.get<string>("padding") == "SAME") {
2959       VLOG(2) << "Using SAME padding";
2960       // SAME_UPPER means that post padding is preferred.
2961       layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
2962     }
2963 
2964     layer->setName(node_def.name().c_str());
2965     layer->setNbGroups(num_groups);
2966     conv_layer = layer;
2967   } else {
2968     nvinfer1::IConvolutionLayer* layer =
2969         params->converter->network()->addConvolutionNd(
2970             *tensor, noutput, kernel_size_drs, weights.GetTrtWeights(),
2971             biases.GetTrtWeights());
2972     TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
2973     layer->setStrideNd(stride_dhw);
2974 
2975     if (attrs.get<string>("padding") == "SAME") {
2976       VLOG(2) << "Using SAME padding";
2977       layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
2978     }
2979 
2980     layer->setName(node_def.name().c_str());
2981     layer->setNbGroups(num_groups);
2982     layer->setDilationNd(dilation_dhw);
2983     conv_layer = layer;
2984   }
2985   nvinfer1::ITensor* output_tensor = conv_layer->getOutput(0);
2986 
2987   // Restore transpose.
2988   if (need_transpose) {
2989     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
2990         output_tensor, {0, 2, 3, 4, 1}, StrCat(node_def.name(), "_to_NDHWC"),
2991         &output_tensor));
2992   }
2993   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
2994   return Status::OK();
2995 }
2996 
ConvertConv3D(OpConverterParams * params)2997 Status ConvertConv3D(OpConverterParams* params) {
2998   return ConvertConv3DHelper(params, 1, /*is_conv3d_backprop_input=*/false);
2999 }
3000 
ConvertConv3DBackpropInputV2(OpConverterParams * params)3001 Status ConvertConv3DBackpropInputV2(OpConverterParams* params) {
3002   return ConvertConv3DHelper(params, 1, /*is_conv3d_backprop_input=*/true);
3003 }
3004 
ConvertPool3D(OpConverterParams * params)3005 Status ConvertPool3D(OpConverterParams* params) {
3006   const int kNumDims = 5;
3007   const auto& inputs = params->inputs;
3008   const auto& node_def = params->node_def;
3009   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
3010   TF_RETURN_IF_ERROR(
3011       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3012   nvinfer1::PoolingType type;
3013   if (node_def.op() == "MaxPool3D") {
3014     type = nvinfer1::PoolingType::kMAX;
3015   } else if (node_def.op() == "AvgPool3D") {
3016     type = nvinfer1::PoolingType::kAVERAGE;
3017   } else {
3018     return errors::Unimplemented("Unsupported pooling type: ", node_def.op(),
3019                                  ", at ", node_def.name());
3020   }
3021   TFAttrs attrs(node_def);
3022   const string padding_type = attrs.get<string>("padding");
3023   if ((padding_type != "SAME") && (padding_type != "VALID")) {
3024     return errors::Unimplemented("Unsupported padding type: ", padding_type,
3025                                  ", at ", node_def.name());
3026   }
3027   const auto data_format = attrs.get<string>("data_format");
3028   const bool is_ndhwc = (data_format == "NDHWC");
3029   const int c_index = is_ndhwc ? 4 : 1;
3030   const int d_index = is_ndhwc ? 1 : 2;
3031   const int h_index = is_ndhwc ? 2 : 3;
3032   const int w_index = is_ndhwc ? 3 : 4;
3033   const auto tf_stride = attrs.get<std::vector<int64>>("strides");
3034   if (tf_stride.size() != kNumDims) {
3035     return errors::InvalidArgument(
3036         "Pooling strides field must specify 5 dimensions, at ",
3037         node_def.name());
3038   }
3039   if (tf_stride[0] != 1 || tf_stride[c_index] != 1) {
3040     return errors::Unimplemented(
3041         "stride must be 1 for batch and channel dimensions, at ",
3042         node_def.name());
3043   }
3044   const auto tf_kernel = attrs.get<std::vector<int64>>("ksize");
3045   if (tf_kernel.size() != kNumDims) {
3046     return errors::InvalidArgument(
3047         "Pooling ksize field must specify 5 dimensions, at ", node_def.name());
3048   }
3049   if (tf_kernel[0] != 1 || tf_kernel[c_index] != 1) {
3050     return errors::Unimplemented(
3051         "ksize must be 1 for batch and channel dimensions, at ",
3052         node_def.name());
3053   }
3054   if (params->validation_only) return Status::OK();
3055 
3056   nvinfer1::ITensor* tensor = inputs.at(0).tensor();
3057   if (data_format == "NDHWC") {
3058     // NDHWC => NCDHW
3059     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
3060         tensor, {0, 4, 1, 2, 3}, StrCat(node_def.name(), "_to_NCDHW"),
3061         &tensor));
3062   }
3063 
3064   const nvinfer1::Dims3 stride(tf_stride[d_index], tf_stride[h_index],
3065                                tf_stride[w_index]);
3066   const nvinfer1::Dims3 ksize(tf_kernel[d_index], tf_kernel[h_index],
3067                               tf_kernel[w_index]);
3068 
3069   nvinfer1::IPoolingLayer* layer =
3070       params->converter->network()->addPoolingNd(*tensor, type, ksize);
3071   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3072 
3073   params->converter->MarkQuantizationRangesAsInferrable(tensor,
3074                                                         layer->getOutput(0));
3075 
3076   layer->setStrideNd(stride);
3077   // VALID padding is the default TRT behavior.
3078   if (padding_type == "SAME") {
3079     // SAME_UPPER means that post padding is preferred.
3080     layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
3081   }
3082   layer->setName(node_def.name().c_str());
3083   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
3084 
3085   if (data_format == "NDHWC") {
3086     // NCDHW => NDHWC
3087     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
3088         output_tensor, {0, 2, 3, 4, 1}, StrCat(node_def.name(), "_to_NDHWC"),
3089         &output_tensor));
3090   }
3091 
3092   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3093   return Status::OK();
3094 }
3095 #endif  // #if IS_TRT_VERSION_GE(6, 0, 0, 0)
3096 
ConvertFusedConv2DBiasActivation(OpConverterParams * params)3097 Status ConvertFusedConv2DBiasActivation(OpConverterParams* params) {
3098   const auto& inputs = params->inputs;
3099   const auto& node_def = params->node_def;
3100 
3101   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false},
3102                                                   {"filter", true},
3103                                                   {"bias", true},
3104                                                   {"side_input", true},
3105                                                   {"conv_input_scale", true},
3106                                                   {"side_input_scale", true}}));
3107   nvinfer1::ITensor* tensor = inputs.at(0).tensor();
3108   TF_RETURN_IF_ERROR(
3109       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3110   TRT_ShapedWeights weights = inputs.at(1).weights();
3111   if (weights.shape_.nbDims != 4) {
3112     return errors::InvalidArgument(
3113         "FusedConv2DBiasActivation expects kernel of dimension 4, at " +
3114         node_def.name());
3115   }
3116   TFAttrs attrs(node_def);
3117   auto data_format = attrs.get<string>("data_format");
3118   if (data_format != "NHWC" && data_format != "NCHW") {
3119     return errors::InvalidArgument("Unsupported data_format:", data_format,
3120                                    " at ", node_def.name());
3121   }
3122 
3123   int c_index = (data_format == "NHWC") ? 3 : 1;
3124   int h_index = (data_format == "NHWC") ? 1 : 2;
3125   int w_index = (data_format == "NHWC") ? 2 : 3;
3126   auto tf_dilations = attrs.get<std::vector<int64>>("dilations");
3127   if (tf_dilations.size() != 4) {
3128     return errors::InvalidArgument(
3129         "Convolution dilations field must specify 4 dimensions, at ",
3130         node_def.name());
3131   }
3132   if (tf_dilations[0] != 1 || tf_dilations[c_index] != 1) {
3133     return errors::Unimplemented(
3134         "Dilation rate must be 1 for batch and channel dimensions, at ",
3135         node_def.name());
3136   }
3137   const nvinfer1::DimsHW dilation(tf_dilations[h_index], tf_dilations[w_index]);
3138 
3139   const auto tf_stride = attrs.get<std::vector<int64>>("strides");
3140   if (tf_stride.size() != 4) {
3141     return errors::InvalidArgument(
3142         "Convolution strides field must specify 4 dimensions, at ",
3143         node_def.name());
3144   }
3145   if (tf_stride[0] != 1 || tf_stride[c_index] != 1) {
3146     return errors::Unimplemented(
3147         "Stride must be 1 for batch and channel dimensions, at ",
3148         node_def.name());
3149   }
3150   const nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]);
3151   const auto activation_mode = attrs.get<string>("activation_mode");
3152   auto op_pair = ActivationTypeMap()->find(activation_mode);
3153   if (op_pair == ActivationTypeMap()->end() && activation_mode != "None") {
3154     return errors::Unimplemented("Activation mode: ", activation_mode,
3155                                  " not supported at: ", node_def.name());
3156   }
3157 
3158   const auto filter_format = attrs.get<string>("filter_format");
3159   if (filter_format != "HWIO" && filter_format != "OIHW") {
3160     return errors::InvalidArgument("Unsupported filter_format:", filter_format,
3161                                    " at ", node_def.name());
3162   }
3163   // Check that there's no side_input or conv_input_scale.
3164   TRT_ShapedWeights side_input = inputs.at(3).weights();
3165   if (side_input.count() != 0) {
3166     return errors::InvalidArgument(
3167         "FusedConv2DBiasActivation doesn't yet support side_input, at " +
3168         node_def.name());
3169   }
3170   TRT_ShapedWeights conv_input_scale = inputs.at(4).weights();
3171   if (conv_input_scale.count() != 1 ||
3172       conv_input_scale.TrtDType() != nvinfer1::DataType::kFLOAT ||
3173       conv_input_scale.GetSpan<float>()[0] != 1.0) {
3174     return errors::InvalidArgument(
3175         "FusedConv2DBiasActivation doesn't yet support conv_input_scale, at " +
3176         node_def.name());
3177   }
3178   if (params->validation_only) return Status::OK();
3179 
3180   // Transpose to NCHW (NCHW is required for IConvLayer).
3181   const bool need_transpose = (data_format == "NHWC");
3182   if (need_transpose) {
3183     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
3184         tensor, {0, 3, 1, 2}, StrCat(node_def.name(), "_to_NCHW"), &tensor));
3185   }
3186 
3187   nvinfer1::DimsHW kernel_size;
3188   if (filter_format == "OIHW") {
3189     kernel_size.h() = weights.shape_.d[2];
3190     kernel_size.w() = weights.shape_.d[3];
3191   } else {
3192     // HWIO.
3193     DCHECK_EQ(filter_format, "HWIO");
3194     kernel_size.h() = weights.shape_.d[0];
3195     kernel_size.w() = weights.shape_.d[1];
3196   }
3197 // Before TRT 5.1.3, we have to calculate padding ourselves.
3198 #if !IS_TRT_VERSION_GE(5, 1, 3, 0)
3199   const auto tensor_dim = tensor->getDimensions();
3200   std::vector<int64_t> input_dims;
3201   // Use 1 and 2 because tensor_dim has the dimensions of the transposed
3202   // input.
3203   input_dims = {static_cast<int>(tensor_dim.d[1]),
3204                 static_cast<int>(tensor_dim.d[2])};
3205   std::vector<std::pair<int, int>> padding;
3206   nvinfer1::ITensor* padded_tensor = nullptr;
3207   TF_RETURN_IF_ERROR(Conv2DPaddingHelper(params, attrs, kernel_size, dilation,
3208                                          stride, input_dims, tensor, &padding,
3209                                          &padded_tensor));
3210   tensor = padded_tensor;
3211 #endif
3212 
3213   // Add convolution.
3214   TRT_ShapedWeights biases = inputs.at(2).weights();
3215   nvinfer1::IConvolutionLayer* conv_layer = nullptr;
3216   if (filter_format == "OIHW") {
3217     // Weights are already in the right order.
3218     conv_layer = params->converter->network()->addConvolution(
3219         *tensor, weights.shape_.d[0], kernel_size, weights.GetTrtWeights(),
3220         biases.GetTrtWeights());
3221   } else {
3222     // For conv, TF weights are RSCK, and TRT expects KCRS.
3223     DCHECK_EQ(filter_format, "HWIO");
3224     TRT_ShapedWeights weights_kcrs =
3225         params->weight_store->GetTempWeights(weights);
3226     ReorderRSCKToKCRS(weights, &weights_kcrs, 1);
3227     conv_layer = params->converter->network()->addConvolution(
3228         *tensor, weights.shape_.d[3], kernel_size, weights_kcrs.GetTrtWeights(),
3229         biases.GetTrtWeights());
3230   }
3231   TFTRT_RETURN_ERROR_IF_NULLPTR(conv_layer, node_def.name());
3232   conv_layer->setStride(stride);
3233 #if IS_TRT_VERSION_GE(5, 1, 3, 0)
3234   if (attrs.get<string>("padding") == "SAME") {
3235     conv_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
3236   }
3237 #else
3238   conv_layer->setPadding(nvinfer1::DimsHW{padding[0].first, padding[1].first});
3239 #endif
3240   conv_layer->setName(node_def.name().c_str());
3241   conv_layer->setNbGroups(1);
3242   conv_layer->setDilation(dilation);
3243   nvinfer1::ITensor* output_tensor = conv_layer->getOutput(0);
3244 
3245   // Add activation if there is one.
3246   if (op_pair != ActivationTypeMap()->end()) {
3247     nvinfer1::IActivationLayer* activation_layer =
3248         params->converter->network()->addActivation(*output_tensor,
3249                                                     op_pair->second);
3250     TFTRT_RETURN_ERROR_IF_NULLPTR(activation_layer, node_def.name());
3251     output_tensor = activation_layer->getOutput(0);
3252   }
3253   // Restore transpose.
3254   if (need_transpose) {
3255     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
3256         output_tensor, {0, 2, 3, 1}, StrCat(node_def.name(), "_to_NHWC"),
3257         &output_tensor));
3258   }
3259   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3260   return Status::OK();
3261 }
3262 
ConvertPool(OpConverterParams * params)3263 Status ConvertPool(OpConverterParams* params) {
3264   const auto& inputs = params->inputs;
3265   const auto& node_def = params->node_def;
3266   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
3267   TF_RETURN_IF_ERROR(
3268       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3269   nvinfer1::PoolingType type;
3270   if (node_def.op() == "MaxPool") {
3271     type = nvinfer1::PoolingType::kMAX;
3272   } else if (node_def.op() == "AvgPool") {
3273     type = nvinfer1::PoolingType::kAVERAGE;
3274   } else {
3275     return errors::Unimplemented("Unsupported pooling type: ", node_def.op(),
3276                                  ", at ", node_def.name());
3277   }
3278   TFAttrs attrs(node_def);
3279   const string padding_type = attrs.get<string>("padding");
3280   if ((padding_type != "SAME") && (padding_type != "VALID")) {
3281     return errors::Unimplemented("Unsupported padding type: ", padding_type,
3282                                  ", at ", node_def.name());
3283   }
3284   if (params->validation_only) return Status::OK();
3285 
3286   nvinfer1::ITensor* tensor = inputs.at(0).tensor();
3287   int h_index = 2;
3288   int w_index = 3;
3289   const auto data_format = attrs.get<string>("data_format");
3290   if (data_format == "NHWC") {
3291     h_index = 1;
3292     w_index = 2;
3293     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
3294         tensor, {0, 3, 1, 2}, StrCat(node_def.name(), "_to_NCHW"), &tensor));
3295   }
3296 
3297   const auto tf_stride = attrs.get<std::vector<int64>>("strides");
3298   const nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]);
3299 
3300   const auto tf_kernel = attrs.get<std::vector<int64>>("ksize");
3301   const nvinfer1::DimsHW ksize(tf_kernel[h_index], tf_kernel[w_index]);
3302 
3303 // Before TRT 5.1.3, we have to calculate padding ourselves.
3304 #if !IS_TRT_VERSION_GE(5, 1, 3, 0)
3305   auto tensor_dim = tensor->getDimensions();
3306   std::vector<std::pair<int, int>> padding;
3307   if (padding_type == "SAME") {
3308     // This is NCHW tensor with no batch dimension.
3309     //  1 -> h
3310     //  2 -> w
3311     padding = CreateSamePadding(
3312         stride, ksize,
3313         {static_cast<int>(tensor_dim.d[1]), static_cast<int>(tensor_dim.d[2])});
3314   } else if (padding_type == "VALID") {
3315     padding = {{0, 0}, {0, 0}};
3316   }
3317 #endif
3318 // TensorRT 5.1 added support for asymmetric padding. Before that, we need an
3319 // extra padding layer.
3320 #if !IS_TRT_VERSION_GE(5, 1, 0, 0)
3321   // Asymmetric padding case.
3322   if (padding[0].first != padding[0].second ||
3323       padding[1].first != padding[1].second) {
3324     auto pad_layer = params->converter->network()->addPadding(
3325         *tensor, nvinfer1::DimsHW(padding[0].first, padding[1].first),
3326         nvinfer1::DimsHW(padding[0].second, padding[1].second));
3327     TFTRT_RETURN_ERROR_IF_NULLPTR(pad_layer, node_def.name());
3328     params->converter->MarkQuantizationRangesAsInferrable(
3329         tensor, pad_layer->getOutput(0));
3330     padding = {{0, 0}, {0, 0}};
3331     tensor = pad_layer->getOutput(0);
3332   }
3333 #endif
3334 
3335   nvinfer1::IPoolingLayer* layer =
3336       params->converter->network()->addPooling(*tensor, type, ksize);
3337   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3338   // TODO(tmorris): Average pooling may not be entirely safe to infer
3339   // quantization range through (at least forwards - backwards should be fine).
3340   // Max pooling is okay.
3341   params->converter->MarkQuantizationRangesAsInferrable(tensor,
3342                                                         layer->getOutput(0));
3343 
3344   layer->setStride(stride);
3345 #if IS_TRT_VERSION_GE(5, 1, 3, 0)
3346   // VALID padding is the default TRT behavior.
3347   if (attrs.get<string>("padding") == "SAME") {
3348     // SAME_UPPER means that post padding is preferred.
3349     layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
3350   }
3351 #elif IS_TRT_VERSION_GE(5, 1, 0, 0)
3352   layer->setPrePadding(nvinfer1::DimsHW{padding[0].first, padding[1].first});
3353   layer->setPostPadding(nvinfer1::DimsHW{padding[0].second, padding[1].second});
3354 #else
3355   layer->setPadding(nvinfer1::DimsHW{padding[0].first, padding[1].first});
3356 #endif
3357   layer->setName(node_def.name().c_str());
3358   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
3359 
3360   if (data_format == "NHWC") {
3361     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
3362         output_tensor, {0, 2, 3, 1}, StrCat(node_def.name(), "_to_NHWC"),
3363         &output_tensor));
3364   }
3365   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3366   return Status::OK();
3367 }
3368 
ConvertLeakyRelu(OpConverterParams * params)3369 Status ConvertLeakyRelu(OpConverterParams* params) {
3370   const auto& inputs = params->inputs;
3371   const auto& node_def = params->node_def;
3372   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
3373   TF_RETURN_IF_ERROR(
3374       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3375   TFAttrs attrs(node_def);
3376   const float alpha = attrs.get<float>("alpha");
3377 
3378 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
3379   // Use IActivationLayer when available.
3380   if (params->validation_only) return Status::OK();
3381 
3382   nvinfer1::IActivationLayer* layer =
3383       params->converter->network()->addActivation(
3384           *inputs.at(0).tensor(), nvinfer1::ActivationType::kLEAKY_RELU);
3385   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3386   layer->setAlpha(alpha);
3387   params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
3388   return Status::OK();
3389 #else
3390   // Use elementwise ops when IActivationLayer is not available.
3391   if (alpha < 0.0f || alpha > 1.0f) {
3392     return errors::Unimplemented(
3393         "Alpha value for LeakyRelu must be between 0 and 1, at ",
3394         node_def.name());
3395   }
3396   if (params->validation_only) return Status::OK();
3397 
3398   nvinfer1::ITensor* tensor = inputs.at(0).tensor();
3399   // Create const for alpha.
3400   nvinfer1::ITensor* const_alpha_tensor = nullptr;
3401   TF_RETURN_IF_ERROR(CreateBroadcastableScalarConstant(
3402       params, alpha, tensor->getDimensions(), &const_alpha_tensor));
3403   // alpha * x
3404   nvinfer1::IElementWiseLayer* mul_layer =
3405       params->converter->network()->addElementWise(
3406           *tensor, *const_alpha_tensor, nvinfer1::ElementWiseOperation::kPROD);
3407   TFTRT_RETURN_ERROR_IF_NULLPTR(mul_layer, node_def.name());
3408   // max(x, alpha * x)
3409   nvinfer1::IElementWiseLayer* max_layer =
3410       params->converter->network()->addElementWise(
3411           *tensor, *mul_layer->getOutput(0),
3412           nvinfer1::ElementWiseOperation::kMAX);
3413   TFTRT_RETURN_ERROR_IF_NULLPTR(max_layer, node_def.name());
3414   nvinfer1::ITensor* output_tensor = max_layer->getOutput(0);
3415   params->converter->MarkQuantizationRangesAsInferrable(
3416       output_tensor, mul_layer->getOutput(0));
3417 
3418   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3419   return Status::OK();
3420 #endif
3421 }
3422 
3423 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
ConvertClipByValue(OpConverterParams * params)3424 Status ConvertClipByValue(OpConverterParams* params) {
3425   const auto& inputs = params->inputs;
3426   const auto& node_def = params->node_def;
3427   // TODO(tmorris): We can also allow the case where min and max are tensors by
3428   // using elementwise min and max layers.
3429   TF_RETURN_IF_ERROR(CheckInputsWeights(
3430       *params,
3431       {{"t", false}, {"clip_value_min", true}, {"clip_value_max", true}}));
3432   TF_RETURN_IF_ERROR(
3433       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3434   if (params->validation_only) return Status::OK();
3435 
3436   TFAttrs attrs(node_def);
3437   const DataType dtype = attrs.get<DataType>("T");
3438   float clip_value_min = 0.0f;
3439   float clip_value_max = 0.0f;
3440   // TODO(tmorris): Add a templated helper function to get scalar weights of
3441   // InType casted to OutType.
3442   if (dtype == DataType::DT_FLOAT) {
3443     clip_value_min = inputs.at(1).weights().GetSpan<float>()[0];
3444     clip_value_max = inputs.at(2).weights().GetSpan<float>()[0];
3445   } else if (dtype == DataType::DT_HALF) {
3446     clip_value_min = Eigen::half_impl::half_to_float(
3447         inputs.at(1).weights().GetSpan<Eigen::half>()[0]);
3448     clip_value_max = Eigen::half_impl::half_to_float(
3449         inputs.at(2).weights().GetSpan<Eigen::half>()[0]);
3450   }
3451 
3452   nvinfer1::IActivationLayer* layer =
3453       params->converter->network()->addActivation(
3454           *inputs.at(0).tensor(), nvinfer1::ActivationType::kCLIP);
3455   layer->setAlpha(clip_value_min);
3456   layer->setBeta(clip_value_max);
3457   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3458   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
3459   params->converter->ProvideQuantizationRange(output_tensor, clip_value_min,
3460                                               clip_value_max);
3461   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3462   return Status::OK();
3463 }
3464 #endif
3465 
3466 const std::unordered_map<string, nvinfer1::ActivationType>*
ActivationTypeMap()3467 ActivationTypeMap() {
3468   static auto* const m =
3469       new std::unordered_map<string, nvinfer1::ActivationType>({
3470         {"Relu", nvinfer1::ActivationType::kRELU},
3471             {"Sigmoid", nvinfer1::ActivationType::kSIGMOID},
3472             {"Tanh", nvinfer1::ActivationType::kTANH},
3473 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
3474             {"Elu", nvinfer1::ActivationType::kELU},
3475             {"Selu", nvinfer1::ActivationType::kSELU},
3476             {"Softsign", nvinfer1::ActivationType::kSOFTSIGN},
3477             {"Softplus", nvinfer1::ActivationType::kSOFTPLUS},
3478 #endif
3479       });
3480   return m;
3481 }
3482 
ConvertActivation(OpConverterParams * params)3483 Status ConvertActivation(OpConverterParams* params) {
3484   const auto& inputs = params->inputs;
3485   const auto& node_def = params->node_def;
3486   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
3487   TF_RETURN_IF_ERROR(
3488       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3489   auto op_pair = ActivationTypeMap()->find(node_def.op());
3490   if (op_pair == ActivationTypeMap()->end()) {
3491     return errors::Unimplemented("Activation op: ", node_def.op(),
3492                                  " not supported at: ", node_def.name());
3493   }
3494   if (params->validation_only) return Status::OK();
3495 
3496   // Start conversion.
3497   nvinfer1::IActivationLayer* layer =
3498       params->converter->network()->addActivation(*inputs.at(0).tensor(),
3499                                                   op_pair->second);
3500   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3501   // Set parameters.
3502 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
3503   if (node_def.op() == "Elu") {
3504     layer->setAlpha(1.0f);
3505   } else if (node_def.op() == "Selu") {
3506     // From tensorflow/core/kernels/relu_op_functor.h
3507     layer->setAlpha(1.7580993408473768599402175208123f);
3508     layer->setBeta(1.0507009873554804934193349852946f);
3509   } else if (node_def.op() == "Softplus") {
3510     layer->setAlpha(1.0f);
3511     layer->setBeta(1.0f);
3512   }
3513 #endif
3514   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
3515   // Set quantization range for output when known.
3516   if (node_def.op() == "Sigmoid") {
3517     params->converter->ProvideQuantizationRange(output_tensor, 0.0f, 1.0f);
3518   } else if (node_def.op() == "Tanh") {
3519     params->converter->ProvideQuantizationRange(output_tensor, -1.0f, 1.0f);
3520   } else if (node_def.op() == "Softsign") {
3521     params->converter->ProvideQuantizationRange(output_tensor, -1.0f, 1.0f);
3522   }
3523   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3524   return Status::OK();
3525 }
3526 
ConvertQuantize(OpConverterParams * params)3527 Status ConvertQuantize(OpConverterParams* params) {
3528   const auto& inputs = params->inputs;
3529   const auto& node_def = params->node_def;
3530   if (node_def.op() == "FakeQuantWithMinMaxArgs") {
3531     TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
3532   } else if (node_def.op() == "FakeQuantWithMinMaxVars") {
3533     TF_RETURN_IF_ERROR(CheckInputsWeights(
3534         *params, {{"input", false}, {"min", true}, {"max", true}}));
3535   } else if (node_def.op() == "QuantizeAndDequantizeV2") {
3536     TF_RETURN_IF_ERROR(CheckInputsWeights(
3537         *params, {{"input", false}, {"input_min", true}, {"input_max", true}}));
3538   } else if (node_def.op() == "QuantizeAndDequantizeV3") {
3539     TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false},
3540                                                     {"input_min", true},
3541                                                     {"input_max", true},
3542                                                     {"num_bits", true}}));
3543   }
3544   float min_range = 0.0f;
3545   float max_range = 0.0f;
3546   if (node_def.op() == "FakeQuantWithMinMaxArgs") {
3547     // Get ranges via node attributes.
3548     TFAttrs attrs(node_def);
3549     if (attrs.count("min") == 0 || attrs.count("max") == 0) {
3550       return errors::InvalidArgument("Min or max attribute not found for ",
3551                                      node_def.op(), " at ", node_def.name());
3552     }
3553     min_range = attrs.get<float>("min");
3554     max_range = attrs.get<float>("max");
3555   } else if (node_def.op() == "FakeQuantWithMinMaxVars" ||
3556              node_def.op() == "QuantizeAndDequantizeV2" ||
3557              node_def.op() == "QuantizeAndDequantizeV3") {
3558     // Get ranges via inputs.
3559     auto get_weights_value = [&inputs](int index) {
3560       auto raw_weights =
3561           static_cast<float*>(inputs.at(index).weights().GetValues());
3562       return raw_weights[0];
3563     };
3564     min_range = get_weights_value(1);
3565     max_range = get_weights_value(2);
3566   } else {
3567     return errors::InvalidArgument("Unknown quantization op ", node_def.op(),
3568                                    ", at ", node_def.name());
3569   }
3570   if (params->validation_only) return Status::OK();
3571 
3572   // Store ranges for tensor
3573   params->converter->ProvideQuantizationRange(inputs.at(0).tensor(), min_range,
3574                                               max_range);
3575   // Sometimes, TRT may not quantize a tensor, either because it chooses to
3576   // execute a higher precision kernel or because of op fusion. In these cases,
3577   // accuracy will suffer if the model was trained to expect quantization at
3578   // that tensor. We should consider adding a clip(tensor, min_range, max_range)
3579   // operation here to ensure that any arbitrarily placed quantize node will
3580   // execute as expected. However, this will negatively affect performance. If
3581   // users train their models in a way which models inference as close as
3582   // possible (i.e. not quantizing in place where fusion will occur), then there
3583   // is no problem with the current implementation.
3584   params->outputs->push_back(inputs.at(0));
3585   return Status::OK();
3586 }
3587 
ConvertRelu6(OpConverterParams * params)3588 Status ConvertRelu6(OpConverterParams* params) {
3589   const auto& inputs = params->inputs;
3590   const auto& node_def = params->node_def;
3591   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
3592   TF_RETURN_IF_ERROR(
3593       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3594   if (params->validation_only) return Status::OK();
3595 
3596 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
3597   // Use IActivationLayer for TRT >= 5.1
3598   nvinfer1::IActivationLayer* layer =
3599       params->converter->network()->addActivation(
3600           *inputs.at(0).tensor(), nvinfer1::ActivationType::kCLIP);
3601   layer->setAlpha(0.0f);
3602   layer->setBeta(6.0f);
3603   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3604   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
3605   params->converter->ProvideQuantizationRange(output_tensor, 0.0f, 6.0f);
3606   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3607   return Status::OK();
3608 #else
3609   // Convert using min(Relu(x), 6) before TRT 5.1
3610   // Input Tensor
3611   nvinfer1::ITensor* tensor = inputs.at(0).tensor();
3612 
3613   // Relu operation i.e. Relu(x) = max(0, x)
3614   nvinfer1::IActivationLayer* relu_layer =
3615       params->converter->network()->addActivation(
3616           *tensor, nvinfer1::ActivationType::kRELU);
3617   TFTRT_RETURN_ERROR_IF_NULLPTR(relu_layer, node_def.name());
3618 
3619   // Large range of relu is problematic during quantization in INT8 precision
3620   // mode. Setting dynamic range of relu = [0.f, 6.0f] helps with quantization.
3621   // TRT only uses dynamic ranges in INT8 precision mode,
3622   // and this does not affect the FP32 path.
3623   params->converter->ProvideQuantizationRange(relu_layer->getOutput(0), 0.0f,
3624                                               6.0f);
3625 
3626   // Create a constant layer to store the floating point weight i.e. 6.0f
3627   nvinfer1::ITensor* const6_tensor = nullptr;
3628   TF_RETURN_IF_ERROR(CreateBroadcastableScalarConstant(
3629       params, 6.0f, relu_layer->getOutput(0)->getDimensions(), &const6_tensor));
3630 
3631   // ElementWise Min Operation
3632   // Min op is a nop for INT8 execution path, as the input tensor
3633   // to this layer will only have values in range [0.f, 6.0f].
3634   nvinfer1::IElementWiseLayer* relu6_layer =
3635       params->converter->network()->addElementWise(
3636           *relu_layer->getOutput(0), *const6_tensor,
3637           nvinfer1::ElementWiseOperation::kMIN);
3638   TFTRT_RETURN_ERROR_IF_NULLPTR(relu6_layer, node_def.name());
3639   nvinfer1::ITensor* output_tensor = relu6_layer->getOutput(0);
3640   params->converter->ProvideQuantizationRange(output_tensor, 0.0f, 6.0f);
3641 
3642   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3643   return Status::OK();
3644 #endif
3645 }
3646 
ConvertBiasAddInt8WithoutCalibration(OpConverterParams * params)3647 Status ConvertBiasAddInt8WithoutCalibration(OpConverterParams* params) {
3648   const auto& inputs = params->inputs;
3649   const auto& node_def = params->node_def;
3650   TF_RETURN_IF_ERROR(
3651       CheckInputsWeights(*params, {{"value", false}, {"bias", true}}));
3652   TF_RETURN_IF_ERROR(
3653       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3654   if (params->validation_only) return Status::OK();
3655 
3656   nvinfer1::ITensor* tensor = inputs.at(0).tensor();
3657   const nvinfer1::Dims original_dims = tensor->getDimensions();
3658   TFAttrs attrs(node_def);
3659   const string data_format = attrs.get<string>("data_format");
3660   const int channel_index =
3661       (data_format == "NHWC" ? original_dims.nbDims - 1 : 0);
3662 
3663   nvinfer1::Permutation permutation;
3664   if (channel_index != 0) {
3665     // Permute the dimensions so that the channel dimension is the first
3666     // dimension.
3667     for (int i = 0; i < original_dims.nbDims; ++i) {
3668       permutation.order[i] = i;
3669     }
3670     permutation.order[0] = channel_index;
3671     permutation.order[channel_index] = 0;
3672     VLOG(1) << "ConvertBiasAdd permutation: "
3673             << DebugString(permutation, original_dims.nbDims);
3674   }
3675 
3676   // TensorRT addScale requires input to be of rank 3, we need to apply
3677   // transpose as well as reshape.
3678   // TODO(laigd): this doesn't match what the TRT doc says, fix the doc?
3679   if (channel_index != 0 || original_dims.nbDims != 3) {
3680     nvinfer1::IShuffleLayer* shuffle_layer =
3681         params->converter->network()->addShuffle(*tensor);
3682     TFTRT_RETURN_ERROR_IF_NULLPTR(shuffle_layer, node_def.name());
3683     params->converter->MarkQuantizationRangesAsInferrable(
3684         tensor, shuffle_layer->getOutput(0));
3685 
3686     // NOTE(laigd): for some reason we need to apply the reshape
3687     // unconditionally. The default shape has nbDims==-1 and it seems the
3688     // behavior is undefined in some cases.
3689     nvinfer1::Dims reshape_dims;
3690     reshape_dims.nbDims = 3;
3691     // 0 means copying from input; -1 means inferring from the rest.
3692     reshape_dims.d[0] = 0;
3693     reshape_dims.d[1] = original_dims.nbDims >= 2 ? 0 : 1;
3694     reshape_dims.d[2] = original_dims.nbDims >= 3 ? -1 : 1;
3695     shuffle_layer->setReshapeDimensions(reshape_dims);
3696 
3697     if (channel_index != 0) {
3698       shuffle_layer->setFirstTranspose(permutation);
3699     }
3700     tensor = shuffle_layer->getOutput(0);
3701   }
3702 
3703   TRT_ShapedWeights weights = inputs.at(1).weights();
3704   nvinfer1::ScaleMode mode = nvinfer1::ScaleMode::kCHANNEL;
3705   if (weights.shape_.d[0] == 1) {
3706     mode = nvinfer1::ScaleMode::kUNIFORM;
3707   }
3708 
3709   TRT_ShapedWeights empty_weights(weights.TrtDType());
3710   nvinfer1::IScaleLayer* layer = params->converter->network()->addScale(
3711       *tensor, mode, weights.GetTrtWeights(), empty_weights.GetTrtWeights(),
3712       empty_weights.GetTrtWeights());
3713   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3714 
3715   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
3716 
3717   // Restore transpose & reshape.
3718   if (channel_index != 0 || original_dims.nbDims != 3) {
3719     nvinfer1::IShuffleLayer* shuffle_layer =
3720         params->converter->network()->addShuffle(*output_tensor);
3721     TFTRT_RETURN_ERROR_IF_NULLPTR(shuffle_layer, node_def.name());
3722     // NOTE: for same reason as mentioned above we need to apply the reshape
3723     // unconditionally.
3724     nvinfer1::Dims reshape_dims = original_dims;
3725     if (channel_index != 0) {
3726       // NOTE: according to NVIDIA dimension types are deprecated, so we don't
3727       // need to copy them back.
3728       reshape_dims.d[channel_index] = original_dims.d[0];
3729       reshape_dims.d[0] = original_dims.d[channel_index];
3730     }
3731     shuffle_layer->setReshapeDimensions(reshape_dims);
3732 
3733     if (channel_index != 0) {
3734       shuffle_layer->setSecondTranspose(permutation);
3735     }
3736     params->converter->MarkQuantizationRangesAsInferrable(
3737         output_tensor, shuffle_layer->getOutput(0));
3738     output_tensor = shuffle_layer->getOutput(0);
3739   }
3740 
3741   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3742   return Status::OK();
3743 }
3744 
ConvertBiasAdd(OpConverterParams * params)3745 Status ConvertBiasAdd(OpConverterParams* params) {
3746   if (params->precision_mode == TrtPrecisionMode::INT8 &&
3747       !params->use_calibration) {
3748     // NOTE(laigd): based on some observation, it seems TensorRT cannot fuse
3749     // IConvolutionLayer and IElementwiseLayer and will require range
3750     // information for the output of Conv2D. Using IScaleLayer will fix the
3751     // problem.
3752     return ConvertBiasAddInt8WithoutCalibration(params);
3753   }
3754   const auto& inputs = params->inputs;
3755   const auto& node_def = params->node_def;
3756 
3757   if (inputs.size() != 2) {
3758     return errors::InvalidArgument(
3759         "BiasAdd expects exactly 2 inputs, but received ", inputs.size());
3760   }
3761 
3762   if (inputs[0].is_weights() && inputs[1].is_weights()) {
3763     return errors::InvalidArgument(
3764         "All inputs are weights, but Grappler is expected to fold them.");
3765   }
3766 
3767   TF_RETURN_IF_ERROR(
3768       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
3769 
3770   TFAttrs attrs(node_def);
3771   const string& data_format = attrs.get<string>("data_format");
3772 
3773   nvinfer1::Dims input_shape = inputs.at(0).GetTrtDims();
3774   nvinfer1::Dims bias_shape = inputs.at(1).GetTrtDims();
3775   // If the input is NCHW, then we need to unsqueeze the bias such that its last
3776   // dimensions are 1s (and the first dimension is C).
3777   if (data_format == "NCHW") {
3778     bias_shape.nbDims = input_shape.nbDims;
3779     std::fill(bias_shape.d + 1, bias_shape.d + bias_shape.nbDims, 1);
3780   } else {
3781     // Next, broadcast the bias across the input.
3782     TF_RETURN_IF_ERROR(GetTrtBroadcastShape(inputs.at(0), inputs.at(1),
3783                                             /*check_feasibility=*/true,
3784                                             params->use_implicit_batch,
3785                                             &input_shape, &bias_shape));
3786   }
3787 
3788   // Convert input to a TRT tensor
3789   nvinfer1::ITensor* input_tensor{nullptr};
3790   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
3791       inputs.at(0), input_shape, params->validation_only, &input_tensor));
3792 
3793   // Finally, reshape bias. Since the bias is usually a constant, this will
3794   // normally happen at conversion-time.
3795   nvinfer1::ITensor* bias_tensor{nullptr};
3796   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
3797       inputs.at(1), bias_shape, params->validation_only, &bias_tensor));
3798   VLOG(2) << "Bias shape adjusted to " << DebugString(bias_shape);
3799 
3800   if (params->validation_only) return Status::OK();
3801 
3802   nvinfer1::IElementWiseLayer* layer =
3803       params->converter->network()->addElementWise(
3804           *input_tensor, *bias_tensor, nvinfer1::ElementWiseOperation::kSUM);
3805   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
3806   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
3807 
3808   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
3809   return Status::OK();
3810 }
3811 
GetTensorDimsWithProtoShape(const Tensor & tensor,nvinfer1::Dims * dims)3812 void GetTensorDimsWithProtoShape(const Tensor& tensor, nvinfer1::Dims* dims) {
3813   if (tensor.dims() > 0) {
3814     *dims = GetTrtDimsForTensor(tensor);
3815   } else {
3816     dims->nbDims = 1;
3817     // No dimension provided. Flatten it.
3818     dims->d[0] = tensor.NumElements();
3819     dims->type[0] = nvinfer1::DimensionType::kSPATIAL;
3820     for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; ++i) {
3821       dims->d[i] = 0;
3822     }
3823   }
3824 }
3825 
3826 template <typename Input>
IsIntegerInInt32Bounds(const Input & inp)3827 inline bool IsIntegerInInt32Bounds(const Input& inp) {
3828   static_assert(std::is_integral<Input>::value,
3829                 "This function is only implemented for integral types.");
3830   // If Input is always within the range of int32, return true.
3831   if (sizeof(Input) < sizeof(int32) || std::is_same<Input, int32>::value) {
3832     return true;
3833   }
3834   // Otherwise, we need to check the value of the input. If the input is
3835   // unsigned, we only check the upper bound.
3836   if (!std::numeric_limits<Input>::is_signed) {
3837     return inp <= static_cast<Input>(std::numeric_limits<int32>::max());
3838   }
3839   // We can safely cast lowest() here since we now know that Input is signed and
3840   // sizeof(Input) >= sizeof(int32)
3841   return (inp >= static_cast<Input>(std::numeric_limits<int32>::lowest()) &&
3842           inp <= static_cast<Input>(std::numeric_limits<int32>::max()));
3843 }
3844 
3845 template <DataType dtype>
CopyToTrtInt32Array(const Tensor & tensor,int32 * dst)3846 Status CopyToTrtInt32Array(const Tensor& tensor, int32* dst) {
3847   typedef typename EnumToDataType<dtype>::Type CType;
3848   const CType* src = tensor.flat<CType>().data();
3849   for (int i = 0; i < tensor.NumElements(); ++i) {
3850     // This becomes a no-op if CType is within bounds of int32
3851     if (!IsIntegerInInt32Bounds(src[i])) {
3852       return errors::InvalidArgument("Value at index ", i,
3853                                      " is outside the range of int32");
3854     }
3855     dst[i] = static_cast<int32>(src[i]);
3856   }
3857   return Status::OK();
3858 }
3859 
TfTensorToTrtWeights(const Tensor & tensor,TrtWeightStore * weight_store,TRT_ShapedWeights * weights)3860 Status TfTensorToTrtWeights(const Tensor& tensor, TrtWeightStore* weight_store,
3861                             TRT_ShapedWeights* weights) {
3862   const DataType dtype = tensor.dtype();
3863 
3864   // We always convert the integer constants to INT32.
3865   //
3866   // TODO(aaroey): FP16 will remain in half format and is not converted to
3867   // FP32, but the converter currently uses all float weights as FP32. Fix
3868   // this.
3869   DataType converted_dtype = DataTypeIsInteger(dtype) ? DT_INT32 : dtype;
3870 
3871   // Verify that the dtype is supported by TensorRT. Otherwise, return an error.
3872   nvinfer1::DataType trt_dtype;
3873   TF_RETURN_IF_ERROR(TfDataTypeToTrt(converted_dtype, &trt_dtype));
3874 
3875   if (tensor.NumElements() == 0) {
3876     // Return empty weights.
3877     *weights = TRT_ShapedWeights(trt_dtype);
3878     return Status::OK();
3879   }
3880 
3881   nvinfer1::Dims weight_dims;
3882   GetTensorDimsWithProtoShape(tensor, &weight_dims);
3883   *weights = weight_store->GetTempWeights(trt_dtype, weight_dims);
3884 
3885   // Copy the tensor directly if the tensor does not require cast to the
3886   // supported type.
3887   if (converted_dtype == dtype) {
3888     char* dst = static_cast<char*>(weights->GetValues());
3889     memcpy(dst, tensor.tensor_data().data(), tensor.TotalBytes());
3890     return Status::OK();
3891   }
3892 
3893   Status status = Status::OK();
3894   // Copy tensor elements after casting them to the converted DataType.
3895   int32* dst = static_cast<int32*>(weights->GetValues());
3896   switch (dtype) {
3897     case DT_INT8:
3898       status = CopyToTrtInt32Array<DT_INT8>(tensor, dst);
3899       break;
3900     case DT_UINT8:
3901       status = CopyToTrtInt32Array<DT_UINT8>(tensor, dst);
3902       break;
3903     case DT_INT16:
3904       status = CopyToTrtInt32Array<DT_INT16>(tensor, dst);
3905       break;
3906     case DT_UINT16:
3907       status = CopyToTrtInt32Array<DT_UINT16>(tensor, dst);
3908       break;
3909     case DT_UINT32:
3910       status = CopyToTrtInt32Array<DT_UINT32>(tensor, dst);
3911       break;
3912     case DT_INT64:
3913       status = CopyToTrtInt32Array<DT_INT64>(tensor, dst);
3914       break;
3915     case DT_UINT64:
3916       status = CopyToTrtInt32Array<DT_UINT64>(tensor, dst);
3917       break;
3918     default:
3919       return errors::Internal("Unexpected DataType: ", DataTypeString(dtype));
3920   }
3921   return status;
3922 }
3923 
3924 // Convert a Const NodeDef to TRT_ShapedWeights. This is a special converter, it
3925 // always ignores the params->validation_only parameter but adds the converted
3926 // weights to params->outputs. We did this since TrtNodeValidator needs the
3927 // weights as input to other nodes, and use it to determine whether those nodes
3928 // are supported by TRT.
ConvertConst(OpConverterParams * params)3929 Status ConvertConst(OpConverterParams* params) {
3930   const auto& inputs = params->inputs;
3931   const auto& node_def = params->node_def;
3932   if (!inputs.empty()) {
3933     return errors::InvalidArgument(
3934         "Constant node is expected to have empty input list: ",
3935         node_def.name());
3936   }
3937 
3938   // Create shaped weights as output
3939   const auto& tensor_proto = node_def.attr().at("value").tensor();
3940   Tensor tensor;
3941   if (!tensor.FromProto(tensor_proto)) {
3942     return errors::Internal("Cannot parse weight tensor proto: ",
3943                             node_def.name());
3944   }
3945 
3946   TFAttrs attrs(node_def);
3947   const DataType dtype = attrs.get<DataType>("dtype");
3948   if (dtype != tensor.dtype()) {
3949     return errors::InvalidArgument("DataType mismatch between attr (",
3950                                    DataTypeString(dtype), ") and tensor (",
3951                                    DataTypeString(tensor.dtype()), ")");
3952   }
3953 
3954   TRT_ShapedWeights weights;
3955   TF_RETURN_IF_ERROR(
3956       TfTensorToTrtWeights(tensor, params->weight_store, &weights));
3957 
3958   if (params->outputs != nullptr) {
3959     params->outputs->push_back(TRT_TensorOrWeights(weights));
3960   }
3961   return Status::OK();
3962 }
3963 
ConvertIdentity(OpConverterParams * params)3964 Status ConvertIdentity(OpConverterParams* params) {
3965   // TODO(tmorris): TRT's Identity layer does not get optimized away as of TRT
3966   // 5.0, however once we know that it does it would be nice to use that
3967   // instead.
3968   if (params->validation_only) return Status::OK();
3969   params->outputs->push_back(params->inputs.at(0));
3970   return Status::OK();
3971 }
3972 
3973 const std::unordered_map<string, nvinfer1::ElementWiseOperation>*
BinaryOperationMap()3974 BinaryOperationMap() {
3975   static auto* const m =
3976       new std::unordered_map<string, nvinfer1::ElementWiseOperation> {
3977     {"Add", nvinfer1::ElementWiseOperation::kSUM},
3978         {"AddV2", nvinfer1::ElementWiseOperation::kSUM},
3979         {"Mul", nvinfer1::ElementWiseOperation::kPROD},
3980         {"Sub", nvinfer1::ElementWiseOperation::kSUB},
3981         {"Div", nvinfer1::ElementWiseOperation::kDIV},
3982 #if IS_TRT_VERSION_GE(5, 1, 0, 0)
3983         // This op applies Floor after Div.
3984         {"FloorDiv", nvinfer1::ElementWiseOperation::kDIV},
3985 #endif
3986         {"RealDiv", nvinfer1::ElementWiseOperation::kDIV},
3987         {"Minimum", nvinfer1::ElementWiseOperation::kMIN},
3988         {"Maximum", nvinfer1::ElementWiseOperation::kMAX},
3989         {"Pow", nvinfer1::ElementWiseOperation::kPOW},
3990   };
3991   return m;
3992 }
3993 
ConvertBinary(OpConverterParams * params)3994 Status ConvertBinary(OpConverterParams* params) {
3995   const auto& inputs = params->inputs;
3996   const auto& node_def = params->node_def;
3997   if (inputs.size() != 2) {
3998     return errors::InvalidArgument(node_def.op(), " got ", inputs.size(),
3999                                    " inputs but expected 2, at ",
4000                                    node_def.name());
4001   }
4002   TF_RETURN_IF_ERROR(
4003       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4004 
4005   // Constant folding should have been done by TensorFlow
4006   if (inputs.at(0).is_weights() && inputs.at(1).is_weights()) {
4007     return errors::Unimplemented(
4008         "Constant folding is falled back to TensorFlow, binary op received "
4009         "both input as constant at: ",
4010         node_def.name());
4011   }
4012   const TRT_TensorOrWeights& operand_l = inputs.at(0);
4013   const TRT_TensorOrWeights& operand_r = inputs.at(1);
4014 
4015   auto op_pair = BinaryOperationMap()->find(node_def.op());
4016   if (op_pair == BinaryOperationMap()->end()) {
4017     return errors::Unimplemented("Binary op ", node_def.op(),
4018                                  " not supported at: ", node_def.name());
4019   }
4020 
4021   nvinfer1::Dims broadcasted_dims_l, broadcasted_dims_r;
4022   TF_RETURN_IF_ERROR(GetTrtBroadcastShape(
4023       operand_l, operand_r, /*check_feasibility=*/true,
4024       params->use_implicit_batch, &broadcasted_dims_l, &broadcasted_dims_r));
4025   nvinfer1::ITensor* tensor_l = nullptr;
4026   nvinfer1::ITensor* tensor_r = nullptr;
4027   // This will also convert constants to tensors, and set quantization ranges.
4028   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
4029       operand_l, broadcasted_dims_l, params->validation_only, &tensor_l));
4030   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
4031       operand_r, broadcasted_dims_r, params->validation_only, &tensor_r));
4032   if (params->validation_only) return Status::OK();
4033 
4034   // Add ElementWise layer.
4035   nvinfer1::ILayer* layer = params->converter->network()->addElementWise(
4036       *tensor_l, *tensor_r, op_pair->second);
4037   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4038   nvinfer1::ITensor* trt_tensor = layer->getOutput(0);
4039 
4040 #if IS_TRT_VERSION_GE(5, 1, 0, 0)
4041   if (node_def.op() == "FloorDiv") {
4042     layer = params->converter->network()->addUnary(
4043         *trt_tensor, nvinfer1::UnaryOperation::kFLOOR);
4044     TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4045     trt_tensor = layer->getOutput(0);
4046   }
4047 #endif
4048   params->outputs->push_back(TRT_TensorOrWeights(trt_tensor));
4049   return Status::OK();
4050 }
4051 
ConvertRsqrt(OpConverterParams * params)4052 Status ConvertRsqrt(OpConverterParams* params) {
4053   const auto& inputs = params->inputs;
4054   const auto& node_def = params->node_def;
4055   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false}}));
4056   TF_RETURN_IF_ERROR(
4057       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4058   if (params->validation_only) return Status::OK();
4059 
4060   // TODO(tmorris): params->converter is null during validation. Allow
4061   // precision_mode and use_calibration to be accessed during validation and
4062   // include this check in validation.
4063   // We will need a quantization range for intermediate tensor if not using
4064   // calibration.
4065   //
4066   //   x -> [Sqrt] -> sqrt(x) -> [Recip] -> 1/sqrt(x)
4067   //                     ^
4068   //               need range here
4069   if (params->converter->precision_mode() == TrtPrecisionMode::INT8 &&
4070       !params->converter->use_calibration()) {
4071     return errors::Unimplemented(
4072         "Intermediate quantization range cannot be determined without"
4073         " calibration for Rsqrt, consider replacing with "
4074         "Sqrt -> FakeQuant -> Reciprocal ops, at ",
4075         node_def.name());
4076   }
4077   // Start conversion.
4078   nvinfer1::ITensor* tensor = inputs.at(0).tensor();
4079   // Sqrt
4080   nvinfer1::IUnaryLayer* sqrt_layer = params->converter->network()->addUnary(
4081       *tensor, nvinfer1::UnaryOperation::kSQRT);
4082   TFTRT_RETURN_ERROR_IF_NULLPTR(sqrt_layer, node_def.name());
4083   // Recip
4084   nvinfer1::IUnaryLayer* recip_layer = params->converter->network()->addUnary(
4085       *sqrt_layer->getOutput(0), nvinfer1::UnaryOperation::kRECIP);
4086   TFTRT_RETURN_ERROR_IF_NULLPTR(recip_layer, node_def.name());
4087   params->outputs->push_back(TRT_TensorOrWeights(recip_layer->getOutput(0)));
4088   return Status::OK();
4089 }
4090 
4091 const std::unordered_map<string, nvinfer1::UnaryOperation>*
UnaryOperationMap()4092 UnaryOperationMap() {
4093   static auto* const m =
4094       new std::unordered_map<string, nvinfer1::UnaryOperation>({
4095         {"Neg", nvinfer1::UnaryOperation::kNEG},
4096             {"Exp", nvinfer1::UnaryOperation::kEXP},
4097             {"Log", nvinfer1::UnaryOperation::kLOG},
4098             {"Sqrt", nvinfer1::UnaryOperation::kSQRT},
4099             {"Abs", nvinfer1::UnaryOperation::kABS},
4100             {"Reciprocal", nvinfer1::UnaryOperation::kRECIP},
4101 #if IS_TRT_VERSION_GE(5, 1, 0, 0)
4102             {"Sin", nvinfer1::UnaryOperation::kSIN},
4103             {"Cos", nvinfer1::UnaryOperation::kCOS},
4104             {"Tan", nvinfer1::UnaryOperation::kTAN},
4105             {"Sinh", nvinfer1::UnaryOperation::kSINH},
4106             {"Cosh", nvinfer1::UnaryOperation::kCOSH},
4107             {"Asin", nvinfer1::UnaryOperation::kASIN},
4108             {"Acos", nvinfer1::UnaryOperation::kACOS},
4109             {"Atan", nvinfer1::UnaryOperation::kATAN},
4110             {"Asinh", nvinfer1::UnaryOperation::kASINH},
4111             {"Acosh", nvinfer1::UnaryOperation::kACOSH},
4112             {"Atanh", nvinfer1::UnaryOperation::kATANH},
4113             {"Ceil", nvinfer1::UnaryOperation::kCEIL},
4114             {"Floor", nvinfer1::UnaryOperation::kFLOOR},
4115 #endif
4116       });
4117   return m;
4118 }
4119 
ConvertUnary(OpConverterParams * params)4120 Status ConvertUnary(OpConverterParams* params) {
4121   const auto& inputs = params->inputs;
4122   const auto& node_def = params->node_def;
4123   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false}}));
4124   TF_RETURN_IF_ERROR(
4125       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4126   auto op_pair = UnaryOperationMap()->find(node_def.op());
4127   if (op_pair == UnaryOperationMap()->end()) {
4128     return errors::Unimplemented("Unary op: ", node_def.op(),
4129                                  " not supported at: ", node_def.name());
4130   }
4131   if (params->validation_only) return Status::OK();
4132 
4133   // Start conversion.
4134   nvinfer1::ITensor* tensor = inputs.at(0).tensor();
4135   nvinfer1::IUnaryLayer* layer =
4136       params->converter->network()->addUnary(*tensor, op_pair->second);
4137   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4138   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
4139 
4140   // Set quantization ranges.
4141   if (node_def.op() == "Sin" || node_def.op() == "Cos") {
4142     params->converter->ProvideQuantizationRange(output_tensor, -1.0f, 1.0f);
4143   } else if (node_def.op() == "Asin" || node_def.op() == "Atan") {
4144     params->converter->ProvideQuantizationRange(output_tensor, -M_PI_2, M_PI_2);
4145   } else if (node_def.op() == "Acos") {
4146     params->converter->ProvideQuantizationRange(output_tensor, 0.0f, M_PI);
4147   } else if (node_def.op() == "Neg" || node_def.op() == "Abs") {
4148     // Neg and Abs will have same range as input since TRT uses symmetric
4149     // quantization.
4150     // TODO(tmorris): Should we infer ranges for Ceil and Floor as well?
4151     params->converter->MarkQuantizationRangesAsInferrable(tensor,
4152                                                           output_tensor);
4153   }
4154   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
4155   return Status::OK();
4156 }
4157 
ConvertSquare(OpConverterParams * params)4158 Status ConvertSquare(OpConverterParams* params) {
4159   const auto& inputs = params->inputs;
4160   const auto& node_def = params->node_def;
4161   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false}}));
4162   TF_RETURN_IF_ERROR(
4163       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4164   if (params->validation_only) return Status::OK();
4165 
4166   // Constant 2 with same rank as input
4167   nvinfer1::ITensor* const2_tensor = nullptr;
4168   TF_RETURN_IF_ERROR(CreateBroadcastableScalarConstant(
4169       params, 2.0f, inputs.at(0).GetTrtDims(), &const2_tensor));
4170 
4171   // ElementWise Pow Operation
4172   nvinfer1::IElementWiseLayer* layer =
4173       params->converter->network()->addElementWise(
4174           *inputs.at(0).tensor(), *const2_tensor,
4175           nvinfer1::ElementWiseOperation::kPOW);
4176   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4177   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
4178 
4179   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
4180   return Status::OK();
4181 }
4182 
ConvertReduce(OpConverterParams * params)4183 Status ConvertReduce(OpConverterParams* params) {
4184   const auto& inputs = params->inputs;
4185   const auto& node_def = params->node_def;
4186   TF_RETURN_IF_ERROR(
4187       CheckInputsWeights(*params, {{"input", false}, {"axis", true}}));
4188   TF_RETURN_IF_ERROR(
4189       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4190 
4191   nvinfer1::ITensor* tensor = inputs.at(0).tensor();
4192   auto tf_axes_list = inputs.at(1).weights().GetSpan<int>();
4193 
4194   TFAttrs attrs(node_def);
4195   // Only expect to handle INT32 as attributes for now
4196   if (attrs.get<DataType>("Tidx") != DataType::DT_INT32) {
4197     return errors::Unimplemented("Tidx supports only DT_INT32");
4198   }
4199 
4200   int axes = 0;
4201   if (tf_axes_list.size() == 0) {
4202     return errors::InvalidArgument(
4203         "TRT cannot support reduce on all (batch) dimensions, at",
4204         node_def.name());
4205   }
4206   for (int i = 0; i < tf_axes_list.size(); i++) {
4207     int trt_axis;
4208     TF_RETURN_IF_ERROR(
4209         ConvertAxis(tf_axes_list[i], tensor->getDimensions().nbDims,
4210                     node_def.name(), /*use_implicit_batch=*/true, &trt_axis));
4211     axes |= (1 << trt_axis);
4212   }
4213 
4214   nvinfer1::ReduceOperation reduce_operation;
4215   if (node_def.op() == "Sum") {
4216     reduce_operation = nvinfer1::ReduceOperation::kSUM;
4217   } else if (node_def.op() == "Prod") {
4218     reduce_operation = nvinfer1::ReduceOperation::kPROD;
4219   } else if (node_def.op() == "Max") {
4220     reduce_operation = nvinfer1::ReduceOperation::kMAX;
4221   } else if (node_def.op() == "Min") {
4222     reduce_operation = nvinfer1::ReduceOperation::kMIN;
4223   } else if (node_def.op() == "Mean") {
4224     reduce_operation = nvinfer1::ReduceOperation::kAVG;
4225   } else {
4226     return errors::Unimplemented("Op not supported ", node_def.op(), ", at ",
4227                                  node_def.name());
4228   }
4229   if (params->validation_only) return Status::OK();
4230 
4231   const auto keep_dims = attrs.get<bool>("keep_dims");
4232   nvinfer1::ILayer* layer = params->converter->network()->addReduce(
4233       *tensor, reduce_operation, axes, keep_dims);
4234   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4235 
4236   params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
4237   return Status::OK();
4238 }
4239 
4240 // TensorRT does not support the Pack op natively. Therefore, Pack op is
4241 // converted by first expanding input tensors by adding a new dimension of size
4242 // one at the specified axis and then concatenating the tensors at the same
4243 // axis.
ConvertPack(OpConverterParams * params)4244 Status ConvertPack(OpConverterParams* params) {
4245   const auto& inputs = params->inputs;
4246   const auto& node_def = params->node_def;
4247 
4248   TFAttrs attrs(node_def);
4249   const int num_inputs = attrs.get<int64>("N");
4250   if (num_inputs != inputs.size()) {
4251     return errors::InvalidArgument(
4252         "Number of inputs for Pack is inconsistent with N attribute, at ",
4253         node_def.name());
4254   }
4255 
4256   // Validate inputs. Values must be tensors for now.
4257   std::vector<std::pair<string, bool>> inputs_is_weight;
4258   for (int i = 0; i < num_inputs; ++i) {
4259     inputs_is_weight.push_back({StrCat("values_", i), false});
4260   }
4261   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, inputs_is_weight));
4262 
4263   // TODO(hinsu): Enable INT32 with TensorRT version 5.1.3 after testing.
4264   TF_RETURN_IF_ERROR(
4265       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4266 
4267   if (num_inputs > 1) {
4268     // Verify that inputs are compatible for concatenation after the expansion.
4269     TF_RETURN_IF_ERROR(
4270         VerifyShapesMatch(inputs, /*masked_dim=*/-1, node_def.name()));
4271   }
4272 
4273   // Convert axis from the TensorFlow format to TensorRT format.
4274   const nvinfer1::Dims dims = inputs.at(0).GetTrtDims();
4275   const int64 tf_axis = attrs.get<int64>("axis");
4276   int trt_axis;
4277   TF_RETURN_IF_ERROR(ConvertAxis(tf_axis, dims.nbDims + 1, node_def.name(),
4278                                  /*use_implicit_batch=*/true, &trt_axis));
4279 
4280   // Compute expanded dimensions and then reshape input tensors.
4281   std::vector<int> tensor_dims(dims.d, dims.d + dims.nbDims);
4282   tensor_dims.insert(tensor_dims.begin() + trt_axis, 1);
4283   nvinfer1::Dims expanded_dims;
4284   TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(tensor_dims, &expanded_dims));
4285   std::vector<nvinfer1::ITensor*> expanded_tensors;
4286   for (const TRT_TensorOrWeights& tensor : inputs) {
4287     nvinfer1::ITensor* expanded_tensor = nullptr;
4288     TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
4289         tensor, expanded_dims, params->validation_only, &expanded_tensor));
4290     if (!params->validation_only) {
4291       expanded_tensors.push_back(expanded_tensor);
4292     }
4293   }
4294   if (params->validation_only) return Status::OK();
4295 
4296   // If there is only one tensor in the input, return the expanded tensor.
4297   if (num_inputs == 1) {
4298     params->outputs->push_back(TRT_TensorOrWeights(expanded_tensors[0]));
4299     return Status::OK();
4300   }
4301 
4302   // Otherwise, concatenate expanded tensors.
4303   nvinfer1::IConcatenationLayer* layer =
4304       params->converter->network()->addConcatenation(
4305           const_cast<nvinfer1::ITensor**>(expanded_tensors.data()),
4306           expanded_tensors.size());
4307   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4308   // Note that trt_axis stays the same even after expanding tensors at the axis.
4309   layer->setAxis(trt_axis);
4310   params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
4311   return Status::OK();
4312 }
4313 
ConvertPad(OpConverterParams * params)4314 Status ConvertPad(OpConverterParams* params) {
4315   const auto& inputs = params->inputs;
4316   const auto& node_def = params->node_def;
4317   TF_RETURN_IF_ERROR(
4318       CheckInputsWeights(*params, {{"tensor", false}, {"paddings", true}}));
4319   TF_RETURN_IF_ERROR(
4320       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4321 
4322   // Implement tensor binaryOp weight [channel wise] for now;
4323   nvinfer1::ITensor* tensor = inputs.at(0).tensor();
4324   const auto dims = tensor->getDimensions();
4325   // Restore implicit batch dimension
4326   const int nb_dims = dims.nbDims + 1;
4327 
4328   TRT_ShapedWeights pads = inputs.at(1).weights();
4329 
4330   TFAttrs attrs(node_def);
4331   // Padding type here is done through TF type
4332   //   so I can leverage their EnumToDataType for my cast
4333   auto padding_type = attrs.get<DataType>("Tpaddings");
4334   // TODO(jie): handle data type conversion for TRT?
4335 
4336   if (pads.shape_.d[0] != nb_dims || pads.shape_.d[1] != 2) {
4337     return errors::InvalidArgument(
4338         "Pad only supports explicit padding on 4 dimensional tensor, at ",
4339         node_def.name());
4340   }
4341 
4342   // Only expect to handle INT32 as attributes for now
4343   if (padding_type != DataType::DT_INT32) {
4344     return errors::Unimplemented("Tpaddings supports only DT_INT32");
4345   }
4346   auto pad_data = static_cast<int*>(pads.GetValues());
4347 
4348   std::vector<int32_t> pad_index;
4349   for (int i = 0; i < nb_dims; i++) {
4350     if (pad_data[2 * i] != 0 || pad_data[2 * i + 1] != 0) {
4351       pad_index.push_back(i);
4352     }
4353   }
4354 
4355   // No padding at all, we should exit
4356   if (pad_index.empty()) {
4357     params->outputs->push_back(inputs.at(0));
4358     return Status::OK();
4359   }
4360 
4361   // Only supports padding on less than 2 axis GIE-2579
4362   if (pad_index.size() > 2) {
4363     return errors::InvalidArgument(
4364         "Padding layer does not support padding on > 2");
4365   }
4366 
4367   // Padding on batch dimension is not supported
4368   if (pad_index[0] == 0) {
4369     return errors::InvalidArgument(
4370         "Padding layer does not support padding on batch dimension");
4371   }
4372 
4373   // Not doing the legit thing here. ignoring padding on dim 1 and 3;
4374   // TODO(jie): implement pad as uff parser
4375   if (pad_index.size() == 2 && pad_index[0] == 0 && pad_index[1] == 3) {
4376     return errors::Unimplemented(
4377         "Padding layer does not support padding on dimension 1 and 3 yet");
4378   }
4379   if (params->validation_only) return Status::OK();
4380 
4381   bool legit_pad = true;
4382   nvinfer1::DimsHW pre_padding(0, 0);
4383   nvinfer1::DimsHW post_padding(0, 0);
4384 
4385   std::vector<int32_t> permuted_pad_index(pad_index);
4386   if (pad_index[0] == 1) {
4387     legit_pad = false;
4388     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
4389         tensor, {0, 3, 2, 1}, StrCat(node_def.name(), "_to_pad"), &tensor));
4390     permuted_pad_index[0] = 3;
4391   }
4392 
4393   for (size_t i = 0; i < pad_index.size(); i++) {
4394     int index = pad_index[i];
4395     if (permuted_pad_index[i] == 2) {
4396       pre_padding.h() = pad_data[index * 2];
4397       post_padding.h() = pad_data[index * 2 + 1];
4398     } else if (permuted_pad_index[i] == 3) {
4399       pre_padding.w() = pad_data[index * 2];
4400       post_padding.w() = pad_data[index * 2 + 1];
4401     }
4402   }
4403 
4404   nvinfer1::IPaddingLayer* layer = params->converter->network()->addPadding(
4405       *tensor, pre_padding, post_padding);
4406   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4407   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
4408   params->converter->MarkQuantizationRangesAsInferrable(tensor, output_tensor);
4409 
4410   if (!legit_pad) {
4411     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
4412         output_tensor, {0, 3, 2, 1}, StrCat(node_def.name(), "_from_pad"),
4413         &output_tensor));
4414   }
4415 
4416   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
4417   return Status::OK();
4418 }
4419 
ConvertSplitHelper(OpConverterParams * params,const TRT_TensorOrWeights & input,int tf_axis,int num_splits,bool squeeze_after)4420 Status ConvertSplitHelper(OpConverterParams* params,
4421                           const TRT_TensorOrWeights& input, int tf_axis,
4422                           int num_splits, bool squeeze_after) {
4423   const auto& node_def = params->node_def;
4424   const nvinfer1::Dims dims = input.GetTrtDims();
4425   // Convert axis.
4426   int trt_axis;
4427   TF_RETURN_IF_ERROR(ConvertAxis(tf_axis, dims.nbDims, node_def.name(),
4428                                  /*use_implicit_batch=*/true, &trt_axis));
4429   // Dimension must equal num_splits for Unstack (when squeeze_after is true)
4430   if (squeeze_after && dims.d[trt_axis] != num_splits) {
4431     return errors::InvalidArgument(
4432         "Dimension ", tf_axis, " has size ", dims.d[trt_axis],
4433         " which is not equal to num of ", num_splits, ", at ", node_def.name());
4434   }
4435   // Dimension must be evenly divisible by num_splits.
4436   if (dims.d[trt_axis] % num_splits != 0) {
4437     return errors::InvalidArgument(
4438         "Dimension ", tf_axis, " of size ", dims.d[trt_axis],
4439         " is not evenly divisble by ", num_splits, ", at ", node_def.name());
4440   }
4441 
4442   // Create parameters for StridedSliceHelper.
4443   // Slice will begin on zero for all dims, except the one being split which
4444   // will change.
4445   std::vector<int> begin(dims.nbDims, 0);
4446   // Determine size of split. Slice will get the full length of all dims, except
4447   // the one being split.
4448   std::vector<int> size(dims.d, dims.d + dims.nbDims);
4449   const int split_size_on_axis = dims.d[trt_axis] / num_splits;
4450   size[trt_axis] = split_size_on_axis;
4451   // Stride will always be 1
4452   std::vector<int> stride(dims.nbDims, 1);
4453   // Add dummy batch dimension
4454   begin.insert(begin.begin(), 0);
4455   size.insert(size.begin(), 1);
4456   stride.insert(stride.begin(), 1);
4457   // Create final shape for Unpack/Unstack, where split axis is squeezed.
4458   nvinfer1::Dims final_shape_for_unpack;
4459   nvinfer1::Dims* final_shape_for_unpack_ptr = nullptr;
4460   if (squeeze_after) {
4461     std::vector<int> size_after_squeeze(size);
4462     size_after_squeeze.erase(size_after_squeeze.begin() + trt_axis + 1);
4463     TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(
4464         size_after_squeeze, &final_shape_for_unpack, /*ignore_frst_dim=*/true));
4465     final_shape_for_unpack_ptr = &final_shape_for_unpack;
4466   }
4467 
4468   // Slice the input. ConvertStridedSliceHelper will push the outputs onto
4469   // params->outputs.
4470   for (int i = 0; i < num_splits; ++i) {
4471     begin[trt_axis + 1] = i * split_size_on_axis;
4472     TF_RETURN_IF_ERROR(ConvertStridedSliceHelper(
4473         params, input, begin, size, stride, final_shape_for_unpack_ptr));
4474   }
4475   return Status::OK();
4476 }
4477 
ConvertSplit(OpConverterParams * params)4478 Status ConvertSplit(OpConverterParams* params) {
4479   const auto& inputs = params->inputs;
4480   const auto& node_def = params->node_def;
4481   TF_RETURN_IF_ERROR(
4482       CheckInputsWeights(*params, {{"axis", true}, {"value", false}}));
4483   TF_RETURN_IF_ERROR(AllowDataTypes(*params, {
4484     DataType::DT_FLOAT, DataType::DT_HALF,
4485 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
4486         DataType::DT_INT32,
4487 #endif
4488   }));
4489   int tf_axis = inputs.at(0).weights().GetSpan<int>()[0];
4490   TFAttrs attrs(node_def);
4491   const int num_split = attrs.get<int64>("num_split");
4492 
4493   return ConvertSplitHelper(params, inputs.at(1), tf_axis, num_split, false);
4494 }
4495 
ConvertUnpack(OpConverterParams * params)4496 Status ConvertUnpack(OpConverterParams* params) {
4497   const auto& inputs = params->inputs;
4498   const auto& node_def = params->node_def;
4499   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"value", false}}));
4500   TF_RETURN_IF_ERROR(AllowDataTypes(*params, {
4501     DataType::DT_FLOAT, DataType::DT_HALF,
4502 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
4503         DataType::DT_INT32,
4504 #endif
4505   }));
4506   // Input must be rank 1 or higher, since we can't unpack on axis 0.
4507   if (inputs.at(0).GetTrtDims().nbDims == 0) {
4508     return errors::Unimplemented(
4509         "Input \"value\" for Unpack must be rank 2 or greater, at ",
4510         node_def.name());
4511   }
4512   TFAttrs attrs(node_def);
4513   const int tf_axis = attrs.get<int64>("axis");
4514   const int num = attrs.get<int64>("num");
4515 
4516   return ConvertSplitHelper(params, inputs.at(0), tf_axis, num, true);
4517 }
4518 
ConvertConcat(OpConverterParams * params)4519 Status ConvertConcat(OpConverterParams* params) {
4520   const auto& inputs = params->inputs;
4521   const auto& node_def = params->node_def;
4522   TFAttrs attrs(node_def);
4523   // Get number of tensor inputs.
4524   const int num_inputs = attrs.get<int64>("N");
4525   if (num_inputs != static_cast<int>(inputs.size()) - 1) {
4526     return errors::InvalidArgument(
4527         "Number of inputs for ConcatV2 is inconsistent with N attribute, at ",
4528         node_def.name());
4529   }
4530   // Validate inputs. Values must be tensors for now.
4531   std::vector<std::pair<string, bool>> inputs_is_weight;
4532   for (int i = 0; i < num_inputs; ++i) {
4533     inputs_is_weight.push_back({StrCat("values_", i), false});
4534   }
4535   inputs_is_weight.push_back({"axis", true});
4536   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, inputs_is_weight));
4537   // TODO(tmorris): There is a bug with Concat and INT32 in TRT - it is supposed
4538   // to be supported.
4539   TF_RETURN_IF_ERROR(
4540       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4541   const auto axis = inputs.at(num_inputs).weights().GetSpan<int>();
4542   if (axis.size() != 1) {
4543     return errors::InvalidArgument("Axis for ConcatV2 must be a scalar, at ",
4544                                    node_def.name());
4545   }
4546   int trt_axis = 0;
4547   const auto dim = inputs.at(0).GetTrtDims();
4548   TF_RETURN_IF_ERROR(ConvertAxis(axis[0], dim.nbDims, node_def.name(),
4549                                  /*use_implicit_batch=*/true, &trt_axis));
4550   // Check that dimensions match on non-concatenate axis.
4551   TF_RETURN_IF_ERROR(VerifyShapesMatch(
4552       absl::Span<const TRT_TensorOrWeights>(inputs).first(num_inputs), trt_axis,
4553       node_def.name()));
4554   if (params->validation_only) return Status::OK();
4555 
4556   // Gather inputs as tensors
4557   std::vector<nvinfer1::ITensor const*> input_tensors;
4558   for (int i = 0; i < num_inputs; i++) {
4559     input_tensors.push_back(inputs.at(i).tensor());
4560   }
4561   nvinfer1::IConcatenationLayer* layer =
4562       params->converter->network()->addConcatenation(
4563           const_cast<nvinfer1::ITensor* const*>(input_tensors.data()),
4564           input_tensors.size());
4565   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4566   layer->setAxis(trt_axis);
4567   params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
4568   return Status::OK();
4569 }
4570 
ConvertFusedBatchNorm(OpConverterParams * params)4571 Status ConvertFusedBatchNorm(OpConverterParams* params) {
4572   const auto& inputs = params->inputs;
4573   const auto& node_def = params->node_def;
4574   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false},
4575                                                   {"scale", true},
4576                                                   {"offset", true},
4577                                                   {"mean", true},
4578                                                   {"variance", true}}));
4579   TF_RETURN_IF_ERROR(
4580       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4581   TFAttrs attrs(node_def);
4582   float epsilon = attrs.get<float>("epsilon");
4583   auto data_format = attrs.get<string>("data_format");
4584   if (data_format != "NCHW") {
4585     return errors::Unimplemented(
4586         node_def.op(), " only supports data_format=NCHW, at ", node_def.name());
4587   }
4588   bool is_training = attrs.get<bool>("is_training");
4589   if (is_training) {
4590     // Trying to use batchnorm in training mode is a very common problem.
4591     // Because the error message will only be printed in VLOG(1) by the
4592     // segmenter, we issue a special warning so that users will actually see it.
4593     LOG(WARNING) << node_def.op() << " only supports is_training=false. If you "
4594                  << "are using Keras, please call "
4595                  << "keras.backend.set_learning_phase(0) before constructing "
4596                  << "your model. At " << node_def.name();
4597     return errors::Unimplemented(node_def.op(),
4598                                  " only supports is_training=false, at ",
4599                                  node_def.name());
4600   }
4601   nvinfer1::ITensor* tensor = inputs.at(0).tensor();
4602 
4603   //  Check parameter types
4604   auto parameter_type = inputs.at(1).weights().TrtDType();
4605   if ((parameter_type != nvinfer1::DataType::kFLOAT) &&
4606       (parameter_type != nvinfer1::DataType::kHALF)) {
4607     return errors::Unimplemented(
4608         "Only float32 or float16 weight data type is supported, for node ",
4609         node_def.name(), " got ", DebugString(parameter_type));
4610   }
4611   for (int i = 1; i < 5; i++) {
4612     if (inputs.at(i).weights().TrtDType() != parameter_type) {
4613       return errors::Unimplemented(
4614           "Inconsistent parameter type for batchnorm is not supported, at: " +
4615           node_def.name());
4616     }
4617   }
4618 
4619   TRT_ShapedWeights dummy_power_weights(parameter_type);
4620   size_t nweight = 0;
4621   for (int i = 1; i < 5; i++) {
4622     nweight = std::max<size_t>(nweight, inputs.at(i).weights().count());
4623   }
4624   const TRT_ShapedWeights* ptr_shape_weights = nullptr;
4625   for (int i = 1; i < 5; i++) {
4626     if (inputs.at(i).weights().count() == nweight) {
4627       ptr_shape_weights = &(inputs.at(i).weights());
4628     } else if (inputs.at(i).weights().count() != 1) {
4629       return errors::InvalidArgument(
4630           "Inconsistent batchnorm parameter count, at: " + node_def.name());
4631     }
4632   }
4633   if (params->validation_only) return Status::OK();
4634 
4635   //  We could technically have two weights with different shape.
4636   //  that requires two addScale op, arguably less performant
4637   TRT_ShapedWeights combined_scale_weights =
4638       params->weight_store->GetTempWeights(*ptr_shape_weights);
4639   TRT_ShapedWeights combined_offset_weights =
4640       params->weight_store->GetTempWeights(*ptr_shape_weights);
4641 
4642   const Eigen::half* cast_vals_array[4];
4643   const float* vals_array[4];
4644   for (int j = 0; j < 4; j++) {
4645     cast_vals_array[j] =
4646         static_cast<Eigen::half const*>(inputs.at(j + 1).weights().GetValues());
4647     vals_array[j] =
4648         static_cast<float const*>(inputs.at(j + 1).weights().GetValues());
4649   }
4650   Eigen::half* cast_combined_scale_vals =
4651       static_cast<Eigen::half*>(combined_scale_weights.GetValues());
4652   Eigen::half* cast_combined_offset_vals =
4653       static_cast<Eigen::half*>(combined_offset_weights.GetValues());
4654   float* combined_scale_vals =
4655       static_cast<float*>(combined_scale_weights.GetValues());
4656   float* combined_offset_vals =
4657       static_cast<float*>(combined_offset_weights.GetValues());
4658 
4659   for (size_t i = 0; i < nweight; ++i) {
4660     float batchnorm_data[4];
4661     for (int j = 0; j < 4; j++) {
4662       if (inputs.at(j + 1).weights().count() != 1) {
4663         if (parameter_type == nvinfer1::DataType::kFLOAT) {
4664           batchnorm_data[j] = vals_array[j][i];
4665         } else if (parameter_type == nvinfer1::DataType::kHALF) {
4666           batchnorm_data[j] =
4667               Eigen::half_impl::half_to_float(cast_vals_array[j][i]);
4668         }
4669       } else {
4670         if (parameter_type == nvinfer1::DataType::kFLOAT) {
4671           batchnorm_data[j] = vals_array[j][0];
4672         } else if (parameter_type == nvinfer1::DataType::kHALF) {
4673           batchnorm_data[j] =
4674               Eigen::half_impl::half_to_float(cast_vals_array[j][0]);
4675         }
4676       }
4677     }
4678     float scale = batchnorm_data[0];
4679     float offset = batchnorm_data[1];
4680     float mean = batchnorm_data[2];
4681     float variance = batchnorm_data[3];
4682     float combined_scale_val = scale / sqrtf(variance + epsilon);
4683     float combined_offset_val = offset - mean * combined_scale_val;
4684     if (parameter_type == nvinfer1::DataType::kFLOAT) {
4685       combined_scale_vals[i] = combined_scale_val;
4686       combined_offset_vals[i] = combined_offset_val;
4687     } else if (parameter_type == nvinfer1::DataType::kHALF) {
4688       cast_combined_scale_vals[i] = Eigen::half(combined_scale_val);
4689       cast_combined_offset_vals[i] = Eigen::half(combined_offset_val);
4690     }
4691   }
4692 
4693   nvinfer1::ScaleMode mode = nweight == 1 ? nvinfer1::ScaleMode::kUNIFORM
4694                                           : nvinfer1::ScaleMode::kCHANNEL;
4695   nvinfer1::IScaleLayer* layer = params->converter->network()->addScale(
4696       *tensor, mode, combined_offset_weights.GetTrtWeights(),
4697       combined_scale_weights.GetTrtWeights(),
4698       dummy_power_weights.GetTrtWeights());
4699   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4700   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
4701   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
4702   return Status::OK();
4703 }
4704 
ConvertGather(OpConverterParams * params)4705 Status ConvertGather(OpConverterParams* params) {
4706   const auto& inputs = params->inputs;
4707   const auto& node_def = params->node_def;
4708   // TODO(tmorris): Use CheckInputsWeights by changing bool to enum with an
4709   // option for an input to be either tensor or weight.
4710   if (inputs.size() != 3) {
4711     return errors::InvalidArgument("GatherV2 got ", inputs.size(),
4712                                    " inputs but expected 3, at ",
4713                                    node_def.name());
4714   }
4715   const auto& params_input = inputs.at(0);
4716   const auto& indices_input = inputs.at(1);
4717   const auto& axis_input = inputs.at(2);
4718   if (!axis_input.is_weights()) {
4719     return errors::Unimplemented(
4720         "The input \"axis\" for GatherV2 must be a constant, at ",
4721         node_def.name());
4722   }
4723   if (!indices_input.is_tensor()) {
4724     return errors::Unimplemented(
4725         "The input \"indices\" for GatherV2 must be a tensor, at ",
4726         node_def.name());
4727   }
4728 
4729   TF_RETURN_IF_ERROR(AllowDataTypes(
4730       *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32},
4731       /*dtype_attr_name=*/"Tparams"));
4732   TF_RETURN_IF_ERROR(AllowDataTypes(*params, {DataType::DT_INT32},
4733                                     /*dtype_attr_name=*/"Tindices"));
4734 
4735   absl::Span<const int> axis = axis_input.weights().GetSpan<int>();
4736   if (axis.size() != 1) {
4737     return errors::InvalidArgument("Axis for GatherV2 must be a scalar, at ",
4738                                    node_def.name());
4739   }
4740   int trt_axis = 0;
4741   TF_RETURN_IF_ERROR(ConvertAxis(axis[0], params_input.GetTrtDims().nbDims,
4742                                  node_def.name(), params_input.is_tensor(),
4743                                  &trt_axis));
4744   if (params_input.is_weights() && trt_axis != 0) {
4745     return errors::Unimplemented(
4746         "The input axis must be zero when params is a weight.");
4747   }
4748   if (params_input.is_tensor() && indices_input.batch_size() != 1) {
4749     return errors::Unimplemented(
4750         "Indices must have a batch size of 1 when params is a tensor.");
4751   }
4752   // Both input are tensors, and the TF gather result will have rank:
4753   // (params.nbDims + 1) + (indices.nbDims + 1) - 1,
4754   // where "+ 1" adds the batch dim. If params is a weight, the TRT rank matches
4755   // the TF rank so we don't have to add + 1.
4756   const int params_tf_rank =
4757       params_input.GetTrtDims().nbDims + (params_input.is_tensor() ? 1 : 0);
4758   const int indices_tf_rank = indices_input.GetTrtDims().nbDims + 1;
4759   const int tf_gather_output_rank = params_tf_rank + indices_tf_rank - 1;
4760   if (tf_gather_output_rank > nvinfer1::Dims::MAX_DIMS + 1) {
4761     return errors::InvalidArgument(
4762         "Result of gather has dimension greater than ",
4763         nvinfer1::Dims::MAX_DIMS + 1);
4764   }
4765   if (params->validation_only) return Status::OK();
4766 
4767   // Convert params to tensor is it is a weight.
4768   nvinfer1::ITensor* params_tensor = nullptr;
4769   if (params_input.is_weights()) {
4770     params_tensor = params->converter->CreateConstantLayer(
4771         params_input.weights(), params_input.GetTrtDims());
4772   } else {
4773     params_tensor = params_input.tensor();
4774   }
4775 
4776   // Note on how IGatherLayer works: if both the data and indices tensors have
4777   // a batch size dimension of size N, it performs:
4778   // for batchid in xrange(N):
4779   //   output[batchid, a0, ..., an, i, ..., j, b0, ..., bn] = (
4780   //       data[batchid, a0, ..., an, indices[batchid, i, ..., j] b0, ..., bn])
4781   nvinfer1::IGatherLayer* layer = params->converter->network()->addGather(
4782       *params_tensor, *indices_input.tensor(), trt_axis);
4783   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
4784 
4785   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
4786   nvinfer1::Dims trt_gather_output_dims = output_tensor->getDimensions();
4787   // Note for the "- 2": one is for the output batch dim encapsulated by TF-TRT,
4788   // and the other is for the output dimension that is squeezed by IGatherLayer
4789   // because of the implicit batch dim in the indices (see the above note).
4790   const int expected_trt_output_rank =
4791       tf_gather_output_rank - (params_input.is_tensor() ? 2 : 1);
4792   if (trt_gather_output_dims.nbDims != expected_trt_output_rank) {
4793     return errors::Internal(
4794         "Get unexpected output dimensions of IGatherLayer. Expect nbDims: ",
4795         expected_trt_output_rank,
4796         ", actual nbDims: ", trt_gather_output_dims.nbDims);
4797   }
4798   // Reshape the output so after adding the implicit batch dim it'll match the
4799   // output shape of TF GatherV2.
4800   if (params_input.is_tensor()) {
4801     for (int i = trt_gather_output_dims.nbDims; i > trt_axis; --i) {
4802       trt_gather_output_dims.d[i] = trt_gather_output_dims.d[i - 1];
4803     }
4804     trt_gather_output_dims.d[trt_axis] = 1;
4805     ++trt_gather_output_dims.nbDims;
4806 
4807     TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
4808         TRT_TensorOrWeights(output_tensor), trt_gather_output_dims,
4809         /*validation_only=*/false, &output_tensor));
4810   }
4811 
4812   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
4813   return Status::OK();
4814 }
4815 
ConvertFullyConnectedHelper(OpConverterParams * params,nvinfer1::ITensor * tensor_a,TRT_ShapedWeights weights_b,bool transpose_b,const string & node_name)4816 Status ConvertFullyConnectedHelper(OpConverterParams* params,
4817                                    nvinfer1::ITensor* tensor_a,
4818                                    TRT_ShapedWeights weights_b,
4819                                    bool transpose_b, const string& node_name) {
4820   // Reshape input to 3D - this will be a no-op unless using int8 precision.
4821   auto input_dim = tensor_a->getDimensions();
4822   while (input_dim.nbDims < 3) {
4823     input_dim.d[input_dim.nbDims++] = 1;
4824   }
4825   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
4826       TRT_TensorOrWeights(tensor_a), input_dim, /*validation_only=*/false,
4827       &tensor_a));
4828 
4829   // FC layer will transpose weights, so we need to pre-transpose.
4830   TRT_ShapedWeights weights(weights_b.TrtDType());
4831   if (!transpose_b) {
4832     weights = params->weight_store->GetTempWeights(weights_b);
4833     ReorderCKtoKC(weights_b, &weights);
4834   } else {
4835     weights = weights_b;
4836   }
4837   TRT_ShapedWeights biases(weights.TrtDType());
4838   const int noutput = weights.shape_.d[0];
4839   nvinfer1::IFullyConnectedLayer* layer =
4840       params->converter->network()->addFullyConnected(
4841           *tensor_a, noutput, weights.GetTrtWeights(), biases.GetTrtWeights());
4842 
4843   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_name);
4844   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
4845 
4846   // Reshape output to 1D - this will be a no-op unless using int8 precision.
4847   auto output_dim = output_tensor->getDimensions();
4848   output_dim.nbDims = 1;
4849   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
4850       TRT_TensorOrWeights(output_tensor), output_dim, /*validation_only=*/false,
4851       &output_tensor));
4852 
4853   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
4854   return Status::OK();
4855 }
4856 
ConvertMatMulHelper(OpConverterParams * params,TRT_TensorOrWeights input_a,TRT_TensorOrWeights input_b,bool transpose_a,bool transpose_b,string node_name)4857 Status ConvertMatMulHelper(OpConverterParams* params,
4858                            TRT_TensorOrWeights input_a,
4859                            TRT_TensorOrWeights input_b, bool transpose_a,
4860                            bool transpose_b, string node_name) {
4861   // TODO: ReorderCKtoKC is currently not general enough to transpose weights
4862   // that are not 2D.
4863   if ((transpose_a && input_a.is_weights() &&
4864        input_a.GetTrtDims().nbDims != 2) ||
4865       (transpose_b && input_b.is_weights() &&
4866        input_b.GetTrtDims().nbDims != 2)) {
4867     return errors::InvalidArgument(
4868         "Cannot currently transpose constant input if it is not 2 dimensional");
4869   }
4870 
4871   // If A is a tensor, we can only transpose if it is at least 3D in TF,
4872   // or TRT will not do the correct transposition.
4873   if (transpose_a && input_a.is_tensor() && input_a.GetTrtDims().nbDims < 2) {
4874     return errors::InvalidArgument(
4875         "Cannot transpose first input if it is a tensor with fewer than 2 "
4876         "non-batch dimensions.");
4877   }
4878 
4879   // If B is a tensor, then it must be at least 3D in TF,
4880   // or TRT won't be able to handle the multiply correctly.
4881   if (input_b.is_tensor() && input_b.GetTrtDims().nbDims < 2) {
4882     return errors::InvalidArgument(
4883         "Second input must either be a constant, or contain at least 2 "
4884         "non-batch dimensions.");
4885   }
4886   if (params->validation_only) return Status::OK();
4887 
4888   // If an FC layer can be used and would be faster, use that instead.
4889   const bool can_use_fc =
4890       !transpose_a && input_a.is_tensor() && input_b.is_weights();
4891   const bool should_use_fc = can_use_fc && input_a.GetTrtDims().nbDims >= 3 &&
4892                              input_b.GetTrtDims().nbDims == 2;
4893   // If int8 is specified, FC must be used unless it is not compatible, as MM
4894   // does not support int8 at this time.
4895   if (should_use_fc || (can_use_fc && params->converter->precision_mode() ==
4896                                           TrtPrecisionMode::INT8)) {
4897     return ConvertFullyConnectedHelper(
4898         params, input_a.tensor(), input_b.weights(), transpose_b, node_name);
4899   }
4900 
4901   const auto get_matrix_op = [](nvinfer1::ITensor* in,
4902                                 bool transpose) -> nvinfer1::MatrixOperation {
4903     return (in->getDimensions().nbDims < 2)
4904                ? nvinfer1::MatrixOperation::kVECTOR
4905                : (transpose) ? nvinfer1::MatrixOperation::kTRANSPOSE
4906                              : nvinfer1::MatrixOperation::kNONE;
4907   };
4908 
4909   // If the MatMul operand is a constant, applies transposes at conversion-time
4910   // as necessary. If the operand is a tensor, does nothing. If required
4911   // transposes were applied, sets transpose to false.
4912   const auto prepare_matmul_operand =
4913       [&params](TRT_TensorOrWeights operand,
4914                 bool* transpose) -> nvinfer1::ITensor* {
4915     if (operand.is_tensor()) {
4916       return operand.tensor();
4917     } else {
4918       TRT_ShapedWeights weights(operand.weights().TrtDType());
4919       if (*transpose) {
4920         weights = params->weight_store->GetTempWeights(operand.weights());
4921         ReorderCKtoKC(operand.weights(), &weights);
4922         // Weights have been transposed, can set transpose to false
4923         *transpose = false;
4924       } else {
4925         weights = operand.weights();
4926       }
4927       return params->converter->CreateConstantLayer(weights, weights.shape_);
4928     }
4929   };
4930 
4931   nvinfer1::ITensor* tensor_a = prepare_matmul_operand(input_a, &transpose_a);
4932   nvinfer1::ITensor* tensor_b = prepare_matmul_operand(input_b, &transpose_b);
4933 
4934   nvinfer1::IMatrixMultiplyLayer* layer =
4935       params->converter->network()->addMatrixMultiply(
4936           *tensor_a, get_matrix_op(tensor_a, transpose_a), *tensor_b,
4937           get_matrix_op(tensor_b, transpose_b));
4938 
4939   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_name);
4940   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
4941   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
4942   return Status::OK();
4943 }
4944 
4945 // inputs are both two dimensional (ops::MatMul)
ConvertMatMul(OpConverterParams * params)4946 Status ConvertMatMul(OpConverterParams* params) {
4947   const auto& inputs = params->inputs;
4948   const auto& node_def = params->node_def;
4949   if (inputs.size() != 2) {
4950     return errors::InvalidArgument(node_def.op(), " got ", inputs.size(),
4951                                    " inputs but expected 2, at ",
4952                                    node_def.name());
4953   }
4954   TF_RETURN_IF_ERROR(
4955       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4956 
4957   TFAttrs attrs(node_def);
4958   bool transpose_a = attrs.get<bool>("transpose_a");
4959   bool transpose_b = attrs.get<bool>("transpose_b");
4960 
4961   return ConvertMatMulHelper(params, inputs.at(0), inputs.at(1), transpose_a,
4962                              transpose_b, node_def.name());
4963 }
4964 
ConvertBatchMatMul(OpConverterParams * params)4965 Status ConvertBatchMatMul(OpConverterParams* params) {
4966   const auto& inputs = params->inputs;
4967   const auto& node_def = params->node_def;
4968   if (inputs.size() != 2) {
4969     return errors::InvalidArgument(node_def.op(), " got ", inputs.size(),
4970                                    " inputs but expected 2, at ",
4971                                    node_def.name());
4972   }
4973   // TODO(tmorris): Enable once false is updated to mean either tensor or weight
4974   // TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false}, {"y",
4975   // false}}));
4976   TF_RETURN_IF_ERROR(
4977       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
4978   if (inputs.at(0).is_weights() && inputs.at(1).is_weights()) {
4979     return errors::InvalidArgument(
4980         "All inputs are weights, but Grappler is expected to fold them.");
4981   }
4982   if (inputs.at(0).is_tensor() && inputs.at(1).is_tensor() &&
4983       inputs.at(0).GetTrtDims().nbDims != inputs.at(1).GetTrtDims().nbDims) {
4984     return errors::Unimplemented(
4985         "Inputs must have the same rank if they are both tensors.");
4986   }
4987 
4988   TFAttrs attrs(node_def);
4989   const bool transpose_a = attrs.get<bool>("adj_x");
4990   const bool transpose_b = attrs.get<bool>("adj_y");
4991 
4992   // There is no way to batch constants in TRT. Example:
4993   // Tensor with TF Dims: 12 5 3 -> TRT Dims: 5 3
4994   // Weight with TF Dims: 12 3 6 -> TRT Dims: 12 3 6
4995   // It is not possible to treat the weight input as a batched [3, 6] tensor.
4996   const auto check_weight_is_not_batched =
4997       [](const TRT_TensorOrWeights& input_l,
4998          const TRT_TensorOrWeights& input_r) {
4999         // If input_l is a weight, then input_r must be a tensor because
5000         // otherwise the op would be handled by Grappler.
5001         if (input_l.is_weights() &&
5002             input_l.GetTrtDims().nbDims > input_r.GetTrtDims().nbDims &&
5003             input_l.GetTrtDims().d[0] != 1) {
5004           return errors::Unimplemented(
5005               "TensorRT does not support batched constants.");
5006         }
5007         return Status::OK();
5008       };
5009   TF_RETURN_IF_ERROR(check_weight_is_not_batched(inputs.at(0), inputs.at(1)));
5010   TF_RETURN_IF_ERROR(check_weight_is_not_batched(inputs.at(1), inputs.at(0)));
5011 
5012   // Broadcast inputs. We don't check feasibility since the dimensions in a
5013   // MatMul don't need to match. For example, consider a valid set of inputs
5014   // which would produce an output of shape [N, T, K]:
5015   // input 0: [N, T, C]
5016   // input 1: [1, C, K]
5017   // Since C != K and T != C, check feasiblity would fail.
5018   nvinfer1::Dims broadcasted_dims_l, broadcasted_dims_r;
5019   TF_RETURN_IF_ERROR(GetTrtBroadcastShape(
5020       inputs.at(0), inputs.at(1), /*check_feasibility=*/false,
5021       params->use_implicit_batch, &broadcasted_dims_l, &broadcasted_dims_r));
5022   nvinfer1::ITensor* tensor_l = nullptr;
5023   nvinfer1::ITensor* tensor_r = nullptr;
5024   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
5025       inputs.at(0), broadcasted_dims_l, params->validation_only, &tensor_l));
5026   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
5027       inputs.at(1), broadcasted_dims_r, params->validation_only, &tensor_r));
5028   if (params->validation_only) return Status::OK();
5029 
5030   return ConvertMatMulHelper(params, TRT_TensorOrWeights(tensor_l),
5031                              TRT_TensorOrWeights(tensor_r), transpose_a,
5032                              transpose_b, node_def.name());
5033 }
5034 
ConvertSoftmax(OpConverterParams * params)5035 Status ConvertSoftmax(OpConverterParams* params) {
5036   const auto& inputs = params->inputs;
5037   const auto& node_def = params->node_def;
5038   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"logits", false}}));
5039   TF_RETURN_IF_ERROR(
5040       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
5041   nvinfer1::ITensor* tensor = inputs.at(0).tensor();
5042 
5043   const int num_trt_dims = tensor->getDimensions().nbDims;
5044   if (num_trt_dims == 0) {
5045     return errors::InvalidArgument(
5046         "TensorRT Softmax cannot apply on batch dimension, at",
5047         node_def.name());
5048   }
5049   if (params->validation_only) return Status::OK();
5050 
5051   nvinfer1::ISoftMaxLayer* layer =
5052       params->converter->network()->addSoftMax(*tensor);
5053   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
5054   // Tensorflow SoftMax assumes applying softmax on the last dimension.
5055   layer->setAxes(1 << (num_trt_dims - 1));
5056 
5057   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
5058   // Quantization range for SoftMax is always (0, 1)
5059   params->converter->ProvideQuantizationRange(output_tensor, 0.0f, 1.0f);
5060   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
5061   return Status::OK();
5062 }
5063 
ConvertArgMinMax(OpConverterParams * params)5064 Status ConvertArgMinMax(OpConverterParams* params) {
5065   const auto& inputs = params->inputs;
5066   const auto& node_def = params->node_def;
5067   TF_RETURN_IF_ERROR(
5068       CheckInputsWeights(*params, {{"input", false}, {"dimension", true}}));
5069   TF_RETURN_IF_ERROR(
5070       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
5071   // INT64 outputs are not supported by TRT.
5072   TFAttrs attrs(node_def);
5073   DataType output_dtype = attrs.get<DataType>("output_type");
5074   if (output_dtype != DataType::DT_INT32) {
5075     return errors::Unimplemented("Output type ", DataTypeString(output_dtype),
5076                                  " is not supported, at ", node_def.name());
5077   }
5078   int tf_axis = inputs.at(1).weights().GetSpan<int>()[0];
5079   int trt_axis;
5080   nvinfer1::Dims dims = inputs.at(0).GetTrtDims();
5081   TF_RETURN_IF_ERROR(ConvertAxis(tf_axis, dims.nbDims, node_def.name(),
5082                                  /*use_implicit_batch=*/true, &trt_axis));
5083   nvinfer1::TopKOperation topk_op;
5084   if (node_def.op() == "ArgMin") {
5085     topk_op = nvinfer1::TopKOperation::kMIN;
5086   } else if (node_def.op() == "ArgMax") {
5087     topk_op = nvinfer1::TopKOperation::kMAX;
5088   } else {
5089     return errors::InvalidArgument("Unsupported ArgMin/Max operation");
5090   }
5091   if (params->validation_only) return Status::OK();
5092 
5093   // Use TopK with k = 1. Only indices output is needed (output 1).
5094   const uint32_t reduce_axes = 1 << trt_axis;
5095   nvinfer1::ITopKLayer* layer = params->converter->network()->addTopK(
5096       *inputs.at(0).tensor(), topk_op, 1, reduce_axes);
5097   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
5098   nvinfer1::ITensor* output_indices_tensor = layer->getOutput(1);
5099 
5100   // Squeeze on axis.
5101   std::vector<int> size(dims.d, dims.d + dims.nbDims);
5102   size.erase(size.begin() + trt_axis);
5103   nvinfer1::Dims new_dims;
5104   TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(size, &new_dims));
5105   nvinfer1::ITensor* output_tensor = nullptr;
5106   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
5107       TRT_TensorOrWeights(output_indices_tensor), new_dims,
5108       /*validation_only=*/false, &output_tensor));
5109 
5110   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
5111   return Status::OK();
5112 }
5113 
ConvertTopK(OpConverterParams * params)5114 Status ConvertTopK(OpConverterParams* params) {
5115   const auto& inputs = params->inputs;
5116   const auto& node_def = params->node_def;
5117   TF_RETURN_IF_ERROR(
5118       CheckInputsWeights(*params, {{"input", false}, {"k", true}}));
5119   TF_RETURN_IF_ERROR(
5120       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
5121   TFAttrs attrs(node_def);
5122   const bool sorted = attrs.get<bool>("sorted");
5123   if (!sorted) {
5124     // TensorRT only supports sorted output. Although TensorFlow API
5125     // doesn't specify the order of output elements in case sorted=false,
5126     // but it's safer to not convert because the output of TensorRT might
5127     // be different with TensorFlow which can cause confusion.
5128     return errors::InvalidArgument("Only sorted=True is supported, at",
5129                                    node_def.name());
5130   }
5131 
5132   nvinfer1::ITensor* tensor = inputs.at(0).tensor();
5133   const int num_dims = tensor->getDimensions().nbDims;
5134   if (num_dims == 0) {
5135     return errors::InvalidArgument(
5136         "TensorRT TopK cannot apply on batch dimension, at", node_def.name());
5137   }
5138 
5139   TRT_ShapedWeights k_w = inputs.at(1).weights();
5140   if (k_w.count() != 1) {
5141     return errors::InvalidArgument("k value of TopK should be a scalar, at",
5142                                    node_def.name());
5143   }
5144   // Note that ITopKLayer always have sorted outputs, so we don't need to handle
5145   // the 'sorted' attribute of the node.
5146   if (params->validation_only) return Status::OK();
5147 
5148   const nvinfer1::TopKOperation op = nvinfer1::TopKOperation::kMAX;
5149   const int k = *(static_cast<int*>(k_w.GetValues()));
5150   const uint32_t reduce_axes = 1 << (num_dims - 1);
5151   nvinfer1::ITopKLayer* layer =
5152       params->converter->network()->addTopK(*tensor, op, k, reduce_axes);
5153   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
5154 
5155   nvinfer1::ITensor* output_value_tensor = layer->getOutput(0);
5156   nvinfer1::ITensor* output_indices_tensor = layer->getOutput(1);
5157   params->outputs->push_back(TRT_TensorOrWeights(output_value_tensor));
5158   params->outputs->push_back(TRT_TensorOrWeights(output_indices_tensor));
5159   return Status::OK();
5160 }
5161 
ConvertDepthSpaceShuffle(OpConverterParams * params)5162 Status ConvertDepthSpaceShuffle(OpConverterParams* params) {
5163   const auto& inputs = params->inputs;
5164   const auto& node_def = params->node_def;
5165   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}}));
5166   TF_RETURN_IF_ERROR(AllowDataTypes(
5167       *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
5168   TFAttrs attrs(node_def);
5169   const int block_size = attrs.get<int64>("block_size");
5170   if (block_size < 2) {
5171     return errors::InvalidArgument("Block size must be 2 or greater, at ",
5172                                    node_def.name());
5173   }
5174   const string data_format = attrs.get<string>("data_format");
5175   if (data_format != "NCHW" && data_format != "NHWC") {
5176     return errors::Unimplemented("Data format ", data_format,
5177                                  " is not supported, at ", node_def.name());
5178   }
5179   nvinfer1::Dims dims = inputs.at(0).GetTrtDims();
5180   if (dims.nbDims != 3) {
5181     return errors::InvalidArgument("The input to ", node_def.op(),
5182                                    " must be rank 4, at ", node_def.name());
5183   }
5184   const int num_channels = data_format == "NCHW" ? dims.d[0] : dims.d[2];
5185   const int h = data_format == "NCHW" ? dims.d[1] : dims.d[0];
5186   const int w = data_format == "NCHW" ? dims.d[2] : dims.d[1];
5187   // Get shuffle parameters.
5188   nvinfer1::Dims first_shuffle_shape;
5189   nvinfer1::Permutation transpose_perm;
5190   nvinfer1::Dims second_shuffle_shape;
5191   if (node_def.op() == "DepthToSpace") {
5192     if (num_channels % (block_size * block_size) != 0) {
5193       return errors::InvalidArgument(
5194           "Number of channels must be divisible by block_size*block_size, at ",
5195           node_def.name());
5196     }
5197     // First Reshape [C, H, W] - > [r, r, C/(r*r), H, W]
5198     first_shuffle_shape = {
5199         /*nbDims=*/5,
5200         /*d=*/{block_size, block_size, num_channels / (block_size * block_size),
5201                h, w}};
5202     // Transpose [r, r, C/(r*r), H, W] -> [C/(r*r), H, r, W, r]
5203     transpose_perm = {2, 3, 0, 4, 1};
5204     // Second Reshape [C/(r*r), H, r, W, r] -> [C/(r*r), H * r, W * r]
5205     second_shuffle_shape =
5206         nvinfer1::DimsCHW(num_channels / (block_size * block_size),
5207                           h * block_size, w * block_size);
5208   } else if (node_def.op() == "SpaceToDepth") {
5209     if (h % block_size != 0 || w % block_size != 0) {
5210       return errors::InvalidArgument(
5211           "Width and height must be divisible by block_size, at ",
5212           node_def.name());
5213     }
5214     // First Reshape [C, H, W] -> [C, H/r, r, W/r, r]
5215     first_shuffle_shape = {/*nbDims=*/5,
5216                            /*d=*/{num_channels, h / block_size, block_size,
5217                                   w / block_size, block_size}};
5218     // Transpose [C, H/r, r, W/r, r] -> [r, r, C, H/r, W/r]
5219     transpose_perm = {2, 4, 0, 1, 3};
5220     // Second Reshape  [r, r, C, H/r, W/r] -> [C*r*r, H/r, W/r]
5221     second_shuffle_shape = nvinfer1::DimsCHW(
5222         num_channels * block_size * block_size, h / block_size, w / block_size);
5223   }
5224   if (params->validation_only) return Status::OK();
5225 
5226   nvinfer1::IShuffleLayer* first_shuffle =
5227       params->converter->network()->addShuffle(*inputs.at(0).tensor());
5228   TFTRT_RETURN_ERROR_IF_NULLPTR(first_shuffle, node_def.name());
5229   if (data_format == "NHWC") {
5230     first_shuffle->setFirstTranspose({2, 0, 1});
5231   }
5232   first_shuffle->setReshapeDimensions(first_shuffle_shape);
5233   first_shuffle->setSecondTranspose(transpose_perm);
5234 
5235   nvinfer1::IShuffleLayer* second_shuffle =
5236       params->converter->network()->addShuffle(*first_shuffle->getOutput(0));
5237   TFTRT_RETURN_ERROR_IF_NULLPTR(second_shuffle, node_def.name());
5238   second_shuffle->setReshapeDimensions(second_shuffle_shape);
5239   if (data_format == "NHWC") {
5240     second_shuffle->setSecondTranspose({1, 2, 0});
5241   }
5242 
5243   params->converter->MarkQuantizationRangesAsInferrable(
5244       inputs.at(0).tensor(), first_shuffle->getOutput(0));
5245   params->converter->MarkQuantizationRangesAsInferrable(
5246       first_shuffle->getOutput(0), second_shuffle->getOutput(0));
5247   params->outputs->push_back(TRT_TensorOrWeights(second_shuffle->getOutput(0)));
5248   return Status::OK();
5249 }
5250 
ConvertSquaredDifference(OpConverterParams * params)5251 Status ConvertSquaredDifference(OpConverterParams* params) {
5252   TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false}, {"y", false}}));
5253   TF_RETURN_IF_ERROR(
5254       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
5255   const auto& inputs = params->inputs;
5256   const auto& node_def = params->node_def;
5257   // Broadcast inputs.
5258   nvinfer1::Dims broadcasted_dims_l, broadcasted_dims_r;
5259   TF_RETURN_IF_ERROR(GetTrtBroadcastShape(
5260       inputs.at(0), inputs.at(1), /*check_feasibility=*/true,
5261       params->use_implicit_batch, &broadcasted_dims_l, &broadcasted_dims_r));
5262   nvinfer1::ITensor* tensor_l = nullptr;
5263   nvinfer1::ITensor* tensor_r = nullptr;
5264   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
5265       inputs.at(0), broadcasted_dims_l, params->validation_only, &tensor_l));
5266   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
5267       inputs.at(1), broadcasted_dims_r, params->validation_only, &tensor_r));
5268   if (params->validation_only) return Status::OK();
5269 
5270   // Subtract x - y.
5271   nvinfer1::IElementWiseLayer* sub =
5272       params->converter->network()->addElementWise(
5273           *tensor_l, *tensor_r, nvinfer1::ElementWiseOperation::kSUB);
5274   TFTRT_RETURN_ERROR_IF_NULLPTR(sub, node_def.name());
5275   // Multiply (x - y) * (x - y).
5276   nvinfer1::IElementWiseLayer* mul =
5277       params->converter->network()->addElementWise(
5278           *sub->getOutput(0), *sub->getOutput(0),
5279           nvinfer1::ElementWiseOperation::kPROD);
5280   TFTRT_RETURN_ERROR_IF_NULLPTR(mul, node_def.name());
5281 
5282   params->outputs->push_back(TRT_TensorOrWeights(mul->getOutput(0)));
5283   return Status::OK();
5284 }
5285 
5286 #if IS_TRT_VERSION_GE(5, 1, 0, 0)
ConvertCombinedNMS(OpConverterParams * params)5287 Status ConvertCombinedNMS(OpConverterParams* params) {
5288   TF_RETURN_IF_ERROR(
5289       CheckInputsWeights(*params, {{"boxes", false},
5290                                    {"scores", false},
5291                                    {"max_output_size_per_class", true},
5292                                    {"max_total_size", true},
5293                                    {"iou_threshold", true},
5294                                    {"score_threshold", true}}));
5295   const auto& inputs = params->inputs;
5296   const auto& node_def = params->node_def;
5297 
5298   nvinfer1::ITensor* boxes_tensor = inputs.at(0).tensor();
5299   nvinfer1::ITensor* scores_tensor = inputs.at(1).tensor();
5300   TRT_ShapedWeights output_size_per_class = inputs.at(2).weights();
5301   TRT_ShapedWeights total_size = inputs.at(3).weights();
5302   TRT_ShapedWeights iou_threshold = inputs.at(4).weights();
5303   TRT_ShapedWeights score_threshold = inputs.at(5).weights();
5304 
5305   // Validate tensors and weights (also set some of the needed plugin fields)
5306   const auto boxes_dims = boxes_tensor->getDimensions();
5307   const auto scores_dims = scores_tensor->getDimensions();
5308   if (boxes_dims.nbDims != 3) {
5309     return errors::InvalidArgument(
5310         "TensorRT BatchedNMS Plugin input boxes must be 3-D excluding batch ",
5311         node_def.name());
5312   }
5313   const int num_classes = scores_dims.d[1];
5314   bool box_check = boxes_dims.d[1] == 1 || boxes_dims.d[1] == num_classes;
5315   if (!box_check) {
5316     return errors::InvalidArgument(
5317         "TensorRT BatchedNMS Plugin third dimension of boxes must be either 1 "
5318         "or num_classes ",
5319         node_def.name());
5320   }
5321   if (output_size_per_class.shape_.nbDims != 1) {
5322     return errors::InvalidArgument(
5323         "TensorRT BatchedNMS Plugin max_output_size_per_class must be 0-D ",
5324         node_def.name());
5325   }
5326   int max_size_per_class =
5327       *(static_cast<int*>(output_size_per_class.GetValues()));
5328   if (max_size_per_class <= 0) {
5329     return errors::InvalidArgument(
5330         "TensorRT BatchedNMS Plugin max_output_size_per_class should be > 0",
5331         node_def.name());
5332   }
5333   if (total_size.shape_.nbDims != 1) {
5334     return errors::InvalidArgument(
5335         "TensorRT BatchedNMS Plugin max_total_size must be 0-D ",
5336         node_def.name());
5337   }
5338   int max_total_size = *(static_cast<int*>(total_size.GetValues()));
5339   if (max_total_size <= 0) {
5340     return errors::InvalidArgument(
5341         "TensorRT BatchedNMS Plugin max_total_size should be > 0",
5342         node_def.name());
5343   }
5344   if (iou_threshold.shape_.nbDims != 1) {
5345     return errors::InvalidArgument(
5346         "TensorRT BatchedNMS Plugin iou_threshold must be 0-D ",
5347         node_def.name());
5348   }
5349   float iou_thresh = *(static_cast<float*>(iou_threshold.GetValues()));
5350   if (iou_thresh < 0.0 || iou_thresh > 1.0) {
5351     return errors::InvalidArgument(
5352         "TensorRT BatchedNMS Plugin iou_threshold must be in [0, 1]",
5353         node_def.name());
5354   }
5355   if (score_threshold.shape_.nbDims != 1) {
5356     return errors::InvalidArgument(
5357         "TensorRT BatchedNMS Plugin score_threshold must be 0-D ",
5358         node_def.name());
5359   }
5360 
5361   if (params->validation_only) return Status::OK();
5362 
5363   // TF op CombinedNonMaxSuppression doesn't have the option of
5364   // not normalizing coordinates.
5365   const bool is_normalized = true;
5366   // Set plugin fields and the field collection
5367   TFAttrs attrs(node_def);
5368   bool share_location = (boxes_dims.d[1] == 1);
5369   const bool pad_per_class = attrs.get<bool>("pad_per_class");
5370   int top_k;
5371   if (pad_per_class) {
5372     top_k = std::min(max_size_per_class * num_classes, max_total_size);
5373   } else {
5374     top_k = max_total_size;
5375   }
5376   const int keep_top_k = top_k;
5377   float score_thresh = *(static_cast<float*>(score_threshold.GetValues()));
5378   const int background_id = -1;
5379   nvinfer1::PluginField fields[8] = {
5380       nvinfer1::PluginField{"shareLocation", &share_location,
5381                             nvinfer1::PluginFieldType::kINT32, 1},
5382       nvinfer1::PluginField{"backgroundLabelId", &background_id,
5383                             nvinfer1::PluginFieldType::kINT32, 1},
5384       nvinfer1::PluginField{"numClasses", &num_classes,
5385                             nvinfer1::PluginFieldType::kINT32, 1},
5386       nvinfer1::PluginField{"topK", &top_k, nvinfer1::PluginFieldType::kINT32,
5387                             1},
5388       nvinfer1::PluginField{"keepTopK", &keep_top_k,
5389                             nvinfer1::PluginFieldType::kINT32, 1},
5390       nvinfer1::PluginField{"scoreThreshold", &score_thresh,
5391                             nvinfer1::PluginFieldType::kFLOAT32, 1},
5392       nvinfer1::PluginField{"iouThreshold", &iou_thresh,
5393                             nvinfer1::PluginFieldType::kFLOAT32, 1},
5394       nvinfer1::PluginField{"isNormalized", &is_normalized,
5395                             nvinfer1::PluginFieldType::kINT32, 1},
5396   };
5397   nvinfer1::PluginFieldCollection fc{8, fields};
5398 
5399   // Get plugin creator
5400   auto creator =
5401       getPluginRegistry()->getPluginCreator("BatchedNMS_TRT", "1", "");
5402   TFTRT_RETURN_ERROR_IF_NULLPTR(creator, node_def.name());
5403 
5404   // Create plugin
5405   TrtUniquePtrType<nvinfer1::IPluginV2> plugin(
5406       creator->createPlugin(node_def.name().c_str(), &fc));
5407   TFTRT_RETURN_ERROR_IF_NULLPTR(plugin, node_def.name());
5408 
5409   // Set plugin inputs
5410   std::vector<nvinfer1::ITensor*> plugin_inputs;
5411   plugin_inputs.push_back(boxes_tensor);
5412   plugin_inputs.push_back(scores_tensor);
5413 
5414   // Add plugin to network
5415   nvinfer1::IPluginV2Layer* layer = params->converter->network()->addPluginV2(
5416       &plugin_inputs[0], static_cast<int>(plugin_inputs.size()), *plugin);
5417   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
5418 
5419   // Set plugin outputs
5420   nvinfer1::ITensor* output_nmsed_boxes = layer->getOutput(1);
5421 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
5422   // TRT6 fixes (removes) the extra last dimension in CombinedNMS outputs
5423   nvinfer1::ITensor* output_num_detections = layer->getOutput(0);
5424   nvinfer1::ITensor* output_nmsed_scores = layer->getOutput(2);
5425   nvinfer1::ITensor* output_nmsed_classes = layer->getOutput(3);
5426 #else
5427   nvinfer1::ITensor* output_num_detections = nullptr;
5428   nvinfer1::ITensor* output_nmsed_scores = nullptr;
5429   nvinfer1::ITensor* output_nmsed_classes = nullptr;
5430 
5431   auto shrink_last_dim = [params](nvinfer1::ITensor* in_tensor,
5432                                   nvinfer1::ITensor** out_tensor) {
5433     nvinfer1::Dims dims = in_tensor->getDimensions();
5434     if (dims.d[dims.nbDims - 1] != 1) {
5435       return errors::Internal("Expect last dims to be 1, for tensor ",
5436                               DebugString(*in_tensor));
5437     }
5438     --dims.nbDims;
5439     TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
5440         TRT_TensorOrWeights(in_tensor), dims,
5441         /*validation_only=*/false, out_tensor));
5442     return Status::OK();
5443   };
5444   TF_RETURN_IF_ERROR(
5445       shrink_last_dim(layer->getOutput(2), &output_nmsed_scores));
5446   TF_RETURN_IF_ERROR(
5447       shrink_last_dim(layer->getOutput(3), &output_nmsed_classes));
5448   TF_RETURN_IF_ERROR(
5449       shrink_last_dim(layer->getOutput(0), &output_num_detections));
5450 #endif  // IS_TRT_VERSION_GE(6, 0, 0, 0)
5451 
5452   params->outputs->push_back(TRT_TensorOrWeights(output_nmsed_boxes));
5453   params->outputs->push_back(TRT_TensorOrWeights(output_nmsed_scores));
5454   params->outputs->push_back(TRT_TensorOrWeights(output_nmsed_classes));
5455   params->outputs->push_back(TRT_TensorOrWeights(output_num_detections));
5456 
5457   return Status::OK();
5458 }
5459 #endif  // IS_TRT_VERSION_GE(5, 1, 0, 0)
5460 
5461 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
ConvertResize(OpConverterParams * params)5462 Status ConvertResize(OpConverterParams* params) {
5463   const auto& inputs = params->inputs;
5464   const auto& node_def = params->node_def;
5465   TF_RETURN_IF_ERROR(
5466       CheckInputsWeights(*params, {{"input", false}, {"size", true}}));
5467   TF_RETURN_IF_ERROR(AllowDataTypes(
5468       *params, {DataType::DT_FLOAT, DataType::DT_HALF, DataType::DT_INT32}));
5469 
5470   // Get input tensor. Transpose it from NHWC to NCHW.
5471   nvinfer1::ITensor* tensor = inputs.at(0).tensor();
5472   TFTRT_RETURN_ERROR_IF_NULLPTR(tensor, params->node_def.name());
5473 
5474   // Get output size. It must constain two values i.e. [H_out, W_out]
5475   TRT_ShapedWeights weights = inputs.at(1).weights();
5476   if (weights.count() != 2) {
5477     return errors::Unimplemented("Resize to shape=[] is not supported, at ",
5478                                  node_def.name());
5479   }
5480   const int* weights_ptr = static_cast<int*>(weights.GetValues());
5481 
5482   // Verify and consume node attributes.
5483   TFAttrs attrs(node_def);
5484   bool align_corners = attrs.get<bool>("align_corners");
5485   TF_RETURN_IF_ERROR(
5486       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
5487 
5488   // Verify resize mode. Initialize resize mode if supported.
5489   nvinfer1::ResizeMode resize_mode;
5490   if (node_def.op() == "ResizeBilinear") {
5491     resize_mode = nvinfer1::ResizeMode::kLINEAR;
5492   } else if (node_def.op() == "ResizeNearestNeighbor") {
5493     resize_mode = nvinfer1::ResizeMode::kNEAREST;
5494   } else {
5495     return errors::Unimplemented(node_def.op(), " is not yet implemented at ",
5496                                  node_def.name());
5497   }
5498 
5499   // return after validation if only validation is requested.
5500   if (params->validation_only) return Status::OK();
5501 
5502   // Transpose tensor from NHWC to NCHW format.
5503   TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
5504       tensor, {0, 3, 1, 2}, StrCat(node_def.name(), "_to_NCHW"), &tensor));
5505 
5506   // Calculate output dimensions.
5507   // Given input dimensions [N, C, H, W] and output size [H_out, W_out],
5508   // output dimensions equals [N, C, H_out, W_out]
5509   nvinfer1::Dims output_dimensions;
5510   output_dimensions.nbDims = tensor->getDimensions().nbDims;
5511   for (int i = 0; i < output_dimensions.nbDims; ++i) {
5512     output_dimensions.d[i] = tensor->getDimensions().d[i];
5513   }
5514   output_dimensions.d[output_dimensions.nbDims - 2] = weights_ptr[0];
5515   output_dimensions.d[output_dimensions.nbDims - 1] = weights_ptr[1];
5516 
5517   // Add resize layer.
5518   nvinfer1::IResizeLayer* layer =
5519       params->converter->network()->addResize(*tensor);
5520   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
5521 
5522   // Set layer parameters.
5523   layer->setResizeMode(resize_mode);
5524   layer->setOutputDimensions(output_dimensions);
5525   layer->setAlignCorners(align_corners);
5526 
5527   // Get output tensor. Transpose it from NCHW to NHWC.
5528   nvinfer1::ITensor* output = layer->getOutput(0);
5529 
5530   TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
5531       output, {0, 2, 3, 1}, StrCat(node_def.name(), "_to_NHWC"), &output));
5532   params->outputs->push_back(TRT_TensorOrWeights(output));
5533   // Success
5534   return Status::OK();
5535 }  // ConvertResize
5536 #endif  // IS_TRT_VERSION_GE(6, 0, 0, 0)
5537 
ConvertAddN(OpConverterParams * params)5538 Status ConvertAddN(OpConverterParams* params) {
5539   const auto& inputs = params->inputs;
5540   const auto& node_def = params->node_def;
5541   TF_RETURN_IF_ERROR(
5542       AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF}));
5543   TFAttrs attrs(node_def);
5544   const int num_inputs = attrs.get<int64>("N");
5545   if (num_inputs < 2) {
5546     return errors::InvalidArgument("AddN requires at least two inputs, at ",
5547                                    node_def.name());
5548   }
5549   if (inputs.size() != num_inputs) {
5550     return errors::InvalidArgument("Got ", inputs.size(),
5551                                    " inputs but expected ", num_inputs, ", at ",
5552                                    node_def.name());
5553   }
5554   for (const auto& input : inputs) {
5555     if (!input.is_tensor() && input.weights().shape_.d[0] != 1) {
5556       return errors::InvalidArgument(
5557           "Weights input to AddN is required to have batch dimension 1.");
5558     }
5559   }
5560   if (params->validation_only) return Status::OK();
5561 
5562   // AddN doesn't support broadcast.
5563   std::vector<nvinfer1::ITensor*> tensor_inputs;
5564   for (const auto& input : inputs) {
5565     if (input.is_tensor()) {
5566       tensor_inputs.push_back(input.tensor());
5567     } else {
5568       auto dims = input.weights().shape_;
5569       TF_RETURN_IF_ERROR(RemoveBatchDimension(&dims));
5570       tensor_inputs.push_back(
5571           params->converter->CreateConstantLayer(input.weights(), dims));
5572     }
5573   }
5574   nvinfer1::ITensor* lhs = tensor_inputs[0];
5575   for (int i = 1; i < num_inputs; ++i) {
5576     nvinfer1::ITensor* rhs = tensor_inputs[i];
5577     nvinfer1::ILayer* layer = params->converter->network()->addElementWise(
5578         *lhs, *rhs, nvinfer1::ElementWiseOperation::kSUM);
5579     TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
5580     lhs = layer->getOutput(0);
5581   }
5582   params->outputs->push_back(TRT_TensorOrWeights(lhs));
5583   return Status::OK();
5584 }
5585 
RegisterValidatableOpConverters(std::unordered_map<string,OpConverter> * registration)5586 static void RegisterValidatableOpConverters(
5587     std::unordered_map<string, OpConverter>* registration) {
5588   (*registration)["BiasAdd"] = ConvertBiasAdd;
5589 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
5590   (*registration)["ClipByValue"] = ConvertClipByValue;
5591 #endif
5592 #if IS_TRT_VERSION_GE(5, 1, 0, 0)
5593   (*registration)["CombinedNonMaxSuppression"] = ConvertCombinedNMS;
5594 #endif
5595   (*registration)["AddN"] = ConvertAddN;
5596   (*registration)["ConcatV2"] = ConvertConcat;
5597   (*registration)["Const"] = ConvertConst;
5598   (*registration)["Conv2D"] = ConvertConv2D;
5599   (*registration)["Conv2DBackpropInput"] = ConvertConv2DBackpropInput;
5600   (*registration)["DepthToSpace"] = ConvertDepthSpaceShuffle;
5601   (*registration)["DepthwiseConv2dNative"] = ConvertConv2DDepthwise;
5602   (*registration)["ExpandDims"] = ConvertExpandDims;
5603   (*registration)["FusedConv2DBiasActivation"] =
5604       ConvertFusedConv2DBiasActivation;
5605   (*registration)["GatherV2"] = ConvertGather;
5606   (*registration)["LeakyRelu"] = ConvertLeakyRelu;
5607   (*registration)["MatMul"] = ConvertMatMul;
5608   (*registration)["Pack"] = ConvertPack;
5609   (*registration)["Pad"] = ConvertPad;
5610   (*registration)["Relu6"] = ConvertRelu6;
5611   (*registration)["Reshape"] = ConvertReshape;
5612 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
5613   (*registration)["Conv3D"] = ConvertConv3D;
5614   (*registration)["Conv3DBackpropInputV2"] = ConvertConv3DBackpropInputV2;
5615   for (auto resize_mode : {"ResizeBilinear", "ResizeNearestNeighbor"}) {
5616     (*registration)[resize_mode] = ConvertResize;
5617   }
5618   for (auto pool_op_type : {"AvgPool3D", "MaxPool3D"}) {
5619     (*registration)[pool_op_type] = ConvertPool3D;
5620   }
5621 #endif
5622   (*registration)["Rsqrt"] = ConvertRsqrt;
5623   (*registration)["Slice"] = ConvertSlice;
5624   (*registration)["Softmax"] = ConvertSoftmax;
5625   (*registration)["SpaceToDepth"] = ConvertDepthSpaceShuffle;
5626   (*registration)["Split"] = ConvertSplit;
5627   (*registration)["Square"] = ConvertSquare;
5628   (*registration)["SquaredDifference"] = ConvertSquaredDifference;
5629   (*registration)["Squeeze"] = ConvertSqueeze;
5630   (*registration)["StridedSlice"] = ConvertStridedSlice;
5631   (*registration)["TopKV2"] = ConvertTopK;
5632   (*registration)["Transpose"] = ConvertTranspose;
5633   (*registration)["Unpack"] = ConvertUnpack;
5634 
5635   for (auto quantization_op_type :
5636        {"QuantizeAndDequantizeV2", "QuantizeAndDequantizeV3",
5637         "FakeQuantWithMinMaxVars", "FakeQuantWithMinMaxArgs"}) {
5638     (*registration)[quantization_op_type] = ConvertQuantize;
5639   }
5640   for (const auto& binary_op_pair : *BinaryOperationMap()) {
5641     (*registration)[binary_op_pair.first] = ConvertBinary;
5642   }
5643   for (const auto& activation_op_pair : *ActivationTypeMap()) {
5644     (*registration)[activation_op_pair.first] = ConvertActivation;
5645   }
5646   for (auto pool_op_type : {"AvgPool", "MaxPool"}) {
5647     (*registration)[pool_op_type] = ConvertPool;
5648   }
5649   for (auto normalization_op_type :
5650        {"FusedBatchNorm", "FusedBatchNormV2", "FusedBatchNormV3"}) {
5651     (*registration)[normalization_op_type] = ConvertFusedBatchNorm;
5652   }
5653   for (auto unary_op_pair : *UnaryOperationMap()) {
5654     (*registration)[unary_op_pair.first] = ConvertUnary;
5655   }
5656   for (auto reduce_op_type : {"Sum", "Prod", "Max", "Min", "Mean"}) {
5657     (*registration)[reduce_op_type] = ConvertReduce;
5658   }
5659   for (auto arg_minmax_type : {"ArgMin", "ArgMax"}) {
5660     (*registration)[arg_minmax_type] = ConvertArgMinMax;
5661   }
5662   // The following are no-ops during inference and will not be mapped to any TRT
5663   // layer.
5664   for (auto identity_op_type : {"Identity", "Snapshot", "StopGradient"}) {
5665     (*registration)[identity_op_type] = ConvertIdentity;
5666   }
5667   for (auto batch_matmul_type : {"BatchMatMul", "BatchMatMulV2"}) {
5668     (*registration)[batch_matmul_type] = ConvertBatchMatMul;
5669   }
5670 }
5671 
RegisterOpValidators()5672 void TrtNodeValidator::RegisterOpValidators() {
5673   RegisterValidatableOpConverters(&op_validators_);
5674 }
5675 
RegisterOpConverters()5676 void Converter::RegisterOpConverters() {
5677   RegisterValidatableOpConverters(&op_registry_);
5678 }
5679 
ConvertGraphDefToEngine(const GraphDef & gdef,TrtPrecisionMode precision_mode,int max_batch_size,size_t max_workspace_size_bytes,const std::vector<PartialTensorShape> & input_shapes,nvinfer1::ILogger * trt_logger,nvinfer1::IGpuAllocator * allocator,TRTInt8Calibrator * calibrator,TrtUniquePtrType<nvinfer1::ICudaEngine> * engine,bool use_calibration,const bool use_implicit_batch,bool * convert_successfully)5680 Status ConvertGraphDefToEngine(
5681     const GraphDef& gdef, TrtPrecisionMode precision_mode, int max_batch_size,
5682     size_t max_workspace_size_bytes,
5683     const std::vector<PartialTensorShape>& input_shapes,
5684     nvinfer1::ILogger* trt_logger, nvinfer1::IGpuAllocator* allocator,
5685     TRTInt8Calibrator* calibrator,
5686     TrtUniquePtrType<nvinfer1::ICudaEngine>* engine, bool use_calibration,
5687     const bool use_implicit_batch, bool* convert_successfully) {
5688   engine->reset();
5689   if (convert_successfully) *convert_successfully = false;
5690 
5691   // Creating converter, TensorRT builder and network
5692   auto statusor = Converter::Create(precision_mode, use_calibration, trt_logger,
5693                                     use_implicit_batch);
5694   TF_RETURN_IF_ERROR(statusor.status());
5695   auto converter = std::move(statusor.ValueOrDie());
5696 
5697   VLOG(1) << "Starting to convert TensorFlow ops to TensorRT layers";
5698   std::vector<Converter::EngineOutputInfo> output_tensors;
5699   // Graph nodes are already topologically sorted during construction
5700   for (const auto& node_def : gdef.node()) {
5701     const string& node_name = node_def.name();
5702     VLOG(2) << "Converting node " << node_name << ", op=" << node_def.op();
5703     if (IsEngineInput(node_name)) {
5704       int32 slot_number = -1;
5705       string type_key;
5706       if (node_def.op() == "Placeholder") {
5707         if (!strings::safe_strto32(  // non-absl ok
5708                 node_name.c_str() + strlen(IONamePrefixes::kInputPHName),
5709                 &slot_number)) {
5710           return errors::InvalidArgument("Failed to parse slot number from ",
5711                                          node_name);
5712         }
5713         type_key = "dtype";
5714       } else if (tensorflow::grappler::IsArg(node_def)) {
5715         // Maybe remove the dependence on grappler and re-implement IsArg,
5716         // which is pretty simple (but could change if new Arg nodes are added)
5717         slot_number = node_def.attr().at("index").i();
5718         type_key = "T";
5719       } else {
5720         return errors::InvalidArgument(
5721             "Node ", node_name,
5722             " with is neither Placeholder nor Arg, instead ", node_def.op());
5723       }
5724       nvinfer1::DataType trt_dtype;
5725       nvinfer1::Dims trt_dims;
5726       int batch_size = -1;
5727       auto shape = input_shapes.at(slot_number);
5728       auto status = ValidateTensorProperties(
5729           node_def.op(), node_def.attr().at(type_key).type(), shape,
5730           use_implicit_batch, /*validation_only=*/false, &trt_dtype, &trt_dims,
5731           &batch_size);
5732       if (!status.ok()) {
5733         const string error_message =
5734             StrCat("Validation failed for ", node_name, " and input slot ",
5735                    slot_number, ": ", status.error_message());
5736         LOG(WARNING) << error_message;
5737         return Status(status.code(), error_message);
5738       }
5739       VLOG(2) << "Adding engine input tensor " << node_name << " with shape "
5740               << DebugString(trt_dims);
5741       // TODO(laigd): the conversion should always happen at runtime where all
5742       // the shapes are known, and we can provide a mode to generate the
5743       // engines offline, by calling sess.run() and cache/serialize the engines.
5744       TF_RETURN_IF_ERROR(converter->AddInputTensor(node_name, trt_dtype,
5745                                                    trt_dims, batch_size));
5746     } else if (IsEngineOutput(node_name)) {
5747       int32 slot_number = -1;
5748       if (node_def.op() == "Identity") {
5749         if (!strings::safe_strto32(  // non-absl ok
5750                 node_name.c_str() + strlen(IONamePrefixes::kOutputPHName),
5751                 &slot_number)) {
5752           return errors::InvalidArgument("Failed to parse slot number from ",
5753                                          node_name);
5754         }
5755       } else if (tensorflow::grappler::IsRetval(node_def)) {
5756         slot_number = node_def.attr().at("index").i();
5757       } else {
5758         return errors::InvalidArgument(
5759             "Node with name ", node_name,
5760             " starting with IONamePrefixes::kOutputPHName is "
5761             "neither Identity nor Retval, instead ",
5762             node_def.op());
5763       }
5764       // Get output type that TensorFlow expects
5765       TFAttrs attrs(node_def);
5766       DataType tf_dtype = attrs.get<DataType>("T");
5767       nvinfer1::DataType trt_dtype;
5768       TF_RETURN_IF_ERROR(TfDataTypeToTrt(tf_dtype, &trt_dtype));
5769       if (output_tensors.size() <= slot_number) {
5770         output_tensors.resize(slot_number + 1);
5771       }
5772       output_tensors.at(slot_number) = {node_def.input(0), node_name,
5773                                         trt_dtype};
5774     } else {
5775       TF_RETURN_IF_ERROR(converter->ConvertNode(node_def));
5776     }
5777   }
5778   TF_RETURN_IF_ERROR(converter->RenameAndMarkOutputTensors(output_tensors));
5779   if (convert_successfully) *convert_successfully = true;
5780 
5781   // Apply user provided quantization ranges to tensors
5782   converter->MaybeApplyQuantizationRanges();
5783 
5784   // Build the engine.
5785   TF_RETURN_IF_ERROR(converter->BuildCudaEngine(
5786       engine, max_batch_size, max_workspace_size_bytes, allocator, calibrator));
5787 
5788   VLOG(1) << "Finished conversion";
5789   return Status::OK();
5790 }
5791 
ConvertSegmentToGraphDef(const Graph * graph,const grappler::GraphProperties & graph_properties,const std::vector<const Node * > & subgraph_nodes,std::vector<EngineConnection> * connections,GraphDef * segment_def,string * scope_name)5792 Status ConvertSegmentToGraphDef(
5793     const Graph* graph, const grappler::GraphProperties& graph_properties,
5794     const std::vector<const Node*>& subgraph_nodes,  // In topological order
5795     std::vector<EngineConnection>* connections, GraphDef* segment_def,
5796     string* scope_name) {
5797   std::set<string> marker_nodes;
5798   // Update connection shapes/data types and add corresponding input/output
5799   // nodes in the segment graphdef.
5800   for (size_t i = 0; i < connections->size(); ++i) {
5801     auto& connection = connections->at(i);
5802     if (connection.is_control_edge()) continue;
5803     auto outside_node = graph->FindNodeId(connection.outside_id);
5804     if (!outside_node) {
5805       // This should never happen, unless the original graph is problematic.
5806       return errors::NotFound("Cannot find node with id ",
5807                               connection.outside_id, " in the graph.");
5808     }
5809     // Updates the shape and data types of input/output connections.
5810     DataType dtype;
5811     PartialTensorShape partial_shape;
5812     if (connection.is_input_edge) {
5813       GetOutputProperties(graph_properties,
5814                           graph->FindNodeId(connection.outside_id),
5815                           connection.outside_port, &partial_shape, &dtype);
5816       connection.outside_shape = partial_shape;
5817     } else {
5818       GetInputProperties(graph_properties,
5819                          graph->FindNodeId(connection.outside_id),
5820                          connection.outside_port, &partial_shape, &dtype);
5821       connection.inside_shape = partial_shape;
5822     }
5823     connection.connection_type = dtype;
5824 
5825     // Add dummy input/output nodes to the segment graphdef.
5826     if (connection.is_input_edge) {
5827       const string node_name =
5828           StrCat(IONamePrefixes::kInputPHName, connection.port_number);
5829       if (marker_nodes.count(node_name)) {
5830         VLOG(1) << "Reusing input " << node_name << " for the edge "
5831                 << connection.outside_node_name << ":"
5832                 << connection.outside_port << " -> "
5833                 << connection.inside_node_name << ":" << connection.inside_port;
5834         continue;
5835       }
5836       marker_nodes.insert(node_name);
5837       auto seg_node = segment_def->add_node();
5838       NodeDefBuilder builder(node_name, "_Arg");
5839       auto status = builder.Attr("shape", partial_shape)
5840                         .Attr("T", dtype)
5841                         .Attr("index", connection.port_number)
5842                         .Finalize(seg_node);
5843       VLOG(1) << "Constructing input " << node_name << " for the edge "
5844               << connection.outside_node_name << ":" << connection.outside_port
5845               << " -> " << connection.inside_node_name << ":"
5846               << connection.inside_port;
5847     } else {
5848       const string node_name =
5849           StrCat(IONamePrefixes::kOutputPHName, connection.port_number);
5850       if (marker_nodes.count(node_name)) {
5851         VLOG(1) << "Reusing output " << node_name << " for the edge "
5852                 << connection.inside_node_name << ":" << connection.inside_port
5853                 << " -> " << connection.outside_node_name << ":"
5854                 << connection.outside_port;
5855         continue;
5856       }
5857       marker_nodes.insert(node_name);
5858       auto seg_node = segment_def->add_node();
5859       NodeDefBuilder builder(node_name, "_Retval");
5860       auto status =
5861           builder.Attr("T", dtype)
5862               .Attr("index", connection.port_number)
5863               .Input(connection.inside_node_name, connection.inside_port, dtype)
5864               .Finalize(seg_node);
5865       VLOG(1) << "Constructing output " << node_name << " for the edge "
5866               << connection.inside_node_name << ":" << connection.inside_port
5867               << " -> " << connection.outside_node_name << ":"
5868               << connection.outside_port;
5869     }
5870   }  // for each connection.
5871 
5872   std::unordered_map<int, int> old_to_new_id_map;
5873   // Copy internal nodes to new graphdef
5874   string local_scope = subgraph_nodes.front()->name();
5875   for (const Node* node : subgraph_nodes) {
5876     local_scope = GetCommonNameScope(local_scope, node->name());
5877     old_to_new_id_map[node->id()] = segment_def->node_size();
5878     auto snode = segment_def->add_node();
5879     *snode = node->def();
5880     VLOG(2) << "Copying " << snode->name() << " to subgraph";
5881   }
5882   // Update the inputs of the new input nodes to point to placeholder nodes.
5883   for (int i = 0; i < connections->size(); ++i) {
5884     auto& connection = connections->at(i);
5885     if (connection.is_control_edge() || !connection.is_input_edge) continue;
5886     auto snode =
5887         segment_def->mutable_node(old_to_new_id_map[connection.inside_id]);
5888     const string arg_name =
5889         StrCat(IONamePrefixes::kInputPHName, connection.port_number);
5890     VLOG(1) << "Updating " << snode->name() << ":" << connection.inside_port
5891             << " from " << snode->input(connection.inside_port) << " to "
5892             << arg_name;
5893     snode->set_input(connection.inside_port, arg_name);
5894   }
5895   std::set<string> subgraph_node_names;
5896   for (const Node* node : subgraph_nodes) {
5897     subgraph_node_names.insert(node->name());
5898   }
5899 
5900   // Remove control inputs that are not inside the segment.
5901   for (int i = 0; i < segment_def->node_size(); ++i) {
5902     auto snode = segment_def->mutable_node(i);
5903     const int input_size = snode->input_size();
5904     int input_idx = 0;
5905     int actual_input_idx = 0;
5906     while (input_idx < input_size) {
5907       TensorId input = ParseTensorName(snode->input(input_idx));
5908       if (!subgraph_node_names.count(
5909               string(input.first.data(), input.first.size())) &&
5910           !IsEngineInput(input.first)) {
5911         if (input.second == Graph::kControlSlot) {
5912           VLOG(1) << "... removing control inputs " << input.first
5913                   << " from subgraph.";
5914           ++input_idx;
5915           continue;
5916         } else {
5917           return errors::InvalidArgument(
5918               "Found non control input outside the segment that is not an "
5919               "engine connection to ",
5920               snode->name(), ": ", input.first);
5921         }
5922       }
5923       if (actual_input_idx != input_idx) {
5924         snode->set_input(actual_input_idx, snode->input(input_idx));
5925       }
5926       ++input_idx;
5927       ++actual_input_idx;
5928     }
5929     for (int remove = input_size - actual_input_idx; remove > 0; --remove) {
5930       snode->mutable_input()->RemoveLast();
5931     }
5932   }
5933   *scope_name = local_scope;
5934   return Status::OK();
5935 }
5936 
operator ()(const Edge * out_edge) const5937 bool OutputEdgeValidator::operator()(const Edge* out_edge) const {
5938   if (out_edge->IsControlEdge()) return true;
5939   if (out_edge->src()->type_string() == "Const") {
5940     VLOG(1) << "--> Need to remove output node " << out_edge->src()->name()
5941             << " which is a Const.";
5942     return false;
5943   }
5944   return true;
5945 }
5946 
5947 }  // namespace convert
5948 }  // namespace tensorrt
5949 }  // namespace tensorflow
5950 
5951 #endif  // GOOGLE_TENSORRT
5952 #endif  // GOOGLE_CUDA
5953