• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h"
17 
18 #include <algorithm>
19 #include <functional>
20 #include <memory>
21 #include <type_traits>
22 #include <unordered_map>
23 #include <vector>
24 
25 #if GOOGLE_CUDA && GOOGLE_TENSORRT
26 
27 #include <gmock/gmock.h>
28 #include <gtest/gtest.h>
29 #include "absl/algorithm/container.h"
30 #include "absl/strings/match.h"
31 #include "absl/strings/numbers.h"
32 #include "absl/strings/str_cat.h"
33 #include "absl/strings/string_view.h"
34 #include "absl/types/span.h"
35 #include "third_party/gpus/cuda/include/cuda.h"
36 #include "third_party/gpus/cuda/include/cuda_runtime_api.h"
37 #include "tensorflow/cc/framework/ops.h"
38 #include "tensorflow/cc/framework/scope.h"
39 #include "tensorflow/cc/ops/nn_ops_internal.h"
40 #include "tensorflow/cc/ops/standard_ops.h"
41 #include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
42 #include "tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.h"
43 #include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h"
44 #include "tensorflow/core/common_runtime/gpu/gpu_managed_allocator.h"
45 #include "tensorflow/core/framework/allocator.h"
46 #include "tensorflow/core/framework/node_def.pb.h"  // NOLINT
47 #include "tensorflow/core/framework/tensor.h"
48 #include "tensorflow/core/framework/tensor.pb.h"  // NOLINT
49 #include "tensorflow/core/framework/tensor_shape.h"
50 #include "tensorflow/core/framework/tensor_testutil.h"
51 #include "tensorflow/core/framework/types.h"
52 #include "tensorflow/core/grappler/costs/graph_properties.h"
53 #include "tensorflow/core/lib/core/status.h"
54 #include "tensorflow/core/lib/core/status_test_util.h"
55 #include "tensorflow/core/lib/strings/str_util.h"
56 #include "tensorflow/core/lib/strings/strcat.h"
57 #include "tensorflow/core/platform/protobuf.h"
58 #include "tensorflow/core/platform/test.h"
59 #include "tensorflow/core/protobuf/config.pb.h"  // NOLINT
60 #include "tensorflow/core/public/session.h"
61 #include "third_party/tensorrt/NvInfer.h"
62 
63 namespace tensorflow {
64 namespace tensorrt {
65 
66 // TensorRT modes for testing. We define the following three modes:
67 // 1. Implicit batch mode: The tensors have static (known) input shape and the
68 //    the batch dimension (first dim) is removed from the TRT tensor shape. In
69 //    a loose notation: trt_shape = tf_shape[1:]. This is the standard mode of
70 //    a TensorRT network definition  before TensorRT 6.
71 // 2. Explicit batch mode: static (known) input shape, but the batch dimension
72 //    is part of the trt tensor shape. (trt_shape = tf_shape)
73 // 3. Dynamic shape mode allows unknown input shapes, and requires explicit
74 //    batch size definition (trt_shape = tf_shape).
75 //
76 // Note that the Converter only distinguishes between two modes:
77 // - use_implicit_batch == true, this corresponds to kImplicitBatch,
78 // - use_implicit_batch == false which includes both kExplicitBatch and
79 //   kDynamicShape.
80 //
81 // For the converter, the distinction between explicit batch or dynamic shape
82 // mode follows from the input tensors of the network: dynamic shape input
83 // implies dynamic shape mode, while static shape input tensors imply explicit
84 // batch mode. We want to test all these modes, therefore we define the
85 // TrtTestMode with the following three options.
86 enum class TrtTestMode {
87   kImplicitBatch = 0,
88   kExplicitBatch = 1,
89   kDynamicShape = 2
90 };
91 
DebugString(const TrtTestMode mode)92 string DebugString(const TrtTestMode mode) {
93   switch (mode) {
94     case TrtTestMode::kImplicitBatch:
95       return "kImplicitBatch";
96     case TrtTestMode::kExplicitBatch:
97       return "kExplicitBatch";
98     case TrtTestMode::kDynamicShape:
99       return "kDynamicShape";
100     default:
101       return "Invalid TrtTestMode";
102   }
103 }
104 
105 namespace convert {
106 
107 using absl::StrCat;
108 using ::testing::ElementsAre;
109 using ::testing::ElementsAreArray;
110 using ::testing::Matcher;
111 
112 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
113 constexpr std::array<TrtTestMode, 3> ValidTrtModes = {
114     TrtTestMode::kImplicitBatch, TrtTestMode::kExplicitBatch,
115     TrtTestMode::kDynamicShape};
116 #else
117 constexpr std::array<TrtTestMode, 1> ValidTrtModes = {
118     TrtTestMode::kImplicitBatch};
119 #endif
120 
121 // TODO(laigd): put this into some test utils file.
ExpectStatus(Status status,error::Code code=error::OK,const char * substr=nullptr)122 void ExpectStatus(Status status, error::Code code = error::OK,
123                   const char* substr = nullptr) {
124   EXPECT_EQ(code, status.code())
125       << status << " vs expected error code \"" << error::Code_Name(code)
126       << "\" and message \"" << substr << "\"";
127   if (substr) {
128     EXPECT_THAT(status.error_message(), ::testing::HasSubstr(substr)) << status;
129   }
130 }
131 
GetTestDims(const std::vector<int> & d)132 nvinfer1::Dims GetTestDims(const std::vector<int>& d) {
133   nvinfer1::Dims dims;
134   dims.nbDims = d.size();
135   for (int i = 0; i < d.size(); ++i) {
136     dims.d[i] = d[i];
137   }
138   return dims;
139 }
140 
141 // Prints the vector to the output stream.
142 template <typename T>
operator <<(std::ostream & os,const std::vector<T> & v)143 std::ostream& operator<<(std::ostream& os, const std::vector<T>& v) {
144   if (!v.empty()) {
145     os << '[';
146     std::copy(v.begin(), v.end(), std::ostream_iterator<T>(os, ", "));
147     os << "\b\b]";
148   }
149   return os;
150 }
151 
MakeNodeDef(const string & name,const string & op,const std::vector<string> & inputs,const std::map<string,AttrValue> attrs={})152 NodeDef MakeNodeDef(const string& name, const string& op,
153                     const std::vector<string>& inputs,
154                     const std::map<string, AttrValue> attrs = {}) {
155   NodeDef node_def;
156   node_def.set_name(name);
157   node_def.set_op(op);
158   for (const string& input : inputs) {
159     node_def.add_input(input);
160   }
161   for (const auto& attr : attrs) {
162     (*node_def.mutable_attr())[attr.first] = attr.second;
163   }
164   return node_def;
165 }
166 
167 template <typename T>
MakeConstNodeDef(const string & name,const std::vector<T> & vals,const TensorShape & shape)168 NodeDef MakeConstNodeDef(const string& name, const std::vector<T>& vals,
169                          const TensorShape& shape) {
170   Scope s = Scope::NewRootScope();
171   Tensor t = test::AsTensor<T>(vals, shape);
172   auto const_op = ops::Const(s.WithOpName(name), t);
173   return const_op.node()->def();
174 }
175 
176 template <typename T>
MakeConstNodeDef(const string & name,const std::vector<T> & vals)177 NodeDef MakeConstNodeDef(const string& name, const std::vector<T>& vals) {
178   TensorShape shape;
179   const std::vector<int32> shape_dims = {static_cast<int32>(vals.size())};
180   TF_EXPECT_OK(TensorShapeUtils::MakeShape(shape_dims, &shape));
181   return MakeConstNodeDef(name, vals, shape);
182 }
183 
TrtDimsEquals(const nvinfer1::Dims & lhs,const nvinfer1::Dims & rhs)184 bool TrtDimsEquals(const nvinfer1::Dims& lhs, const nvinfer1::Dims& rhs) {
185   if (lhs.nbDims != rhs.nbDims) return false;
186   for (int i = 0; i < lhs.nbDims; ++i) {
187     if (lhs.d[i] != rhs.d[i]) return false;
188     // We don't check the types in the tests.
189   }
190   return true;
191 }
192 
TrtDimsEqualsArray(const std::vector<int> & lhs,const nvinfer1::Dims & rhs)193 bool TrtDimsEqualsArray(const std::vector<int>& lhs,
194                         const nvinfer1::Dims& rhs) {
195   return TrtDimsEquals(GetTestDims(lhs), rhs);
196 }
197 
198 // TODO(laigd): define a parameterized matcher that can compare against the
199 // vector.
ExpectTrtDimsEqualsArray(const std::vector<int> & lhs,const nvinfer1::Dims & rhs)200 void ExpectTrtDimsEqualsArray(const std::vector<int>& lhs,
201                               const nvinfer1::Dims& rhs) {
202   EXPECT_TRUE(TrtDimsEqualsArray(lhs, rhs))
203       << "expected: " << DebugString(GetTestDims(lhs)) << "\n"
204       << "  actual: " << DebugString(rhs);
205 }
206 
ExpectTrtLayerNames(absl::Span<const std::string> names,nvinfer1::INetworkDefinition * network)207 void ExpectTrtLayerNames(absl::Span<const std::string> names,
208                          nvinfer1::INetworkDefinition* network) {
209   EXPECT_EQ(network->getNbLayers(), names.size());
210 
211   for (int i = 0; i < network->getNbLayers(); i++) {
212     auto layer = network->getLayer(i);
213     EXPECT_EQ(layer->getName(), names[i]);
214   }
215 }
216 
VerifyTrtLayerNameNotEmpty(nvinfer1::INetworkDefinition * network)217 void VerifyTrtLayerNameNotEmpty(nvinfer1::INetworkDefinition* network) {
218   for (int i = 0; i < network->getNbLayers(); i++) {
219     auto layer = network->getLayer(i);
220     EXPECT_NE(layer->getName(), nullptr);
221   }
222 }
223 
ArrayFloatNear(const std::vector<float> & values,float max_abs_error=1e-5,bool nan_sensitive=false)224 Matcher<std::vector<float>> ArrayFloatNear(const std::vector<float>& values,
225                                            float max_abs_error = 1e-5,
226                                            bool nan_sensitive = false) {
227   std::vector<Matcher<float>> matchers;
228   matchers.reserve(values.size());
229   for (const float& v : values) {
230     if (nan_sensitive) {
231       matchers.emplace_back(::testing::NanSensitiveFloatNear(v, max_abs_error));
232     } else if (max_abs_error == 0) {
233       matchers.emplace_back(::testing::FloatEq(v));
234     } else {
235       EXPECT_GE(max_abs_error, 0);
236       matchers.emplace_back(::testing::FloatNear(v, max_abs_error));
237     }
238   }
239   return ElementsAreArray(matchers);
240 }
241 
242 template <typename T>
ExpectArrayNear(const std::vector<T> & lhs,absl::Span<const T> rhs)243 void ExpectArrayNear(const std::vector<T>& lhs, absl::Span<const T> rhs) {
244   ASSERT_EQ(lhs.size(), rhs.size());
245   for (int i = 0; i < lhs.size(); i++) {
246     EXPECT_FLOAT_EQ(lhs[i], rhs[i]);
247   }
248 }
249 
250 // Eigen::half cannot implicitly convert to float which is required for
251 // EXPECT_FLOAT_EQ.
252 template <>
ExpectArrayNear(const std::vector<Eigen::half> & lhs,absl::Span<const Eigen::half> rhs)253 void ExpectArrayNear(const std::vector<Eigen::half>& lhs,
254                      absl::Span<const Eigen::half> rhs) {
255   ASSERT_EQ(lhs.size(), rhs.size());
256   for (int i = 0; i < lhs.size(); i++) {
257     EXPECT_FLOAT_EQ(Eigen::half_impl::half_to_float(lhs[i]),
258                     Eigen::half_impl::half_to_float(rhs[i]));
259   }
260 }
261 
262 template <typename T>
ExpectArrayAlmostEqual(const std::vector<T> & lhs,absl::Span<const T> rhs,T tolerance)263 void ExpectArrayAlmostEqual(const std::vector<T>& lhs, absl::Span<const T> rhs,
264                             T tolerance) {
265   ASSERT_EQ(lhs.size(), rhs.size());
266   for (int i = 0; i < lhs.size(); i++) {
267     EXPECT_NEAR(lhs[i], rhs[i], tolerance);
268   }
269 }
270 
271 // Eigen::half cannot implicitly convert to float which is required for
272 // EXPECT_NEAR.
273 template <>
ExpectArrayAlmostEqual(const std::vector<Eigen::half> & lhs,absl::Span<const Eigen::half> rhs,Eigen::half tolerance)274 void ExpectArrayAlmostEqual(const std::vector<Eigen::half>& lhs,
275                             absl::Span<const Eigen::half> rhs,
276                             Eigen::half tolerance) {
277   ASSERT_EQ(lhs.size(), rhs.size());
278   for (int i = 0; i < lhs.size(); i++) {
279     EXPECT_NEAR(Eigen::half_impl::half_to_float(lhs[i]),
280                 Eigen::half_impl::half_to_float(rhs[i]),
281                 Eigen::half_impl::half_to_float(tolerance));
282   }
283 }
284 
TrtShapedWeightsEquals(const TRT_ShapedWeights & lhs,const TRT_ShapedWeights & rhs)285 bool TrtShapedWeightsEquals(const TRT_ShapedWeights& lhs,
286                             const TRT_ShapedWeights& rhs) {
287   return TrtDimsEquals(lhs.shape_, rhs.shape_) &&
288          lhs.TrtDType() == rhs.TrtDType() && lhs.GetValues() == rhs.GetValues();
289 }
290 
291 template <typename T>
ValidateWeights(const TRT_ShapedWeights & weights,const std::vector<int> & expected_dims,const std::vector<T> & expected_value)292 void ValidateWeights(const TRT_ShapedWeights& weights,
293                      const std::vector<int>& expected_dims,
294                      const std::vector<T>& expected_value) {
295   ExpectTrtDimsEqualsArray(expected_dims, weights.shape_);
296   ASSERT_EQ(expected_value.size(), weights.count()) << weights.DebugString();
297   const T* actual_values = static_cast<const T*>(weights.GetValues());
298   for (int i = 0; i < expected_value.size(); ++i) {
299     EXPECT_EQ(expected_value[i], actual_values[i]);
300   }
301 }
302 
303 template <typename CType>
InitTestVector(int size,CType start_value=CType (0))304 std::vector<CType> InitTestVector(int size, CType start_value = CType(0)) {
305   std::vector<CType> res;
306   res.reserve(size);
307   for (int i = 0; i < size; ++i) {
308     res.push_back(start_value + CType(i));
309   }
310   return res;
311 }
312 
313 template <typename InCType, typename OutCType>
314 struct StaticCaster {
operator ()tensorflow::tensorrt::convert::StaticCaster315   OutCType operator()(InCType in) const { return static_cast<OutCType>(in); }
316 };
317 
318 template <typename InCType, typename OutCType>
CastTestVector(const gtl::ArraySlice<InCType> & vals)319 std::vector<OutCType> CastTestVector(
320     const gtl::ArraySlice<InCType>& vals) {  // non-absl ok
321   std::vector<OutCType> res(vals.size());
322   std::transform(vals.begin(), vals.end(), res.begin(),
323                  StaticCaster<InCType, OutCType>());
324   return res;
325 }
326 
327 // Fake ITensor implementation for testing purposes.
328 class FakeITensor : public nvinfer1::ITensor {
329  public:
FakeITensor()330   FakeITensor() : dynamic_range_(0.0f) {}
331 
FakeITensor(const nvinfer1::Dims & dims)332   FakeITensor(const nvinfer1::Dims& dims) : dims_(dims), dynamic_range_(0.0f) {}
333 
FakeITensor(const std::vector<int> & dims)334   FakeITensor(const std::vector<int>& dims)
335       : dims_(GetTestDims(dims)), dynamic_range_(0.0f) {}
336 
setName(const char * name)337   void setName(const char* name) override { name_ = name; }
338 
getName() const339   const char* getName() const override { return name_.c_str(); }
340 
setDimensions(nvinfer1::Dims dimensions)341   void setDimensions(nvinfer1::Dims dimensions) override { dims_ = dimensions; }
342 
getDimensions() const343   nvinfer1::Dims getDimensions() const override { return dims_; }
344 
setType(nvinfer1::DataType type)345   void setType(nvinfer1::DataType type) override { type_ = type; }
346 
getType() const347   nvinfer1::DataType getType() const override { return type_; }
348 
isNetworkInput() const349   bool isNetworkInput() const override { return false; }
350 
isNetworkOutput() const351   bool isNetworkOutput() const override { return false; }
352 
setBroadcastAcrossBatch(bool broadcastAcrossBatch)353   void setBroadcastAcrossBatch(bool broadcastAcrossBatch) override {}
354 
getBroadcastAcrossBatch() const355   bool getBroadcastAcrossBatch() const override { return false; }
356 
getLocation() const357   nvinfer1::TensorLocation getLocation() const override { return location_; }
358 
setLocation(nvinfer1::TensorLocation location)359   void setLocation(nvinfer1::TensorLocation location) override {
360     location_ = location;
361   }
362 
363 #if IS_TRT_VERSION_GE(5, 0, 0, 0)
setDynamicRange(float min,float max)364   bool setDynamicRange(float min, float max) override {
365     dynamic_range_ = std::max(std::abs(min), std::abs(max));
366     return true;
367   }
368 
getDynamicRange() const369   float getDynamicRange() const override { return dynamic_range_; }
370 #endif
371 
372 #if IS_TRT_VERSION_GE(5, 1, 0, 0)
dynamicRangeIsSet() const373   bool dynamicRangeIsSet() const override { return true; }
374 
resetDynamicRange()375   void resetDynamicRange() override {}
376 
getDynamicRangeMin() const377   float getDynamicRangeMin() const override { return 0.f; }
378 
getDynamicRangeMax() const379   float getDynamicRangeMax() const override { return 0.f; }
380 #endif
381 
382 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
setAllowedFormats(nvinfer1::TensorFormats formats)383   void setAllowedFormats(nvinfer1::TensorFormats formats) override {}
384 
getAllowedFormats() const385   nvinfer1::TensorFormats getAllowedFormats() const override { return 1; }
386 
isShapeTensor() const387   bool isShapeTensor() const override { return false; }
isExecutionTensor() const388   bool isExecutionTensor() const override { return true; }
389 
390 #endif
391 
392  private:
393   string name_;
394   nvinfer1::Dims dims_;
395   nvinfer1::DataType type_;
396   nvinfer1::TensorLocation location_;
397   float dynamic_range_;
398 };
399 
TEST(TRT_ShapedWeights_Test,Basic)400 TEST(TRT_ShapedWeights_Test, Basic) {
401   // Test constructor with no arguments.
402   {
403     TRT_ShapedWeights weights;
404     TRT_ShapedWeights copy(weights);
405     for (auto ptr : {&weights, &copy}) {
406       nvinfer1::Weights trt_weights = ptr->GetTrtWeights();
407       EXPECT_EQ(nvinfer1::DataType::kFLOAT, trt_weights.type);
408       EXPECT_EQ(nullptr, trt_weights.values);
409       EXPECT_EQ(0, trt_weights.count);
410 
411       EXPECT_EQ(nullptr, ptr->GetValues());
412       EXPECT_EQ(0, ptr->count());
413       EXPECT_EQ(0, ptr->size_bytes());
414     }
415   }
416   // Test constructor with DataType argument.
417   {
418     TRT_ShapedWeights weights(nvinfer1::DataType::kFLOAT);
419     TRT_ShapedWeights copy(weights);
420     for (auto ptr : {&weights, &copy}) {
421       nvinfer1::Weights trt_weights = ptr->GetTrtWeights();
422       EXPECT_EQ(nvinfer1::DataType::kFLOAT, trt_weights.type);
423       EXPECT_EQ(nullptr, trt_weights.values);
424       EXPECT_EQ(0, trt_weights.count);
425 
426       EXPECT_EQ(nullptr, ptr->GetValues());
427       EXPECT_EQ(0, ptr->count());
428       EXPECT_EQ(0, ptr->size_bytes());
429     }
430   }
431   // Test constructor with DataType and nvinfer1::Dims arguments.
432   {
433     TrtWeightStore store;
434     TRT_ShapedWeights weights =
435         store.GetTempWeights(nvinfer1::DataType::kFLOAT, GetTestDims({2, 5}));
436     TRT_ShapedWeights copy(weights);
437     for (auto ptr : {&weights, &copy}) {
438       nvinfer1::Weights trt_weights = ptr->GetTrtWeights();
439       EXPECT_EQ(nvinfer1::DataType::kFLOAT, trt_weights.type);
440       EXPECT_NE(nullptr, trt_weights.values);
441       EXPECT_EQ(10, trt_weights.count);
442 
443       EXPECT_EQ(trt_weights.values, ptr->GetValues());
444       EXPECT_EQ(10, ptr->count());
445       EXPECT_EQ(40, ptr->size_bytes());
446     }
447     // Test that it doesn't copy the underlying buffer.
448     EXPECT_EQ(weights.GetValues(), copy.GetValues());
449   }
450 }
451 
TEST(TRT_TensorOrWeights_Test,Basic)452 TEST(TRT_TensorOrWeights_Test, Basic) {
453   // Test constructor with no arguments.
454   {
455     TRT_TensorOrWeights tw;
456     TRT_TensorOrWeights copy(tw);
457     TRT_TensorOrWeights assigned;
458     assigned = tw;
459     for (auto ptr : {&tw, &copy, &assigned}) {
460       EXPECT_EQ(false, ptr->is_tensor());
461       EXPECT_EQ(false, ptr->is_weights());
462       EXPECT_EQ(-1, ptr->batch_size());
463     }
464   }
465 
466   // Test constructor with ITensor and batch size argument.
467   {
468     nvinfer1::Dims dims;
469     dims.nbDims = 1;
470     dims.d[0] = 1;
471     FakeITensor itensor(dims);
472     TRT_TensorOrWeights tw(&itensor);
473     TRT_TensorOrWeights tw1(&itensor, /*batch_size=*/1);
474 
475     for (auto original_ptr : {&tw, &tw1}) {
476       TRT_TensorOrWeights copy(*original_ptr);
477       TRT_TensorOrWeights assigned;
478       assigned = *original_ptr;
479 
480       for (auto ptr : {original_ptr, &copy, &assigned}) {
481         ASSERT_TRUE(ptr->is_tensor());
482         EXPECT_EQ(false, ptr->is_weights());
483         if (original_ptr == &tw) {
484           EXPECT_EQ(-1, ptr->batch_size());
485         } else {
486           EXPECT_EQ(1, ptr->batch_size());
487         }
488         EXPECT_EQ(&itensor, ptr->tensor());
489         ExpectTrtDimsEqualsArray({1}, ptr->GetTrtDims());
490       }
491     }
492   }
493   // Test constructor which creates and owns an ITensor.
494   {
495     nvinfer1::Dims dims;
496     dims.nbDims = 1;
497     dims.d[0] = 1;
498     TRT_TensorOrWeights tw(nvinfer1::DataType::kFLOAT, dims, /*batch_size=*/1);
499     TRT_TensorOrWeights copy(tw);
500     TRT_TensorOrWeights assigned;
501     assigned = tw;
502 
503     for (auto ptr : {&tw, &copy, &assigned}) {
504       ASSERT_TRUE(ptr->is_tensor());
505       EXPECT_EQ(false, ptr->is_weights());
506       EXPECT_EQ(1, ptr->batch_size());
507       EXPECT_NE(nullptr, ptr->tensor());
508       ExpectTrtDimsEqualsArray({1}, ptr->GetTrtDims());
509     }
510   }
511   // Test constructor with TRT_ShapedWeights argument.
512   {
513     TRT_ShapedWeights weights;
514     TRT_TensorOrWeights tw(weights);
515     TRT_TensorOrWeights copy(tw);
516     TRT_TensorOrWeights assigned;
517     assigned = tw;
518     for (auto ptr : {&tw, &copy, &assigned}) {
519       EXPECT_EQ(false, ptr->is_tensor());
520       EXPECT_EQ(true, ptr->is_weights());
521       EXPECT_TRUE(TrtShapedWeightsEquals(weights, ptr->weights()));
522       ExpectTrtDimsEqualsArray({}, ptr->GetTrtDims());
523     }
524   }
525 }
526 
527 class ValidatorTest : public ::testing::Test {
528  public:
op_validators(TrtNodeValidator * validator)529   std::unordered_map<string, OpConverter>& op_validators(
530       TrtNodeValidator* validator) {
531     return validator->op_validators_;
532   }
533 
ConvertToTensorOrWeights(const Scope & scope,const Node * node,int output_port,TRT_TensorOrWeights * tensor_or_weights)534   Status ConvertToTensorOrWeights(const Scope& scope, const Node* node,
535                                   int output_port,
536                                   TRT_TensorOrWeights* tensor_or_weights) {
537     grappler::GrapplerItem item;
538     TF_EXPECT_OK(scope.ToGraphDef(&item.graph));
539     grappler::GraphProperties graph_properties(item);
540     TF_EXPECT_OK(graph_properties.InferStatically(true));
541 
542     TrtNodeValidator validator(graph_properties, TrtPrecisionMode::FP32,
543                                /*use_calibration=*/false,
544                                /*use_implicit_batch=*/true);
545     return validator.ConvertToTensorOrWeights(node->def(), output_port,
546                                               tensor_or_weights);
547   }
548 
GetQuantizeOps(TrtNodeValidator * validator)549   const std::set<string>* GetQuantizeOps(TrtNodeValidator* validator) {
550     return validator->quantize_ops;
551   }
552 };
553 
TEST_F(ValidatorTest,QuantizeOpsAreRegistered)554 TEST_F(ValidatorTest, QuantizeOpsAreRegistered) {
555   grappler::GrapplerItem item;
556   grappler::GraphProperties graph_properties(item);
557   TrtNodeValidator validator(graph_properties, TrtPrecisionMode::FP32,
558                              /*use_calibration=*/false,
559                              /*use_implicit_batch=*/true);
560   for (const string& quantize_op : *GetQuantizeOps(&validator)) {
561     QCHECK(op_validators(&validator).count(quantize_op));
562   }
563 }
564 
TEST_F(ValidatorTest,ConvertToTensorOrWeights)565 TEST_F(ValidatorTest, ConvertToTensorOrWeights) {
566   // Convert Const.
567   {
568     Scope s = Scope::NewRootScope();
569     auto node =
570         ops::Const(s.WithOpName("my_const"), {1.0f, 2.0f}, TensorShape({2}));
571     TRT_TensorOrWeights output;
572     ExpectStatus(ConvertToTensorOrWeights(s, node.op().node(),
573                                           /*output_port=*/0, &output));
574     ValidateWeights<float>(output.weights(), {2}, {1.0, 2.0});
575   }
576 
577   // Helper method to run ConvertToTensorOrWeights() with predefined parameters.
578   auto convert_to_tensor_or_weights = [this](const std::vector<int64>& dims,
579                                              TRT_TensorOrWeights* output) {
580     Scope s = Scope::NewRootScope();
581     const auto attrs = ops::Placeholder::Shape(PartialTensorShape{dims});
582     auto feed = ops::Placeholder(s.WithOpName("feed"), DT_FLOAT, attrs);
583     auto add = ops::Add(s.WithOpName("add"), feed, feed);
584     return this->ConvertToTensorOrWeights(s, add.operation.node(),
585                                           /*output_port=*/0, output);
586   };
587   // Convert non-Const with #dims > nvinfer1::Dims::MAX_DIMS+1.
588   {
589     TRT_TensorOrWeights output;
590     ExpectStatus(
591         convert_to_tensor_or_weights(
592             std::vector<int64>(nvinfer1::Dims::MAX_DIMS + 2, 1), &output),
593         error::OUT_OF_RANGE, "Input tensor rank is greater than 9");
594   }
595   // Convert non-Const with #dims < 1.
596   {
597     TRT_TensorOrWeights output;
598     ExpectStatus(
599         convert_to_tensor_or_weights({}, &output), error::INVALID_ARGUMENT,
600         "Scalar input tensor is not supported since the first dimension "
601         "is treated as batch dimension by TRT");
602   }
603   // Convert non-Const. We test the case where the non-batch dimension is
604   // unknown as well, to make sure the validator allows that.
605   for (const int32 non_batch_dim : {-1, 2}) {
606     const int32 batch_size = 12;
607     TRT_TensorOrWeights output;
608     ExpectStatus(
609         convert_to_tensor_or_weights({batch_size, non_batch_dim}, &output));
610     ASSERT_TRUE(output.is_tensor());
611     EXPECT_EQ(batch_size, output.batch_size());
612     EXPECT_NE(nullptr, output.tensor());
613     ExpectTrtDimsEqualsArray({non_batch_dim}, output.GetTrtDims());
614   }
615 }
616 
TEST_F(ValidatorTest,IsTensorRTCandidate_Basics)617 TEST_F(ValidatorTest, IsTensorRTCandidate_Basics) {
618   Scope s = Scope::NewRootScope();
619   auto input =
620       ops::Const(s.WithOpName("const"), {1.0f, 2.0f}, TensorShape({2}));
621   auto add = ops::Add(s.WithOpName("add"), input, input);
622   const Node* add_node = add.operation.node();
623 
624   grappler::GrapplerItem item;
625   TF_EXPECT_OK(s.ToGraphDef(&item.graph));
626   grappler::GraphProperties graph_properties(item);
627   TF_EXPECT_OK(graph_properties.InferStatically(true));
628   TrtNodeValidator validator(graph_properties, TrtPrecisionMode::FP32,
629                              /*use_calibration=*/false,
630                              /*use_implicit_batch=*/true);
631 
632   bool start_conversion = false;
633   bool should_fail = false;
634   auto op_converter = [&start_conversion,
635                        &should_fail](OpConverterParams* params) -> Status {
636     if (should_fail) return errors::InvalidArgument("");
637     if (!params->validation_only) start_conversion = true;
638     return Status::OK();
639   };
640 
641   // Validator not registered.
642   ASSERT_EQ(1, op_validators(&validator).erase("Add"));
643   ExpectStatus(validator.IsTensorRTCandidate(add_node), error::UNIMPLEMENTED,
644                "Op type Add is not supported.");
645 
646   // Register validator.
647   op_validators(&validator)["Add"] = op_converter;
648   TF_EXPECT_OK(validator.IsTensorRTCandidate(add_node));
649   EXPECT_EQ(false, start_conversion);
650 
651   // Let the converter return error.
652   should_fail = true;
653   ExpectStatus(validator.IsTensorRTCandidate(add_node),
654                error::INVALID_ARGUMENT);
655 }
656 
TEST(TrtNodeValidator,IsTensorRTCandidate)657 TEST(TrtNodeValidator, IsTensorRTCandidate) {
658   // Create a graph containing both TRT-compatible and TRT-incompatible nodes
659   // and use it to test TrtNodeValidator::IsTensorRTCandidate().
660   const std::vector<int32> input_shape_array{2, 2};
661   TensorShape input_shape;
662   TF_EXPECT_OK(TensorShapeUtils::MakeShape(input_shape_array, &input_shape));
663 
664   Scope s = Scope::NewRootScope();
665   ops::Placeholder::Attrs feed_attrs;
666   TF_EXPECT_OK(
667       TensorShapeUtils::MakeShape(input_shape_array, &feed_attrs.shape_));
668 
669   // Compatible input.
670   auto feed = ops::Placeholder(s.WithOpName("feed"), DT_FLOAT, feed_attrs);
671   auto const_1 = ops::Const(s.WithOpName("const_1"), 1.0f, input_shape);
672 
673   // Compatible MatMul.
674   auto matmul = ops::MatMul(s.WithOpName("matmul"), feed, const_1);
675 
676   // Incompatible MatMul.
677   ops::MatMul::Attrs matmul_attrs;
678   matmul_attrs.transpose_a_ = true;
679   auto incompatible_matmul = ops::MatMul(s.WithOpName("incompatible_matmul"),
680                                          feed, const_1, matmul_attrs);
681 
682   // Unsupported op.
683   auto unsupported_op = ops::Erf(s.WithOpName("sin"), feed);
684 
685   // Incompatible input.
686   auto incompatible_feed = ops::Placeholder(s.WithOpName("feed"), DT_DOUBLE);
687   auto const_2 = ops::Const(s.WithOpName("const_2"), 1.0, input_shape);
688   // Compatible op with incompatible input.
689   auto matmul_with_incompatible_input =
690       ops::MatMul(s.WithOpName("matmul_with_incompatible_input"),
691                   incompatible_feed, const_2);
692 
693   // Quantize ops.
694   auto quantize_attrs = ops::FakeQuantWithMinMaxArgs::Min(-6.0f).Max(6.0f);
695   auto quantize = ops::FakeQuantWithMinMaxArgs(s.WithOpName("quantize"), feed,
696                                                quantize_attrs);
697 
698   // Get GrapplerItem and GraphProperties.
699   grappler::GrapplerItem item;
700   TF_EXPECT_OK(s.ToGraphDef(&item.graph));
701   Tensor feed_tensor(DT_FLOAT, input_shape);
702   item.feed.push_back(std::make_pair("feed", feed_tensor));
703   grappler::GraphProperties graph_properties(item);
704   TF_EXPECT_OK(graph_properties.InferStatically(true));
705 
706   for (const TrtPrecisionMode precision_mode :
707        {TrtPrecisionMode::FP32, TrtPrecisionMode::INT8}) {
708     TrtNodeValidator validator(graph_properties, precision_mode,
709                                /*use_calibration=*/false,
710                                /*use_implicit_batch=*/true);
711     TF_EXPECT_OK(validator.IsTensorRTCandidate(matmul.operation.node()));
712     ExpectStatus(
713         validator.IsTensorRTCandidate(incompatible_matmul.operation.node()),
714         error::INVALID_ARGUMENT,
715         "Cannot transpose first input if it is a tensor with fewer than 2 "
716         "non-batch dimensions.");
717     ExpectStatus(validator.IsTensorRTCandidate(unsupported_op.operation.node()),
718                  error::UNIMPLEMENTED, "Op type Erf is not supported");
719     ExpectStatus(validator.IsTensorRTCandidate(
720                      matmul_with_incompatible_input.operation.node()),
721                  error::INTERNAL,
722                  "Failed to convert input feed_1 to a TRT_TensorOrWeights");
723     if (precision_mode == TrtPrecisionMode::INT8) {
724       TF_EXPECT_OK(validator.IsTensorRTCandidate(quantize.operation.node()));
725     } else {
726       ExpectStatus(validator.IsTensorRTCandidate(quantize.operation.node()),
727                    error::UNIMPLEMENTED,
728                    "Op type FakeQuantWithMinMaxArgs is not supported");
729     }
730   }
731 }
732 
733 class ConverterTest : public ::testing::Test {
734  public:
ConverterTest()735   ConverterTest() { Reset(); }
736 
Reset()737   void Reset() {
738     converter_ =
739         std::move(Converter::Create(TrtPrecisionMode::FP32,
740                                     /*use_calibration=*/false, &logger_,
741                                     /*use_implicit_batch=*/true,
742                                     /*engine_name=*/"TRTEngineOp_0_0")
743                       .ValueOrDie());
744     weight_store_ = &converter_->weight_store_;
745   }
746 
AddOpConverter(const string & op_name,OpConverter op_converter)747   void AddOpConverter(const string& op_name, OpConverter op_converter) {
748     converter_->op_registry_[op_name] = op_converter;
749   }
750 
751   // Below we expose private methods of Converter for testing.
752 
MaybeUpdateBatchSize(int batch_size)753   Status MaybeUpdateBatchSize(int batch_size) {
754     return converter_->MaybeUpdateBatchSize(batch_size);
755   }
756 
AddTensorOrWeights(const string & name,TRT_TensorOrWeights input)757   Status AddTensorOrWeights(const string& name, TRT_TensorOrWeights input) {
758     return converter_->AddTensorOrWeights(name, input);
759   }
760 
GetTensorOrWeights(const string & name,TRT_TensorOrWeights * output)761   Status GetTensorOrWeights(const string& name, TRT_TensorOrWeights* output) {
762     return converter_->GetTensorOrWeights(name, output);
763   }
764 
GetInputs(const NodeDef & node_def,std::vector<TRT_TensorOrWeights> * inputs) const765   Status GetInputs(const NodeDef& node_def,
766                    std::vector<TRT_TensorOrWeights>* inputs) const {
767     return converter_->GetInputs(node_def, inputs);
768   }
769 
GetWeightRange(const TRT_ShapedWeights & weights,float * out_min,float * out_max) const770   Status GetWeightRange(const TRT_ShapedWeights& weights, float* out_min,
771                         float* out_max) const {
772     return converter_->GetWeightRange(weights, out_min, out_max);
773   }
774 
PropagateQuantizationRanges()775   void PropagateQuantizationRanges() {
776     converter_->PropagateQuantizationRanges();
777   }
778 
batch_size() const779   int batch_size() const { return converter_->batch_size_; }
780 
quantization_ranges()781   std::unordered_map<nvinfer1::ITensor*, float>& quantization_ranges() {
782     return converter_->quantization_ranges_;
783   }
784 
785  private:
786   Logger logger_;
787 
788  protected:
789   std::unique_ptr<Converter> converter_;
790   TrtWeightStore* weight_store_;
791 };
792 
TEST_F(ConverterTest,ConvertNode)793 TEST_F(ConverterTest, ConvertNode) {
794   FakeITensor output_tensors[2];
795   auto op_converter = [&output_tensors](OpConverterParams* params) -> Status {
796     nvinfer1::Dims dims = params->inputs[0].tensor()->getDimensions();
797     for (int i = 0; i < 2; ++i) {
798       dims.d[0] += 1;
799       output_tensors[i].setDimensions(dims);
800       params->outputs->push_back(TRT_TensorOrWeights(&output_tensors[i]));
801     }
802     return Status::OK();
803   };
804   NodeDef node_def = MakeNodeDef("my_op", "MyOp", {"my_input"});
805   TF_EXPECT_OK(converter_->AddInputTensor(
806       "my_input", nvinfer1::DataType::kFLOAT, GetTestDims({123}), 1));
807 
808   // Converter not registered.
809   ExpectStatus(converter_->ConvertNode(node_def), error::UNIMPLEMENTED,
810                "No converter registered for op: MyOp");
811 
812   // Register the converter and retry.
813   AddOpConverter("MyOp", op_converter);
814   TF_EXPECT_OK(converter_->ConvertNode(node_def));
815 
816   TRT_TensorOrWeights actual_output_1;
817   TF_EXPECT_OK(GetTensorOrWeights("my_op", &actual_output_1));
818   EXPECT_EQ(&output_tensors[0], actual_output_1.tensor());
819   EXPECT_EQ(124, actual_output_1.tensor()->getDimensions().d[0]);
820 
821   TRT_TensorOrWeights actual_output_2;
822   TF_EXPECT_OK(GetTensorOrWeights("my_op:1", &actual_output_2));
823   EXPECT_EQ(&output_tensors[1], actual_output_2.tensor());
824   EXPECT_EQ(125, actual_output_2.tensor()->getDimensions().d[0]);
825 
826   VerifyTrtLayerNameNotEmpty(converter_->network());
827 }
828 
TEST_F(ConverterTest,AddAndGetInputs)829 TEST_F(ConverterTest, AddAndGetInputs) {
830   NodeDef node_def;
831   node_def.add_input("^control_input");
832   node_def.add_input("input");
833   node_def.add_input("input:0");
834   node_def.add_input("input:1");
835   node_def.add_input("weird_input:2:3:4:0");
836 
837   TF_EXPECT_OK(converter_->AddInputTensor("input", nvinfer1::DataType::kFLOAT,
838                                           GetTestDims({1}), 1));
839   TF_EXPECT_OK(converter_->AddInputTensor("input:1", nvinfer1::DataType::kINT32,
840                                           GetTestDims({2, 3}), 1));
841   TF_EXPECT_OK(converter_->AddInputTensor(
842       "weird_input:2:3:4", nvinfer1::DataType::kHALF, GetTestDims({5, 3}), 1));
843 
844   std::vector<TRT_TensorOrWeights> inputs;
845   TF_EXPECT_OK(GetInputs(node_def, &inputs));
846 
847   EXPECT_EQ(4, inputs.size());
848   EXPECT_EQ(inputs[0].tensor(), inputs[1].tensor());
849 
850   EXPECT_EQ(nvinfer1::DataType::kFLOAT, inputs[0].tensor()->getType());
851   EXPECT_EQ(nvinfer1::DataType::kINT32, inputs[2].tensor()->getType());
852   EXPECT_EQ(nvinfer1::DataType::kHALF, inputs[3].tensor()->getType());
853   ExpectTrtDimsEqualsArray({1}, inputs[0].tensor()->getDimensions());
854   ExpectTrtDimsEqualsArray({2, 3}, inputs[2].tensor()->getDimensions());
855   ExpectTrtDimsEqualsArray({5, 3}, inputs[3].tensor()->getDimensions());
856 
857   VerifyTrtLayerNameNotEmpty(converter_->network());
858 }
859 
TEST_F(ConverterTest,RenameAndMarkOutputTensors)860 TEST_F(ConverterTest, RenameAndMarkOutputTensors) {
861   // Test that the tensor are actually named and marked as output after
862   // Converter::RenameAndMarkOutputTensors() is called.
863 
864   // Register a custom converter which shuffles the input. We use it to build a
865   // TRT network whose output will be later marked.
866   std::vector<nvinfer1::ITensor*> output_tensors;
867   auto op_converter = [&output_tensors](OpConverterParams* params) -> Status {
868     nvinfer1::Permutation perm;
869     perm.order[0] = 1;
870     perm.order[1] = 0;
871     for (int i = 0; i < 2; ++i) {
872       nvinfer1::ITensor* input_tensor = params->inputs[0].tensor();
873       nvinfer1::IShuffleLayer* layer =
874           params->converter->network()->addShuffle(*input_tensor);
875       layer->setFirstTranspose(perm);
876       nvinfer1::ITensor* output_tensor = layer->getOutput(0);
877       params->outputs->emplace_back(output_tensor);
878       output_tensors.push_back(output_tensor);
879     }
880     TRT_ShapedWeights output_weights(nvinfer1::DataType::kFLOAT);
881     params->outputs->emplace_back(output_weights);
882     return Status::OK();
883   };
884   AddOpConverter("MyOp", op_converter);
885 
886   // Run the conversion.
887   NodeDef node_def = MakeNodeDef("my_op", "MyOp", {"my_input"});
888   TF_EXPECT_OK(converter_->AddInputTensor(
889       "my_input", nvinfer1::DataType::kFLOAT, GetTestDims({1, 2}), 1));
890   TF_EXPECT_OK(converter_->ConvertNode(node_def));
891 
892   // Mark a weight as output, should fail.
893   ExpectStatus(
894       converter_->RenameAndMarkOutputTensors({{"my_op:2", "my_output"}}),
895       error::INVALID_ARGUMENT, "Output my_op:2 is weights not tensor");
896 
897   // Mark tensors as output, should pass.
898   TF_EXPECT_OK(converter_->RenameAndMarkOutputTensors(
899       {{"my_op", "my_output"}, {"my_op:1", "my_output_1"}}));
900   EXPECT_EQ(2, output_tensors.size());
901   for (auto output_tensor : output_tensors) {
902     ExpectTrtDimsEqualsArray({2, 1}, output_tensor->getDimensions());
903   }
904   EXPECT_EQ("my_output", string(output_tensors[0]->getName()));
905   EXPECT_EQ("my_output_1", string(output_tensors[1]->getName()));
906 
907   VerifyTrtLayerNameNotEmpty(converter_->network());
908 }
909 
TEST_F(ConverterTest,TransposeTensor)910 TEST_F(ConverterTest, TransposeTensor) {
911   nvinfer1::ITensor* input_tensor = converter_->network()->addInput(
912       "", nvinfer1::DataType::kFLOAT, GetTestDims({2, 3, 5}));
913   nvinfer1::ITensor* output_tensor = nullptr;
914   NodeDef dummy_node_def = MakeNodeDef("dummy_op", "DummyOp", {});
915   // Rank doesn't match.
916   ExpectStatus(
917       converter_->TransposeTensor(input_tensor, {0, 1}, &output_tensor,
918                                   dummy_node_def, "sub1"),
919       error::INVALID_ARGUMENT,
920       "Rank of perm for transpose does not match with that of the input");
921 
922   // Transpose at batch dimension.
923   ExpectStatus(
924       converter_->TransposeTensor(input_tensor, {1, 0, 2, 3}, &output_tensor,
925                                   dummy_node_def, "sub2"),
926       error::UNIMPLEMENTED, "Transpose at batch dimension is not supported.");
927 
928   // OK.
929   TF_EXPECT_OK(converter_->TransposeTensor(
930       input_tensor, {0, 3, 1, 2}, &output_tensor, dummy_node_def, "sub3"));
931   ExpectTrtDimsEqualsArray({5, 2, 3}, output_tensor->getDimensions());
932   ExpectTrtLayerNames({"TRTEngineOp_0_0/dummy_op-sub3:SHUFFLE"},
933                       converter_->network());
934 }
935 
TestPrepareTensorForShape(const std::vector<int> & input_dims,const std::vector<int> & reshape_dims,const std::vector<int> & expected_tensor_dims,bool input_is_tensor,Converter * converter,TrtWeightStore * weight_store,error::Code expected_code=error::OK,const char * expected_error_msg_substr=nullptr)936 void TestPrepareTensorForShape(
937     const std::vector<int>& input_dims, const std::vector<int>& reshape_dims,
938     const std::vector<int>& expected_tensor_dims, bool input_is_tensor,
939     Converter* converter, TrtWeightStore* weight_store,
940     error::Code expected_code = error::OK,
941     const char* expected_error_msg_substr = nullptr) {
942   TRT_TensorOrWeights input;
943   if (input_is_tensor) {
944     input = TRT_TensorOrWeights(converter->network()->addInput(
945         "", nvinfer1::DataType::kFLOAT, GetTestDims(input_dims)));
946   } else {
947     input = TRT_TensorOrWeights(weight_store->GetTempWeights(
948         nvinfer1::DataType::kFLOAT, GetTestDims(input_dims)));
949   }
950   nvinfer1::ITensor* output_tensor = nullptr;
951 
952   NodeDef dummy_node_def = MakeNodeDef("dummy_op", "DummyOp", {});
953   for (bool validation_only : {false, true}) {
954     const Status status =
955         PrepareTensorForShape(converter, input, GetTestDims(reshape_dims),
956                               validation_only, &output_tensor, dummy_node_def);
957     if (expected_code == error::OK) {
958       TF_EXPECT_OK(status);
959       if (validation_only) {
960         EXPECT_EQ(nullptr, output_tensor);
961       } else {
962         ExpectTrtDimsEqualsArray(expected_tensor_dims,
963                                  output_tensor->getDimensions());
964       }
965     } else {
966       ExpectStatus(status, expected_code, expected_error_msg_substr);
967     }
968   }
969 }
970 
TEST_F(ConverterTest,PrepareTensorForShape)971 TEST_F(ConverterTest, PrepareTensorForShape) {
972   for (bool input_is_tensor : {true, false}) {
973     // Shape size doesn't match.
974     Reset();
975     TestPrepareTensorForShape({2, 3, 5}, {2, 3, 6}, {}, input_is_tensor,
976                               converter_.get(), weight_store_,
977                               error::INVALID_ARGUMENT, "Incompatible shapes");
978 
979     // Regular shape.
980     Reset();
981     TestPrepareTensorForShape({2, 3, 5}, {10, 3}, {10, 3}, input_is_tensor,
982                               converter_.get(), weight_store_);
983 
984     // Reshape to zero rank.
985     Reset();
986     TestPrepareTensorForShape({1, 1}, {}, {}, input_is_tensor, converter_.get(),
987                               weight_store_);
988   }
989 
990   // Tensor input with zero rank.
991   Reset();
992   TestPrepareTensorForShape({}, {1, 1}, {1, 1}, /*input_is_tensor=*/true,
993                             converter_.get(), weight_store_);
994 
995   // TODO(aaroey): we should check the case where uninferred dimensions are
996   // not an exact divisor of input dim ensions, e.g. for dims {-1, 7}.
997 
998   // Infer tensor shape, ok.
999   Reset();
1000   TestPrepareTensorForShape({2, 3, 5}, {-1, 2}, {15, 2},
1001                             /*input_is_tensor=*/true, converter_.get(),
1002                             weight_store_);
1003 
1004   // Infer weight shape, should fail.
1005   Reset();
1006   TestPrepareTensorForShape({2, 3, 5}, {-1, 2}, {15, 2},
1007                             /*input_is_tensor=*/false, converter_.get(),
1008                             weight_store_, error::INVALID_ARGUMENT,
1009                             "Shape is not fully defined");
1010 
1011   VerifyTrtLayerNameNotEmpty(converter_->network());
1012 }
1013 
TEST_F(ConverterTest,MaybeUpdateBatchSize)1014 TEST_F(ConverterTest, MaybeUpdateBatchSize) {
1015   EXPECT_EQ(-1, batch_size());
1016 
1017   TF_EXPECT_OK(MaybeUpdateBatchSize(-1));
1018   EXPECT_EQ(-1, batch_size());
1019 
1020   TF_EXPECT_OK(MaybeUpdateBatchSize(123));
1021   EXPECT_EQ(123, batch_size());
1022 
1023   TF_EXPECT_OK(MaybeUpdateBatchSize(123));
1024   EXPECT_EQ(123, batch_size());
1025 
1026   TF_EXPECT_OK(MaybeUpdateBatchSize(-1));
1027   EXPECT_EQ(123, batch_size());
1028 
1029   ExpectStatus(MaybeUpdateBatchSize(124), error::INVALID_ARGUMENT,
1030                "Provided batch size does not match converter batch size");
1031 }
1032 
TEST_F(ConverterTest,AddAndGetTensorOrWeights)1033 TEST_F(ConverterTest, AddAndGetTensorOrWeights) {
1034   // Add a tensor.
1035   FakeITensor fake_tensor;
1036   TRT_TensorOrWeights tensor(&fake_tensor);
1037   EXPECT_EQ(-1, tensor.batch_size());
1038   TF_EXPECT_OK(MaybeUpdateBatchSize(123));
1039   TF_EXPECT_OK(AddTensorOrWeights("my_tensor", tensor));
1040 
1041   // Get the added tensor.
1042   TRT_TensorOrWeights added_tensor;
1043   TF_EXPECT_OK(GetTensorOrWeights("my_tensor", &added_tensor));
1044   EXPECT_EQ(123, added_tensor.batch_size());
1045 
1046   // Add the same tensor again.
1047   ExpectStatus(AddTensorOrWeights("my_tensor", tensor), error::ALREADY_EXISTS,
1048                "tensor/weights my_tensor already exist");
1049 }
1050 
1051 template <typename T>
TestGetWeightRange(ConverterTest * test,TrtWeightStore * weight_store)1052 void TestGetWeightRange(ConverterTest* test, TrtWeightStore* weight_store) {
1053   nvinfer1::DataType trt_type;
1054   TF_ASSERT_OK(TfTypeToTrtType(DataTypeToEnum<T>::v(), &trt_type));
1055   TRT_ShapedWeights weights =
1056       weight_store->GetTempWeights(trt_type, GetTestDims({2, 3}));
1057   const std::vector<T> values = {T(3), T(1), T(2), T(6), T(5), T(4)};
1058   memcpy(weights.GetValues(), values.data(), weights.size_bytes());
1059 
1060   float out_min = 0.0f;
1061   float out_max = 0.0f;
1062   TF_EXPECT_OK(test->GetWeightRange(weights, &out_min, &out_max));
1063   EXPECT_EQ(1.0f, out_min);
1064   EXPECT_EQ(6.0f, out_max);
1065 }
1066 
TEST_F(ConverterTest,GetWeightRange)1067 TEST_F(ConverterTest, GetWeightRange) {
1068   TestGetWeightRange<float>(this, weight_store_);
1069   TestGetWeightRange<Eigen::half>(this, weight_store_);
1070   TestGetWeightRange<int32>(this, weight_store_);
1071 }
1072 
TEST_F(ConverterTest,ProvideQuantizationRange)1073 TEST_F(ConverterTest, ProvideQuantizationRange) {
1074   FakeITensor fake_tensor;
1075   // Asymmetric range
1076   converter_->ProvideQuantizationRange(&fake_tensor, 0.0f, 6.0f);
1077   EXPECT_EQ(6.0f, quantization_ranges()[&fake_tensor]);
1078   converter_->ProvideQuantizationRange(&fake_tensor, 1.0f, 6.0f);
1079   EXPECT_EQ(6.0f, quantization_ranges()[&fake_tensor]);
1080   converter_->ProvideQuantizationRange(&fake_tensor, -8.0f, 6.0f);
1081   EXPECT_EQ(8.0f, quantization_ranges()[&fake_tensor]);
1082   converter_->ProvideQuantizationRange(&fake_tensor, -8.123f, -6.123f);
1083   EXPECT_EQ(8.123f, quantization_ranges()[&fake_tensor]);
1084   // Symmetric range
1085   converter_->ProvideQuantizationRange(&fake_tensor, -6.123f, 6.123f);
1086   EXPECT_EQ(6.123f, quantization_ranges()[&fake_tensor]);
1087 
1088   VerifyTrtLayerNameNotEmpty(converter_->network());
1089 }
1090 
TEST_F(ConverterTest,MaybeApplyQuantizationRanges)1091 TEST_F(ConverterTest, MaybeApplyQuantizationRanges) {
1092   // input -> infer1 -> infer2 -> infer3
1093   FakeITensor input, infer_1, infer_2, infer_3;
1094   FakeITensor not_infer;
1095   Logger logger;
1096   auto int8_converter = Converter::Create(TrtPrecisionMode::INT8,
1097                                           /*use_calibration=*/true, &logger,
1098                                           /*use_implicit_batch=*/true,
1099                                           /*engine_name=*/"")
1100                             .ValueOrDie();
1101   int8_converter->ProvideQuantizationRange(&input, -5.0f, 5.0f);
1102   int8_converter->ProvideQuantizationRange(&not_infer, -100.0f, 100.0f);
1103   int8_converter->MarkQuantizationRangesAsInferrable(&input, &infer_1);
1104   int8_converter->MarkQuantizationRangesAsInferrable(&infer_1, &infer_2);
1105   int8_converter->MarkQuantizationRangesAsInferrable(&infer_2, &infer_3);
1106 
1107   // Input range should be inferred along the chain and applied to tensors.
1108   int8_converter->MaybeApplyQuantizationRanges();
1109 #if IS_TRT_VERSION_GE(5, 0, 0, 0)
1110   EXPECT_EQ(input.getDynamicRange(), 5.0f);
1111   EXPECT_EQ(infer_1.getDynamicRange(), 5.0f);
1112   EXPECT_EQ(infer_2.getDynamicRange(), 5.0f);
1113   EXPECT_EQ(infer_3.getDynamicRange(), 5.0f);
1114   EXPECT_EQ(not_infer.getDynamicRange(), 100.0f);
1115 #endif
1116 
1117   VerifyTrtLayerNameNotEmpty(int8_converter->network());
1118 }
1119 
TEST_F(ConverterTest,PropagateQuantizationRanges)1120 TEST_F(ConverterTest, PropagateQuantizationRanges) {
1121   // infer0 <-> infer1 <-> infer2 <-> infer3
1122   //              |
1123   //            infer4 <-> infer5
1124   FakeITensor infer[6];
1125   FakeITensor not_infer;
1126   converter_->ProvideQuantizationRange(&infer[4], -5.0f, 5.0f);
1127   converter_->MarkQuantizationRangesAsInferrable(&infer[0], &infer[1]);
1128   converter_->MarkQuantizationRangesAsInferrable(&infer[1], &infer[2]);
1129   converter_->MarkQuantizationRangesAsInferrable(&infer[3], &infer[2]);
1130   converter_->MarkQuantizationRangesAsInferrable(&infer[4], &infer[1]);
1131   converter_->MarkQuantizationRangesAsInferrable(&infer[4], &infer[5]);
1132 
1133   // Input range should be inferred along the chain.
1134   PropagateQuantizationRanges();
1135   auto ranges = quantization_ranges();
1136   for (int i = 0; i < 6; ++i) {
1137     EXPECT_EQ(5.0f, ranges[&infer[i]]);
1138   }
1139   EXPECT_EQ(ranges.count(&not_infer), 0);
1140 
1141   VerifyTrtLayerNameNotEmpty(converter_->network());
1142 }
1143 
TEST_F(ConverterTest,GetTrtBroadcastShape)1144 TEST_F(ConverterTest, GetTrtBroadcastShape) {
1145   const bool kIsTensor = true;
1146   const bool kIsNotTensor = false;
1147   auto symmetric_test = [this](const std::vector<int>& operand_1_shape,
1148                                const std::vector<int>& operand_2_shape,
1149                                const bool operand_1_is_tensor,
1150                                const bool operand_2_is_tensor,
1151                                const std::vector<int>& expected_operand_1_shape,
1152                                const std::vector<int>& expected_operand_2_shape,
1153                                error::Code expected_code = error::OK,
1154                                const char* expected_error_msg_substr = nullptr,
1155                                const int operand_1_batch_size = -1,
1156                                const int operand_2_batch_size = -1) {
1157     auto create_tensor_or_weights = [](const std::vector<int>& shape,
1158                                        bool is_tensor, int batch_size = -1) {
1159       if (is_tensor) {
1160         return TRT_TensorOrWeights{nvinfer1::DataType::kFLOAT,
1161                                    GetTestDims(shape), batch_size};
1162       }
1163       TRT_ShapedWeights weights;
1164       weights.shape_ = GetTestDims(shape);
1165       return TRT_TensorOrWeights(weights);
1166     };
1167 
1168     nvinfer1::Dims operand_1_new_dims, operand_2_new_dims;
1169     TRT_TensorOrWeights operand_1 = create_tensor_or_weights(
1170         operand_1_shape, operand_1_is_tensor, operand_1_batch_size);
1171     TRT_TensorOrWeights operand_2 = create_tensor_or_weights(
1172         operand_2_shape, operand_2_is_tensor, operand_2_batch_size);
1173 
1174     // operand_1 broadcast operand_2
1175     ExpectStatus(
1176         GetTrtBroadcastShape(operand_1, operand_2, /*check_feasibility=*/true,
1177                              /*use_implicit_batch=*/true, &operand_1_new_dims,
1178                              &operand_2_new_dims),
1179         expected_code, expected_error_msg_substr);
1180     if (expected_code == error::OK) {
1181       ExpectTrtDimsEqualsArray(expected_operand_1_shape, operand_1_new_dims);
1182       ExpectTrtDimsEqualsArray(expected_operand_2_shape, operand_2_new_dims);
1183     }
1184     // operand_2 broadcast operand_1
1185     ExpectStatus(
1186         GetTrtBroadcastShape(operand_2, operand_1, /*check_feasibility=*/true,
1187                              /*use_implicit_batch=*/true, &operand_2_new_dims,
1188                              &operand_1_new_dims),
1189         expected_code, expected_error_msg_substr);
1190     if (expected_code == error::OK) {
1191       ExpectTrtDimsEqualsArray(expected_operand_1_shape, operand_1_new_dims);
1192       ExpectTrtDimsEqualsArray(expected_operand_2_shape, operand_2_new_dims);
1193     }
1194   };
1195 
1196   // Both inputs are weights.
1197   symmetric_test(
1198       {1}, {1}, kIsNotTensor, kIsNotTensor, {}, {}, error::INVALID_ARGUMENT,
1199       "Broadcasting requires at least one of the operands be tensors");
1200 
1201   // One tensor and one weights.
1202   symmetric_test({1, 1, 1}, {2}, kIsTensor, kIsNotTensor, {1, 1, 1}, {1, 1, 2});
1203   symmetric_test({1, 1, 2}, {2}, kIsTensor, kIsNotTensor, {1, 1, 2}, {1, 1, 2});
1204   symmetric_test({1, 3, 2}, {1}, kIsTensor, kIsNotTensor, {1, 3, 2}, {1, 1, 1});
1205   symmetric_test({1, 1, 1}, {2, 3}, kIsTensor, kIsNotTensor, {1, 1, 1},
1206                  {1, 2, 3});
1207   symmetric_test({1, 1, 1}, {2, 3, 4}, kIsTensor, kIsNotTensor, {1, 1, 1},
1208                  {2, 3, 4});
1209   symmetric_test({1, 1, 1}, {1, 2, 3, 4}, kIsTensor, kIsNotTensor, {1, 1, 1},
1210                  {2, 3, 4});
1211   symmetric_test({1, 3, 4}, {1, 2, 1, 4}, kIsTensor, kIsNotTensor, {1, 3, 4},
1212                  {2, 1, 4});
1213   symmetric_test({1, 1, 1}, {2, 1, 1, 1}, kIsTensor, kIsNotTensor, {}, {},
1214                  error::INVALID_ARGUMENT, "Infeasible broadcast scheme");
1215   symmetric_test({1, 1, 1}, {2, 1, 1, 1}, kIsTensor, kIsNotTensor, {}, {},
1216                  error::INVALID_ARGUMENT, "Infeasible broadcast scheme",
1217                  /*operand_1_batch_size=*/2);
1218   symmetric_test({1, 1, 1}, {1, 1, 1, 1, 1}, kIsTensor, kIsNotTensor, {}, {},
1219                  error::INVALID_ARGUMENT,
1220                  "Broadcasting beyond batch dimension is not supported "
1221                  "(tensor #dims 4 vs broadcast #dims 5)");
1222   symmetric_test({3}, {1, 1, 3}, kIsTensor, kIsNotTensor, {}, {},
1223                  error::INVALID_ARGUMENT,
1224                  "Broadcasting beyond batch dimension is not supported "
1225                  "(tensor #dims 2 vs broadcast #dims 3)",
1226                  /*operand_1_batch_size=*/2);
1227 
1228   // Both inputs are tensors.
1229   symmetric_test({1, 1, 1}, {1, 1}, kIsTensor, kIsTensor, {}, {},
1230                  error::INVALID_ARGUMENT,
1231                  "Broadcasting beyond batch dimension is not supported "
1232                  "(tensor #dims 3 vs broadcast #dims 4)");
1233   symmetric_test({1, 3}, {3}, kIsTensor, kIsTensor, {}, {},
1234                  error::INVALID_ARGUMENT,
1235                  "Broadcasting beyond batch dimension is not supported "
1236                  "(tensor #dims 2 vs broadcast #dims 3)");
1237   symmetric_test({1, 3, 4}, {2, 1, 4}, kIsTensor, kIsTensor, {1, 3, 4},
1238                  {2, 1, 4});
1239   symmetric_test({1, 1, 1}, {1, 1, 1, 1}, kIsTensor, kIsTensor, {}, {},
1240                  error::INVALID_ARGUMENT,
1241                  "Broadcasting beyond batch dimension is not supported "
1242                  "(tensor #dims 4 vs broadcast #dims 5)");
1243   symmetric_test({2, 3}, {7, 5}, kIsTensor, kIsTensor, {}, {},
1244                  error::INVALID_ARGUMENT, "Infeasible broadcast scheme");
1245 
1246   VerifyTrtLayerNameNotEmpty(converter_->network());
1247 }
1248 
TEST_F(ConverterTest,CreateConstantLayer)1249 TEST_F(ConverterTest, CreateConstantLayer) {
1250   for (auto dtype : {nvinfer1::DataType::kFLOAT, nvinfer1::DataType::kINT32}) {
1251     TRT_ShapedWeights weights =
1252         weight_store_->GetTempWeights(dtype, GetTestDims({2, 3, 5}));
1253     nvinfer1::ITensor* tensor =
1254         converter_->CreateConstantLayer(weights, GetTestDims({3, 10}));
1255     ASSERT_NE(nullptr, tensor);
1256     EXPECT_EQ(dtype, tensor->getType())
1257         << "Expected " << DebugString(dtype) << " vs. actual "
1258         << DebugString(tensor->getType());
1259     ExpectTrtDimsEqualsArray({3, 10}, tensor->getDimensions());
1260   }
1261 
1262   VerifyTrtLayerNameNotEmpty(converter_->network());
1263 }
1264 
1265 class ConvertGraphDefToEngineTest : public ::testing::Test {
1266  public:
RunConvertGraphDefToEngine(Scope * s)1267   Status RunConvertGraphDefToEngine(Scope* s) {
1268     GraphDef gdef;
1269     TF_EXPECT_OK(s->ToGraphDef(&gdef));
1270     std::vector<PartialTensorShape> input_shapes;
1271     int batch_size = -1;
1272     for (const NodeDef& node : gdef.node()) {
1273       absl::string_view node_name(node.name());
1274       if (absl::ConsumePrefix(&node_name, IONamePrefixes::kInputPHName)) {
1275         int port = -1;
1276         EXPECT_TRUE(absl::SimpleAtoi(node_name, &port)) << node.name();
1277         if (input_shapes.size() < port + 1) input_shapes.resize(port + 1);
1278         input_shapes[port] =
1279             PartialTensorShape(node.attr().at("shape").shape());
1280         if (batch_size == -1) {
1281           batch_size = input_shapes[port].dim_size(0);
1282         } else {
1283           EXPECT_EQ(batch_size, input_shapes[port].dim_size(0));
1284         }
1285       }
1286     }
1287     // TODO(laigd): execute the engine and get outputs.
1288     return ConvertGraphDefToEngine(
1289         gdef, TrtPrecisionMode::FP32, /*max_batch_size=*/1,
1290         /*max_workspace_size_bytes=*/64 << 20, input_shapes, &logger_,
1291         /*allocator=*/nullptr, /*calibrator=*/nullptr, &engine_,
1292         /*use_calibration=*/false, /*use_implicit_batch=*/true,
1293         /*convert_successfully=*/nullptr, /*profiles=*/nullptr,
1294         "TRTEngineOp_0_0");
1295   }
1296 
1297  protected:
1298   TrtUniquePtrType<nvinfer1::ICudaEngine> engine_;
1299 
1300  private:
1301   Logger logger_;
1302 };
1303 
TEST_F(ConvertGraphDefToEngineTest,IdentityGraph)1304 TEST_F(ConvertGraphDefToEngineTest, IdentityGraph) {
1305   Scope s = Scope::NewRootScope();
1306   auto input =
1307       ops::Placeholder(s.WithOpName(StrCat(IONamePrefixes::kInputPHName, 0)),
1308                        DT_FLOAT, ops::Placeholder::Shape({1, 1}));
1309   auto output = ops::Identity(s.WithOpName("identity1"), input);
1310   output = ops::Identity(s.WithOpName("identity2"), output);
1311   output = ops::Identity(s.WithOpName(StrCat(IONamePrefixes::kOutputPHName, 0)),
1312                          output);
1313   // If the converter marks the input tensor as output tensor, the conversion
1314   // below will fail with:
1315   // > TensorRTOutputPH_0 cannot be both input and output
1316   // > Network must have at least one output
1317   TF_EXPECT_OK(RunConvertGraphDefToEngine(&s));
1318 }
1319 
1320 // Returns a vector of shapes from a vector of input tensors. This can be used
1321 // to create optimization profiles.
GetShapeFromDataVec(DataVec input_data,std::vector<TensorShape> * shape_vec)1322 Status GetShapeFromDataVec(DataVec input_data,
1323                            std::vector<TensorShape>* shape_vec) {
1324   shape_vec->reserve(input_data.size());
1325   std::transform(input_data.begin(), input_data.end(),
1326                  std::back_inserter(*shape_vec),
1327                  [](InputOutputData x) { return x.tensor.shape(); });
1328   return Status::OK();
1329 }
1330 
1331 template <typename T>
GetSpanForData(const InputOutputData & data)1332 inline absl::Span<const T> GetSpanForData(const InputOutputData& data) {
1333   const auto& tensor_map = data.tensor.flat<T>();
1334   return absl::Span<const T>(tensor_map.data(), tensor_map.size());
1335 }
1336 
GetDataAsFloat(InputOutputData & data)1337 std::vector<float> GetDataAsFloat(InputOutputData& data) {
1338   if (data.tensor.dtype() == DT_FLOAT) {
1339     auto span = GetSpanForData<float>(data);
1340     return std::vector<float>(span.begin(), span.end());
1341   }
1342   if (data.tensor.dtype() == DT_HALF) {
1343     return CastTestVector<Eigen::half, float>(
1344         GetSpanForData<Eigen::half>(data));
1345   }
1346   if (data.tensor.dtype() == DT_INT32) {
1347     return CastTestVector<int32, float>(GetSpanForData<int32>(data));
1348   }
1349   LOG(FATAL) << "DataType not supported for testing "
1350              << DataTypeString(data.tensor.dtype());
1351 }
1352 // Class to test various op converters, using both a TrtNodeValidator and
1353 // Converter.
1354 class OpConverterTest : public ::testing::Test {
1355  public:
OpConverterTest()1356   OpConverterTest()
1357       : tensor_buffer_allocator_(new GpuManagedAllocator()),
1358         scope_(Scope::NewRootScope()) {
1359     QCHECK_EQ(0, cudaStreamCreate(&stream_));
1360     Reset();
1361   }
1362 
~OpConverterTest()1363   ~OpConverterTest() override { QCHECK_EQ(0, cudaStreamDestroy(stream_)); }
1364 
GetTensorOrWeights(const string & name,TRT_TensorOrWeights * output)1365   Status GetTensorOrWeights(const string& name, TRT_TensorOrWeights* output) {
1366     return converter_->GetTensorOrWeights(name, output);
1367   }
1368 
Reset(TrtPrecisionMode precision_mode_to_test=TrtPrecisionMode::FP32,TrtTestMode trt_mode=TrtTestMode::kImplicitBatch)1369   void Reset(TrtPrecisionMode precision_mode_to_test = TrtPrecisionMode::FP32,
1370              TrtTestMode trt_mode = TrtTestMode::kImplicitBatch) {
1371     // Destroy existing TRT objects in a proper order.
1372     converter_.reset(nullptr);
1373     engine_.reset(nullptr);
1374 
1375     // Re-create them in proper order.
1376     converter_ =
1377         std::move(Converter::Create(precision_mode_to_test,
1378                                     /*use_calibration=*/false, &logger_,
1379                                     /*use_implicit_batch=*/trt_mode ==
1380                                         TrtTestMode::kImplicitBatch,
1381                                     /*engine_name=*/"")
1382                       .ValueOrDie());
1383 
1384     // Reset other related artifacts.
1385     scope_ = Scope::NewRootScope();
1386   }
1387 
1388   // Constructs a flat tensor with 'vals' in Unified Memory.
1389   template <typename T>
AsTensor(gtl::ArraySlice<T> vals)1390   Tensor AsTensor(gtl::ArraySlice<T> vals) {  // non-absl ok
1391     Tensor ret(tensor_buffer_allocator_.get(), DataTypeToEnum<T>::value,
1392                {static_cast<int64>(vals.size())});
1393     std::copy_n(vals.data(), vals.size(), ret.flat<T>().data());
1394     return ret;
1395   }
1396 
1397   // Constructs a tensor of "shape" with values "vals" in Unified Memory.
1398   template <typename T>
AsTensor(gtl::ArraySlice<T> vals,const TensorShape & shape)1399   Tensor AsTensor(gtl::ArraySlice<T> vals,  // non-absl ok
1400                   const TensorShape& shape) {
1401     Tensor ret(tensor_buffer_allocator_.get(), DataTypeToEnum<T>::value,
1402                {static_cast<int64>(vals.size())});
1403     CHECK(ret.CopyFrom(AsTensor(vals), shape));
1404     return ret;
1405   }
1406 
1407   // Constructs a tensor with given values (vals). The tensor type is defined by
1408   // the tf_type argument, its shape is given by input_dims. The tensor is
1409   // constructed using the allocator of OpConverterTest in Unified Memory.
1410   template <typename T>
AsTensor(std::vector<T> vals,const std::vector<int> input_dims,DataType tf_type)1411   Tensor AsTensor(std::vector<T> vals, const std::vector<int> input_dims,
1412                   DataType tf_type) {
1413     Tensor ret(tensor_buffer_allocator_.get(), tf_type,
1414                {static_cast<int64>(vals.size())});
1415     if (tf_type == DT_FLOAT) {
1416       auto conv_vals = CastTestVector<T, float>(vals);
1417       std::copy_n(conv_vals.data(), conv_vals.size(), ret.flat<float>().data());
1418     } else if (tf_type == DT_HALF) {
1419       auto conv_vals = CastTestVector<T, Eigen::half>(vals);
1420       std::copy_n(conv_vals.data(), conv_vals.size(),
1421                   ret.flat<Eigen::half>().data());
1422     } else if (tf_type == DT_INT32) {
1423       auto conv_vals = CastTestVector<T, int32>(vals);
1424       std::copy_n(conv_vals.data(), conv_vals.size(), ret.flat<int32>().data());
1425     } else {
1426       LOG(FATAL) << "Cannot create tensor with type "
1427                  << DataTypeString(tf_type);
1428     }
1429     TensorShape shape;
1430     TF_EXPECT_OK(TensorShapeUtils::MakeShape(input_dims, &shape));
1431     CHECK(ret.CopyFrom(ret, shape));
1432     return ret;
1433   }
1434 
1435   // Constructs a flat tensor in Unified Memory.
1436   template <typename T>
ConstructTensor(int data_size,const T & value=T ())1437   Tensor ConstructTensor(int data_size, const T& value = T()) {
1438     std::vector<T> values(data_size, value);
1439     return AsTensor<T>(values);
1440   }
1441 
1442   // Constructs a flat tensor in Unified Memory.
1443   template <typename T>
ConstructTensor(int data_size,const T & value,DataType tf_type)1444   Tensor ConstructTensor(int data_size, const T& value, DataType tf_type) {
1445     std::vector<T> values(data_size, value);
1446     return AsTensor<T>(values, {data_size}, tf_type);
1447   }
1448 
CheckDataTypeMatches(const DataVec & datas)1449   void CheckDataTypeMatches(const DataVec& datas) {
1450     for (const auto& data : datas) {
1451       const int input_index = engine_->getBindingIndex(data.name.c_str());
1452       ASSERT_NE(-1, input_index);
1453       const nvinfer1::DataType trt_dtype =
1454           engine_->getBindingDataType(input_index);
1455       DataType tf_type;
1456       TF_ASSERT_OK(TrtTypeToTfType(trt_dtype, &tf_type));
1457       ASSERT_EQ(data.tensor.dtype(), tf_type)
1458           << DataTypeString(data.tensor.dtype()) << " vs. "
1459           << DataTypeString(tf_type);
1460     }
1461   }
1462 
BuildAndRun(const DataVec & input_data,DataVec * output_data,const int batch_size=1)1463   Status BuildAndRun(const DataVec& input_data, DataVec* output_data,
1464                      const int batch_size = 1) {
1465     // Mark the output tensor as TRT engine output.
1466     std::vector<Converter::EngineOutputInfo> output_info;
1467     for (const auto& data : *output_data) {
1468       nvinfer1::DataType trt_type;
1469       TF_RETURN_IF_ERROR(TfTypeToTrtType(data.tensor.dtype(), &trt_type));
1470       output_info.push_back({data.name, data.name, trt_type});
1471     }
1472     TF_RETURN_IF_ERROR(converter_->RenameAndMarkOutputTensors(output_info));
1473 
1474     // Build the TRT engine.
1475     if (engine_.get() != nullptr) {
1476       return errors::Internal("Engine already exists");
1477     }
1478     TrtShapeOptimizationProfile profiles(
1479         ProfileStrategy::kImplicitBatchModeCompatible);
1480     if (!converter_->use_implicit_batch()) {
1481       // Create a single optimization profile for explicit batch mode
1482       std::vector<TensorShape> input_shapes;
1483       TF_RETURN_IF_ERROR(GetShapeFromDataVec(input_data, &input_shapes));
1484       profiles.AddShape(input_shapes);
1485       std::vector<PartialTensorShape> input_partial_shapes;
1486       TF_RETURN_IF_ERROR(
1487           GetNetworkInputShapes(converter_->network(), &input_partial_shapes));
1488       profiles.InitProfiles(input_partial_shapes);
1489     }
1490     TF_RETURN_IF_ERROR(
1491         converter_->BuildCudaEngine(&engine_,
1492                                     /*max_batch_size=*/batch_size,
1493                                     /*max_workspace_size_bytes=*/1 << 26,
1494                                     /*allocator=*/nullptr,
1495                                     /*calibrator=*/nullptr,
1496                                     /*profiles=*/&profiles));
1497     CHECK_NOTNULL(engine_.get());
1498     CheckDataTypeMatches(input_data);
1499     CheckDataTypeMatches(*output_data);
1500 
1501     const int num_bindings = input_data.size() + output_data->size();
1502     std::vector<void*> buffers(num_bindings);
1503 
1504     if (engine_->getNbBindings() != num_bindings) {
1505       return errors::Internal("Number of bindings do not match");
1506     }
1507     // Since we have only 1 optimization profile (which is enabled by default)
1508     // it is fine to create execution context directly, instead of calling
1509     // profiles.CreateExecutionContexts()
1510     TrtUniquePtrType<nvinfer1::IExecutionContext> execution_context(
1511         engine_->createExecutionContext());
1512 
1513     // Prepare input bindings.
1514     TF_RETURN_IF_ERROR(SetTrtEngineInputs(
1515         engine_.get(), execution_context.get(), 0, buffers,
1516         converter_->use_implicit_batch(), batch_size, nullptr, &input_data));
1517     // Prepare output bindings.
1518     TF_RETURN_IF_ERROR(SetTrtEngineOutputs(
1519         engine_.get(), execution_context.get(), 0, buffers,
1520         converter_->use_implicit_batch(), batch_size, nullptr, output_data));
1521     // Execute the TRT engine.
1522     TF_RETURN_IF_ERROR(TrtEnqueue(execution_context.get(), buffers, stream_,
1523                                   converter_->use_implicit_batch(),
1524                                   batch_size));
1525     cudaStreamSynchronize(stream_);
1526     return Status::OK();
1527   }
1528 
1529   // Adds ITensor for both validation and conversion, assuming explicit batch
1530   // dimension is included in dims (ie for an NCHW tensor dims = {N, C, H, W}).
AddTestTensorWithTFDims(const string & name,const std::vector<int32> & dims,nvinfer1::DataType trt_type=nvinfer1::DataType::kFLOAT,Status add_input_status=Status::OK ())1531   void AddTestTensorWithTFDims(
1532       const string& name, const std::vector<int32>& dims,
1533       nvinfer1::DataType trt_type = nvinfer1::DataType::kFLOAT,
1534       Status add_input_status = Status::OK()) {
1535     DataType tf_type;
1536     TF_ASSERT_OK(TrtTypeToTfType(trt_type, &tf_type));
1537     ops::Placeholder::Attrs attrs;
1538     TF_EXPECT_OK(TensorShapeUtils::MakeShape(dims, &attrs.shape_));
1539 
1540     auto input = ops::Placeholder(scope_.WithOpName(name), tf_type, attrs);
1541     node_inputs_[name] = input.output;
1542 
1543     // Add a real ITensor for conversion conditionally.
1544     const nvinfer1::Dims trt_dims =
1545         TensorShapeToTrtDims(attrs.shape_, converter_->use_implicit_batch());
1546     if (!converter_->use_implicit_batch() || HasStaticShape(trt_dims)) {
1547       int batch_size = dims[0];
1548       Status status =
1549           converter_->AddInputTensor(name, trt_type, trt_dims, batch_size);
1550       ASSERT_EQ(add_input_status, status);
1551     }
1552   }
1553 
1554   // Adds ITensor for both validation and conversion. The difference compared to
1555   // AddTestTensorWithTFDims is in the meaning of the dims parameter. To define
1556   // a tensor with NCHW shape, here we set dims = {C,H,W} and batch_size = N.
1557   // TODO(tfeher) remove this function once all test are updated to use the
1558   // other version of AddTestTensor (defined by
1559   // ParameterizedOpConverterTestBase).
AddTestTensor(const string & name,const std::vector<int32> & dims,int batch_size=1,nvinfer1::DataType trt_dtype=nvinfer1::DataType::kFLOAT)1560   void AddTestTensor(
1561       const string& name, const std::vector<int32>& dims, int batch_size = 1,
1562       nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT) {
1563     std::vector<int32> dims_with_batch(dims.size() + 1);
1564     dims_with_batch[0] = batch_size;
1565     std::copy(dims.begin(), dims.end(), dims_with_batch.begin() + 1);
1566     AddTestTensorWithTFDims(name, dims_with_batch, trt_dtype);
1567     if (HasStaticShape(dims)) {
1568       ASSERT_EQ(batch_size, converter_->batch_size_);
1569     }
1570   }
1571 
1572   // Add weights for both validation and conversion.
1573   template <typename T>
AddTestWeights(const string & name,const std::vector<int> & dims,const std::vector<T> & values)1574   void AddTestWeights(const string& name, const std::vector<int>& dims,
1575                       const std::vector<T>& values) {
1576     // Add weights for validation.
1577     TensorShape shape;
1578     TF_EXPECT_OK(TensorShapeUtils::MakeShape(dims, &shape));
1579     Tensor t = AsTensor<T>(values, shape);
1580     node_inputs_[name] = ops::Const(scope_.WithOpName(name), t);
1581 
1582     // Add weights for conversion.
1583     nvinfer1::DataType dtype;
1584     TF_ASSERT_OK(TfTypeToTrtType(DataTypeToEnum<T>::v(), &dtype));
1585     const nvinfer1::Dims trt_dims = GetTestDims(dims);
1586     const int64_t num_elements = TrtWeightDimsNumElements(trt_dims);
1587     QCHECK_EQ(num_elements, values.size())
1588         << num_elements << " vs " << values.size();
1589     TRT_ShapedWeights weights(dtype);
1590     if (num_elements) {
1591       weights = converter_->weight_store_.GetTempWeights(dtype, trt_dims);
1592       QCHECK_EQ(weights.size_bytes(), sizeof(T) * values.size())
1593           << weights.size_bytes() << " vs " << sizeof(T) * values.size();
1594       memcpy(weights.GetValues(), values.data(), weights.size_bytes());
1595     }
1596     TF_EXPECT_OK(
1597         converter_->AddTensorOrWeights(name, TRT_TensorOrWeights{weights}));
1598   }
1599 
1600   template <typename T = int32>
AddTestWeights(const string & name,const std::vector<int> & dims,const std::vector<T> & values,DataType tf_type)1601   void AddTestWeights(const string& name, const std::vector<int>& dims,
1602                       const std::vector<T>& values, DataType tf_type) {
1603     if (tf_type == DT_FLOAT) {
1604       AddTestWeights(name, dims, CastTestVector<T, float>(values));
1605     } else if (tf_type == DT_HALF) {
1606       AddTestWeights(name, dims, CastTestVector<T, Eigen::half>(values));
1607     } else if (tf_type == DT_INT32) {
1608       AddTestWeights(name, dims, CastTestVector<T, int32>(values));
1609     } else {
1610       FAIL() << "Cannot create test weights with type "
1611              << DataTypeString(tf_type);
1612     }
1613   }
1614 
1615   // Test validation in validation-only mode.
RunValidation(const Node * node)1616   Status RunValidation(const Node* node) {
1617     grappler::GrapplerItem item;
1618     TF_EXPECT_OK(scope_.ToGraphDef(&item.graph));
1619     grappler::GraphProperties graph_properties(item);
1620     TF_EXPECT_OK(graph_properties.InferStatically(true));
1621 
1622     TrtNodeValidator validator(graph_properties, converter_->precision_mode(),
1623                                /*use_calibration=*/false,
1624                                converter_->use_implicit_batch());
1625     return validator.IsTensorRTCandidate(node);
1626   }
1627 
RunConversion(const Node * node,error::Code expected_code=error::OK,const char * expected_msg_substr=nullptr)1628   void RunConversion(const Node* node, error::Code expected_code = error::OK,
1629                      const char* expected_msg_substr = nullptr) {
1630     ExpectStatus(converter_->ConvertNode(node->def()), expected_code,
1631                  expected_msg_substr);
1632     if (expected_code == error::OK) {
1633       VerifyTrtLayerNameNotEmpty(converter_->network());
1634     }
1635   }
1636 
1637   // Helper method to run both validation and conversion, when the expected
1638   // output are same.
RunValidationAndConversion(const NodeDef & node_def,error::Code expected_code=error::OK,const char * expected_msg_substr=nullptr,bool should_run_conversion=true)1639   void RunValidationAndConversion(const NodeDef& node_def,
1640                                   error::Code expected_code = error::OK,
1641                                   const char* expected_msg_substr = nullptr,
1642                                   bool should_run_conversion = true) {
1643     // Add the node to the graph.
1644     // TODO(laigd): we should accept a function that adds the node using
1645     // `scope_`, so individual test case can reuse the scope object and we don't
1646     // need to add the edges here by ourselves.
1647     Graph* graph = scope_.graph();
1648     Status status;
1649     Node* node = graph->AddNode(std::move(node_def), &status);
1650     TF_EXPECT_OK(status);
1651     for (int i = 0; i < node_def.input().size(); ++i) {
1652       const string& input_name = node_def.input(i);
1653       const auto& itr = node_inputs_.find(input_name);
1654       QCHECK(itr != node_inputs_.end());
1655       const Output& input = itr->second;
1656       graph->AddEdge(input.node(), input.index(), node, i);
1657     }
1658 
1659     status = RunValidation(node);
1660     if (should_run_conversion && status.ok()) {
1661       RunConversion(node, expected_code, expected_msg_substr);
1662     } else {
1663       ExpectStatus(status, expected_code, expected_msg_substr);
1664     }
1665   }
1666 
1667   // Helper method to run both validation and conversion, and check the output
1668   // shapes.
RunValidationAndConversion(const NodeDef & node_def,const Status & status,const char * output_name,const std::vector<std::vector<int>> & exp_out_dims)1669   void RunValidationAndConversion(
1670       const NodeDef& node_def, const Status& status, const char* output_name,
1671       const std::vector<std::vector<int>>& exp_out_dims) {
1672     RunValidationAndConversion(node_def, status.code(),
1673                                status.error_message().c_str(), true);
1674     if (status.ok()) {
1675       // TODO(tfeher): Enable this check in explicit_batch_mode.
1676       // In dynamic shape mode the output dims cannot be tested here. In that
1677       // case we need to wait for the concrate input shapes to be defined (by
1678       // setBindingDimensions before enqueue) before we can check the output
1679       // dims.
1680       if (converter_->use_implicit_batch()) {
1681         for (int i = 0; i < exp_out_dims.size(); i++) {
1682           TRT_TensorOrWeights output;
1683           string name = i == 0 ? output_name : StrCat(output_name, ":", i);
1684           TF_EXPECT_OK(GetTensorOrWeights(name.c_str(), &output));
1685           ASSERT_TRUE(output.is_tensor());
1686           if (!exp_out_dims[i].empty()) {
1687             // Removing batch dim.
1688             auto out_dims = std::vector<int>(exp_out_dims[i].begin() + 1,
1689                                              exp_out_dims[i].end());
1690             VLOG(2) << "Testing output shape for tensor " << name;
1691             ExpectTrtDimsEqualsArray(out_dims,
1692                                      output.tensor()->getDimensions());
1693           }
1694         }
1695       }
1696     }
1697   }
1698 
1699   // Expose quantization_ranges_ for tests
quantization_ranges()1700   std::unordered_map<nvinfer1::ITensor*, float>& quantization_ranges() {
1701     return converter_->quantization_ranges_;
1702   }
1703 
PropagateQuantizationRanges()1704   void PropagateQuantizationRanges() {
1705     converter_->PropagateQuantizationRanges();
1706   }
1707   std::unique_ptr<Converter> converter_;
1708 
1709  private:
1710   Logger logger_;
1711   TrtUniquePtrType<nvinfer1::ICudaEngine> engine_;
1712   cudaStream_t stream_;
1713   std::unique_ptr<Allocator> tensor_buffer_allocator_;
1714   // The scope that contains the graph being converted. Because
1715   // tensor_buffer_allocator_ provides the storage for tensor contents that are
1716   // represented as attributes for graph nodes within scope_,
1717   // tensor_buffer_allocator_ needs to be available when destructing scope_.
1718   // Therefore, scope_ comes after tensor_buffer_allocator_ in the class member
1719   // field list.
1720   Scope scope_;
1721   std::unordered_map<string, Output> node_inputs_;
1722 };
1723 
1724 // General test parameters to be used with ops that take a single input tensor.
1725 struct TestParamBase {
1726   // Concrete input dimensions for the test (including the batch dim)
1727   std::vector<int> input_dims;
1728 
1729   // Dimensions to define an input with PartialTensorShape. This can be used to
1730   // define networks with dynamic input shape. It can be left empty, in that
1731   // case AddTestTensor sets partial shapes that are appropriate to TrtTestMode.
1732   std::vector<int> partial_input_dims;
1733 
1734   // Concrete (static) output dimensions, including batch size as first dim
1735   std::vector<int> expected_output_dims;
1736 
1737   // Parameter vector, has converter specific meaning.
1738   std::vector<int> param;
1739 
1740   // Expected status of conversion (with concrete error message)
1741   Status status;
1742 
1743   // Expected status of BuildAndRun
1744   Status runtime_status;
1745 };
1746 
operator <<(std::ostream & os,const TestParamBase & p)1747 std::ostream& operator<<(std::ostream& os, const TestParamBase& p) {
1748   os << "input_dims" << p.input_dims;
1749   if (!p.partial_input_dims.empty()) {
1750     os << ", partial_input_dims" << p.partial_input_dims;
1751   }
1752   if (!p.expected_output_dims.empty()) {
1753     os << ", exp_out_dims" << p.expected_output_dims;
1754   }
1755   if (!p.param.empty()) {
1756     os << ", param" << p.param;
1757   }
1758   os << ", " << p.status;
1759   return os;
1760 }
1761 
1762 // Parameterized version of OpConverterTest. We have the following parameters:
1763 // 1. TrtTestMode: implicit batch, explicit batch, dynamic shape modes
1764 // 2. DataType of the input TF tensors: DT_FLOAT, DT_HALF, DT_INT32
1765 // 3. TrtPrecisionMode argument for the Converter: FP32, FP16, INT8
1766 // We will introduce subclasses that will be instantiated using different
1767 // combinations of the DataType and TrtPrecisionMode parameters.
1768 class ParameterizedOpConverterTestBase
1769     : public OpConverterTest,
1770       public ::testing::WithParamInterface<
1771           std::tuple<TrtTestMode, DataType, TrtPrecisionMode>> {
1772  public:
ParameterizedOpConverterTestBase()1773   ParameterizedOpConverterTestBase()
1774       : trt_mode_(std::get<0>(GetParam())),
1775         tf_type_(std::get<1>(GetParam())),
1776         converter_precision_(std::get<2>(GetParam())) {
1777     LOG(INFO) << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%";
1778     LOG(INFO) << "tf_type_: " << DebugString(tf_type_);
1779     LOG(INFO) << "trt_mode_: " << DebugString(trt_mode_);
1780     LOG(INFO) << "converter_precision_: " << DebugString(converter_precision_);
1781     LOG(INFO) << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%";
1782   }
1783 
Reset()1784   void Reset() {
1785     OpConverterTest::Reset(converter_precision_, trt_mode_);
1786     input_data_.clear();
1787   }
1788 
1789   // Getters of protected attributes
get_tf_type()1790   DataType get_tf_type() { return tf_type_; }
get_trt_mode()1791   TrtTestMode get_trt_mode() { return trt_mode_; }
get_converter_precision()1792   TrtPrecisionMode get_converter_precision() { return converter_precision_; }
1793 
1794   // Adds an input ITensor for TRT network. Also creates the corresponding TF
1795   // tensor, and stores it in the list of inputs (input_data_).
1796   //
1797   // The TF tensor is always created with concrete static input shape given by
1798   // dims. The ITensor can have static or dynamic shape based on the trt_mode
1799   // attribute. The ITensor shape is set automatically according to the trt_mode
1800   // parameter, unless the user overrides it with an explicit
1801   // partial_input_shape_dims argument.
1802   //
1803   // Parameters:
1804   // - name of the input node
1805   // - dims actual dimensions of the tensor that we will use during the test
1806   //   (including explicit batch dim)
1807   // - values initial values for the TF tensor
1808   // - dtype data type of the tensor
1809   // - partial_input_shape dimensions which can include unknown shapes. This can
1810   //   be empty, in that case the partial_input_shape will be set automatically
1811   //   depending on the trt_mode argument. (This argument also includes explicit
1812   //   batch dim).
1813   // - add_input_status adding ITensor to the network can fail in implicit batch
1814   //   mode if the batch size is inconsistent. Using the add_input_status arg we
1815   //   can test such errors.
1816   //
1817   template <typename T = int>
AddTestTensor(const string & name,const std::vector<int32> & dims,DataType tf_type,const std::vector<T> & values,const std::vector<int32> & partial_input_shape_dims={},Status add_input_status=Status::OK ())1818   void AddTestTensor(const string& name, const std::vector<int32>& dims,
1819                      DataType tf_type, const std::vector<T>& values,
1820                      const std::vector<int32>& partial_input_shape_dims = {},
1821                      Status add_input_status = Status::OK()) {
1822     if (!dims.empty()) {
1823       const auto num_elements = std::accumulate(
1824           std::begin(dims), std::end(dims), 1, std::multiplies<double>());
1825       if (num_elements != values.size()) {
1826         LOG(WARNING) << "Expected Test Tensor Shape: " << DebugString(dims)
1827                      << ", Received Input Tensor: " << DebugString(values);
1828       }
1829     }
1830 
1831     std::vector<int32> partial_shape;
1832     if (!partial_input_shape_dims.empty()) {
1833       partial_shape = partial_input_shape_dims;
1834     } else {
1835       if (trt_mode_ == TrtTestMode::kDynamicShape) {
1836         // In dynamic shape mode we make all dims unknown.
1837         partial_shape = std::vector<int32>(dims.size(), -1);
1838       } else {
1839         // Use static (known) input shapes.
1840         partial_shape = dims;
1841       }
1842     }
1843     nvinfer1::DataType trt_type;
1844     TF_ASSERT_OK(TfTypeToTrtType(tf_type, &trt_type));
1845     AddTestTensorWithTFDims(name, partial_shape, trt_type, add_input_status);
1846     if (!values.empty()) {
1847       VLOG(2) << "Adding test tensor: " << name << " "
1848               << DataTypeString(tf_type);
1849       InputOutputData data{name, AsTensor(values, dims, tf_type)};
1850       VLOG(2) << "Added tensor: " << data.name
1851               << DataTypeString(data.tensor.dtype());
1852       input_data_.push_back(data);
1853     }
1854   }
1855 
1856   // Adds test tensor (same as above) but with the default tf_type defined by
1857   // the test params.
1858   template <typename T = int>
AddTestTensor(const string & name,const std::vector<int32> & dims,const std::vector<T> & values={},const std::vector<int32> & partial_input_shape_dims={})1859   void AddTestTensor(const string& name, const std::vector<int32>& dims,
1860                      const std::vector<T>& values = {},
1861                      const std::vector<int32>& partial_input_shape_dims = {}) {
1862     AddTestTensor<T>(name, dims, tf_type_, values, partial_input_shape_dims);
1863   }
1864 
1865   // Builds and runs the converted network. Checks output tensor shape. Tests
1866   // output values using a matcher. The network can have multiple input and
1867   // output tensors. The inputs are defined by the input_data_ member variable.
BuildAndRun(const string & name,const std::vector<std::vector<int>> & expected_output_dims,const Status & expected_runtime_status,const std::vector<Matcher<std::vector<float>>> & matcher,const std::vector<DataType> & out_tf_types={})1868   void BuildAndRun(const string& name,
1869                    const std::vector<std::vector<int>>& expected_output_dims,
1870                    const Status& expected_runtime_status,
1871                    const std::vector<Matcher<std::vector<float>>>& matcher,
1872                    const std::vector<DataType>& out_tf_types = {}) {
1873     TensorShape shape;
1874     const int n_output = expected_output_dims.size();
1875     ASSERT_EQ(n_output, matcher.size());
1876     DataVec output_data;
1877     for (int i = 0; i < n_output; i++) {
1878       TF_EXPECT_OK(
1879           TensorShapeUtils::MakeShape(expected_output_dims[i], &shape));
1880       string out_name = (i == 0) ? name : StrCat(name, ":", i);
1881       DataType out_tf_type =
1882           out_tf_types.size() > i ? out_tf_types[i] : tf_type_;
1883       InputOutputData data{
1884           out_name, ConstructTensor(shape.num_elements(), 0, out_tf_type)};
1885       output_data.push_back(data);
1886     }
1887     const int batch_size =
1888         input_data_.empty() ? 1 : input_data_[0].tensor.shape().dim_size(0);
1889     Status stat =
1890         OpConverterTest::BuildAndRun(input_data_, &output_data, batch_size);
1891     ASSERT_EQ(expected_runtime_status.ok(), stat.ok())
1892         << "expected status: " << expected_runtime_status
1893         << ", actual status: " << stat;
1894     if (expected_runtime_status.ok() && stat.ok()) {
1895       for (int i = 0; i < n_output; i++) {
1896         // Check the shape of the actual output tensors
1897         TF_EXPECT_OK(
1898             TensorShapeUtils::MakeShape(expected_output_dims[i], &shape));
1899         EXPECT_TRUE(output_data[i].tensor.shape() == shape)
1900             << "Expected shape: " << shape.DebugString() << ", actual shape"
1901             << output_data[i].tensor.shape().DebugString();
1902         EXPECT_THAT(GetDataAsFloat(output_data[i]), matcher[i]);
1903       }
1904     }
1905   }
1906 
1907   // Runs validation and conversion. If conversion is successfull then builds
1908   // the TRT network, executes it and checks the output. Handles multiple output
1909   // tensors.
TestOpConverterMultiOut(const string & name,const NodeDef node_def,const std::vector<std::vector<int>> & expected_output_dims,const Status & expected_conversion_status,const Status & expected_runtime_status,const std::vector<Matcher<std::vector<float>>> & matcher,const std::vector<DataType> & out_tf_type={})1910   void TestOpConverterMultiOut(
1911       const string& name, const NodeDef node_def,
1912       const std::vector<std::vector<int>>& expected_output_dims,
1913       const Status& expected_conversion_status,
1914       const Status& expected_runtime_status,
1915       const std::vector<Matcher<std::vector<float>>>& matcher,
1916       const std::vector<DataType>& out_tf_type = {}) {
1917     RunValidationAndConversion(node_def, expected_conversion_status,
1918                                name.c_str(), expected_output_dims);
1919     if (expected_conversion_status.ok()) {
1920       BuildAndRun(name, expected_output_dims, expected_runtime_status, matcher,
1921                   out_tf_type);
1922     }
1923   }
1924 
1925   // Runs validation and conversion. If conversion is successfull then builds
1926   // the TRT network, executes it and checks the output.
TestOpConverter(const string & name,const NodeDef node_def,const std::vector<int> & expected_output_dims,const Status & expected_conversion_status,const Status & expected_runtime_status,const Matcher<std::vector<float>> & matcher,const std::vector<DataType> & out_tf_types={})1927   void TestOpConverter(const string& name, const NodeDef node_def,
1928                        const std::vector<int>& expected_output_dims,
1929                        const Status& expected_conversion_status,
1930                        const Status& expected_runtime_status,
1931                        const Matcher<std::vector<float>>& matcher,
1932                        const std::vector<DataType>& out_tf_types = {}) {
1933     RunValidationAndConversion(
1934         node_def, expected_conversion_status, name.c_str(),
1935         std::vector<std::vector<int>>({expected_output_dims}));
1936     if (expected_conversion_status.ok()) {
1937       BuildAndRun(name, std::vector<std::vector<int>>({expected_output_dims}),
1938                   expected_runtime_status,
1939                   std::vector<Matcher<std::vector<float>>>({matcher}),
1940                   out_tf_types);
1941     }
1942   }
1943 
1944  protected:
1945   const TrtTestMode trt_mode_;
1946   const DataType tf_type_;
1947   const TrtPrecisionMode converter_precision_;
1948   DataVec input_data_;
1949 };
1950 
1951 // Op converter test in FP32 mode. While for debugging purposes it might make
1952 // sense to run over all possible combinations, normally a subset of them
1953 // would be sufficient:
1954 // - All valid options to TrtTestMode (implicit, explicit, dynamic shape)
1955 // - DataType: is the TF data type of the input tensors. This usually only
1956 //   influences the data type added by Converter::AddInputTensor. We test the
1957 //   valid combinations of input data types in AddAndGetInputs, therefore
1958 //   for most of the OpConverterTest its is sufficient to test for DT_FLOAT.
1959 // - TrtPrecisionMode: valid options are FP32, FP16 and INT8. This influences
1960 //   how TRT handles the precision inside the TRT network, but should not matter
1961 //   for the TF -> TRT conversion. Therefore it should be sufficient to test
1962 //   for FP32.
1963 class OpConverter_FP32_Test : public ParameterizedOpConverterTestBase {};
1964 // Base class for tests that need to be tested for both FP32 and FP16.
1965 class OpConverter_FP32_FP16_Test : public ParameterizedOpConverterTestBase {};
1966 // Base class for tests that need to be tested for FP32, FP16, and INT32
1967 class OpConverter_FP32_FP16_INT32_Test
1968     : public ParameterizedOpConverterTestBase {};
1969 
1970 // Instantiate parameter combinations to OpConverter_<DT_X...>_Test
1971 INSTANTIATE_TEST_CASE_P(
1972     OpConvTestInstantiation, OpConverter_FP32_Test,
1973     ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
1974                        ::testing::Values(DT_FLOAT),
1975                        ::testing::Values(TrtPrecisionMode::FP32)));
1976 
1977 INSTANTIATE_TEST_CASE_P(
1978     OpConvTestInstantiation, OpConverter_FP32_FP16_Test,
1979     ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
1980                        ::testing::Values(DT_FLOAT, DT_HALF),
1981                        ::testing::Values(TrtPrecisionMode::FP32)));
1982 
1983 INSTANTIATE_TEST_CASE_P(
1984     OpConvTestInstantiation, OpConverter_FP32_FP16_INT32_Test,
1985     ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
1986                        ::testing::Values(DT_FLOAT, DT_HALF, DT_INT32),
1987                        ::testing::Values(TrtPrecisionMode::FP32)));
1988 
1989 template <typename T>
CopyTensorElements(const Tensor & tensor,protobuf::RepeatedField<T> * out)1990 void CopyTensorElements(const Tensor& tensor, protobuf::RepeatedField<T>* out) {
1991   out->Clear();
1992   if (tensor.NumElements() == 0) return;
1993 
1994   // TensorProto does not need to have all the elements present and can truncate
1995   // trailing elements with the same value for compressed representation. Such
1996   // elements are derived based on the tensor shape.
1997   const auto flat = tensor.flat<T>();
1998   int64 last_index = 0;
1999   for (int64 i = 0; i < tensor.NumElements(); ++i) {
2000     if (flat(i) != flat(last_index)) {
2001       last_index = i;
2002     }
2003   }
2004 
2005   int num_out_elements = last_index + 1;
2006   out->Reserve(num_out_elements);
2007   out->AddNAlreadyReserved(num_out_elements);
2008   const T* src = flat.data();
2009   T* dst = out->mutable_data();
2010   std::copy(src, src + num_out_elements, dst);
2011 }
2012 
2013 template <DataType dtype, typename InputCType, typename OutputCType>
TestConvertConst(OpConverterTest * test)2014 void TestConvertConst(OpConverterTest* test) {
2015   NodeDef node_def;
2016   node_def.set_name("my_const");
2017   node_def.set_op("Const");
2018 
2019   auto reset_and_test = [&node_def, test](
2020                             const Tensor& tensor, const bool as_tensor_content,
2021                             const std::vector<int>& expected_dims,
2022                             const std::vector<OutputCType>& expected_value) {
2023     test->Reset();
2024 
2025     TensorProto* tensor_attr =
2026         (*node_def.mutable_attr())["value"].mutable_tensor();
2027     tensor_attr->Clear();
2028 
2029     if (as_tensor_content) {
2030       tensor.AsProtoTensorContent(tensor_attr);
2031     } else {
2032       tensor.shape().AsProto(tensor_attr->mutable_tensor_shape());
2033       tensor_attr->set_dtype(tensor.dtype());
2034 
2035       if (tensor.dtype() == DT_FLOAT) {
2036         CopyTensorElements<float>(tensor, tensor_attr->mutable_float_val());
2037       } else if (tensor.dtype() == DT_INT32) {
2038         CopyTensorElements<int32>(tensor, tensor_attr->mutable_int_val());
2039       } else {
2040         tensor.AsProtoField(tensor_attr);
2041       }
2042     }
2043     test->RunValidationAndConversion(node_def);
2044     TRT_TensorOrWeights output;
2045     TF_EXPECT_OK(test->GetTensorOrWeights("my_const", &output));
2046     ValidateWeights(output.weights(), expected_dims, expected_value);
2047   };
2048 
2049   auto& attr = *node_def.mutable_attr();
2050   attr["dtype"].set_type(dtype);
2051   {
2052     // By default empty tensor will pick DT_FLOAT as data type and we fix it
2053     // here.
2054     Tensor t(dtype);  // Empty tensor.
2055     reset_and_test(t, false, {}, {});
2056   }
2057   {
2058     Tensor t = test::AsScalar<InputCType>(12);
2059     reset_and_test(t, false, {1}, {12});
2060     reset_and_test(t, true, {1}, {12});
2061   }
2062   {
2063     Tensor t = test->AsTensor<InputCType>({1, 2});
2064     reset_and_test(t, false, {2}, {1, 2});
2065     reset_and_test(t, true, {2}, {1, 2});
2066   }
2067   {
2068     Tensor t =
2069         test->AsTensor<InputCType>({1, 2, 3, 4, 5, 6}, TensorShape({2, 3}));
2070     reset_and_test(t, false, {2, 3}, {1, 2, 3, 4, 5, 6});
2071     reset_and_test(t, true, {2, 3}, {1, 2, 3, 4, 5, 6});
2072   }
2073   {
2074     // Set all tensor elements to the same value. Such tensors are encoded
2075     // using a single element list in tensor proto.
2076     Tensor t =
2077         test->AsTensor<InputCType>({1, 1, 1, 1, 1, 1}, TensorShape({2, 3}));
2078     reset_and_test(t, false, {2, 3}, {1, 1, 1, 1, 1, 1});
2079     reset_and_test(t, true, {2, 3}, {1, 1, 1, 1, 1, 1});
2080   }
2081   {
2082     // Set trailing tensor elements to the same value. Such tensors are
2083     // encoded by truncating all equal elements except the first one.
2084     Tensor t =
2085         test->AsTensor<InputCType>({2, 2, 1, 1, 1, 1}, TensorShape({2, 3}));
2086     reset_and_test(t, false, {2, 3}, {2, 2, 1, 1, 1, 1});
2087     reset_and_test(t, true, {2, 3}, {2, 2, 1, 1, 1, 1});
2088   }
2089 }
2090 
TEST_F(OpConverterTest,ConvertConst)2091 TEST_F(OpConverterTest, ConvertConst) {
2092   {
2093     Reset();
2094     NodeDef node_def = MakeConstNodeDef<double>("my_const", {});
2095     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
2096                                "Unsupported tensorflow data type double");
2097   }
2098   {
2099     Reset();
2100     Tensor tensor = AsTensor<int64>({1, std::numeric_limits<int64>::max(), 1, 1,
2101                                      1, std::numeric_limits<int64>::lowest()},
2102                                     TensorShape({2, 3}));
2103     NodeDef node_def;
2104     node_def.set_name("my_const");
2105     node_def.set_op("Const");
2106     (*node_def.mutable_attr())["dtype"].set_type(DT_INT64);
2107     TensorProto* tensor_attr =
2108         (*node_def.mutable_attr())["value"].mutable_tensor();
2109     tensor_attr->Clear();
2110     tensor.AsProtoTensorContent(tensor_attr);
2111     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
2112                                "outside the range of int32");
2113   }
2114 
2115   TestConvertConst<DT_FLOAT, float, float>(this);
2116   TestConvertConst<DT_INT8, int8, int32>(this);
2117   TestConvertConst<DT_UINT8, uint8, int32>(this);
2118   TestConvertConst<DT_INT16, int16, int32>(this);
2119   TestConvertConst<DT_UINT16, uint16, int32>(this);
2120   TestConvertConst<DT_INT32, int32, int32>(this);
2121   TestConvertConst<DT_UINT32, uint32, int32>(this);
2122   TestConvertConst<DT_INT64, int64, int32>(this);
2123   TestConvertConst<DT_UINT64, uint64, int32>(this);
2124 }
2125 
2126 template <typename T>
CreateFusedBatchNormOp(DataType tf_type,std::string data_format,bool is_training,float epsilon)2127 NodeDef CreateFusedBatchNormOp(DataType tf_type, std::string data_format,
2128                                bool is_training, float epsilon) {
2129   Scope s = Scope::NewRootScope();
2130   auto x = ops::Placeholder(s.WithOpName("x"), tf_type);
2131   auto scale = ops::Placeholder(s.WithOpName("scale"), tf_type);
2132   auto offset = ops::Placeholder(s.WithOpName("offset"), tf_type);
2133   auto mean = ops::Placeholder(s.WithOpName("mean"), tf_type);
2134   auto variance = ops::Placeholder(s.WithOpName("variance"), tf_type);
2135   typename T::Attrs attrs;
2136   attrs.data_format_ = data_format;
2137   attrs.is_training_ = is_training;
2138   if (epsilon > 0) {
2139     attrs.epsilon_ = epsilon;
2140   } else {
2141     EXPECT_GE(epsilon, 0);
2142   }
2143   return T(s.WithOpName("my_batchnorm"), x, scale, offset, mean, variance,
2144            attrs)
2145       .operation.node()
2146       ->def();
2147 }
2148 
TEST_P(OpConverter_FP32_Test,ConvertFusedBatchNorm)2149 TEST_P(OpConverter_FP32_Test, ConvertFusedBatchNorm) {
2150   using OpFunc = std::function<NodeDef(DataType, std::string, bool, float)>;
2151   std::vector<OpFunc> get_node_def_vec{
2152       CreateFusedBatchNormOp<ops::FusedBatchNorm>,
2153       CreateFusedBatchNormOp<ops::FusedBatchNormV2>,
2154       CreateFusedBatchNormOp<ops::FusedBatchNormV3>};
2155 
2156   struct TestParam {
2157     std::string data_format;
2158     int tensor_input_idx;  // Index of an input that will be provided as tensor.
2159     bool is_training;
2160     float epsilon;
2161     Status conversion_status;
2162     bool keep_channel_unknown;
2163   };
2164 
2165   struct NodeInput {
2166     std::string name;
2167     std::vector<int> dims;
2168     std::vector<float> val;
2169   };
2170   std::vector<NodeInput> node_input{
2171       {"x", {2, 3, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}},
2172       {"scale", {3}, {7, 8, 9}},
2173       {"offset", {3}, {10, 20, 30}},
2174       {"mean", {3}, {1, 2, 3}},
2175       {"variance", {3}, {4, 5, 6}}};
2176 
2177   std::vector<float> expected_output{10.0,      13.495633, 23.574135, 27.148273,
2178                                      37.342354, 41.013527, 30.9738,   34.469433,
2179                                      45.018955, 48.59309,  59.369415, 63.04059};
2180   for (auto get_node_def : get_node_def_vec) {
2181     NodeDef tmp_node_def = get_node_def(tf_type_, "NCHW", true, 0);
2182     std::string op_name = tmp_node_def.op();
2183     std::vector<TestParam> test_param{
2184         {"NHWC", 0, false, 0,
2185          errors::Unimplemented(StrCat(
2186              op_name, " only supports data_format=NCHW, at my_batchnorm"))},
2187         {"NCHW", 0, true, 0,
2188          errors::Unimplemented(StrCat(
2189              op_name, " only supports is_training=false, at my_batchnorm"))},
2190         {"NCHW", 1, false, 0,
2191          errors::Unimplemented(StrCat("The input \"scale\" for ", op_name,
2192                                       " must be a constant, at my_batchnorm"))},
2193         {"NCHW", 2, false, 0,
2194          errors::Unimplemented(StrCat("The input \"offset\" for ", op_name,
2195                                       " must be a constant, at my_batchnorm"))},
2196         {"NCHW", 3, false, 0,
2197          errors::Unimplemented(StrCat("The input \"mean\" for ", op_name,
2198                                       " must be a constant, at my_batchnorm"))},
2199         {"NCHW", 4, false, 0,
2200          errors::Unimplemented(StrCat("The input \"variance\" for ", op_name,
2201                                       " must be a constant, at my_batchnorm"))},
2202         {"NCHW", 0, false, 0.01}};  // The last one is the only test that runs.
2203     if (trt_mode_ == TrtTestMode::kDynamicShape) {
2204       test_param.push_back(
2205           {"NCHW", 0, false, 0.01,
2206            errors::InvalidArgument(
2207                "Channel dimension must be static, at my_batchnorm"),
2208            true});
2209     }
2210     for (auto p : test_param) {
2211       Reset();
2212       NodeDef node_def =
2213           get_node_def(tf_type_, p.data_format, p.is_training, p.epsilon);
2214       for (int i = 0; i < node_input.size(); i++) {
2215         if (i == 0 || i == p.tensor_input_idx) {
2216           // The first input (x) is always added as a tensor, and it hase shape
2217           // NCHW. The other inputs are per channel values (1D, size C).
2218           //
2219           // In implicit batch mode, it is not possible to add any of the 1D
2220           // inputs as a tensor: the first dim is always treated as batch dim in
2221           // implicit batch mode, and that has to agree for all tensors. We have
2222           // two input tensors with shapes NCHW and C and in general N != C.
2223           // The converter already picked up N from the fist input, and reports
2224           // an error when we try to add any other tensors with not matching
2225           // first dim.
2226           //
2227           // This restriction does not apply in explicit batch mode: the tensors
2228           // can have different first dim. The converter still expects that only
2229           // the first arg is a tensor. TODO(tfeher) Check if one can relax this
2230           // restriction.
2231           Status expected_status =
2232               (i != 0 && trt_mode_ == TrtTestMode::kImplicitBatch)
2233                   ? errors::InvalidArgument(
2234                         StrCat("Batch size doesn't match for tensor ",
2235                                node_input[i].name,
2236                                ": Provided batch size does not match "
2237                                "converter batch size: 3 vs 2"))
2238                   : Status::OK();
2239           std::vector<int> partial_input_shape;
2240           if (i == 0 && trt_mode_ == TrtTestMode::kDynamicShape &&
2241               !p.keep_channel_unknown) {
2242             // keep channel dim static (known)
2243             partial_input_shape.resize(4, -1);
2244             partial_input_shape[1] = node_input[i].dims[1];
2245           }
2246           AddTestTensor(node_input[i].name, node_input[i].dims, tf_type_,
2247                         node_input[i].val, partial_input_shape,
2248                         expected_status);
2249 
2250         } else {
2251           AddTestWeights(node_input[i].name, node_input[i].dims,
2252                          node_input[i].val, tf_type_);
2253         }
2254       }
2255       TestOpConverter("my_batchnorm", node_def, node_input[0].dims,
2256                       p.conversion_status, Status::OK(),
2257                       ArrayFloatNear(expected_output));
2258     }
2259   }
2260 }
2261 
TEST_P(OpConverter_FP32_Test,ConvertTranspose)2262 TEST_P(OpConverter_FP32_Test, ConvertTranspose) {
2263   // Get the NodeDef for Transpose.
2264   Scope s = Scope::NewRootScope();
2265   auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
2266   auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
2267   auto transpose = ops::Transpose(s.WithOpName("my_transpose"), input, weights);
2268   const NodeDef& node_def = transpose.operation.node()->def();
2269 
2270   std::vector<TestParamBase> test_params = {
2271       // For the first test we leave param empty. This signals to use a
2272       // input as weight which will be invalid
2273       TestParamBase{{3, 1, 2, 1},
2274                     {},
2275                     {},
2276                     {},
2277                     Status(error::UNIMPLEMENTED,
2278                            "The input \"perm\" for Transpose must be a "
2279                            "constant, at my_transpose")},
2280       TestParamBase{{1, 1, 2, 3},
2281                     {},
2282                     {},
2283                     {0, 1, 2},
2284                     Status(error::INVALID_ARGUMENT,
2285                            "Rank of perm for transpose does not match with "
2286                            "that of the input.")},
2287       // Transpose batch dim
2288       TestParamBase{
2289           {1, 1, 2, 3},
2290           {},
2291           {3, 2, 1, 1},
2292           {3, 2, 1, 0},
2293           (trt_mode_ == TrtTestMode::kImplicitBatch)
2294               ? Status(error::UNIMPLEMENTED,
2295                        "Transpose at batch dimension is not supported")
2296               : Status::OK()},
2297       TestParamBase{{1, 1, 2, 3}, {}, {1, 3, 1, 2}, {0, 3, 1, 2}},
2298   };
2299   if (trt_mode_ == TrtTestMode::kDynamicShape) {
2300     // Dynamic shape tests where some shapes are known
2301     test_params.push_back(TestParamBase{
2302         {1, 1, 2, 3}, {-1, 1, 2, -1}, {1, 3, 1, 2}, {0, 3, 1, 2}});
2303   }
2304   std::vector<float> expected_values{1, 4, 2, 5, 3, 6};
2305   for (auto p : test_params) {
2306     SCOPED_TRACE(p);
2307     Reset();
2308     AddTestTensor("input", p.input_dims, {1, 2, 3, 4, 5, 6},
2309                   p.partial_input_dims);
2310     if (p.param.empty()) {
2311       AddTestTensor("weights", {3});
2312     } else {
2313       AddTestWeights<int32>("weights", {static_cast<int>(p.param.size())},
2314                             p.param);
2315     }
2316     TestOpConverter("my_transpose", node_def, p.expected_output_dims, p.status,
2317                     p.runtime_status, ElementsAreArray(expected_values));
2318   }
2319 }
2320 
TEST_F(OpConverterTest,ConvertReshape)2321 TEST_F(OpConverterTest, ConvertReshape) {
2322   // Get the NodeDef for Reshape.
2323   Scope s = Scope::NewRootScope();
2324   auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
2325   auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
2326   auto reshape = ops::Reshape(s.WithOpName("my_reshape"), input, weights);
2327   const NodeDef& node_def = reshape.operation.node()->def();
2328 
2329   {
2330     // Shape is a tensor, should fail.
2331     Reset();
2332     AddTestTensor("input", {1, 2, 3});
2333     AddTestTensor("weights", {3});
2334     RunValidationAndConversion(
2335         node_def, error::UNIMPLEMENTED,
2336         "The input \"shape\" for Reshape must be a constant, at my_reshape");
2337   }
2338   {
2339     // Reshape to scalar, should fail.
2340     Reset();
2341     AddTestTensor("input", {1, 2, 3});
2342     AddTestWeights<int32>("weights", {0}, {});
2343     RunValidationAndConversion(
2344         node_def, error::UNIMPLEMENTED,
2345         "Reshape to shape=[] is not supported, at my_reshape");
2346   }
2347   {
2348     // Reshape tensor with zero rank to empty tensor, should fail.
2349     Reset();
2350     AddTestTensor("input", {});
2351     AddTestWeights<int32>("weights", {1, 0, 1}, {});
2352     RunValidationAndConversion(
2353         node_def, error::UNIMPLEMENTED,
2354         "Reshape to shape=[] is not supported, at my_reshape");
2355   }
2356 
2357   struct TestParams {
2358     int batch_size;
2359     std::vector<int> tensor_dims;
2360     std::vector<int> shape;
2361   };
2362 
2363   // Reshape at batch dimension, should fail.
2364   std::vector<TestParams> params = {
2365       TestParams{1, {1, 2, 3}, {3, 1, 1, 2}},
2366       TestParams{1, {1, 2, -1}, {-1, 1, 1, 2}},
2367       TestParams{1, {1, 2, 3}, {-1, 1, 1, 2}},
2368       TestParams{-1, {1, 2, 3}, {1, 1, 1, 2}},
2369       TestParams{-1, {-1, 2, 3}, {1, 1, 1, 6}},  // TODO(laigd): it should pass.
2370   };
2371   for (int i = 0; i < params.size(); ++i) {
2372     Reset();
2373     const std::vector<int>& dims = params[i].tensor_dims;
2374     AddTestTensor("input", dims, params[i].batch_size);
2375     AddTestWeights<int32>("weights", {4}, params[i].shape);
2376     RunValidationAndConversion(
2377         node_def, error::UNIMPLEMENTED,
2378         "Reshape on batch dimension is not supported, at my_reshape",
2379         /*should_run_conversion=*/(dims[0] > 0 && dims[1] > 0 && dims[2] > 0));
2380   }
2381 
2382   // Reshape on non batch dimensions, ok.
2383   std::vector<TestParams> ok_params = {
2384       TestParams{-1, {1, 2, 3}, {-1, 1, 3, 2}},
2385       TestParams{1, {1, 2, 3}, {-1, 1, 3, 2}},
2386       TestParams{1, {1, 2, 3}, {1, 1, 3, 2}},
2387       TestParams{2, {1, 2, 3}, {2, 1, 3, 2}},
2388       TestParams{1, {1, 1}, {1}},
2389       TestParams{1, {}, {1, 1}},
2390       TestParams{2, {1, 1}, {2}},
2391       TestParams{2, {}, {2, 1}},
2392   };
2393   for (int i = 0; i < ok_params.size(); ++i) {
2394     const int batch_size = std::max(1, ok_params[i].batch_size);
2395     const auto& shape = ok_params[i].shape;
2396     Reset();
2397     AddTestTensor("input", ok_params[i].tensor_dims, batch_size);
2398     AddTestWeights<int32>("weights", {static_cast<int>(shape.size())}, shape);
2399     RunValidationAndConversion(node_def);
2400 
2401     TRT_TensorOrWeights output;
2402     TF_EXPECT_OK(GetTensorOrWeights("my_reshape", &output));
2403     ASSERT_TRUE(output.is_tensor());
2404     const std::vector<int> expected_output_dims(shape.begin() + 1, shape.end());
2405     const nvinfer1::Dims actual_output_dims = output.tensor()->getDimensions();
2406     ExpectTrtDimsEqualsArray(expected_output_dims, actual_output_dims);
2407 
2408     std::vector<float> input_vec(TrtTensorDimsNumElements(actual_output_dims) *
2409                                  batch_size);
2410     std::iota(input_vec.begin(), input_vec.end(), 1);
2411     const DataVec input_data{{"input", AsTensor<float>(input_vec)}};
2412     DataVec output_data{
2413         {"my_reshape", ConstructTensor<float>(input_vec.size())}};
2414     TF_EXPECT_OK(BuildAndRun(input_data, &output_data, batch_size));
2415     EXPECT_THAT(GetSpanForData<float>(output_data[0]),
2416                 ElementsAreArray(input_vec));
2417   }
2418 }
2419 
TEST_P(OpConverter_FP32_Test,ConvertShape)2420 TEST_P(OpConverter_FP32_Test, ConvertShape) {
2421   // Get the NodeDef for Shape op.
2422   Scope s = Scope::NewRootScope();
2423   auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
2424   auto shape = ops::Shape(s.WithOpName("my_shape"), input);
2425   const NodeDef& node_def = shape.operation.node()->def();
2426 
2427   Status conversion_status =
2428       (trt_mode_ == TrtTestMode::kImplicitBatch)
2429           ? errors::Unimplemented(
2430                 "Shape is only supported for explicit batch mode.")
2431           : Status::OK();
2432   std::vector<TestParamBase> test_params = {
2433 // TODO(b/166274212): Enable the test parameter for TensorRT 7.1.3.
2434 #if !IS_TRT_VERSION_GE(7, 1, 3, 0)
2435     TestParamBase{{1, 2, 3}, {}, {3}, {}, conversion_status},
2436 #endif
2437     // Add input as weight (we use non empty param ({1}) to trigger this).
2438     TestParamBase{{1, 2, 3}, {}, {3}, {1}, conversion_status},
2439   };
2440 
2441   auto input_is_weight = [](const TestParamBase p) { return !p.param.empty(); };
2442   for (auto p : test_params) {
2443     SCOPED_TRACE(p);
2444     Reset();
2445     // The number of elements of the input tensor. We leave it 0 in case we do
2446     // not need to add an input tensor. This happens in explicit batch mode: the
2447     // shape is known at conversion time and therefore the shape is added to the
2448     // network as a constant layer. In this case the single node network that
2449     // we use for the unit test have no actual input tensor when it is converted
2450     // to a TensorRT network.
2451     int n_elements = 0;
2452     if (input_is_weight(p) || trt_mode_ != TrtTestMode::kExplicitBatch) {
2453       // Calculate the number of elements for adding input data.
2454       n_elements = std::accumulate(p.input_dims.begin(), p.input_dims.end(), 1,
2455                                    std::multiplies<int>());
2456     }
2457     std::vector<float> input_val(n_elements, 1);
2458     if (!input_is_weight(p)) {
2459       AddTestTensor("input", p.input_dims, input_val);
2460     } else {
2461       AddTestWeights("input", p.input_dims, input_val, tf_type_);
2462     }
2463     TestOpConverter("my_shape", node_def, p.expected_output_dims, p.status,
2464                     p.runtime_status, ElementsAreArray(p.input_dims),
2465                     {DT_INT32});
2466   }
2467 }
2468 
2469 // Helper function for testing MatMul and BatchMatMul
2470 // get_matmul corresponds to the function used to generate the node. It should
2471 // accept (DataType, transpose_a, transpose_b) as parameters.
TestMatMulHelper(OpConverterTest * test,const std::function<NodeDef (DataType,bool,bool)> & get_matmul,const std::string & op_name)2472 void TestMatMulHelper(
2473     OpConverterTest* test,
2474     const std::function<NodeDef(DataType, bool, bool)>& get_matmul,
2475     const std::string& op_name) {
2476   // HACK: This needs to be done in a better way.
2477   const bool is_batch_matmul = op_name == "BatchMatMul";
2478   {
2479     // Unsupported data type.
2480     test->Reset();
2481     NodeDef node_def = get_matmul(DT_INT32, false, false);
2482     test->AddTestTensor("input", {2}, /*batch_size=*/1,
2483                         nvinfer1::DataType::kINT32);
2484     test->AddTestWeights<int32>("weights", {2, 1}, {3, 5});
2485     test->RunValidationAndConversion(
2486         node_def, error::UNIMPLEMENTED,
2487         StrCat("Data type int32 is not supported for ", op_name,
2488                ", must be one of [float, half], at my_matmul")
2489             .c_str());
2490   }
2491   // OK.
2492   for (bool transpose_a : {false, true}) {
2493     for (bool transpose_b : {false, true}) {
2494       test->Reset();
2495       NodeDef node_def = get_matmul(DT_FLOAT, transpose_a, transpose_b);
2496       test->AddTestTensor("input", {2}, /*batch_size=*/1);
2497       test->AddTestWeights<float>("weights", {2, 2}, {0, 1, 2, 3});
2498       if (is_batch_matmul) {
2499         test->RunValidationAndConversion(
2500             node_def, error::UNIMPLEMENTED,
2501             "TensorRT does not support batched constants.");
2502         continue;
2503       } else if (transpose_a) {
2504         test->RunValidationAndConversion(
2505             node_def, error::INVALID_ARGUMENT,
2506             "Cannot transpose first input if it is a tensor with fewer than 2 "
2507             "non-batch dimensions");
2508         continue;
2509       }
2510       test->RunValidationAndConversion(node_def);
2511       TRT_TensorOrWeights output;
2512       TF_EXPECT_OK(test->GetTensorOrWeights("my_matmul", &output));
2513       ASSERT_TRUE(output.is_tensor());
2514       ExpectTrtDimsEqualsArray({2}, output.tensor()->getDimensions());
2515 
2516       const DataVec input_data{{"input", test->AsTensor<float>({0, 1})}};
2517       DataVec output_data{{"my_matmul", test->ConstructTensor<float>(2)}};
2518       TF_EXPECT_OK(test->BuildAndRun(input_data, &output_data));
2519       if (transpose_b) {
2520         EXPECT_THAT(GetSpanForData<float>(output_data[0]), ElementsAre(1, 3));
2521       } else {
2522         EXPECT_THAT(GetSpanForData<float>(output_data[0]), ElementsAre(2, 3));
2523       }
2524     }
2525   }
2526   // OK, 3D inputs
2527   for (bool transpose_b : {false, true}) {
2528     test->Reset();
2529     NodeDef node_def = get_matmul(DT_FLOAT, /*transpose_a=*/false, transpose_b);
2530     test->AddTestTensor("input", {2}, /*batch_size=*/1);
2531     test->AddTestWeights<float>("weights", {2, 2}, {0, 1, 2, 3});
2532     if (is_batch_matmul) {
2533       test->RunValidationAndConversion(
2534           node_def, error::UNIMPLEMENTED,
2535           "TensorRT does not support batched constants.");
2536       continue;
2537     }
2538     test->RunValidationAndConversion(node_def);
2539     TRT_TensorOrWeights output;
2540     TF_EXPECT_OK(test->GetTensorOrWeights("my_matmul", &output));
2541     ASSERT_TRUE(output.is_tensor());
2542     ExpectTrtDimsEqualsArray({2}, output.tensor()->getDimensions());
2543     const DataVec input_data{{"input", test->AsTensor<float>({0, 1})}};
2544     DataVec output_data{{"my_matmul", test->ConstructTensor<float>(2)}};
2545     TF_EXPECT_OK(test->BuildAndRun(input_data, &output_data));
2546     if (transpose_b) {
2547       EXPECT_THAT(GetSpanForData<float>(output_data[0]), ElementsAre(1, 3));
2548     } else {
2549       EXPECT_THAT(GetSpanForData<float>(output_data[0]), ElementsAre(2, 3));
2550     }
2551   }
2552 }
2553 
2554 template <typename LayerType>
CheckAddedLayers(OpConverterTest * test,bool expect_found)2555 void CheckAddedLayers(OpConverterTest* test, bool expect_found) {
2556   bool layer_found = false;
2557   for (int i = 0; i < test->converter_->network()->getNbLayers(); i++) {
2558     nvinfer1::ILayer* layer = test->converter_->network()->getLayer(i);
2559     if (dynamic_cast<LayerType*>(layer)) {
2560       layer_found = true;
2561     }
2562   }
2563   EXPECT_EQ(expect_found, layer_found);
2564 }
2565 
TEST_F(OpConverterTest,ConvertMatMul)2566 TEST_F(OpConverterTest, ConvertMatMul) {
2567   // Get the NodeDef for MatMul.
2568   auto get_matmul_nodedef = [](DataType dtype, bool transpose_a,
2569                                bool transpose_b) -> NodeDef {
2570     Scope s = Scope::NewRootScope();
2571     auto input = ops::Placeholder(s.WithOpName("input"), dtype);
2572     auto weights = ops::Placeholder(s.WithOpName("weights"), dtype);
2573     const auto matmul_attrs =
2574         ops::MatMul::TransposeA(transpose_a).TransposeB(transpose_b);
2575     auto matmul =
2576         ops::MatMul(s.WithOpName("my_matmul"), input, weights, matmul_attrs);
2577     return matmul.operation.node()->def();
2578   };
2579 
2580   // Additional test cases specific to MatMul
2581   {
2582     // Can only transpose A if it is 2D in TRT
2583     Reset();
2584     NodeDef node_def = get_matmul_nodedef(DT_FLOAT, true, false);
2585     AddTestTensor("input", {2}, /*batch_size=*/1);
2586     AddTestWeights<float>("weights", {2, 2}, {0, 1, 2, 3});
2587     RunValidationAndConversion(
2588         node_def, error::INVALID_ARGUMENT,
2589         "Cannot transpose first input if it is a tensor with fewer than 2 "
2590         "non-batch dimensions.");
2591   }
2592   {
2593     // B must always have 2 non-batch dimensions
2594     Reset();
2595     NodeDef node_def = get_matmul_nodedef(DT_FLOAT, false, false);
2596     AddTestTensor("input", {2}, /*batch_size=*/1);
2597     AddTestTensor("weights", {2}, /*batch_size=*/1);
2598     RunValidationAndConversion(
2599         node_def, error::INVALID_ARGUMENT,
2600         "Second input must either be a constant, or contain at least 2 "
2601         "non-batch dimensions.");
2602   }
2603   {
2604     // We can never transpose weights that are not 2D.
2605     Reset();
2606     NodeDef node_def = get_matmul_nodedef(DT_FLOAT, true, false);
2607     AddTestWeights<float>("input", {1, 1, 2}, {0, 1});
2608     AddTestTensor("weights", {2, 2}, /*batch_size=*/1);
2609     RunValidationAndConversion(
2610         node_def, error::INVALID_ARGUMENT,
2611         "Cannot currently transpose constant input if it is not 2 dimensional");
2612   }
2613   {
2614     // Make sure that INT8 mode uses IFullyConnectedLayer when possible.
2615     Reset(TrtPrecisionMode::INT8);
2616     NodeDef node_def = get_matmul_nodedef(DT_FLOAT, false, false);
2617     AddTestTensor("input", {2, 1, 1});
2618     AddTestWeights<float>("weights", {2, 2}, {0, 1, 2, 3});
2619     RunValidationAndConversion(node_def);
2620     CheckAddedLayers<nvinfer1::IMatrixMultiplyLayer>(this, false);
2621     CheckAddedLayers<nvinfer1::IFullyConnectedLayer>(this, true);
2622   }
2623   {
2624     // Make sure that INT8 mode doesn't try to use IFullyConnectedLayer when not
2625     // compatible. In this case we can't use FC because weights is a tensor.
2626     Reset(TrtPrecisionMode::INT8);
2627     NodeDef node_def = get_matmul_nodedef(DT_FLOAT, false, false);
2628     AddTestTensor("input", {2, 1, 1});
2629     AddTestTensor("weights", {2, 2});
2630     RunValidationAndConversion(node_def);
2631     CheckAddedLayers<nvinfer1::IMatrixMultiplyLayer>(this, true);
2632     CheckAddedLayers<nvinfer1::IFullyConnectedLayer>(this, false);
2633   }
2634   TestMatMulHelper(this, get_matmul_nodedef, "MatMul");
2635 }
2636 
TEST_F(OpConverterTest,ConvertBatchMatMul)2637 TEST_F(OpConverterTest, ConvertBatchMatMul) {
2638   // Get the NodeDef for BatchMatMul.
2639   auto get_batch_matmul_nodedef = [](DataType dtype, bool transpose_a,
2640                                      bool transpose_b) -> NodeDef {
2641     Scope s = Scope::NewRootScope();
2642     auto input = ops::Placeholder(s.WithOpName("input"), dtype);
2643     auto weights = ops::Placeholder(s.WithOpName("weights"), dtype);
2644     const auto matmul_attrs =
2645         ops::BatchMatMul::AdjX(transpose_a).AdjY(transpose_b);
2646     auto matmul = ops::BatchMatMul(s.WithOpName("my_matmul"), input, weights,
2647                                    matmul_attrs);
2648     return matmul.operation.node()->def();
2649   };
2650 
2651   {
2652     // Can't broadcast two tensor inputs of different rank.
2653     Reset();
2654     NodeDef node_def = get_batch_matmul_nodedef(DT_FLOAT, false, false);
2655     AddTestTensor("input", {1, 2, 2}, /*batch_size=*/2);
2656     AddTestTensor("weights", {2}, /*batch_size=*/2);
2657     RunValidationAndConversion(
2658         node_def, error::UNIMPLEMENTED,
2659         "Inputs must have the same rank if they are both tensors.");
2660   }
2661   {
2662     // Make sure that INT8 mode doesn't try to use IFullyConnectedLayer when not
2663     // compatible. In this case we can't use FC because transpose_a is true.
2664     Reset(TrtPrecisionMode::INT8);
2665     NodeDef node_def = get_batch_matmul_nodedef(DT_FLOAT, true, false);
2666     AddTestTensor("input", {1, 2, 2});
2667     AddTestWeights<float>("weights", {2, 2}, {0, 1, 2, 3});
2668     RunValidationAndConversion(node_def);
2669     CheckAddedLayers<nvinfer1::IMatrixMultiplyLayer>(this, true);
2670     CheckAddedLayers<nvinfer1::IFullyConnectedLayer>(this, false);
2671   }
2672 
2673   for (bool transpose_a : {false, true}) {
2674     for (bool transpose_b : {false, true}) {
2675       Reset();
2676       NodeDef node_def =
2677           get_batch_matmul_nodedef(DT_FLOAT, transpose_a, transpose_b);
2678       AddTestTensor("input", {2, 2}, /*batch_size=*/1);
2679       AddTestWeights<float>("weights", {1, 2, 2}, {1, 2, 3, 4});
2680 
2681       RunValidationAndConversion(node_def);
2682       TRT_TensorOrWeights output;
2683       TF_EXPECT_OK(GetTensorOrWeights("my_matmul", &output));
2684       ASSERT_TRUE(output.is_tensor());
2685       ExpectTrtDimsEqualsArray({2, 2}, output.tensor()->getDimensions());
2686       const DataVec input_data{{"input", AsTensor<float>({0, 1, 2, 3})}};
2687       DataVec output_data{{"my_matmul", ConstructTensor<float>(4)}};
2688       TF_EXPECT_OK(BuildAndRun(input_data, &output_data));
2689       if (!transpose_a && !transpose_b) {
2690         EXPECT_THAT(GetSpanForData<float>(output_data[0]),
2691                     ElementsAre(3, 4, 11, 16));
2692       } else if (transpose_a && transpose_b) {
2693         EXPECT_THAT(GetSpanForData<float>(output_data[0]),
2694                     ElementsAre(4, 8, 7, 15));
2695       } else if (transpose_a) {
2696         EXPECT_THAT(GetSpanForData<float>(output_data[0]),
2697                     ElementsAre(6, 8, 10, 14));
2698       } else if (transpose_b) {
2699         EXPECT_THAT(GetSpanForData<float>(output_data[0]),
2700                     ElementsAre(2, 4, 8, 18));
2701       }
2702     }
2703   }
2704 
2705   TestMatMulHelper(this, get_batch_matmul_nodedef, "BatchMatMul");
2706 }
2707 
TEST_P(OpConverter_FP32_FP16_Test,ConvertBiasAdd)2708 TEST_P(OpConverter_FP32_FP16_Test, ConvertBiasAdd) {
2709   // Note that kINT32 is not supported by IScaleLayer, so we don't test
2710   // DT_INT32 type here. DT_FLOAT and DT_HALF are tested.
2711   // Get the NodeDef for BiasAdd.
2712   auto get_biasadd_nodedef = [](const string& data_format,
2713                                 DataType tf_type) -> NodeDef {
2714     Scope s = Scope::NewRootScope();
2715     auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
2716     auto weights = ops::Placeholder(s.WithOpName("weights"), tf_type);
2717     const auto biasadd_attrs = ops::BiasAdd::DataFormat(data_format);
2718     auto biasadd =
2719         ops::BiasAdd(s.WithOpName("my_biasadd"), input, weights, biasadd_attrs);
2720     return biasadd.operation.node()->def();
2721   };
2722 
2723   for (const string& data_format : {"NHWC", "NCHW"}) {
2724     for (const int trt_input_rank : {1, 2, 3, 4}) {
2725       Reset();
2726       NodeDef node_def = get_biasadd_nodedef(data_format, tf_type_);
2727 
2728       // Add input, dims_array will be like {2, 1, ..., 1, 3}
2729       std::vector<int32> dims_array(trt_input_rank + 1, 1);
2730       if (trt_input_rank == 1) {
2731         dims_array[1] = (data_format == "NHWC" ? 3 : 2);
2732       } else {
2733         dims_array[1] = 2;
2734         dims_array[trt_input_rank] = 3;
2735       }
2736       const int num_input = TrtTensorDimsNumElements(GetTestDims(dims_array));
2737       ASSERT_EQ(trt_input_rank > 1 ? 6 : (data_format == "NHWC" ? 3 : 2),
2738                 num_input);
2739       std::vector<float> input_data(num_input, 0);
2740 
2741       AddTestTensor("input", dims_array, input_data);
2742 
2743       const int channel_size = (data_format == "NHWC" ? 3 : 2);
2744       std::vector<float> bias(channel_size);
2745       for (int i = 0; i < channel_size; ++i) {
2746         bias[i] = i + 1;  // bias will be {1, 2, 3, ...}
2747       }
2748       AddTestWeights("weights", {channel_size}, bias, tf_type_);
2749 
2750       // Build and run the engine.
2751       std::vector<float> output_data;
2752 
2753       if (trt_input_rank == 1) {
2754         if (data_format == "NHWC") {
2755           output_data = {1, 2, 3};
2756         } else {
2757           output_data = {1, 2};
2758         }
2759       } else {
2760         if (data_format == "NHWC") {
2761           output_data = {1, 2, 3, 1, 2, 3};
2762         } else {
2763           output_data = {1, 1, 1, 2, 2, 2};
2764         }
2765       }
2766       TestOpConverter("my_biasadd", node_def, dims_array, Status::OK(),
2767                       Status::OK(), ElementsAreArray(output_data));
2768     }
2769   }
2770 }
2771 
2772 template <typename OpType>
GetBinaryOpNodeDef(DataType dtype)2773 NodeDef GetBinaryOpNodeDef(DataType dtype) {
2774   Scope s = Scope::NewRootScope();
2775   auto input_l = ops::Placeholder(s.WithOpName("input1"), dtype);
2776   auto input_r = ops::Placeholder(s.WithOpName("input2"), dtype);
2777   auto op = OpType(s.WithOpName("my_binary"), input_l, input_r);
2778   return op.operation.node()->def();
2779 }
2780 
TEST_P(OpConverter_FP32_FP16_Test,ConvertBinary)2781 TEST_P(OpConverter_FP32_FP16_Test, ConvertBinary) {
2782   {
2783     AttrValue dtype;
2784     dtype.set_type(tf_type_);
2785     // Both inputs are weights.
2786     Reset();
2787     NodeDef node_def =
2788         MakeNodeDef("my_add", "Add", {"weights1", "weights2"}, {{"T", dtype}});
2789     AddTestWeights<float>("weights1", {1}, {1});
2790     AddTestWeights<float>("weights2", {1}, {1});
2791     RunValidationAndConversion(
2792         node_def, error::UNIMPLEMENTED,
2793         "Constant folding is falled back to TensorFlow, binary op received "
2794         "both input as constant at: my_add");
2795   }
2796 
2797   using OpFunc = std::function<NodeDef(DataType)>;
2798   std::map<std::string, std::pair<OpFunc, std::vector<float>>> op_test_info;
2799 #define ADD_OP(name, op, v1, v2, v3, v4, v5, v6, v7, v8) \
2800   op_test_info[name] =                                   \
2801       std::make_pair(GetBinaryOpNodeDef<op>,             \
2802                      std::vector<float>(v1, v2, v3, v4, v5, v6, v7, v8))
2803   ADD_OP("Add", ops::Add, {5, 8, 6, 9, 5, 8, 6, 9});
2804   ADD_OP("AddV2", ops::AddV2, {5, 8, 6, 9, 5, 8, 6, 9});
2805   ADD_OP("Sub", ops::Sub, {1, 4, 0, 3, 1, 4, 0, 3});
2806   ADD_OP("Mul", ops::Mul, {6, 12, 9, 18, 6, 12, 9, 18});
2807   ADD_OP("Div", ops::Div, {1.5, 3, 1, 2, 1.5, 3, 1, 2});
2808   ADD_OP("RealDiv", ops::RealDiv, {1.5, 3, 1, 2, 1.5, 3, 1, 2});
2809   ADD_OP("FloorDiv", ops::FloorDiv, {1, 3, 1, 2, 1, 3, 1, 2});
2810   ADD_OP("Minimum", ops::Minimum, {2, 2, 3, 3, 2, 2, 3, 3});
2811   ADD_OP("Maximum", ops::Maximum, {3, 6, 3, 6, 3, 6, 3, 6});
2812   ADD_OP("Pow", ops::Pow, {9, 36, 27, 216, 9, 36, 27, 216});
2813 #undef ADD_OP
2814   // Add all ops supported by ConvertBinary.
2815   auto* supported_ops = BinaryOperationMap();
2816   // Test combinations of tensor vs weight inputs (except when both inputs are
2817   // weights).
2818   for (const bool operand_1_is_tensor : {true, false}) {
2819     for (const bool operand_2_is_tensor : {true, false}) {
2820       if (!operand_1_is_tensor && !operand_2_is_tensor) continue;
2821       for (auto& iter : *supported_ops) {
2822         string op_name = iter.first;
2823         SCOPED_TRACE(StrCat(op_name, "_", operand_1_is_tensor ? "T" : "W",
2824                             operand_2_is_tensor ? "T" : "W"));
2825         Reset();
2826         if (!op_test_info.count(op_name)) {
2827           FAIL() << "Binary op test map does not contain op " << op_name;
2828         }
2829         NodeDef node_def = op_test_info[op_name].first(tf_type_);
2830         std::vector<std::string> input_names;
2831         std::vector<std::vector<int>> input_dims;
2832         std::vector<std::vector<float>> input_values;
2833         if (operand_1_is_tensor) {
2834           AddTestTensor("input1", {2, 1, 2}, {3, 6, 3, 6});
2835         } else {
2836           AddTestWeights("input1", {1, 2}, std::vector<float>{3, 6}, tf_type_);
2837         }
2838         if (operand_2_is_tensor) {
2839           AddTestTensor("input2", {2, 2, 1}, {2, 3, 2, 3});
2840         } else {
2841           AddTestWeights("input2", {2, 1}, std::vector<float>{2, 3}, tf_type_);
2842         }
2843         TestOpConverter("my_binary", node_def, {2, 2, 2}, Status::OK(),
2844                         Status::OK(),
2845                         ElementsAreArray(op_test_info[op_name].second));
2846       }
2847     }
2848   }
2849 }
2850 
GetAddNNodeDef(const std::vector<string> & input_names,DataType dtype)2851 NodeDef GetAddNNodeDef(const std::vector<string>& input_names, DataType dtype) {
2852   Scope s = Scope::NewRootScope();
2853   OutputList inputs;
2854   for (const string& name : input_names) {
2855     inputs.push_back(ops::Placeholder(s.WithOpName(name), dtype));
2856   }
2857   auto op = ops::AddN(s.WithOpName("my_addn"), inputs);
2858   return op.operation.node()->def();
2859 }
2860 
2861 struct AddNTestParams {
2862   std::vector<float> input_values;
2863   std::vector<string> input_names;
2864   std::vector<int> dimensions;
2865   std::vector<float> expected_output;
2866   Status status;
2867 };
2868 
TestAddN(ParameterizedOpConverterTestBase * test,AddNTestParams & p)2869 void TestAddN(ParameterizedOpConverterTestBase* test, AddNTestParams& p) {
2870   // All inputs are tensors.
2871   test->Reset();
2872   const NodeDef node_def = GetAddNNodeDef(p.input_names, test->get_tf_type());
2873 
2874   if (p.input_values.size() % p.input_names.size() != 0) {
2875     LOG(ERROR) << "The number of input values: `" << p.input_values.size()
2876                << "` is not a multiple of the number of inputs: `"
2877                << p.input_names.size() << "`";
2878     ASSERT_TRUE(false);
2879   }
2880 
2881   DataVec input_data;
2882   int input_offset = 0;
2883   const int window_size = p.input_values.size() / p.input_names.size();
2884   for (const string& name : p.input_names) {
2885     std::vector<float>::const_iterator start_pos =
2886         p.input_values.begin() + input_offset;
2887     std::vector<float>::const_iterator end_pos = start_pos + window_size;
2888     std::vector<float> sub_input_val(start_pos, end_pos);
2889     input_offset += window_size;
2890 
2891     test->AddTestTensor(name, p.dimensions, test->get_tf_type(), sub_input_val);
2892   }
2893 
2894   test->TestOpConverter("my_addn", node_def, p.dimensions,
2895                         /*expected_conversion_status=*/p.status,
2896                         /*expected_runtime_status=*/p.status,
2897                         /*matcher=*/ElementsAreArray(p.expected_output),
2898                         /*out_tf_types=*/{test->get_tf_type()});
2899 }
2900 
TEST_P(OpConverter_FP32_FP16_Test,ConvertAddN)2901 TEST_P(OpConverter_FP32_FP16_Test, ConvertAddN) {
2902   {
2903     // Weights with batch dim that is not 1.
2904     Reset();
2905     const NodeDef node_def = GetAddNNodeDef({"tensor", "weights"}, tf_type_);
2906     AddTestTensor("tensor", /*dims=*/{1, 2});
2907     AddTestWeights<float>("weights", {2, 1, 2}, {0, 1, 2, 3});
2908     RunValidationAndConversion(
2909         node_def, error::INVALID_ARGUMENT,
2910         "Weights input to AddN is required to have batch dimension 1.");
2911   }
2912 
2913   const std::vector<float> common_input = InitTestVector<float>(6);
2914 
2915   std::vector<AddNTestParams> params = {
2916       {/*input_values=*/common_input,
2917        /*input_names=*/{"inp1", "inp2", "inp3"},
2918        /*dimensions=*/{1, 1, 2, 1, 1},
2919        /*expected_output=*/{6, 9},
2920        /*status=*/Status::OK()},
2921       {/*input_values=*/common_input,
2922        /*input_names=*/{"inp1", "inp2"},
2923        /*dimensions=*/{1, 1, 3, 1, 1},
2924        /*expected_output=*/{3, 5, 7},
2925        /*status=*/Status::OK()},
2926       {/*input_values=*/common_input,
2927        /*input_names=*/{"inp1", "inp2", "inp3"},
2928        /*dimensions=*/{1, 2, 1, 1},
2929        /*expected_output=*/{6, 9},
2930        /*status=*/Status::OK()},
2931       {/*input_values=*/common_input,
2932        /*input_names=*/{"inp1", "inp2"},
2933        /*dimensions=*/{1, 1, 3, 1},
2934        /*expected_output=*/{3, 5, 7},
2935        /*status=*/Status::OK()},
2936       {/*input_values=*/common_input,
2937        /*input_names=*/{"inp1", "inp2", "inp3"},
2938        /*dimensions=*/{1, 2, 1},
2939        /*expected_output=*/{6, 9},
2940        /*status=*/Status::OK()},
2941       {/*input_values=*/common_input,
2942        /*input_names=*/{"inp1", "inp2"},
2943        /*dimensions=*/{1, 1, 3},
2944        /*expected_output=*/{3, 5, 7},
2945        /*status=*/Status::OK()},
2946       {/*input_value=*/common_input,
2947        /*input_names=*/{"inp1", "inp2", "inp3"},
2948        /*dimensions=*/{2, 1},
2949        /*expected_output=*/{6, 9},
2950        /*status=*/Status::OK()},
2951       {/*input_values=*/common_input,
2952        /*input_names=*/{"inp1", "inp2"},
2953        /*dimensions=*/{1, 3},
2954        /*expected_output=*/{3, 5, 7},
2955        /*status=*/Status::OK()},
2956       {/*input_values=*/common_input,
2957        /*input_names=*/{"inp1", "inp2", "inp3"},
2958        /*dimensions=*/{2},
2959        /*expected_output=*/{6, 9},
2960        /*status=*/Status::OK()},
2961       {/*input_values=*/common_input,
2962        /*input_names=*/{"inp1", "inp2"},
2963        /*dimensions=*/{3},
2964        /*expected_output=*/{3, 5, 7},
2965        /*status=*/Status::OK()},
2966       {/*input_values=*/common_input,
2967        /*input_names=*/{"inp1", "inp2", "inp3", "inp4", "inp5", "inp6"},
2968        /*dimensions=*/{1},
2969        /*expected_output=*/{15},
2970        /*status=*/Status::OK()},
2971   };
2972 
2973   for (auto p : params) {
2974     TestAddN(this, p);
2975   }
2976 }
2977 
TEST_F(OpConverterTest,ConvertQuantize)2978 TEST_F(OpConverterTest, ConvertQuantize) {
2979   {
2980     // FakeQuantWithMinMaxArgs attributes are empty, should fail.
2981     Reset(TrtPrecisionMode::INT8);
2982     NodeDef node_def =
2983         MakeNodeDef("my_quantize", "FakeQuantWithMinMaxArgs", {"input"});
2984     AddTestTensor("input", {1, 2, 3});
2985     RunValidationAndConversion(
2986         node_def, error::INVALID_ARGUMENT,
2987         "Min or max attribute not found for FakeQuantWithMinMaxArgs "
2988         "at my_quantize");
2989   }
2990   {
2991     // FakeQuantWithMinMaxArgs ranges set via attributes, ok.
2992     Reset(TrtPrecisionMode::INT8);
2993     Scope s = Scope::NewRootScope();
2994     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
2995     auto quantize_attrs = ops::FakeQuantWithMinMaxArgs::Min(-6.0f).Max(6.0f);
2996     auto quantize = ops::FakeQuantWithMinMaxArgs(s.WithOpName("my_quantize"),
2997                                                  input, quantize_attrs);
2998     const NodeDef& node_def = quantize.operation.node()->def();
2999     AddTestTensor("input", {1, 2, 3});
3000     RunValidationAndConversion(node_def);
3001     TRT_TensorOrWeights output;
3002     TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3003     ASSERT_TRUE(output.is_tensor());
3004     auto ranges = quantization_ranges();
3005     EXPECT_EQ(1, ranges.count(output.tensor()));
3006     EXPECT_EQ(6.0f, ranges[output.tensor()]);
3007   }
3008   {
3009     // FakeQuantWithMinMaxVars ranges set via inputs, ok.
3010     Reset(TrtPrecisionMode::INT8);
3011     Scope s = Scope::NewRootScope();
3012     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3013     auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3014     auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3015     auto quantize = ops::FakeQuantWithMinMaxVars(
3016         s.WithOpName("my_quantize"), input, weights_min, weights_max);
3017     const NodeDef& node_def = quantize.operation.node()->def();
3018     AddTestTensor("input", {1, 2, 3});
3019     AddTestWeights<float>("weights_min", {1}, {-6.0f});
3020     AddTestWeights<float>("weights_max", {1}, {6.0f});
3021     RunValidationAndConversion(node_def);
3022     TRT_TensorOrWeights output;
3023     TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3024     ASSERT_TRUE(output.is_tensor());
3025     auto ranges = quantization_ranges();
3026     EXPECT_EQ(1, ranges.count(output.tensor()));
3027     EXPECT_EQ(6.0f, ranges[output.tensor()]);
3028   }
3029   {
3030     // QuantizeAndDequantizeV2 ranges set via inputs, ok.
3031     Reset(TrtPrecisionMode::INT8);
3032     Scope s = Scope::NewRootScope();
3033     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3034     auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3035     auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3036     auto quantize = ops::QuantizeAndDequantizeV2(
3037         s.WithOpName("my_quantize"), input, weights_min, weights_max);
3038     const NodeDef& node_def = quantize.operation.node()->def();
3039     AddTestTensor("input", {1, 2, 3});
3040     AddTestWeights<float>("weights_min", {1}, {-6.0f});
3041     AddTestWeights<float>("weights_max", {1}, {6.0f});
3042     RunValidationAndConversion(node_def);
3043     TRT_TensorOrWeights output;
3044     TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3045     ASSERT_TRUE(output.is_tensor());
3046     auto ranges = quantization_ranges();
3047     EXPECT_EQ(1, ranges.count(output.tensor()));
3048     EXPECT_EQ(6.0f, ranges[output.tensor()]);
3049   }
3050   {
3051     // QuantizeAndDequantizeV2 Range inputs are tensors, should fail.
3052     Reset(TrtPrecisionMode::INT8);
3053     Scope s = Scope::NewRootScope();
3054     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3055     auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3056     auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3057     auto quantize = ops::QuantizeAndDequantizeV2(
3058         s.WithOpName("my_quantize"), input, weights_min, weights_max);
3059     const NodeDef& node_def = quantize.operation.node()->def();
3060     AddTestTensor("input", {1, 2, 3});
3061     AddTestTensor("weights_min", {1});
3062     AddTestTensor("weights_max", {1});
3063     RunValidationAndConversion(
3064         node_def, error::UNIMPLEMENTED,
3065         "The input \"input_min\" for QuantizeAndDequantizeV2 must be a constant"
3066         ", at my_quantize");
3067   }
3068   {
3069     // QuantizeAndDequantizeV3 ranges set via inputs, ok.
3070     Reset(TrtPrecisionMode::INT8);
3071     Scope s = Scope::NewRootScope();
3072     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3073     auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3074     auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3075     auto num_bits = ops::Placeholder(s.WithOpName("num_bits"), DT_INT32);
3076     auto quantize = ops::QuantizeAndDequantizeV3(
3077         s.WithOpName("my_quantize"), input, weights_min, weights_max, num_bits);
3078     const NodeDef& node_def = quantize.operation.node()->def();
3079     AddTestTensor("input", {1, 2, 3});
3080     AddTestWeights<float>("weights_min", {1}, {-6.0f});
3081     AddTestWeights<float>("weights_max", {1}, {6.0f});
3082     AddTestWeights<int>("num_bits", {1}, {8});
3083     RunValidationAndConversion(node_def);
3084     TRT_TensorOrWeights output;
3085     TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3086     ASSERT_TRUE(output.is_tensor());
3087     auto ranges = quantization_ranges();
3088     EXPECT_EQ(1, ranges.count(output.tensor()));
3089     EXPECT_EQ(6.0f, ranges[output.tensor()]);
3090   }
3091 }
3092 
TEST_P(OpConverter_FP32_FP16_Test,ConvertSquare)3093 TEST_P(OpConverter_FP32_FP16_Test, ConvertSquare) {
3094   {
3095     // Input is weights, should fail.
3096     Reset();
3097     Scope s = Scope::NewRootScope();
3098     auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
3099     auto square = ops::Square(s.WithOpName("my_square"), input);
3100     NodeDef node_def = square.operation.node()->def();
3101     AddTestWeights("input", {1, 2, 3}, {1, 2, 3, 4, -5, 6}, tf_type_);
3102     RunValidationAndConversion(
3103         node_def, error::UNIMPLEMENTED,
3104         "The input \"x\" for Square must be a tensor, at my_square");
3105   }
3106 
3107   Reset();
3108 
3109   Scope s = Scope::NewRootScope();
3110   auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
3111   auto square = ops::Square(s.WithOpName("my_square"), input);
3112   NodeDef node_def = square.operation.node()->def();
3113 
3114   const int num_inputs = 20;
3115   std::vector<float> inputs(num_inputs);
3116   std::vector<float> expected_outputs(num_inputs);
3117 
3118   for (int i = 0; i < num_inputs; ++i) {
3119     const float value = (i - 9);
3120     inputs[i] = value;
3121     expected_outputs[i] = value * value;
3122   }
3123   AddTestTensor("input", {1, 1, 20}, tf_type_, inputs);
3124 
3125   TestOpConverter("my_square", node_def, {1, 1, 20}, Status::OK(), Status::OK(),
3126                   ArrayFloatNear(expected_outputs, 0));
3127 }
3128 
3129 #if IS_TRT_VERSION_GE(7, 1, 3, 0)
TEST_P(OpConverter_FP32_Test,ConvertCombinedNMS)3130 TEST_P(OpConverter_FP32_Test, ConvertCombinedNMS) {
3131   // Get the NodeDef for CombinedNMS.
3132   auto get_nms_nodedef = [](DataType tf_type, bool clip_boxes = true,
3133                             bool pad_per_class = false) -> NodeDef {
3134     Scope s = Scope::NewRootScope();
3135     auto boxes_tensor = ops::Placeholder(s.WithOpName("boxes"), tf_type);
3136     auto scores_tensor = ops::Placeholder(s.WithOpName("scores"), tf_type);
3137     auto max_output_size_per_class =
3138         ops::Placeholder(s.WithOpName("max_output_size_per_class"), DT_INT32);
3139     auto max_total_size =
3140         ops::Placeholder(s.WithOpName("max_total_size"), DT_INT32);
3141     auto iou_threshold =
3142         ops::Placeholder(s.WithOpName("iou_threshold"), tf_type);
3143     auto score_threshold =
3144         ops::Placeholder(s.WithOpName("score_threshold"), tf_type);
3145     auto nms_attrs = ops::CombinedNonMaxSuppression::Attrs()
3146                          .PadPerClass(pad_per_class)
3147                          .ClipBoxes(clip_boxes);
3148 
3149     auto nms_op = ops::CombinedNonMaxSuppression(
3150         s.WithOpName("my_nms"), boxes_tensor, scores_tensor,
3151         max_output_size_per_class, max_total_size, iou_threshold,
3152         score_threshold, nms_attrs);
3153     return nms_op.operation.node()->def();
3154   };
3155 
3156   struct TestParams {
3157     const std::string description;
3158     const std::vector<int32> boxes_tensor_dims;
3159     const std::vector<int32> scores_tensor_dims;
3160     const std::vector<float> boxes_values;
3161     const std::vector<float> scores_values;
3162     const int32 max_output_size_per_class;
3163     const int32 max_total_size;
3164     const float iou_threshold;
3165     const float score_threshold;
3166     bool pad_per_class;
3167     bool clip_boxes;
3168     const std::vector<std::vector<int32>> expected_output_dims;
3169     const std::vector<float> exp_boxes;
3170     const std::vector<float> exp_scores;
3171     const std::vector<float> exp_classes;
3172     const std::vector<float> exp_num_detections;
3173     Status conversion_status;
3174     Status runtime_status;
3175   };
3176 
3177   Status conv_status =
3178       trt_mode_ == TrtTestMode::kDynamicShape
3179           ? errors::Unimplemented(
3180                 "TensorRT BatchedNMS Plugin requires input with static shape")
3181           : Status::OK();
3182 
3183   std::vector<TestParams> params = {
3184       // TODO(aaroey): there is a bug in TRT's CombinedNonMaxSuppression
3185       // implementation that, the extra output classes that are outside of the
3186       // range specified by valid_detections[i] are not zeros but -1s.
3187       TestParams{
3188           "Test 1: Original test",
3189           {1, 1, 3, 4},                                      // boxes dims
3190           {1, 1, 3},                                         // scores dims
3191           {0, 0, 0.3, 0.4, 0, 0, 0.3, 0.4, 0, 0, 0.3, 0.4},  // boxes values
3192           {0.4, 0.7, 0.3},                                   // scores values
3193           3,                                 // max_output_size_per_class
3194           2,                                 // max_total_size
3195           .5f,                               // IOU threshold
3196           0,                                 // score_threshold
3197           false,                             // pad_per_class
3198           true,                              // clip_boxes
3199           {{1, 2, 4},                        // expected_nmsed_boxes_dims
3200            {1, 2},                           // expected_nmsed_scores_dims
3201            {1, 2},                           // expected_nmsed_classes_dims
3202            {1}},                             // expected_valid_detections_dims
3203           {0, 0, 0.3, 0.4, 0, 0, 0.3, 0.4},  // exp_boxes_values
3204           {0.7, 0.4},                        // exp_scores
3205           {1, 0},                            // exp_classes
3206           {2},                               // exp_num_detections
3207           conv_status},
3208       // Test with clip_boxes = False
3209       TestParams{
3210           "Test 2: clip_boxes",
3211           {1, 5, 1, 4},  // boxes dims
3212           {1, 5, 1},     // scores dims
3213           // boxes values:
3214           {0, 0, 5, 10, 0, 4, 5, 14, 8, 0, 12, 4, 6, 2, 10, 6, 8, 9, 11, 12},
3215           {5, 4, 3, 2, 1},  // scores values
3216           4,                // max_output_size_per_class
3217           4,                // max_total_size
3218           0.1,              // IOU threshold
3219           0,                // score threshold
3220           false,            // pad_per_class
3221           false,            // clip_boxes
3222           {{1, 4, 4},       // expected nmsed_boxes_dims
3223            {1, 4},          // expected nmsed_scores_dims
3224            {1, 4},          // expected_nmsed_classes_dims
3225            {1}},            // expected_valid_detections_dims
3226                             // exp_boxes_values:
3227           {0, 0, 5, 10, 8, 0, 12, 4, 8, 9, 11, 12, 0, 0, 0, 0},
3228           {5, 3, 1, 0},   // exp_scores
3229           {0, 0, 0, -1},  // exp_classes
3230           {3},            // exp_num_detections
3231           conv_status},
3232       // Test with clip_boxes = False, and nonzero score threshold
3233       TestParams{
3234           "Test 3: score threshold",
3235           {1, 5, 1, 4},  // boxes dims
3236           {1, 5, 1},     // scores dims
3237           // boxes values:
3238           {0, 0, 5, 10, 0, 4, 5, 14, 8, 0, 12, 4, 6, 2, 10, 6, 8, 9, 11, 12},
3239           {5, 4, 3, 2, 1},  // scores values
3240           4,                // max_output_size_per_class
3241           4,                // max_total_size
3242           0.1,              // IOU threshold
3243           2,                // score threshold
3244           false,            // pad_per_class
3245           false,            // clip_boxes
3246           {{1, 4, 4},       // expected nmsed_boxes_dims
3247            {1, 4},          // expected nmsed_scores_dims
3248            {1, 4},          // expected_nmsed_classes_dims
3249            {1}},            // expected_valid_detections_dims
3250                             // exp_boxes_values:
3251           {0, 0, 5, 10, 8, 0, 12, 4, 0, 0, 0, 0, 0, 0, 0, 0},
3252           {5, 3, 0, 0},    // exp_scores
3253           {0, 0, -1, -1},  // exp_classes
3254           {2},             // exp_num_detections
3255           conv_status},
3256       // Test where the boxes are defined as with max value first for the box
3257       // coordinates. This test fails before TRT 7.1.3.
3258       TestParams{
3259           "Test 4: max coord first",
3260           {1, 5, 1, 4},  // boxes dims
3261           {1, 5, 1},     // scores dims
3262                          // boxes values:
3263           {5, 10, 0, 0, 5, 14, 0, 4, 12, 4, 8, 0, 10, 6, 6, 2, 11, 12, 8, 9},
3264           {5, 4, 3, 2, 1},  // scores values
3265           4,                // max_output_size_per_class
3266           4,                // max_total_size
3267           0.1,              // IOU threshold
3268           0,                // score threshold
3269           false,            // pad_per_class
3270           false,            // clip_boxes
3271           {{1, 4, 4},       // expected nmsed_boxes_dims
3272            {1, 4},          // expected nmsed_scores_dims
3273            {1, 4},          // expected_nmsed_classes_dims
3274            {1}},            // expected_valid_detections_dims
3275                             // exp_boxes_values:
3276           {5, 10, 0, 0, 12, 4, 8, 0, 11, 12, 8, 9, 0, 0, 0, 0},
3277           {5, 3, 1, 0},   // exp_scores
3278           {0, 0, 0, -1},  // exp_classes
3279           {3},            // exp_num_detections
3280           conv_status},
3281   };
3282 
3283   for (auto p : params) {
3284     Reset();
3285     SCOPED_TRACE(p.description);
3286     AddTestTensor("boxes", p.boxes_tensor_dims, p.boxes_values);
3287     AddTestTensor("scores", p.scores_tensor_dims, p.scores_values);
3288     AddTestWeights<int32>("max_output_size_per_class", {1},
3289                           {p.max_output_size_per_class});
3290     AddTestWeights<int32>("max_total_size", {1}, {p.max_total_size});
3291     AddTestWeights<float>("iou_threshold", {1}, {p.iou_threshold}, tf_type_);
3292     AddTestWeights<float>("score_threshold", {1}, {p.score_threshold},
3293                           tf_type_);
3294 
3295     auto node_def = get_nms_nodedef(tf_type_, p.clip_boxes, p.pad_per_class);
3296 
3297     TestOpConverterMultiOut("my_nms", node_def, p.expected_output_dims,
3298                             p.conversion_status, p.runtime_status,
3299                             {
3300                                 ElementsAreArray(p.exp_boxes),
3301                                 ElementsAreArray(p.exp_scores),
3302                                 ElementsAreArray(p.exp_classes),
3303                                 ElementsAreArray(p.exp_num_detections),
3304                             },
3305                             {tf_type_, tf_type_, tf_type_, DT_INT32});
3306   }
3307 }
3308 #endif  // IS_TRT_VERSION_GE(7, 1, 3, 0)
3309 
3310 template <typename T>
CreateUnaryOp(DataType tf_type)3311 NodeDef CreateUnaryOp(DataType tf_type) {
3312   Scope s = Scope::NewRootScope();
3313   auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
3314   return T(s.WithOpName("my_unary"), input).operation.node()->def();
3315 }
3316 
3317 constexpr float kLeakyReluAlpha = 0.2f;
3318 template <>
CreateUnaryOp(DataType tf_type)3319 NodeDef CreateUnaryOp<ops::internal::LeakyRelu>(DataType tf_type) {
3320   Scope s = Scope::NewRootScope();
3321   auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
3322   return ops::internal::LeakyRelu(
3323              s.WithOpName("my_unary"), input,
3324              ops::internal::LeakyRelu::Alpha(kLeakyReluAlpha))
3325       .operation.node()
3326       ->def();
3327 }
3328 
TEST_P(OpConverter_FP32_Test,ConvertActivation)3329 TEST_P(OpConverter_FP32_Test, ConvertActivation) {
3330   {
3331     // Input is weights, should fail.
3332     Reset();
3333     const NodeDef& node_def = CreateUnaryOp<ops::Relu>(tf_type_);
3334     AddTestWeights<int32>("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
3335     RunValidationAndConversion(
3336         node_def, error::UNIMPLEMENTED,
3337         "The input \"input\" for Relu must be a tensor, at my_unary");
3338   }
3339 
3340   constexpr float kSeluAlpha = 1.7580993408473768599402175208123f;
3341   constexpr float kSeluScale = 1.0507009873554804934193349852946f;
3342   using OpFunc = std::function<NodeDef(DataType)>;
3343   using ValFunc = float (*)(float);
3344   std::map<std::string, std::pair<OpFunc, ValFunc>> op_map;
3345 
3346 #define ADD_OP(name, op, compute) \
3347   op_map[name] = std::make_pair(CreateUnaryOp<op>, compute)
3348   ADD_OP("LeakyRelu", ops::internal::LeakyRelu,
3349          [](float x) { return (x > 0.0f) ? x : x * kLeakyReluAlpha; });
3350   ADD_OP("Relu", ops::Relu, [](float x) { return (x > 0.0f) ? x : 0.0f; });
3351   ADD_OP("Relu6", ops::Relu6,
3352          [](float x) { return std::min(std::max(x, 0.0f), 6.0f); });
3353   ADD_OP("Sigmoid", ops::Sigmoid,
3354          [](float x) { return 1.0f / (1.0f + std::exp(-x)); });
3355   ADD_OP("Tanh", ops::Tanh, static_cast<ValFunc>(std::tanh));
3356   ADD_OP("Elu", ops::Elu,
3357          [](float x) { return (x > 0.0f) ? x : std::exp(x) - 1; });
3358   ADD_OP("Selu", ops::Selu, [](float x) {
3359     return (x > 0.0f) ? kSeluScale * x
3360                       : kSeluScale * kSeluAlpha * (std::exp(x) - 1);
3361   });
3362   ADD_OP("Softsign", ops::Softsign,
3363          [](float x) { return x / (std::abs(x) + 1); });
3364   ADD_OP("Softplus", ops::Softplus,
3365          [](float x) { return std::log(std::exp(x) + 1); });
3366 #undef ADD_OP
3367 
3368   // Get list of ops to test.
3369   std::vector<string> ops_to_test;
3370   // Add all ops supported by ConvertActivation.
3371   auto* map = ActivationTypeMap();
3372   ops_to_test.reserve(map->size());
3373   for (auto& pair : *map) {
3374     ops_to_test.push_back(pair.first);
3375   }
3376   // Add other activation ops to test.
3377   ops_to_test.push_back("Relu6");
3378   ops_to_test.push_back("LeakyRelu");
3379   auto p = TestParamBase{
3380       {1, 1, 2, 3},  // input dims
3381       {},            // input partial dims
3382       {1, 1, 2, 3},  // expected output dims
3383   };
3384   // Ok.
3385   for (const string& op_name : ops_to_test) {
3386     if (!op_map.count(op_name)) {
3387       FAIL() << "Activation op test map does not contain op " << op_name;
3388     }
3389     Reset();
3390     NodeDef node_def = op_map[op_name].first(tf_type_);
3391     const std::vector<float> input = {-100, -2, -1, 0, 1, 88};
3392     AddTestTensor("input", p.input_dims, input);
3393 
3394     // std::exp in Softplus will overflow for input > 88
3395     std::vector<float> output_values;
3396     std::transform(input.begin(), input.end(),
3397                    std::back_inserter(output_values), op_map[op_name].second);
3398     TestOpConverter("my_unary", node_def, p.expected_output_dims, Status::OK(),
3399                     Status::OK(), ArrayFloatNear(output_values, 0, false));
3400 
3401     TRT_TensorOrWeights output;
3402     TF_EXPECT_OK(GetTensorOrWeights("my_unary", &output));
3403 
3404     // Certain activations should set quantization range automatically.
3405     auto ranges = quantization_ranges();
3406     if (op_name == "Relu6") {
3407       EXPECT_EQ(ranges[output.tensor()], 6.0f);
3408     } else if (op_name == "Sigmoid" || op_name == "Tanh" ||
3409                op_name == "Softsign") {
3410       EXPECT_EQ(ranges[output.tensor()], 1.0f);
3411     }
3412   }
3413 }
3414 
TEST_P(OpConverter_FP32_Test,ConvertExpandDims)3415 TEST_P(OpConverter_FP32_Test, ConvertExpandDims) {
3416   // Get the NodeDef for ExpandDims.
3417   Scope s = Scope::NewRootScope();
3418   auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
3419   auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
3420   auto expanddims =
3421       ops::ExpandDims(s.WithOpName("my_expanddims"), input, weights);
3422   const NodeDef& node_def = expanddims.operation.node()->def();
3423   {
3424     // Input is weights, should fail.
3425     Reset();
3426     AddTestWeights<int32>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
3427     AddTestWeights<int32>("weights", {1}, {1});
3428     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
3429                                "The input \"input\" for ExpandDims must be a "
3430                                "tensor, at my_expanddims");
3431   }
3432   {
3433     // Axis is a tensor, should fail.
3434     Reset();
3435     AddTestTensor("input", {3, 2, 1});
3436     AddTestTensor("weights", {3});
3437     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
3438                                "The input \"axis\" for ExpandDims must be a "
3439                                "constant, at my_expanddims");
3440   }
3441   std::vector<TestParamBase> test_params = {
3442       TestParamBase{{1, 1, 2, 3},
3443                     {},
3444                     {1, 1, 1, 2, 3},
3445                     {0},
3446                     trt_mode_ == TrtTestMode::kImplicitBatch
3447                         ? Status(error::UNIMPLEMENTED,
3448                                  "TensorRT does not allow manipulation of the "
3449                                  "batch dimension, at my_expanddims")
3450                         : Status::OK()},
3451       TestParamBase{{1, 1, 2, 3},
3452                     {},
3453                     {1, 1, 1, 2, 3},
3454                     {-5},
3455                     trt_mode_ == TrtTestMode::kImplicitBatch
3456                         ? Status(error::UNIMPLEMENTED,
3457                                  "TensorRT does not allow manipulation of the "
3458                                  "batch dimension, at my_expanddims")
3459                         : Status::OK()},
3460       TestParamBase{{1, 1, 2, 3},
3461                     {},
3462                     {},
3463                     {5},
3464                     Status(error::INVALID_ARGUMENT,
3465                            "Axis value of 5 is out of bounds, must be in range"
3466                            " [-5, 5), at my_expanddims")},
3467       TestParamBase{{1, 1, 2, 3},
3468                     {},
3469                     {},
3470                     {-6},
3471                     Status(error::INVALID_ARGUMENT,
3472                            "Axis value of -6 is out of bounds, must be in range"
3473                            " [-5, 5), at my_expanddims")},
3474       TestParamBase{{1, 2, 3}, {}, {1, 1, 2, 3}, {1}},
3475       TestParamBase{{1, 2, 3}, {}, {1, 1, 2, 3}, {-3}},
3476       TestParamBase{{1, 2, 3}, {}, {1, 2, 3, 1}, {3}},
3477       TestParamBase{{1, 2, 3}, {}, {1, 2, 3, 1}, {-1}},
3478       TestParamBase{{1, 2, 3}, {}, {1, 2, 1, 3}, {2}},
3479       TestParamBase{{1, 2, 3}, {}, {1, 2, 1, 3}, {-2}},
3480       TestParamBase{{1, 6}, {}, {1, 1, 6}, {1}},
3481       TestParamBase{{1, 6}, {}, {1, 6, 1}, {-1}},
3482   };
3483   for (auto p : test_params) {
3484     Reset();
3485     AddTestTensor("input", p.input_dims, {1, 2, 3, 4, 5, 6});
3486     AddTestWeights<int32>("weights", {1}, {p.param[0]});
3487     TestOpConverter("my_expanddims", node_def, p.expected_output_dims, p.status,
3488                     p.runtime_status, ElementsAreArray({1, 2, 3, 4, 5, 6}));
3489   }
3490 }
3491 
TEST_P(OpConverter_FP32_Test,ConvertSqueeze)3492 TEST_P(OpConverter_FP32_Test, ConvertSqueeze) {
3493   const bool use_implicit_batch = (trt_mode_ == TrtTestMode::kImplicitBatch);
3494   // Get the NodeDef for Squeeze.
3495   auto get_squeeze_nodedef = [](std::vector<int> axes,
3496                                 DataType tf_type) -> NodeDef {
3497     Scope s = Scope::NewRootScope();
3498     auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
3499     if (!axes.empty()) {
3500       ops::Squeeze::Attrs squeeze_attrs;
3501       squeeze_attrs.axis_ = gtl::ArraySlice<int>(axes);  // non-absl ok
3502       auto squeeze =
3503           ops::Squeeze(s.WithOpName("my_squeeze"), input, squeeze_attrs);
3504       return squeeze.operation.node()->def();
3505     } else {
3506       auto squeeze = ops::Squeeze(s.WithOpName("my_squeeze"), input);
3507       return squeeze.operation.node()->def();
3508     }
3509   };
3510   std::vector<TestParamBase> test_params = {
3511       TestParamBase{
3512           {1, 2, 1, 3},  // input dims
3513           {},            // input partial dims
3514           {2, 3},        // expected output dims
3515           {},            // axis
3516           trt_mode_ == TrtTestMode::kExplicitBatch
3517               ? Status::OK()
3518               : Status{error::UNIMPLEMENTED,
3519                        "Squeeze is not implemented for empty squeeze_dims, at "
3520                        "my_squeeze"}},
3521       TestParamBase{{1, 2, 1, 3},
3522                     {},
3523                     {2, 1, 3},
3524                     {0},
3525                     use_implicit_batch
3526                         ? Status{error::UNIMPLEMENTED,
3527                                  "TensorRT does not allow manipulation of the "
3528                                  "batch dimension, at my_squeeze"}
3529                         : Status::OK()},
3530       TestParamBase{{1, 2, 1, 3},
3531                     {},
3532                     {2, 1, 3},
3533                     {-4},
3534                     use_implicit_batch
3535                         ? Status{error::UNIMPLEMENTED,
3536                                  "TensorRT does not allow manipulation of the "
3537                                  "batch dimension, at my_squeeze"}
3538                         : Status::OK()},
3539       TestParamBase{
3540           {1, 1, 2, 3},
3541           {},
3542           {},
3543           {4},
3544           Status{error::INVALID_ARGUMENT,
3545                  "Axis value of 4 is out of bounds, must be in range [-4, 4), "
3546                  "at my_squeeze"}},
3547       TestParamBase{
3548           {1, 1, 2, 3},
3549           {},
3550           {},
3551           {-5},
3552           Status{error::INVALID_ARGUMENT,
3553                  "Axis value of -5 is out of bounds, must be in range [-4, 4), "
3554                  "at my_squeeze"}},
3555       TestParamBase{{1, 1, 2, 3}, {}, {1, 2, 3}, {1}},
3556       TestParamBase{{1, 1, 2, 3}, {}, {1, 2, 3}, {-3}},
3557       TestParamBase{{1, 2, 3, 1}, {}, {1, 2, 3}, {3}},
3558       TestParamBase{{1, 2, 3, 1}, {}, {1, 2, 3}, {-1}},
3559       TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {1, 3, 5}},
3560       TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {3, 1, 5}},
3561       TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {-1, -3, -5}},
3562       TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {1, -3, 5}},
3563       TestParamBase{{1, 1, 6}, {}, {1, 6}, {1}},
3564       TestParamBase{{1, 6, 1}, {}, {1, 6}, {2}},
3565   };
3566   auto squeeze_non_singleton = TestParamBase{
3567       {1, 1, 2, 3},
3568       {},
3569       {},
3570       {2},
3571       Status{error::INVALID_ARGUMENT,
3572              "Dimension 2 with size 2 cannot be squeezed because it must be "
3573              "size 1, at my_squeeze"}};
3574 
3575   if (trt_mode_ == TrtTestMode::kDynamicShape) {
3576     // In this test we try to squeeze axis=2 which has size > 1. In dynamic
3577     // shape mode the converter sees only -1, so it cannot catch this error.
3578     squeeze_non_singleton.status = Status::OK();  // conversion status
3579     squeeze_non_singleton.runtime_status =
3580         errors::InvalidArgument("Negative number of dimensions -1");
3581     // Dynamic shape tests with partially known input shape
3582     test_params.push_back(TestParamBase{{2, 1, 3}, {2, -1, 3}, {2, 3}, {1}});
3583     test_params.push_back(TestParamBase{{2, 1, 3}, {2, 1, -1}, {2, 3}, {1}});
3584   }
3585   test_params.push_back(squeeze_non_singleton);
3586 
3587   for (TestParamBase p : test_params) {
3588     SCOPED_TRACE(p);
3589     Reset();
3590     NodeDef node_def = get_squeeze_nodedef(p.param, tf_type_);
3591     AddTestTensor("input", p.input_dims, {1, 2, 3, 4, 5, 6},
3592                   p.partial_input_dims);
3593     TestOpConverter("my_squeeze", node_def, p.expected_output_dims, p.status,
3594                     p.runtime_status, ElementsAreArray({1, 2, 3, 4, 5, 6}));
3595   }
3596 }
3597 
TEST_F(OpConverterTest,ConvertStridedSlice)3598 TEST_F(OpConverterTest, ConvertStridedSlice) {
3599   // Get nodedef for StridedSlice layer.
3600   auto get_strided_slice_nodedef =
3601       [](int64 begin_mask = 0, int64 end_mask = 0, int64 ellipsis_mask = 0,
3602          int64 new_axis_mask = 0, int64 shrink_axis_mask = 0) -> NodeDef {
3603     Scope s = Scope::NewRootScope();
3604     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3605     auto begin = ops::Placeholder(s.WithOpName("begin"), DT_INT32);
3606     auto end = ops::Placeholder(s.WithOpName("end"), DT_INT32);
3607     auto strides = ops::Placeholder(s.WithOpName("strides"), DT_INT32);
3608     ops::StridedSlice::Attrs attrs = ops::StridedSlice::Attrs()
3609                                          .BeginMask(begin_mask)
3610                                          .EndMask(end_mask)
3611                                          .EllipsisMask(ellipsis_mask)
3612                                          .NewAxisMask(new_axis_mask)
3613                                          .ShrinkAxisMask(shrink_axis_mask);
3614     auto strided_slice = ops::StridedSlice(s.WithOpName("my_strided_slice"),
3615                                            input, begin, end, strides, attrs);
3616     return strided_slice.operation.node()->def();
3617   };
3618 
3619   {
3620     // Input is weights, should fail.
3621     Reset();
3622     NodeDef node_def = get_strided_slice_nodedef();
3623     AddTestWeights<int32>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
3624     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
3625     AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
3626     AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
3627     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
3628                                "The input \"input\" for StridedSlice must be a "
3629                                "tensor, at my_strided_slice");
3630   }
3631   {
3632     // Begin, end, strides are tensors, should fail.
3633     Reset();
3634     NodeDef node_def = get_strided_slice_nodedef();
3635     AddTestTensor("input", {1, 2, 3});
3636     AddTestTensor("begin", {4});
3637     AddTestTensor("end", {4});
3638     AddTestTensor("strides", {4});
3639     RunValidationAndConversion(
3640         node_def, error::UNIMPLEMENTED,
3641         "The input \"begin\" for StridedSlice must be a constant, at "
3642         "my_strided_slice");
3643   }
3644   {
3645     // Modify batch dim, should fail.
3646     Reset();
3647     NodeDef node_def = get_strided_slice_nodedef();
3648     AddTestTensor("input", {1, 2, 3});
3649     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
3650     AddTestWeights<int32>("end", {4}, {0, 1, 2, 3});
3651     AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
3652     RunValidationAndConversion(
3653         node_def, error::UNIMPLEMENTED,
3654         "TensorRT does not allow modifications to the batch dimension, at "
3655         "my_strided_slice");
3656   }
3657   {
3658     // Dynamic batch size without end_mask, should fail.
3659     Reset();
3660     NodeDef node_def = get_strided_slice_nodedef();
3661     AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1);
3662     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
3663     AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
3664     AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
3665     RunValidationAndConversion(
3666         node_def, error::UNIMPLEMENTED,
3667         "TensorRT does not allow modifications to the batch dimension, at "
3668         "my_strided_slice");
3669   }
3670   {
3671     // Dynamic batch size but using end_mask, ok.
3672     Reset();
3673     NodeDef node_def = get_strided_slice_nodedef(/*begin_mask=*/0,
3674                                                  /*end_mask=*/1);
3675     AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1);
3676     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
3677     AddTestWeights<int32>("end", {4}, {0, 1, 2, 2});
3678     AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
3679     RunValidationAndConversion(node_def);
3680   }
3681 // TRT 5.1+ supports strides (disabled until 5.1.3.1 due to bugs)
3682 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
3683   {
3684     // Negative strides, should fail.
3685     Reset();
3686     NodeDef node_def = get_strided_slice_nodedef();
3687     AddTestTensor("input", {1, 2, 3});
3688     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
3689     AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
3690     AddTestWeights<int32>("strides", {4}, {1, 1, 1, -1});
3691     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
3692                                "Negative or zero stride values are not "
3693                                "supported for StridedSlice, at "
3694                                "my_strided_slice");
3695   }
3696 #else
3697   {
3698     // Stride is not 1, should fail.
3699     Reset();
3700     NodeDef node_def = get_strided_slice_nodedef();
3701     AddTestTensor("input", {1, 2, 3});
3702     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
3703     AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
3704     AddTestWeights<int32>("strides", {4}, {1, 2, 1, 3});
3705     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
3706                                "Strides other than 1 are not supported with "
3707                                "this version of TRT, at my_strided_slice");
3708   }
3709 #endif
3710   {
3711     // Size of sliced dim is negative, should fail.
3712     Reset();
3713     NodeDef node_def = get_strided_slice_nodedef();
3714     AddTestTensor("input", {1, 2, 3});
3715     AddTestWeights<int32>("begin", {4}, {0, 0, 2, 0});
3716     AddTestWeights<int32>("end", {4}, {1, 1, 0, 3});
3717     AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
3718     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
3719                                "\"size\" cannot be negative or zero for "
3720                                "StridedSlice, at my_strided_slice");
3721   }
3722 
3723   struct TestParams {
3724     std::vector<int> input_dims;
3725     std::vector<int> begin;
3726     std::vector<int> end;
3727     std::vector<int> strides;
3728     int begin_mask;
3729     int end_mask;
3730     int ellipsis_mask;
3731     int new_axis_mask;
3732     int shrink_axis_mask;
3733     std::vector<int> expected_output_dims;
3734     std::vector<float> expected_output;
3735   };
3736 
3737   auto get_mask = [](const std::vector<int>& mask) {
3738     int result = 0;
3739     for (int i = 0; i < mask.size(); i++) {
3740       if (mask[i]) result += (1 << i);
3741     }
3742     return result;
3743   };
3744 
3745   // Same input is used for all tests.
3746   const std::vector<float> ok_input = {1, 2, 3, 4, 5, 6};
3747 
3748   // Ok.
3749   std::vector<TestParams> ok_params = {
3750     // 2D Crop.
3751     TestParams{
3752         /*input_dims=*/{1, 2, 3},
3753         /*begin=*/{0, 0, 0, 0},
3754         /*end=*/{0, 0, 1, 2},
3755         /*strides=*/{1, 1, 1, 1},
3756         /*begin_mask=*/get_mask({0, 0, 0, 0}),
3757         /*end_mask=*/get_mask({1, 1, 0, 0}),
3758         /*ellipsis_mask=*/0,
3759         /*new_axis_mask=*/0,
3760         /*shrink_axis_mask=*/0,
3761         /*expected_output_dims=*/{1, 1, 2},
3762         /*expected_output=*/{1, 2},
3763     },
3764     TestParams{
3765         /*input_dims=*/{1, 2, 3},
3766         /*begin=*/{0, 0, 1, 1},
3767         /*end=*/{0, 0, 0, 0},
3768         /*strides=*/{1, 1, 1, 1},
3769         /*begin_mask=*/get_mask({0, 0, 0, 0}),
3770         /*end_mask=*/get_mask({1, 1, 1, 1}),
3771         /*ellipsis_mask=*/0,
3772         /*new_axis_mask=*/0,
3773         /*shrink_axis_mask=*/0,
3774         /*expected_output_dims=*/{1, 1, 2},
3775         /*expected_output=*/{5, 6},
3776     },
3777     TestParams{
3778         /*input_dims=*/{1, 2, 3},
3779         /*begin=*/{0, 0, 1, 1},
3780         /*end=*/{0, 1, 2, 3},
3781         /*strides=*/{1, 1, 1, 1},
3782         /*begin_mask=*/get_mask({0, 0, 0, 0}),
3783         /*end_mask=*/get_mask({1, 1, 0, 0}),
3784         /*ellipsis_mask=*/0,
3785         /*new_axis_mask=*/0,
3786         /*shrink_axis_mask=*/0,
3787         /*expected_output_dims=*/{1, 1, 2},
3788         /*expected_output=*/{5, 6},
3789     },
3790     // 2D Crop, with transpose.
3791     TestParams{
3792         /*input_dims=*/{2, 3, 1},
3793         /*begin=*/{0, 0, 0, 0},
3794         /*end=*/{0, 1, 2, 1},
3795         /*strides=*/{1, 1, 1, 1},
3796         /*begin_mask=*/get_mask({0, 0, 0, 0}),
3797         /*end_mask=*/get_mask({1, 0, 0, 0}),
3798         /*ellipsis_mask=*/0,
3799         /*new_axis_mask=*/0,
3800         /*shrink_axis_mask=*/0,
3801         /*expected_output_dims=*/{1, 2, 1},
3802         /*expected_output=*/{1, 2},
3803     },
3804     TestParams{
3805         /*input_dims=*/{2, 3, 1},
3806         /*begin=*/{0, 1, 1, 0},
3807         /*end=*/{0, 2, 3, 1},
3808         /*strides=*/{1, 1, 1, 1},
3809         /*begin_mask=*/get_mask({0, 0, 0, 0}),
3810         /*end_mask=*/get_mask({1, 0, 0, 0}),
3811         /*ellipsis_mask=*/0,
3812         /*new_axis_mask=*/0,
3813         /*shrink_axis_mask=*/0,
3814         /*expected_output_dims=*/{1, 2, 1},
3815         /*expected_output=*/{5, 6},
3816     },
3817     TestParams{
3818         /*input_dims=*/{2, 1, 3},
3819         /*begin=*/{0, 0, 0, 0},
3820         /*end=*/{0, 1, 1, 2},
3821         /*strides=*/{1, 1, 1, 1},
3822         /*begin_mask=*/get_mask({0, 0, 0, 0}),
3823         /*end_mask=*/get_mask({1, 0, 0, 0}),
3824         /*ellipsis_mask=*/0,
3825         /*new_axis_mask=*/0,
3826         /*shrink_axis_mask=*/0,
3827         /*expected_output_dims=*/{1, 1, 2},
3828         /*expected_output=*/{1, 2},
3829     },
3830     TestParams{
3831         /*input_dims=*/{2, 1, 3},
3832         /*begin=*/{0, 1, 0, 1},
3833         /*end=*/{0, 2, 1, 3},
3834         /*strides=*/{1, 1, 1, 1},
3835         /*begin_mask=*/get_mask({0, 0, 0, 0}),
3836         /*end_mask=*/get_mask({1, 0, 0, 0}),
3837         /*ellipsis_mask=*/0,
3838         /*new_axis_mask=*/0,
3839         /*shrink_axis_mask=*/0,
3840         /*expected_output_dims=*/{1, 1, 2},
3841         /*expected_output=*/{5, 6},
3842     },
3843     // 2D Crop, with reshape.
3844     TestParams{
3845         /*input_dims=*/{2, 3},
3846         /*begin=*/{0, 0, 0},
3847         /*end=*/{0, 1, 2},
3848         /*strides=*/{1, 1, 1},
3849         /*begin_mask=*/get_mask({0, 0, 0}),
3850         /*end_mask=*/get_mask({1, 0, 0}),
3851         /*ellipsis_mask=*/0,
3852         /*new_axis_mask=*/0,
3853         /*shrink_axis_mask=*/0,
3854         /*expected_output_dims=*/{1, 2},
3855         /*expected_output=*/{1, 2},
3856     },
3857     TestParams{
3858         /*input_dims=*/{2, 3},
3859         /*begin=*/{0, 1, 1},
3860         /*end=*/{0, 0, 0},
3861         /*strides=*/{1, 1, 1},
3862         /*begin_mask=*/get_mask({0, 0, 0}),
3863         /*end_mask=*/get_mask({1, 1, 1}),
3864         /*ellipsis_mask=*/0,
3865         /*new_axis_mask=*/0,
3866         /*shrink_axis_mask=*/0,
3867         /*expected_output_dims=*/{1, 2},
3868         /*expected_output=*/{5, 6},
3869     },
3870     // 1D Crop.
3871     TestParams{
3872         /*input_dims=*/{1, 2, 3},
3873         /*begin=*/{0, 0, 0, 0},
3874         /*end=*/{0, 0, 0, 2},
3875         /*strides=*/{1, 1, 1, 1},
3876         /*begin_mask=*/get_mask({0, 0, 0, 0}),
3877         /*end_mask=*/get_mask({1, 1, 1, 0}),
3878         /*ellipsis_mask=*/0,
3879         /*new_axis_mask=*/0,
3880         /*shrink_axis_mask=*/0,
3881         /*expected_output_dims=*/{1, 2, 2},
3882         /*expected_output=*/{1, 2, 4, 5},
3883     },
3884     TestParams{
3885         /*input_dims=*/{1, 2, 3},
3886         /*begin=*/{0, 0, 1, 0},
3887         /*end=*/{0, 0, 0, 0},
3888         /*strides=*/{1, 1, 1, 1},
3889         /*begin_mask=*/get_mask({0, 0, 0, 0}),
3890         /*end_mask=*/get_mask({1, 1, 1, 1}),
3891         /*ellipsis_mask=*/0,
3892         /*new_axis_mask=*/0,
3893         /*shrink_axis_mask=*/0,
3894         /*expected_output_dims=*/{1, 1, 3},
3895         /*expected_output=*/{4, 5, 6},
3896     },
3897     // 1D Crop, with transpose.
3898     TestParams{
3899         /*input_dims=*/{2, 3, 1},
3900         /*begin=*/{0, 0, 0, 0},
3901         /*end=*/{0, 1, 0, 0},
3902         /*strides=*/{1, 1, 1, 1},
3903         /*begin_mask=*/get_mask({0, 0, 0, 0}),
3904         /*end_mask=*/get_mask({1, 0, 1, 1}),
3905         /*ellipsis_mask=*/0,
3906         /*new_axis_mask=*/0,
3907         /*shrink_axis_mask=*/0,
3908         /*expected_output_dims=*/{1, 3, 1},
3909         /*expected_output=*/{1, 2, 3},
3910     },
3911     TestParams{
3912         /*input_dims=*/{2, 3, 1},
3913         /*begin=*/{0, 1, 0, 0},
3914         /*end=*/{0, 0, 0, 0},
3915         /*strides=*/{1, 1, 1, 1},
3916         /*begin_mask=*/get_mask({0, 0, 0, 0}),
3917         /*end_mask=*/get_mask({1, 1, 1, 1}),
3918         /*ellipsis_mask=*/0,
3919         /*new_axis_mask=*/0,
3920         /*shrink_axis_mask=*/0,
3921         /*expected_output_dims=*/{1, 3, 1},
3922         /*expected_output=*/{4, 5, 6},
3923     },
3924     // 1D Crop, with reshape.
3925     TestParams{
3926         /*input_dims=*/{6},
3927         /*begin=*/{0, 0},
3928         /*end=*/{0, 3},
3929         /*strides=*/{1, 1},
3930         /*begin_mask=*/get_mask({0, 0}),
3931         /*end_mask=*/get_mask({1, 0}),
3932         /*ellipsis_mask=*/0,
3933         /*new_axis_mask=*/0,
3934         /*shrink_axis_mask=*/0,
3935         /*expected_output_dims=*/{3},
3936         /*expected_output=*/{1, 2, 3},
3937     },
3938     TestParams{
3939         /*input_dims=*/{1, 6},
3940         /*begin=*/{0, 0, 2},
3941         /*end=*/{0, 0, 5},
3942         /*strides=*/{1, 1, 1},
3943         /*begin_mask=*/get_mask({0, 0, 0}),
3944         /*end_mask=*/get_mask({1, 1, 0}),
3945         /*ellipsis_mask=*/0,
3946         /*new_axis_mask=*/0,
3947         /*shrink_axis_mask=*/0,
3948         /*expected_output_dims=*/{1, 3},
3949         /*expected_output=*/{3, 4, 5},
3950     },
3951     TestParams{
3952         /*input_dims=*/{6, 1},
3953         /*begin=*/{0, 2, 0},
3954         /*end=*/{0, 5, 0},
3955         /*strides=*/{1, 1, 1},
3956         /*begin_mask=*/get_mask({0, 0, 0}),
3957         /*end_mask=*/get_mask({1, 0, 1}),
3958         /*ellipsis_mask=*/0,
3959         /*new_axis_mask=*/0,
3960         /*shrink_axis_mask=*/0,
3961         /*expected_output_dims=*/{3, 1},
3962         /*expected_output=*/{3, 4, 5},
3963     },
3964     // Negative axis.
3965     TestParams{
3966         /*input_dims=*/{6, 1},
3967         /*begin=*/{0, -6, 0},
3968         /*end=*/{0, -3, 0},
3969         /*strides=*/{1, 1, 1},
3970         /*begin_mask=*/get_mask({0, 0, 0}),
3971         /*end_mask=*/get_mask({1, 0, 1}),
3972         /*ellipsis_mask=*/0,
3973         /*new_axis_mask=*/0,
3974         /*shrink_axis_mask=*/0,
3975         /*expected_output_dims=*/{3, 1},
3976         /*expected_output=*/{1, 2, 3},
3977     },
3978     TestParams{
3979         /*input_dims=*/{6, 1},
3980         /*begin=*/{0, 0, 0},
3981         /*end=*/{0, -1, 0},
3982         /*strides=*/{1, 1, 1},
3983         /*begin_mask=*/get_mask({0, 0, 0}),
3984         /*end_mask=*/get_mask({1, 0, 1}),
3985         /*ellipsis_mask=*/0,
3986         /*new_axis_mask=*/0,
3987         /*shrink_axis_mask=*/0,
3988         /*expected_output_dims=*/{5, 1},
3989         /*expected_output=*/{1, 2, 3, 4, 5},
3990     },
3991     // Clamp out of bounds begin and end.
3992     TestParams{
3993         /*input_dims=*/{1, 2, 3},
3994         /*begin=*/{0, 0, -9999, -9},
3995         /*end=*/{0, 1, 1000, 4},
3996         /*strides=*/{1, 1, 1, 1},
3997         /*begin_mask=*/get_mask({0, 0, 0, 0}),
3998         /*end_mask=*/get_mask({1, 0, 0, 0}),
3999         /*ellipsis_mask=*/0,
4000         /*new_axis_mask=*/0,
4001         /*shrink_axis_mask=*/0,
4002         /*expected_output_dims=*/{1, 2, 3},
4003         /*expected_output=*/{1, 2, 3, 4, 5, 6},
4004     },
4005 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
4006     // Strides
4007     TestParams{
4008         /*input_dims=*/{6},
4009         /*begin=*/{0, 0},
4010         /*end=*/{0, 5},
4011         /*strides=*/{1, 2},
4012         /*begin_mask=*/get_mask({0, 0}),
4013         /*end_mask=*/get_mask({1, 0}),
4014         /*ellipsis_mask=*/0,
4015         /*new_axis_mask=*/0,
4016         /*shrink_axis_mask=*/0,
4017         /*expected_output_dims=*/{3},
4018         /*expected_output=*/{1, 3, 5},
4019     },
4020     TestParams{
4021         /*input_dims=*/{6},
4022         /*begin=*/{0, 0},
4023         /*end=*/{0, 6},
4024         /*strides=*/{1, 2},
4025         /*begin_mask=*/get_mask({0, 0}),
4026         /*end_mask=*/get_mask({1, 0}),
4027         /*ellipsis_mask=*/0,
4028         /*new_axis_mask=*/0,
4029         /*shrink_axis_mask=*/0,
4030         /*expected_output_dims=*/{3},
4031         /*expected_output=*/{1, 3, 5},
4032     },
4033     TestParams{
4034         /*input_dims=*/{6},
4035         /*begin=*/{0, 1},
4036         /*end=*/{0, 6},
4037         /*strides=*/{1, 2},
4038         /*begin_mask=*/get_mask({0, 0}),
4039         /*end_mask=*/get_mask({1, 0}),
4040         /*ellipsis_mask=*/0,
4041         /*new_axis_mask=*/0,
4042         /*shrink_axis_mask=*/0,
4043         /*expected_output_dims=*/{3},
4044         /*expected_output=*/{2, 4, 6},
4045     },
4046     TestParams{
4047         /*input_dims=*/{6},
4048         /*begin=*/{0, 2},
4049         /*end=*/{0, 6},
4050         /*strides=*/{1, 3},
4051         /*begin_mask=*/get_mask({0, 0}),
4052         /*end_mask=*/get_mask({1, 0}),
4053         /*ellipsis_mask=*/0,
4054         /*new_axis_mask=*/0,
4055         /*shrink_axis_mask=*/0,
4056         /*expected_output_dims=*/{2},
4057         /*expected_output=*/{3, 6},
4058     },
4059 #endif
4060     // ellipsis_mask
4061     TestParams{
4062         /*input_dims=*/{1, 2, 3},
4063         /*begin=*/{0, 1},
4064         /*end=*/{0, 2},
4065         /*strides=*/{1, 1},
4066         /*begin_mask=*/get_mask({0, 0, 0, 0}),
4067         /*end_mask=*/get_mask({0, 0, 0, 0}),
4068         /*ellipsis_mask=*/get_mask({1, 0, 0, 0}),
4069         /*new_axis_mask=*/0,
4070         /*shrink_axis_mask=*/0,
4071         /*expected_output_dims=*/{1, 2, 1},
4072         /*expected_output=*/{2, 5},
4073     },
4074     TestParams{
4075         /*input_dims=*/{1, 2, 3},
4076         /*begin=*/{0, 0, 1},
4077         /*end=*/{0, 0, 2},
4078         /*strides=*/{1, 1, 1},
4079         /*begin_mask=*/get_mask({1, 0, 0, 0}),
4080         /*end_mask=*/get_mask({1, 0, 0, 0}),
4081         /*ellipsis_mask=*/get_mask({0, 1, 0, 0}),
4082         /*new_axis_mask=*/0,
4083         /*shrink_axis_mask=*/0,
4084         /*expected_output_dims=*/{1, 2, 1},
4085         /*expected_output=*/{2, 5},
4086     },
4087     TestParams{
4088         /*input_dims=*/{1, 2, 3},
4089         /*begin=*/{0, 0, 0, 1},
4090         /*end=*/{0, 1, 2, 2},
4091         /*strides=*/{1, 1, 1, 1},
4092         /*begin_mask=*/get_mask({0, 0, 0, 0}),
4093         /*end_mask=*/get_mask({0, 0, 0, 0}),
4094         /*ellipsis_mask=*/get_mask({1, 0, 0, 0}),
4095         /*new_axis_mask=*/0,
4096         /*shrink_axis_mask=*/0,
4097         /*expected_output_dims=*/{1, 2, 1},
4098         /*expected_output=*/{2, 5},
4099     },
4100     TestParams{
4101         /*input_dims=*/{1, 2, 3},
4102         /*begin=*/{0, 0, 0, 1},
4103         /*end=*/{1, 1, 2, 2},
4104         /*strides=*/{1, 1, 1, 1},
4105         /*begin_mask=*/get_mask({0, 0, 0, 0}),
4106         /*end_mask=*/get_mask({0, 0, 0, 0}),
4107         /*ellipsis_mask=*/get_mask({0, 1, 0, 0}),
4108         /*new_axis_mask=*/0,
4109         /*shrink_axis_mask=*/0,
4110         /*expected_output_dims=*/{1, 2, 1},
4111         /*expected_output=*/{2, 5},
4112     },
4113 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
4114     TestParams{
4115         /*input_dims=*/{1, 2, 3},
4116         /*begin=*/{0, 0, 0, 0, 1},
4117         /*end=*/{0, 1, 1, 2, 2},
4118         /*strides=*/{1, 1, 1, 1, 1},
4119         /*begin_mask=*/get_mask({0, 0, 0, 0}),
4120         /*end_mask=*/get_mask({0, 0, 0, 0}),
4121         /*ellipsis_mask=*/get_mask({1, 0, 0, 0}),
4122         /*new_axis_mask=*/0,
4123         /*shrink_axis_mask=*/0,
4124         /*expected_output_dims=*/{1, 2, 1},
4125         /*expected_output=*/{2, 5},
4126     },
4127     // shrink_axis_mask
4128     TestParams{
4129         /*input_dims=*/{1, 2, 3},
4130         /*begin=*/{0, 0, 0, 1},
4131         /*end=*/{0, 0, 0, 2},
4132         /*strides=*/{1, 1, 1, 1},
4133         /*begin_mask=*/get_mask({1, 1, 1, 0}),
4134         /*end_mask=*/get_mask({1, 1, 1, 0}),
4135         /*ellipsis_mask=*/0,
4136         /*new_axis_mask=*/0,
4137         /*shrink_axis_mask=*/get_mask({0, 0, 0, 1}),
4138         /*expected_output_dims=*/{1, 2},
4139         /*expected_output=*/{2, 5},
4140     },
4141     TestParams{
4142         /*input_dims=*/{1, 2, 3},
4143         /*begin=*/{0, 0, 0, 1},
4144         /*end=*/{0, 1, 2, 2},
4145         /*strides=*/{1, 1, 1, 1},
4146         /*begin_mask=*/get_mask({1, 0, 0, 0}),
4147         /*end_mask=*/get_mask({1, 0, 0, 0}),
4148         /*ellipsis_mask=*/0,
4149         /*new_axis_mask=*/0,
4150         /*shrink_axis_mask=*/get_mask({0, 1, 0, 1}),
4151         /*expected_output_dims=*/{2},
4152         /*expected_output=*/{2, 5},
4153     },
4154     TestParams{
4155         /*input_dims=*/{6},
4156         /*begin=*/{0, 0},
4157         /*end=*/{0, 1},
4158         /*strides=*/{1, 1},
4159         /*begin_mask=*/get_mask({1, 0}),
4160         /*end_mask=*/get_mask({1, 0}),
4161         /*ellipsis_mask=*/0,
4162         /*new_axis_mask=*/0,
4163         /*shrink_axis_mask=*/get_mask({0, 1}),
4164         /*expected_output_dims=*/{},
4165         /*expected_output=*/{1},
4166     },
4167 #endif  // IS_TRT_VERSION_GE(5, 1, 3, 1)
4168   };
4169 
4170   for (int i = 0; i < ok_params.size(); i++) {
4171     Reset();
4172     NodeDef node_def = get_strided_slice_nodedef(
4173         ok_params[i].begin_mask, ok_params[i].end_mask,
4174         ok_params[i].ellipsis_mask, ok_params[i].new_axis_mask,
4175         ok_params[i].shrink_axis_mask);
4176     AddTestTensor("input", ok_params[i].input_dims);
4177     AddTestWeights<int32>("begin",
4178                           {static_cast<int>(ok_params[i].begin.size())},
4179                           ok_params[i].begin);
4180     AddTestWeights<int32>("end", {static_cast<int>(ok_params[i].end.size())},
4181                           ok_params[i].end);
4182     AddTestWeights<int32>("strides",
4183                           {static_cast<int>(ok_params[i].strides.size())},
4184                           ok_params[i].strides);
4185     RunValidationAndConversion(node_def);
4186 
4187     TRT_TensorOrWeights output;
4188     TF_EXPECT_OK(GetTensorOrWeights("my_strided_slice", &output));
4189     ASSERT_TRUE(output.is_tensor());
4190     ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
4191                              output.tensor()->getDimensions());
4192 
4193     const DataVec input_data{{"input", AsTensor<float>(ok_input)}};
4194     DataVec output_data{
4195         {"my_strided_slice",
4196          ConstructTensor<float>(ok_params[i].expected_output.size())}};
4197     TF_EXPECT_OK(BuildAndRun(input_data, &output_data));
4198     EXPECT_THAT(GetSpanForData<float>(output_data[0]),
4199                 ElementsAreArray(ok_params[i].expected_output));
4200   }
4201 }
4202 
TEST_F(OpConverterTest,ConvertSlice)4203 TEST_F(OpConverterTest, ConvertSlice) {
4204   // Get nodedef for Slice layer.
4205   auto get_slice_nodedef = []() -> NodeDef {
4206     Scope s = Scope::NewRootScope();
4207     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
4208     auto begin = ops::Placeholder(s.WithOpName("begin"), DT_INT32);
4209     auto size = ops::Placeholder(s.WithOpName("size"), DT_INT32);
4210     auto slice = ops::Slice(s.WithOpName("my_slice"), input, begin, size);
4211     return slice.operation.node()->def();
4212   };
4213 
4214   {
4215     // Begin is below bounds, should fail.
4216     Reset();
4217     NodeDef node_def = get_slice_nodedef();
4218     AddTestTensor("input", {1, 2, 3});
4219     AddTestWeights<int32>("begin", {4}, {0, 0, -1, 0});
4220     AddTestWeights<int32>("size", {4}, {1, 1, 2, 3});
4221     RunValidationAndConversion(
4222         node_def, error::INVALID_ARGUMENT,
4223         "\"begin\" for dimension 2 in Slice is out of range, at my_slice");
4224   }
4225   {
4226     // Begin is above bounds, should fail.
4227     Reset();
4228     NodeDef node_def = get_slice_nodedef();
4229     AddTestTensor("input", {1, 2, 3});
4230     AddTestWeights<int32>("begin", {4}, {0, 0, 3, 0});
4231     AddTestWeights<int32>("size", {4}, {1, 1, 2, 3});
4232     RunValidationAndConversion(
4233         node_def, error::INVALID_ARGUMENT,
4234         "\"begin\" for dimension 2 in Slice is out of range, at my_slice");
4235   }
4236   {
4237     // Size is below bounds, should fail.
4238     Reset();
4239     NodeDef node_def = get_slice_nodedef();
4240     AddTestTensor("input", {1, 2, 3});
4241     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
4242     AddTestWeights<int32>("size", {4}, {1, 1, 2, -2});
4243     RunValidationAndConversion(
4244         node_def, error::INVALID_ARGUMENT,
4245         "\"begin\" + \"size\" for dimension 3 in Slice is out of range, at "
4246         "my_slice");
4247   }
4248   {
4249     // Size is above bounds, should fail.
4250     Reset();
4251     NodeDef node_def = get_slice_nodedef();
4252     AddTestTensor("input", {1, 2, 3});
4253     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
4254     AddTestWeights<int32>("size", {4}, {1, 1, 3, 3});
4255     RunValidationAndConversion(
4256         node_def, error::INVALID_ARGUMENT,
4257         "\"begin\" + \"size\" for dimension 2 in Slice is out of range, at "
4258         "my_slice");
4259   }
4260   {
4261     // Modify batch dim, should fail.
4262     Reset();
4263     NodeDef node_def = get_slice_nodedef();
4264     AddTestTensor("input", {1, 2, 3});
4265     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
4266     AddTestWeights<int32>("size", {4}, {0, 1, 2, 3});
4267     RunValidationAndConversion(
4268         node_def, error::UNIMPLEMENTED,
4269         "TensorRT does not allow modifications to the batch dimension, at "
4270         "my_slice");
4271   }
4272   {
4273     // Dynamic batch size with size[0] not -1, should fail.
4274     Reset();
4275     NodeDef node_def = get_slice_nodedef();
4276     AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1);
4277     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
4278     AddTestWeights<int32>("size", {4}, {1, 1, 2, 3});
4279     RunValidationAndConversion(
4280         node_def, error::UNIMPLEMENTED,
4281         "TensorRT does not allow modifications to the batch dimension, at "
4282         "my_slice");
4283   }
4284   {
4285     // Dynamic batch size but using size[0] of -1, ok.
4286     Reset();
4287     NodeDef node_def = get_slice_nodedef();
4288     AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1);
4289     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
4290     AddTestWeights<int32>("size", {4}, {-1, 1, 2, 2});
4291     RunValidationAndConversion(node_def);
4292   }
4293 
4294   struct TestParams {
4295     std::vector<int> input_dims;
4296     std::vector<int> begin;
4297     std::vector<int> size;
4298     std::vector<int> expected_output_dims;
4299     std::vector<int> expected_output;
4300   };
4301 
4302   // Ok.
4303   std::vector<TestParams> ok_params = {
4304       TestParams{{1, 2, 3},
4305                  {0, 0, 0, 0},
4306                  {-1, -1, -1, -1},
4307                  {1, 2, 3},
4308                  {1, 2, 3, 4, 5, 6}},
4309       TestParams{
4310           {1, 2, 3}, {0, 0, 0, 0}, {1, 1, 2, 3}, {1, 2, 3}, {1, 2, 3, 4, 5, 6}},
4311       TestParams{
4312           {1, 2, 3}, {0, 0, 0, 0}, {1, -1, 2, 2}, {1, 2, 2}, {1, 2, 4, 5}},
4313       TestParams{{6}, {0, 1}, {1, 5}, {5}, {2, 3, 4, 5, 6}},
4314       TestParams{{6}, {0, 1}, {-1, 3}, {3}, {2, 3, 4}},
4315   };
4316 
4317   for (int i = 0; i < ok_params.size(); i++) {
4318     Reset();
4319     NodeDef node_def = get_slice_nodedef();
4320     AddTestTensor("input", ok_params[i].input_dims);
4321     AddTestWeights<int32>("begin",
4322                           {static_cast<int>(ok_params[i].begin.size())},
4323                           ok_params[i].begin);
4324     AddTestWeights<int32>("size", {static_cast<int>(ok_params[i].size.size())},
4325                           ok_params[i].size);
4326     RunValidationAndConversion(node_def);
4327 
4328     TRT_TensorOrWeights output;
4329     TF_EXPECT_OK(GetTensorOrWeights("my_slice", &output));
4330     ASSERT_TRUE(output.is_tensor());
4331     ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
4332                              output.tensor()->getDimensions());
4333 
4334     const DataVec input_data{{"input", AsTensor<float>({1, 2, 3, 4, 5, 6})}};
4335     DataVec output_data{{"my_slice", ConstructTensor<float>(
4336                                          ok_params[i].expected_output.size())}};
4337     TF_EXPECT_OK(BuildAndRun(input_data, &output_data));
4338     EXPECT_THAT(GetSpanForData<float>(output_data[0]),
4339                 ElementsAreArray(ok_params[i].expected_output));
4340   }
4341 }
4342 
TEST_P(OpConverter_FP32_Test,ConvertConv2D)4343 TEST_P(OpConverter_FP32_Test, ConvertConv2D) {
4344   // Get nodedef for Conv2D layer.
4345   DataType tf_type = tf_type_;
4346   auto get_conv2d_nodedef =
4347       [tf_type](std::vector<int> strides = {1, 1, 1, 1},
4348                 string padding = "SAME", string data_format = "NCHW",
4349                 std::vector<int> dilations = {1, 1, 1, 1}) -> NodeDef {
4350     Scope s = Scope::NewRootScope();
4351     auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
4352     auto filter = ops::Placeholder(s.WithOpName("weights"), tf_type);
4353     ops::Conv2D::Attrs attrs =
4354         ops::Conv2D::Attrs().DataFormat(data_format).Dilations(dilations);
4355     auto conv2d = ops::Conv2D(s.WithOpName("my_conv2d"), input, filter, strides,
4356                               padding, attrs);
4357     return conv2d.operation.node()->def();
4358   };
4359 
4360   {
4361     // Input is weights, should fail.
4362     Reset();
4363     NodeDef node_def = get_conv2d_nodedef();
4364     AddTestWeights<float>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
4365     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4366     RunValidationAndConversion(
4367         node_def, error::UNIMPLEMENTED,
4368         "The input \"input\" for Conv2D must be a tensor, at my_conv2d");
4369   }
4370   {
4371     // Filter is tensor, should fail.
4372     Reset();
4373     NodeDef node_def = get_conv2d_nodedef();
4374     AddTestTensor("input", {3, 1, 2, 1});
4375     AddTestTensor("weights", {3, 3, 1, 1});
4376     RunValidationAndConversion(
4377         node_def, error::UNIMPLEMENTED,
4378         "The input \"filter\" for Conv2D must be a constant, at my_conv2d");
4379   }
4380   {
4381     // Filter is not 4D, should fail.
4382     Reset();
4383     NodeDef node_def = get_conv2d_nodedef();
4384     AddTestTensor("input", {1, 1, 2, 3});
4385     AddTestWeights<float>("weights", {3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4386     RunValidationAndConversion(
4387         node_def, error::INVALID_ARGUMENT,
4388         "Conv2D expects kernel of dimension 4, at my_conv2d");
4389   }
4390   {
4391     // Dilations is not 4D, should fail.
4392     Reset();
4393     NodeDef node_def =
4394         get_conv2d_nodedef({1, 1, 1, 1}, "SAME", "NCHW", {1, 1, 1});
4395     AddTestTensor("input", {1, 1, 2, 3});
4396     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4397     RunValidationAndConversion(
4398         node_def, error::INVALID_ARGUMENT,
4399         "Convolution dilations field must specify 4 dimensions, at my_conv2d");
4400   }
4401   {
4402     // Dilation value is not 1 for channel, should fail.
4403     Reset();
4404     NodeDef node_def =
4405         get_conv2d_nodedef({1, 1, 1, 1}, "SAME", "NCHW", {1, 2, 1, 1});
4406     AddTestTensor("input", {1, 1, 2, 3});
4407     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4408     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4409                                "Dilation rate must be 1 for batch and channel "
4410                                "dimensions, at my_conv2d");
4411   }
4412   {
4413     // Dilation value is not 1 for channel (NHWC), should fail.
4414     Reset();
4415     NodeDef node_def =
4416         get_conv2d_nodedef({1, 1, 1, 1}, "SAME", "NHWC", {1, 1, 1, 2});
4417     AddTestTensor("input", {1, 2, 3, 1});
4418     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4419     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4420                                "Dilation rate must be 1 for batch and channel "
4421                                "dimensions, at my_conv2d");
4422   }
4423   {
4424     // Strides is not 4D, should fail.
4425     Reset();
4426     NodeDef node_def =
4427         get_conv2d_nodedef({1, 1, 1}, "SAME", "NCHW", {1, 1, 1, 1});
4428     AddTestTensor("input", {1, 1, 2, 3});
4429     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4430     RunValidationAndConversion(
4431         node_def, error::INVALID_ARGUMENT,
4432         "Convolution strides field must specify 4 dimensions, at my_conv2d");
4433   }
4434   {
4435     // Stride value is not 1 for channel, should fail.
4436     Reset();
4437     NodeDef node_def =
4438         get_conv2d_nodedef({1, 2, 1, 1}, "SAME", "NCHW", {1, 1, 1, 1});
4439     AddTestTensor("input", {1, 1, 2, 3});
4440     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4441     RunValidationAndConversion(
4442         node_def, error::UNIMPLEMENTED,
4443         "Stride must be 1 for batch and channel dimensions, at my_conv2d");
4444   }
4445   if (trt_mode_ == TrtTestMode::kDynamicShape) {
4446     Reset();
4447     NodeDef node_def = get_conv2d_nodedef();
4448     // Channel dim unknown, should fail.
4449     nvinfer1::DataType trt_type;
4450     TF_ASSERT_OK(TfTypeToTrtType(tf_type_, &trt_type));
4451     AddTestTensorWithTFDims("input", {-1, -1, -1, -1}, trt_type);
4452     AddTestWeights<float>("weights", {1, 2, 1, 1}, {-1, 1});
4453     RunValidationAndConversion(
4454         node_def, error::INVALID_ARGUMENT,
4455         "Channel dimension must be static, at my_conv2d");
4456   }
4457 
4458   struct TestParams {
4459     std::vector<int> input_dims;
4460     std::vector<float> input;
4461     std::vector<int> filter_dims;
4462     std::vector<float> filter;
4463     std::vector<int> strides;
4464     string padding;
4465     string data_format;
4466     std::vector<int> dilations;
4467     std::vector<int> expected_output_dims;
4468     std::vector<float> expected_output;
4469   };
4470 
4471   // Ok.
4472   std::vector<TestParams> ok_params = {
4473       // Basic
4474       TestParams{/*input_dims=*/{1, 1, 2, 3},
4475                  /*input=*/{0, 1, 2, 3, 3, 4},
4476                  /*filter_dims=*/{1, 2, 1, 1},
4477                  /*filter=*/{-1, 1},
4478                  /*strides=*/{1, 1, 1, 1},
4479                  /*padding=*/"VALID",
4480                  /*data_format=*/"NCHW",
4481                  /*dilations=*/{1, 1, 1, 1},
4482                  /*expected_output_dims=*/{1, 1, 2, 2},
4483                  /*expected_output=*/{1, 1, 0, 1}},
4484       // SAME padding (Asymmetric)
4485       TestParams{/*input_dims=*/{1, 1, 2, 3},
4486                  /*input=*/{0, 1, 2, 3, 3, 4},
4487                  /*filter_dims=*/{1, 2, 1, 1},
4488                  /*filter=*/{-1, 1},
4489                  /*strides=*/{1, 1, 1, 1},
4490                  /*padding=*/"SAME",
4491                  /*data_format=*/"NCHW",
4492                  /*dilations=*/{1, 1, 1, 1},
4493                  /*expected_output_dims=*/{1, 1, 2, 3},
4494                  /*expected_output=*/{1, 1, -2, 0, 1, -4}},
4495       // SAME padding (Symmetric)
4496       TestParams{/*input_dims=*/{1, 1, 2, 3},
4497                  /*input=*/{0, 1, 2, 3, 3, 4},
4498                  /*filter_dims=*/{1, 3, 1, 1},
4499                  /*filter=*/{-1, 0, 1},
4500                  /*strides=*/{1, 1, 1, 1},
4501                  /*padding=*/"SAME",
4502                  /*data_format=*/"NCHW",
4503                  /*dilations=*/{1, 1, 1, 1},
4504                  /*expected_output_dims=*/{1, 1, 2, 3},
4505                  /*expected_output=*/{1, 2, -1, 3, 1, -3}},
4506       // NHWC
4507       TestParams{/*input_dims=*/{1, 2, 3, 1},
4508                  /*input=*/{0, 1, 2, 3, 3, 4},
4509                  /*filter_dims=*/{1, 2, 1, 1},
4510                  /*filter=*/{-1, 1},
4511                  /*strides=*/{1, 1, 1, 1},
4512                  /*padding=*/"VALID",
4513                  /*data_format=*/"NHWC",
4514                  /*dilations=*/{1, 1, 1, 1},
4515                  /*expected_output_dims=*/{1, 2, 2, 1},
4516                  /*expected_output=*/{1, 1, 0, 1}},
4517       // Dilated
4518       TestParams{/*input_dims=*/{1, 1, 2, 3},
4519                  /*input=*/{0, 1, 2, 3, 3, 4},
4520                  /*filter_dims=*/{1, 2, 1, 1},
4521                  /*filter=*/{-1, 1},
4522                  /*strides=*/{1, 1, 1, 1},
4523                  /*padding=*/"VALID",
4524                  /*data_format=*/"NCHW",
4525                  /*dilations=*/{1, 1, 1, 2},
4526                  /*expected_output_dims=*/{1, 1, 2, 1},
4527                  /*expected_output=*/{2, 1}},
4528       // Strided
4529       TestParams{/*input_dims=*/{1, 1, 2, 4},
4530                  /*input=*/{0, 1, 2, 2, 3, 4, 4, 7},
4531                  /*filter_dims=*/{1, 2, 1, 1},
4532                  /*filter=*/{-1, 1},
4533                  /*strides=*/{1, 1, 1, 2},
4534                  /*padding=*/"VALID",
4535                  /*data_format=*/"NCHW",
4536                  /*dilations=*/{1, 1, 1, 1},
4537                  /*expected_output_dims=*/{1, 1, 2, 2},
4538                  /*expected_output=*/{1, 0, 1, 3}},
4539   };
4540 
4541   for (int i = 0; i < ok_params.size(); i++) {
4542     Reset();
4543     NodeDef node_def =
4544         get_conv2d_nodedef(ok_params[i].strides, ok_params[i].padding,
4545                            ok_params[i].data_format, ok_params[i].dilations);
4546     std::vector<int> partial_input_shape;
4547     if (trt_mode_ == TrtTestMode::kDynamicShape) {
4548       // The channel dim cannot have unknown size, fix that.
4549       partial_input_shape.resize(ok_params[i].input_dims.size(), -1);
4550       int channel_id = (ok_params[i].data_format == "NCHW") ? 1 : 3;
4551       partial_input_shape[channel_id] = ok_params[i].input_dims[channel_id];
4552     }
4553 
4554     AddTestTensor("input", ok_params[i].input_dims, tf_type_,
4555                   ok_params[i].input, partial_input_shape);
4556     AddTestWeights<float>("weights", ok_params[i].filter_dims,
4557                           ok_params[i].filter);
4558 
4559     TestOpConverter("my_conv2d", node_def, ok_params[i].expected_output_dims,
4560                     Status::OK(), Status::OK(),
4561                     ElementsAreArray(ok_params[i].expected_output));
4562   }
4563 }
4564 
TEST_F(OpConverterTest,ConvertConv2DBackpropInput)4565 TEST_F(OpConverterTest, ConvertConv2DBackpropInput) {
4566   // Get nodedef for Conv2D layer.
4567   auto get_conv2d_backprop_input_nodedef =
4568       [](std::vector<int> strides = {1, 1, 1, 1}, string padding = "SAME",
4569          string data_format = "NCHW",
4570          std::vector<int> dilations = {1, 1, 1, 1}) -> NodeDef {
4571     Scope s = Scope::NewRootScope();
4572     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
4573     auto filter = ops::Placeholder(s.WithOpName("weights"), DT_FLOAT);
4574     auto input_sizes = ops::Placeholder(s.WithOpName("input_sizes"), DT_INT32);
4575     ops::Conv2DBackpropInput::Attrs attrs = ops::Conv2DBackpropInput::Attrs()
4576                                                 .DataFormat(data_format)
4577                                                 .Dilations(dilations);
4578     auto conv2d = ops::Conv2DBackpropInput(
4579         s.WithOpName("my_conv2d_backprop_input"), input_sizes, filter, input,
4580         strides, padding, attrs);
4581     return conv2d.operation.node()->def();
4582   };
4583 
4584   {
4585     // Dilation + Conv2DBackpropInput, should fail.
4586     Reset();
4587     NodeDef node_def = get_conv2d_backprop_input_nodedef({1, 1, 1, 1}, "SAME",
4588                                                          "NHWC", {1, 1, 2, 1});
4589     AddTestTensor("input", {2, 3, 1});
4590     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4591     AddTestWeights<int>("input_sizes", {4}, {1, 2, 3, 1});
4592     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4593                                "Dilation with Conv2DBackpropInput "
4594                                "(conv2d_transpose) is not supported, "
4595                                "at my_conv2d_backprop_input");
4596   }
4597 
4598   struct TestParams {
4599     std::vector<int> input_dims;
4600     std::vector<float> input;
4601     std::vector<int> filter_dims;
4602     std::vector<float> filter;
4603     std::vector<int> strides;
4604     string padding;
4605     string data_format;
4606     std::vector<int> dilations;
4607     std::vector<int> expected_output_dims;
4608     std::vector<float> expected_output;
4609   };
4610 
4611   // Ok.
4612   std::vector<TestParams> ok_params = {
4613       // Transpose Strided
4614       TestParams{/*input_dims=*/{1, 2, 2},
4615                  /*input=*/{0, 1, 2, 3},
4616                  /*filter_dims=*/{1, 2, 1, 1},
4617                  /*filter=*/{-1, 1},
4618                  /*strides=*/{1, 1, 1, 2},
4619                  /*padding=*/"SAME",
4620                  /*data_format=*/"NCHW",
4621                  /*dilations=*/{1, 1, 1, 1},
4622                  /*expected_output_dims=*/{1, 2, 4},
4623                  /*expected_output=*/{0, 0, -1, 1, -2, 2, -3, 3}},
4624       // Transpose Strided NHWC
4625       TestParams{/*input_dims=*/{2, 2, 1},
4626                  /*input=*/{0, 1, 2, 3},
4627                  /*filter_dims=*/{1, 2, 1, 1},
4628                  /*filter=*/{-1, 1},
4629                  /*strides=*/{1, 1, 2, 1},
4630                  /*padding=*/"SAME",
4631                  /*data_format=*/"NHWC",
4632                  /*dilations=*/{1, 1, 1, 1},
4633                  /*expected_output_dims=*/{2, 4, 1},
4634                  /*expected_output=*/{0, 0, -1, 1, -2, 2, -3, 3}},
4635       // Transpose Strided NHWC with VALID padding
4636       TestParams{/*input_dims=*/{3, 1, 1},
4637                  /*input=*/{0, 1, 2},
4638                  /*filter_dims=*/{2, 1, 1, 1},
4639                  /*filter=*/{-1, 1},
4640                  /*strides=*/{1, 2, 1, 1},
4641                  /*padding=*/"VALID",
4642                  /*data_format=*/"NHWC",
4643                  /*dilations=*/{1, 1, 1, 1},
4644                  /*expected_output_dims=*/{7, 1, 1},
4645                  /*expected_output=*/{0, 0, -1, 1, -2, 2, 0}},
4646   };
4647 
4648   for (int i = 0; i < ok_params.size(); i++) {
4649     for (int input_sizes_length : {2, 4}) {
4650       Reset();
4651       NodeDef node_def = get_conv2d_backprop_input_nodedef(
4652           ok_params[i].strides, ok_params[i].padding, ok_params[i].data_format,
4653           ok_params[i].dilations);
4654       AddTestTensor("input", ok_params[i].input_dims);
4655       AddTestWeights<float>("weights", ok_params[i].filter_dims,
4656                             ok_params[i].filter);
4657 
4658       std::vector<int> tf_input_sizes = ok_params[i].expected_output_dims;
4659       if (input_sizes_length == 4) {
4660         tf_input_sizes.insert(tf_input_sizes.begin(),
4661                               1);  // Add batch dimension.
4662         QCHECK_EQ(4, tf_input_sizes.size());
4663         AddTestWeights<int>("input_sizes", {4}, tf_input_sizes);
4664       } else {
4665         // Remove the channel dimension.
4666         if (ok_params[i].data_format == "NHWC") {
4667           tf_input_sizes.pop_back();
4668         } else {
4669           tf_input_sizes.erase(tf_input_sizes.begin());
4670         }
4671         QCHECK_EQ(2, tf_input_sizes.size());
4672         AddTestWeights<int>("input_sizes", {2}, tf_input_sizes);
4673       }
4674 
4675       RunValidationAndConversion(node_def);
4676       TRT_TensorOrWeights output;
4677       TF_EXPECT_OK(GetTensorOrWeights("my_conv2d_backprop_input", &output));
4678       ASSERT_TRUE(output.is_tensor());
4679       ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
4680                                output.tensor()->getDimensions());
4681 
4682       const DataVec input_data{{"input", AsTensor<float>(ok_params[i].input)}};
4683       DataVec output_data{
4684           {"my_conv2d_backprop_input",
4685            ConstructTensor<float>(ok_params[i].expected_output.size())}};
4686       TF_EXPECT_OK(BuildAndRun(input_data, &output_data));
4687       EXPECT_THAT(GetSpanForData<float>(output_data[0]),
4688                   ElementsAreArray(ok_params[i].expected_output));
4689     }
4690   }
4691 }
4692 
4693 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
TEST_F(OpConverterTest,ConvertConv3D)4694 TEST_F(OpConverterTest, ConvertConv3D) {
4695   // Get nodedef for Conv3D layer.
4696   auto get_conv3d_nodedef =
4697       [](std::vector<int> strides = {1, 1, 1, 1, 1}, string padding = "SAME",
4698          string data_format = "NCDHW",
4699          std::vector<int> dilations = {1, 1, 1, 1, 1},
4700          bool is_conv3d_backprop_input = false) -> NodeDef {
4701     Scope s = Scope::NewRootScope();
4702     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
4703     auto filter = ops::Placeholder(s.WithOpName("weights"), DT_FLOAT);
4704 
4705     if (is_conv3d_backprop_input) {
4706       auto input_sizes =
4707           ops::Placeholder(s.WithOpName("input_sizes"), DT_INT32);
4708       ops::Conv3DBackpropInputV2::Attrs attrs =
4709           ops::Conv3DBackpropInputV2::Attrs()
4710               .DataFormat(data_format)
4711               .Dilations(dilations);
4712       auto conv3d =
4713           ops::Conv3DBackpropInputV2(s.WithOpName("my_conv3d"), input_sizes,
4714                                      filter, input, strides, padding, attrs);
4715       return conv3d.operation.node()->def();
4716     } else {
4717       ops::Conv3D::Attrs attrs =
4718           ops::Conv3D::Attrs().DataFormat(data_format).Dilations(dilations);
4719       auto conv3d = ops::Conv3D(s.WithOpName("my_conv3d"), input, filter,
4720                                 strides, padding, attrs);
4721       return conv3d.operation.node()->def();
4722     }
4723   };
4724 
4725   {
4726     // Input is weights, should fail.
4727     Reset();
4728     NodeDef node_def = get_conv3d_nodedef();
4729 
4730     AddTestWeights<float>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
4731     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4732     RunValidationAndConversion(
4733         node_def, error::UNIMPLEMENTED,
4734         "The input \"input\" for Conv3D must be a tensor, at my_conv3d");
4735   }
4736   {
4737     // Filter is tensor, should fail.
4738     Reset();
4739     NodeDef node_def = get_conv3d_nodedef();
4740     AddTestTensor("input", {1, 2, 3});
4741     AddTestTensor("weights", {3, 3, 1, 1, 3, 3, 1, 1});
4742     RunValidationAndConversion(
4743         node_def, error::UNIMPLEMENTED,
4744         "The input \"filter\" for Conv3D must be a constant, at my_conv3d");
4745   }
4746   {
4747     // Filter is not 5D, should fail.
4748     Reset();
4749     NodeDef node_def = get_conv3d_nodedef();
4750     AddTestTensor("input", {1, 2, 3});
4751     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4752     RunValidationAndConversion(
4753         node_def, error::INVALID_ARGUMENT,
4754         "Conv3D expects kernel of dimension 5, at my_conv3d");
4755   }
4756   {
4757     // Dilations is not 5D, should fail.
4758     Reset();
4759     NodeDef node_def =
4760         get_conv3d_nodedef({1, 1, 1, 1, 1}, "SAME", "NCDHW", {1, 1, 1, 1});
4761     AddTestTensor("input", {1, 2, 3});
4762     AddTestWeights<float>(
4763         "weights", {3, 3, 1, 1, 1},
4764         {1, 2, 3, 4, 5, 6, 7, 8, 9});  // Dimensions, then values
4765     RunValidationAndConversion(
4766         node_def, error::INVALID_ARGUMENT,
4767         "Convolution dilations field must specify 5 dimensions, at my_conv3d");
4768   }
4769   {
4770     // Dilation value is not 1 for channel, should fail.
4771     Reset();
4772     NodeDef node_def =
4773         get_conv3d_nodedef({1, 1, 1, 1, 1}, "SAME", "NCDHW", {1, 2, 1, 1, 1});
4774     AddTestTensor("input", {1, 2, 3});
4775     AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
4776                           {1, 2, 3, 4, 5, 6, 7, 8, 9});
4777     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4778                                "Dilation rate must be 1 for batch and channel "
4779                                "dimensions, at my_conv3d");
4780   }
4781   {
4782     // Dilation value is not 1 for channel (NDHWC), should fail.
4783     Reset();
4784     NodeDef node_def =
4785         get_conv3d_nodedef({1, 1, 1, 1, 1}, "SAME", "NDHWC", {1, 1, 1, 1, 2});
4786     AddTestTensor("input", {2, 3, 1});
4787     AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
4788                           {1, 2, 3, 4, 5, 6, 7, 8, 9});
4789     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4790                                "Dilation rate must be 1 for batch and channel "
4791                                "dimensions, at my_conv3d");
4792   }
4793   {
4794     // Dilation + Conv3DBackpropInputV2, should fail.
4795     Reset();
4796     NodeDef node_def = get_conv3d_nodedef({1, 1, 1, 1, 1}, "SAME", "NDHWC",
4797                                           {1, 1, 2, 1, 1}, true);
4798     AddTestTensor("input", {2, 3, 1});
4799     AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
4800                           {1, 2, 3, 4, 5, 6, 7, 8, 9});
4801     AddTestWeights<int>("input_sizes", {4}, {1, 2, 3, 1});
4802     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4803                                "Dilation with Conv3DBackpropInputV2 "
4804                                "(conv3d_transpose) is not supported, "
4805                                "at my_conv3d");
4806   }
4807   {
4808     // Asymmetric+ Conv3DBackpropInputV2, should fail.
4809     Reset();
4810     NodeDef node_def = get_conv3d_nodedef({1, 1, 1, 1, 1}, "SAME", "NDHWC",
4811                                           {1, 1, 1, 1, 1}, true);
4812     AddTestTensor("input", {1, 2, 2, 2});
4813     AddTestWeights<float>("weights", {1, 1, 2, 1, 1}, {1, 1});
4814     AddTestWeights<int>("input_sizes", {8}, {1, 2, 3, 4, 5, 6, 7, 8});
4815     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4816                                "Asymmetric padding with Conv3DBackpropInputV2 "
4817                                "(conv3d_transpose) is not supported, at "
4818                                "my_conv3d");
4819   }
4820   {
4821     // Strides is not 5D, should fail.
4822     Reset();
4823     NodeDef node_def = get_conv3d_nodedef({1, 1, 1, 1, 1, 1}, "SAME", "NCDHW",
4824                                           {1, 1, 1, 1, 1});
4825     AddTestTensor("input", {1, 2, 2, 2});
4826     AddTestWeights<float>("weights", {1, 1, 2, 1, 1}, {1, 1});
4827     RunValidationAndConversion(
4828         node_def, error::INVALID_ARGUMENT,
4829         "Convolution strides field must specify 5 dimensions, at my_conv3d");
4830   }
4831   {
4832     // Stride value is not 1 for channel, should fail.
4833     Reset();
4834     NodeDef node_def =
4835         get_conv3d_nodedef({1, 2, 1, 1, 1}, "SAME", "NCDHW", {1, 1, 1, 1, 1});
4836     AddTestTensor("input", {1, 2, 3});
4837     AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
4838                           {1, 2, 3, 4, 5, 6, 7, 8, 9});
4839     RunValidationAndConversion(
4840         node_def, error::UNIMPLEMENTED,
4841         "Stride must be 1 for batch and channel dimensions, at my_conv3d");
4842   }
4843   struct TestParams {
4844     std::vector<int> input_dims;
4845     std::vector<float> input;
4846     std::vector<int> filter_dims;
4847     std::vector<float> filter;
4848     std::vector<int> strides;
4849     string padding;
4850     string data_format;
4851     std::vector<int> dilations;
4852     bool is_conv3d_backprop_input;
4853     std::vector<int> expected_output_dims;
4854     std::vector<float> expected_output;
4855   };
4856 
4857   // Start here
4858   std::vector<TestParams> ok_params = {
4859       // Basic - just 1x1 conv - input = output
4860       TestParams{
4861           /*input_dims=*/{1, 3, 3, 3},  // CDHW
4862           /*input=*/{1, 2,  15,  3, 6,  -3, 22, 1, 88, 56, 36, 1,  1, 105,
4863                      1, 16, -28, 1, 42, 9,  3,  1, 7,  1,  11, 61, 5},
4864           /*filter_dims=*/{1, 1, 1, 1, 1},  // DRSCK
4865           /*filter=*/{1},
4866           /*strides=*/{1, 1, 1, 1, 1},
4867           /*padding=*/"VALID",
4868           /*data_format=*/"NCDHW",
4869           /*dilations=*/{1, 1, 1, 1, 1},
4870           /*is_conv3d_backprop_input=*/false,
4871           /*expected_output_dims=*/{1, 3, 3, 3},
4872           /*expected_output=*/{1,  2,  15, 3, 6,   -3, 22, 1,   88,
4873                                56, 36, 1,  1, 105, 1,  16, -28, 1,
4874                                42, 9,  3,  1, 7,   1,  11, 61,  5}},
4875       // Basic - 2x1 filter
4876       TestParams{/*input_dims=*/{1, 3, 3, 3},  // CDHW
4877                  /*input=*/{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4878                             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6},
4879                  /*filter_dims=*/{2, 1, 1, 1, 1},  // DRSCK
4880                  /*filter=*/{1, 1},
4881                  /*strides=*/{1, 1, 1, 1, 1},
4882                  /*padding=*/"VALID",
4883                  /*data_format=*/"NCDHW",
4884                  /*dilations=*/{1, 1, 1, 1, 1},
4885                  /*is_conv3d_backprop_input=*/false,
4886                  /*expected_output_dims=*/{1, 2, 3, 3},
4887                  /*expected_output=*/
4888                  {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7}},
4889       // SAME padding (Asymmetric)
4890       TestParams{
4891           /*input_dims=*/{1, 2, 3, 2},  // CDHW
4892           /*input=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
4893           /*filter_dims=*/{2, 1, 1, 1, 1},  // DRSCK
4894           /*filter=*/{-1, 1},
4895           /*strides=*/{1, 1, 1, 1, 1},
4896           /*padding=*/"SAME",
4897           /*data_format=*/"NCDHW",
4898           /*dilations=*/{1, 1, 1, 1, 1},
4899           /*is_conv3d_backprop_input=*/false,
4900           /*expected_output_dims=*/{1, 2, 3, 2},
4901           /*expected_output=*/
4902           {6, 6, 6, 6, 6, 6, -6, -7, -8, -9, -10,
4903            -11}  // Diff in first 2 depths is const 6
4904       },
4905       // SAME padding (Symmetric)
4906       TestParams{
4907           /*input_dims=*/{1, 2, 3, 2},  // CDHW
4908           /*input=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
4909           /*filter_dims=*/{3, 1, 1, 1, 1},  // DRSCK
4910           /*filter=*/{-1, 0, 1},
4911           /*strides=*/{1, 1, 1, 1, 1},
4912           /*padding=*/"SAME",
4913           /*data_format=*/"NCDHW",
4914           /*dilations=*/{1, 1, 1, 1, 1},
4915           /*is_conv3d_backprop_input=*/false,
4916           /*expected_output_dims=*/{1, 2, 3, 2},
4917           /*expected_output=*/
4918           {6, 7, 8, 9, 10, 11, 0, -1, -2, -3, -4,
4919            -5}  // Swaps front two depths, negates
4920       },
4921 
4922       // NDHWC (multi-channel)
4923       TestParams{
4924           /*input_dims=*/{2, 3, 2, 2},  // DHWC
4925           /*input=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
4926                      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
4927           /*filter_dims=*/{2, 1, 1, 2, 1},  // DRSCK
4928           /*filter=*/{-1, 1, 1, -1},
4929           /*strides=*/{1, 1, 1, 1, 1},
4930           /*padding=*/"VALID",
4931           /*data_format=*/"NDHWC",
4932           /*dilations=*/{1, 1, 1, 1, 1},
4933           /*is_conv3d_backprop_input=*/false,
4934           /*expected_output_dims=*/{1, 3, 2, 1},
4935           /*expected_output=*/{0, 0, 0, 0, 0, 0}  // Each filter opposes the
4936                                                   // other
4937       },
4938 
4939       // Dilated
4940       TestParams{
4941           /*input_dims=*/{1, 3, 3, 3},  // CDHW
4942           /*input=*/{1,   1,   1,   1,   1, 1, 1, 1, 1, -10, -10, -10, -10, -10,
4943                      -10, -10, -10, -10, 7, 7, 7, 7, 7, 7,   7,   7,   7},
4944           /*filter_dims=*/{2, 1, 1, 1, 1},  // DRSCK
4945           /*filter=*/{1, 1},
4946           /*strides=*/{1, 1, 1, 1, 1},
4947           /*padding=*/"VALID",
4948           /*data_format=*/"NCDHW",
4949           /*dilations=*/{1, 1, 2, 1, 1},
4950           /*is_conv3d_backprop_input=*/false,
4951           /*expected_output_dims=*/{1, 1, 3, 3},
4952           /*expected_output=*/{8, 8, 8, 8, 8, 8, 8, 8, 8}  // Only front depth
4953                                                            // is valid, skips
4954                                                            // neg values
4955       },
4956       // Strided
4957       TestParams{
4958           /*input_dims=*/{1, 3, 3, 3},
4959           /*input=*/{1, 0, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0,
4960                      0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 7, 0, 8},
4961           /*filter_dims=*/{1, 1, 1, 1, 1},
4962           /*filter=*/{1},
4963           /*strides=*/{1, 1, 2, 2, 2},
4964           /*padding=*/"VALID",
4965           /*data_format=*/"NCDHW",
4966           /*dilations=*/{1, 1, 1, 1, 1},
4967           /*is_conv3d_backprop_input=*/false,
4968           /*expected_output_dims=*/{1, 2, 2, 2},
4969           /*expected_output=*/{1, 2, 3, 4, 5, 6, 7, 8}  // Should only pick up
4970                                                         // the corners
4971       },
4972       // Transpose Strided
4973       TestParams{/*input_dims=*/{1, 2, 2, 2},  // CDHW
4974                  /*input=*/{1, 2, 3, 4, 5, 6, 7, 8},
4975                  /*filter_dims=*/{1, 1, 1, 1, 1},
4976                  /*filter=*/{1},
4977                  /*strides=*/{1, 1, 2, 2, 2},
4978                  /*padding=*/"VALID",
4979                  /*data_format=*/"NCDHW",
4980                  /*dilations=*/{1, 1, 1, 1, 1},
4981                  /*is_conv3d_backprop_input=*/true,
4982                  /*expected_output_dims=*/{1, 3, 3, 3},
4983                  /*expected_output=*/
4984                  {1, 0, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0,
4985                   0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 7, 0, 8}},  // Cube
4986                                                             // expands and
4987                                                             // fills
4988                                                             // center with
4989                                                             // zeroes
4990 
4991   };
4992 
4993   for (int i = 0; i < ok_params.size(); i++) {
4994     Reset();
4995     NodeDef node_def = get_conv3d_nodedef(
4996         ok_params[i].strides, ok_params[i].padding, ok_params[i].data_format,
4997         ok_params[i].dilations, ok_params[i].is_conv3d_backprop_input);
4998     AddTestTensor("input", ok_params[i].input_dims);
4999     AddTestWeights<float>("weights", ok_params[i].filter_dims,
5000                           ok_params[i].filter);
5001     if (ok_params[i].is_conv3d_backprop_input) {
5002       AddTestWeights<float>(
5003           "input_sizes",
5004           {static_cast<int>(ok_params[i].expected_output.size())},
5005           ok_params[i].expected_output);
5006     }
5007     RunValidationAndConversion(node_def);
5008     TRT_TensorOrWeights output;
5009     TF_EXPECT_OK(GetTensorOrWeights("my_conv3d", &output));
5010     ASSERT_TRUE(output.is_tensor());
5011     ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
5012                              output.tensor()->getDimensions());
5013 
5014     const DataVec input_data{{"input", AsTensor<float>(ok_params[i].input)}};
5015     DataVec output_data{
5016         {"my_conv3d",
5017          ConstructTensor<float>(ok_params[i].expected_output.size())}};
5018     TF_EXPECT_OK(BuildAndRun(input_data, &output_data));
5019     EXPECT_THAT(GetSpanForData<float>(output_data[0]),
5020                 ElementsAreArray(ok_params[i].expected_output));
5021   }
5022 }
5023 #endif
5024 
5025 template <typename T>
CreatePoolOp(DataType tf_type,std::vector<int> ksize,std::vector<int> strides,string padding,string data_format)5026 NodeDef CreatePoolOp(DataType tf_type, std::vector<int> ksize,
5027                      std::vector<int> strides, string padding,
5028                      string data_format) {
5029   Scope s = Scope::NewRootScope();
5030   auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
5031   typename T::Attrs attrs;
5032   attrs.data_format_ = data_format;
5033   return T(s.WithOpName("my_pool"), input, ksize, strides, padding, attrs)
5034       .operation.node()
5035       ->def();
5036 }
TEST_P(OpConverter_FP32_Test,ConvertPool)5037 TEST_P(OpConverter_FP32_Test, ConvertPool) {
5038   // Get nodedef for MaxPool and AvgPool layers (2D or 3D).
5039   auto get_pool_nodedef =
5040       [](DataType tf_type, int nDim, std::vector<int> ksize = {},
5041          std::vector<int> strides = {}, string padding = "SAME",
5042          string data_format = "", const bool is_max_pooling = true) -> NodeDef {
5043     if (ksize.empty()) {
5044       ksize = nDim == 2 ? std::vector<int>{1, 1, 1, 1}
5045                         : std::vector<int>{1, 1, 1, 1, 1};
5046     }
5047     if (strides.empty()) {
5048       strides = nDim == 2 ? std::vector<int>{1, 1, 1, 1}
5049                           : std::vector<int>{1, 1, 1, 1, 1};
5050     }
5051     if (data_format == "") {
5052       data_format = nDim == 2 ? "NCHW" : "NCDHW";
5053     }
5054     if (is_max_pooling) {
5055       if (nDim == 3) {
5056         return CreatePoolOp<ops::MaxPool3D>(tf_type, ksize, strides, padding,
5057                                             data_format);
5058       } else {
5059         return CreatePoolOp<ops::MaxPool>(tf_type, ksize, strides, padding,
5060                                           data_format);
5061       }
5062     } else {
5063       if (nDim == 3) {
5064         return CreatePoolOp<ops::AvgPool3D>(tf_type, ksize, strides, padding,
5065                                             data_format);
5066       } else {
5067         return CreatePoolOp<ops::AvgPool>(tf_type, ksize, strides, padding,
5068                                           data_format);
5069       }
5070     }
5071   };
5072 
5073 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
5074   std::vector<int> test_nDims{2, 3};
5075 #else
5076   std::vector<int> test_nDims{2};
5077 #endif
5078 
5079   for (int nDim : test_nDims) {
5080     // Input is weights, should fail.
5081     Reset();
5082     NodeDef node_def = get_pool_nodedef(tf_type_, nDim);
5083 
5084     AddTestWeights<float>("input", {1, 1, 1, 2, 3}, {1, 2, 3, 4, 5, 6});
5085     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
5086                                StrCat("The input \"input\" for ", node_def.op(),
5087                                       " must be a tensor, at my_pool")
5088                                    .c_str());
5089   }
5090 
5091   struct TestParams {
5092     std::vector<int> input_dims;
5093     std::vector<float> input;
5094     std::vector<int> ksize;
5095     std::vector<int> strides;
5096     string padding;
5097     string data_format;
5098     std::vector<int> expected_output_dims;
5099     // The expected outputs for the following operations: MaxPool2D, AvgPool2D,
5100     // MaxPool3D, AvgPool3D
5101     std::vector<std::vector<float>> expected_outputs;
5102   };
5103 
5104   // We use common_input as the input to test both 2D and 3D pooling operations,
5105   // to simplify TestParams. For 2D operations, only the first 1/3 of the values
5106   // are used.
5107   const std::vector<float> common_input{-4, 2,  15, 3, 6,   -3, 22, 1,   88,
5108                                         56, 36, 1,  1, 105, 1,  16, -28, 1,
5109                                         42, 9,  3,  1, 7,   1,  11, 61,  5};
5110   // The output of 2D ops for the case where the op is equivalent to the
5111   // identity op.
5112   const std::vector<float> common_2d_output{-4, 2, 15, 3, 6, -3, 22, 1, 88};
5113   std::vector<TestParams> ok_params = {
5114       // Basic - just 1x1 max pooling - input = output
5115       TestParams{
5116           /*input_dims=*/{1, 1, 3, 3, 3},
5117           /*input=*/common_input,
5118           /*ksize=*/{1, 1, 1, 1, 1},
5119           /*strides=*/{1, 1, 1, 1, 1},
5120           /*padding=*/"VALID",
5121           /*data_format=*/"NCDHW",
5122           /*expected_output_dims=*/{1, 1, 3, 3, 3},
5123           /*expected_outputs=*/
5124           {common_2d_output, common_2d_output, common_input, common_input}},
5125       // Basic - just 1x1 max pooling - input = output, SAME padding
5126       TestParams{
5127           /*input_dims=*/{1, 1, 3, 3, 3},
5128           /*input=*/common_input,
5129           /*ksize=*/{1, 1, 1, 1, 1},
5130           /*strides=*/{1, 1, 1, 1, 1},
5131           /*padding=*/"SAME",
5132           /*data_format=*/"NCDHW",
5133           /*expected_output_dims=*/{1, 1, 3, 3, 3},
5134           /*expected_outputs=*/
5135           {common_2d_output, common_2d_output, common_input, common_input}},
5136       // 3x3 pooling NCDHW
5137       TestParams{/*input_dims=*/{1, 1, 3, 3, 3},
5138                  /*input=*/common_input,
5139                  /*ksize=*/{1, 1, 3, 3, 3},
5140                  /*strides=*/{1, 1, 1, 1, 1},
5141                  /*padding=*/"VALID",
5142                  /*data_format=*/"NCDHW",
5143                  /*expected_output_dims=*/{1, 1, 1, 1, 1},
5144                  /*expected_outputs=*/{{88}, {14.444445}, {105}, {17}}},
5145       // 3x3 pooling, NDHWC
5146       TestParams{/*input_dims=*/{1, 3, 3, 3, 1},
5147                  /*input=*/common_input,
5148                  /*ksize=*/{1, 3, 3, 3, 1},
5149                  /*strides=*/{1, 1, 1, 1, 1},
5150                  /*padding=*/"VALID",
5151                  /*data_format=*/"NDHWC",
5152                  /*expected_output_dims=*/{1, 1, 1, 1, 1},
5153                  /*expected_outputs=*/{{88}, {14.444445}, {105}, {17}}},
5154       // Strided
5155       TestParams{/*input_dims=*/{1, 1, 3, 3, 3},
5156                  /*input=*/{1, 0, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0,
5157                             0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 7, 0, 8},
5158                  /*ksize=*/{1, 1, 1, 1, 1},
5159                  /*strides=*/{1, 1, 2, 2, 2},
5160                  /*padding=*/"VALID",
5161                  /*data_format=*/"NCDHW",
5162                  /*expected_output_dims=*/{1, 1, 2, 2, 2},
5163                  /*expected_outputs=*/
5164                  {{1, 2, 3, 4},  // Should only pick up the corners
5165                   {1, 2, 3, 4},
5166                   {1, 2, 3, 4, 5, 6, 7, 8},
5167                   {1, 2, 3, 4, 5, 6, 7, 8}}},
5168   };
5169 
5170   for (auto p : ok_params) {
5171     int test_counter = 0;
5172     for (int nDim : test_nDims) {
5173       auto input = p.input;
5174       auto input_dims = p.input_dims;
5175       auto ksize = p.ksize;
5176       auto strides = p.strides;
5177       auto expected_output_dims = p.expected_output_dims;
5178       std::string data_format = p.data_format;
5179       if (nDim == 2) {
5180         input.resize(9);
5181         data_format = p.data_format == "NDHWC" ? "NHWC" : "NCHW";
5182         // Remove one of the spatial dimensions
5183         input_dims.erase(input_dims.begin() + 2);
5184         ksize.erase(ksize.begin() + 2);
5185         strides.erase(strides.begin() + 2);
5186         expected_output_dims.erase(expected_output_dims.begin() + 2);
5187       }
5188       for (bool is_max_pooling : {true, false}) {
5189         Reset();
5190         NodeDef node_def =
5191             get_pool_nodedef(tf_type_, nDim, ksize, strides, p.padding,
5192                              data_format, is_max_pooling);
5193         AddTestTensor("input", input_dims, input);
5194         TestOpConverter("my_pool", node_def, expected_output_dims, Status::OK(),
5195                         Status::OK(),
5196                         ElementsAreArray(p.expected_outputs.at(test_counter)));
5197         test_counter++;
5198       }
5199     }
5200   }
5201 }
5202 
TEST_P(OpConverter_FP32_FP16_Test,ConvertTopK)5203 TEST_P(OpConverter_FP32_FP16_Test, ConvertTopK) {
5204   // Get the NodeDef for TopKV2.
5205   Scope s = Scope::NewRootScope();
5206   auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
5207   auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
5208   auto topk = ops::TopK(s.WithOpName("my_topk"), input, weights);
5209   const NodeDef& node_def = topk.operation.node()->def();
5210   {
5211     // K is a tensor, should fail.
5212     Reset();
5213     AddTestTensor("input", {1, 1, 2, 3});
5214     AddTestTensor("weights", {1}, DT_INT32, {});
5215     RunValidationAndConversion(
5216         node_def, error::UNIMPLEMENTED,
5217         "The input \"k\" for TopKV2 must be a constant, at my_topk");
5218   }
5219   {
5220     // Ok.
5221     Reset();
5222     AddTestTensor("input", {1, 1, 2, 5}, {-9, 3, 5, 1, 6, -5, 7, 1, 0, -1});
5223     AddTestWeights<int32>("weights", {1}, {2});
5224     std::vector<std::vector<int>> expected_output_dims{{1, 1, 2, 2},
5225                                                        {1, 1, 2, 2}};
5226     TestOpConverterMultiOut("my_topk", node_def, expected_output_dims,
5227                             Status::OK(), Status::OK(),
5228                             {ElementsAre(6, 5, 7, 1), ElementsAre(4, 2, 1, 2)},
5229                             {tf_type_, DT_INT32});
5230   }
5231 }
5232 
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertGather)5233 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertGather) {
5234   // Get the NodeDef for GatherV2.
5235   Scope s = Scope::NewRootScope();
5236   auto params = ops::Placeholder(s.WithOpName("params"), tf_type_);
5237   auto indices = ops::Placeholder(s.WithOpName("indices"), DT_INT32);
5238   auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
5239   auto gather = ops::GatherV2(s.WithOpName("my_gather"), params, indices, axis);
5240   const NodeDef& node_def = gather.operation.node()->def();
5241   {
5242     // Axis is a tensor, should fail.
5243     Reset();
5244     AddTestTensor("params", {1, 1, 2, 3}, tf_type_, {});
5245     AddTestTensor("indices", {1, 2}, DT_INT32, {});
5246     AddTestTensor("axis", {1}, DT_INT32, {});
5247     RunValidationAndConversion(
5248         node_def, error::UNIMPLEMENTED,
5249         "The input \"axis\" for GatherV2 must be a constant, at my_gather");
5250   }
5251   {
5252     // Axis is out of bounds, should fail.
5253     Reset();
5254     AddTestTensor("params", {1, 1, 2, 3});
5255     AddTestTensor("indices", {1, 2}, DT_INT32, {});
5256     AddTestWeights<int32>("axis", {1}, {4});
5257     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
5258                                "Axis value of 4 is out of bounds, must be in "
5259                                "range [-4, 4), at my_gather");
5260   }
5261 
5262   struct TestParams {
5263     // TF shape of the input 'params' (including batch dimension).
5264     std::vector<int> params_shape;
5265     // TF shape of the input 'indices' (including batch dimension).
5266     std::vector<int> indices_shape;
5267     std::vector<int> indices;
5268     int axis;
5269     // Expected TF shape of the output (including batch dimension).
5270     std::vector<int> expected_output_shape;
5271     std::vector<int> expected_output;
5272     bool params_is_tensor;
5273     Status status;
5274     Status runtime_status;
5275     Status add_index_status;
5276   };
5277 
5278   // Input is the same {1, 2, 3, 4, 5, 6} for all cases.
5279   const std::vector<int> params_input = {1, 2, 3, 4, 5, 6};
5280   std::vector<TestParams> test_params = {
5281       // Axis is batch dimension, should fail in implicit batch mode.
5282       TestParams{/*params_shape=*/{2, 1, 1, 3},
5283                  /*indices_shape=*/{2},
5284                  /*indices=*/{1, 0},
5285                  /*axis=*/0,
5286                  /*expected_output_shape=*/{2, 1, 1, 3},
5287                  /*expected_output=*/{4, 5, 6, 1, 2, 3},
5288                  /*params_is_tensor=*/true,
5289                  trt_mode_ == TrtTestMode::kImplicitBatch
5290                      ? Status{error::UNIMPLEMENTED,
5291                               "TensorRT does not allow manipulation of the"
5292                               " batch dimension, at my_gather"}
5293                      : Status::OK()},
5294       // Batch size of indices is not 1 when params is a tensor.
5295       TestParams{/*params_shape=*/{2, 1, 3},
5296                  /*indices_shape=*/{2, 1},
5297                  /*indices=*/{2, 0},
5298                  /*axis=*/2,
5299                  /*expected_output_shape=*/{2, 1, 2, 1},
5300                  /*expected_output=*/{3, 1, 6, 4},
5301                  /*params_is_tensor=*/true,
5302                  trt_mode_ == TrtTestMode::kImplicitBatch
5303                      ? Status{error::UNIMPLEMENTED,
5304                               "Indices must have a batch size of 1 when params"
5305                               " is a tensor."}
5306                      : Status::OK()},
5307       // Axis is not zero when params is a weight, should fail in implicit batch
5308       // mode.
5309       TestParams{/*params_shape=*/{2, 1, 3},
5310                  /*indices_shape=*/{2},
5311                  /*indices=*/{1, 2},
5312                  /*axis=*/2,
5313                  /*expected_output_shape=*/{2, 1, 2},
5314                  /*expected_output=*/{2, 3, 5, 6},
5315                  /*params_is_tensor=*/false,
5316                  trt_mode_ == TrtTestMode::kImplicitBatch
5317                      ? Status{error::UNIMPLEMENTED,
5318                               "The input axis must be zero when params is a"
5319                               " weight."}
5320                      : Status::OK()},
5321       // Params with only batch dimension.
5322       TestParams{/*params_shape=*/{6},
5323                  /*indices_shape=*/{2},
5324                  /*indices=*/{1, 3},
5325                  /*axis=*/0,
5326                  /*expected_output_shape=*/{2},
5327                  /*expected_output=*/{2, 4},
5328                  /*params_is_tensor=*/true,
5329                  trt_mode_ == TrtTestMode::kImplicitBatch  // conversion_status
5330                      ? Status{error::UNIMPLEMENTED,
5331                               "TensorRT does not allow manipulation of the "
5332                               "batch dimension, at my_gather"}
5333                      : Status::OK(),
5334                  Status::OK(),                             // runtime_status
5335                  trt_mode_ == TrtTestMode::kImplicitBatch  // add_index_status
5336                      ? Status{error::INVALID_ARGUMENT,
5337                               "Batch size doesn't match for tensor indices: "
5338                               "Provided batch size does not match converter "
5339                               "batch size: 2 vs 6"}
5340                      : Status::OK()},
5341       // Vector indices, and output rank is rank(params).
5342       TestParams{
5343           /*params_shape=*/{1, 1, 2, 3},
5344           /*indices_shape=*/{1},
5345           /*indices=*/{0},
5346           /*axis=*/3,
5347           /*expected_output_shape=*/{1, 1, 2, 1},
5348           /*expected_output=*/{1, 4},
5349           /*params_is_tensor=*/true,
5350       },
5351       TestParams{
5352           /*params_shape=*/{1, 1, 2, 3},
5353           /*indices_shape=*/{1},
5354           /*indices=*/{1},
5355           /*axis=*/2,
5356           /*expected_output_shape=*/{1, 1, 1, 3},
5357           /*expected_output=*/{4, 5, 6},
5358           /*params_is_tensor=*/true,
5359       },
5360       // Indices with rank>1, and output rank is rank(params) + rank(indices) -
5361       // 1
5362       TestParams{
5363           /*params_shape=*/{1, 1, 2, 3},
5364           /*indices_shape=*/{1, 1},
5365           /*indices=*/{0},
5366           /*axis=*/3,
5367           /*expected_output_shape=*/{1, 1, 2, 1, 1},
5368           /*expected_output=*/{1, 4},
5369           /*params_is_tensor=*/true,
5370       },
5371       TestParams{
5372           /*params_shape=*/{1, 1, 2, 3},
5373           /*indices_shape=*/{1, 1},
5374           /*indices=*/{1},
5375           /*axis=*/3,
5376           /*expected_output_shape=*/{1, 1, 2, 1, 1},
5377           /*expected_output=*/{2, 5},
5378           /*params_is_tensor=*/true,
5379       },
5380       TestParams{
5381           /*params_shape=*/{1, 1, 2, 3},
5382           /*indices_shape=*/{1, 1},
5383           /*indices=*/{2},
5384           /*axis=*/-1,
5385           /*expected_output_shape=*/{1, 1, 2, 1, 1},
5386           /*expected_output=*/{3, 6},
5387           /*params_is_tensor=*/true,
5388       },
5389       TestParams{
5390           /*params_shape=*/{1, 1, 2, 3},
5391           /*indices_shape=*/{1, 3},
5392           /*indices=*/{2, 0, 1},
5393           /*axis=*/3,
5394           /*expected_output_shape=*/{1, 1, 2, 1, 3},
5395           /*expected_output=*/{3, 1, 2, 6, 4, 5},
5396           /*params_is_tensor=*/true,
5397       },
5398       TestParams{
5399           /*params_shape=*/{1, 3, 2},
5400           /*indices_shape=*/{1, 2, 2},
5401           /*indices=*/{0, 0, 1, 0},
5402           /*axis=*/2,
5403           /*expected_output_shape=*/{1, 3, 1, 2, 2},
5404           /*expected_output=*/{1, 1, 2, 1, 3, 3, 4, 3, 5, 5, 6, 5},
5405           /*params_is_tensor=*/true,
5406       },
5407       TestParams{
5408           /*params_shape=*/{1, 2, 3},
5409           /*indices_shape=*/{1},
5410           /*indices=*/{0},
5411           /*axis=*/0,
5412           /*expected_output_shape=*/{1, 2, 3},
5413           /*expected_output=*/{1, 2, 3, 4, 5, 6},
5414           /*params_is_tensor=*/false,
5415       },
5416       TestParams{
5417           /*params_shape=*/{3, 2},
5418           /*indices_shape=*/{1, 2},
5419           /*indices=*/{0, 1},
5420           /*axis=*/0,
5421           /*expected_output_shape=*/{1, 2, 2},
5422           /*expected_output=*/{1, 2, 3, 4},
5423           /*params_is_tensor=*/false,
5424       },
5425       TestParams{
5426           /*params_shape=*/{2, 3},
5427           /*indices_shape=*/{1, 1, 2},
5428           /*indices=*/{0, 1},
5429           /*axis=*/0,
5430           /*expected_output_shape=*/{1, 1, 2, 3},
5431           /*expected_output=*/{1, 2, 3, 4, 5, 6},
5432           /*params_is_tensor=*/false,
5433       },
5434       TestParams{
5435           /*params_shape=*/{3, 2},
5436           /*indices_shape=*/{2, 2},
5437           /*indices=*/{0, 2, 1, 0},
5438           /*axis=*/0,
5439           /*expected_output_shape=*/{2, 2, 2},
5440           /*expected_output=*/{1, 2, 5, 6, 3, 4, 1, 2},
5441           /*params_is_tensor=*/false,
5442       },
5443   };
5444 
5445   for (auto p : test_params) {
5446     Reset();
5447     if (p.params_is_tensor) {
5448       AddTestTensor("params", p.params_shape, params_input);
5449     } else {
5450       AddTestWeights("params", p.params_shape, params_input, tf_type_);
5451     }
5452     AddTestTensor("indices", p.indices_shape, DT_INT32, p.indices, {},
5453                   p.add_index_status);
5454     AddTestWeights<int32>("axis", {1}, {p.axis});
5455     TestOpConverter("my_gather", node_def, p.expected_output_shape, p.status,
5456                     p.runtime_status, ElementsAreArray(p.expected_output));
5457   }
5458 }
5459 
5460 template <typename OpType>
CreateReduceOp(DataType tf_type,bool keep_dims)5461 NodeDef CreateReduceOp(DataType tf_type, bool keep_dims) {
5462   Scope s = Scope::NewRootScope();
5463   auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
5464   auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
5465   typename OpType::Attrs op_attrs;
5466   op_attrs.keep_dims_ = keep_dims;
5467   auto op = OpType(s.WithOpName("my_reduce"), input, axis, op_attrs);
5468   return op.operation.node()->def();
5469 }
5470 
5471 // Applies reduction op on sub-sequences of input
5472 // output[i] = reduce(input[m * i : m * (i +1)])
CalcReduce(string op_name,std::vector<float> input,int m,float (* op)(float,float),float init)5473 std::vector<float> CalcReduce(string op_name, std::vector<float> input, int m,
5474                               float (*op)(float, float), float init) {
5475   std::vector<float> output(input.size() / m);
5476   for (int i = 0; i < output.size(); i++) {
5477     auto begin = input.begin() + i * m;
5478     auto end = input.begin() + (i + 1) * m;
5479     output[i] = std::accumulate(begin, end, init, op);
5480     if (op_name == "Mean") {
5481       output[i] /= m;
5482     }
5483   }
5484   return output;
5485 }
TEST_P(OpConverter_FP32_Test,ConvertReduce)5486 TEST_P(OpConverter_FP32_Test, ConvertReduce) {
5487   {
5488     // Input is weights, should fail.
5489     Reset();
5490     const NodeDef node_def = CreateReduceOp<ops::Sum>(tf_type_, false);
5491     AddTestWeights<float>("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
5492     AddTestWeights<int32>("axis", {1}, {1});
5493     RunValidationAndConversion(
5494         node_def, error::UNIMPLEMENTED,
5495         "The input \"input\" for Sum must be a tensor, at my_reduce");
5496   }
5497   {
5498     // Axis is weights, should fail.
5499     Reset();
5500     const NodeDef node_def = CreateReduceOp<ops::Sum>(tf_type_, false);
5501     AddTestTensor("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
5502     AddTestTensor("axis", {1}, DT_INT32, {1});
5503     RunValidationAndConversion(
5504         node_def, error::UNIMPLEMENTED,
5505         "The input \"axis\" for Sum must be a constant, at my_reduce");
5506   }
5507   using OpFunc = std::function<NodeDef(DataType, bool)>;
5508   using ValFunc = float (*)(float, float);
5509   struct ReduceTestDescriptor {
5510     string name;
5511     OpFunc get_node;
5512     ValFunc val_func;
5513     float init_val;
5514   };
5515   std::vector<ReduceTestDescriptor> op_test_info{
5516       {"Sum", CreateReduceOp<ops::Sum>, [](float x, float y) { return x + y; },
5517        0},
5518       {"Prod", CreateReduceOp<ops::Prod>,
5519        [](float x, float y) { return x * y; }, 1},
5520       {"Mean", CreateReduceOp<ops::Mean>,
5521        [](float x, float y) { return x + y; }, 0},
5522       {"Min", CreateReduceOp<ops::Min>,
5523        [](float x, float y) { return y < x ? y : x; }, 1000},
5524       {"Max", CreateReduceOp<ops::Max>,
5525        [](float x, float y) { return x < y ? y : x; }, -1000}};
5526 
5527   std::vector<float> input_values{1, 2, 3, 4, 5, 6};
5528   struct TestParams {
5529     std::vector<int> input_dims;
5530     std::vector<float> input_values;
5531     // Helper array contains the same elements as input but permuted in a way
5532     // that the reduction can be calculated over contiguous elements using
5533     // CalcReduce
5534     std::vector<float> helper_array;
5535     std::vector<int> axis;
5536     int stride;  // product of input_dims along axis
5537     Status conversion_status;
5538   };
5539   std::vector<TestParams> params{
5540       // Out of range tests
5541       TestParams{{2, 3, 1}, input_values, input_values, {3}, 3},
5542       TestParams{{2, 3, 1}, input_values, input_values, {-4}, 3},
5543       // Ok tests
5544       TestParams{{2, 3, 1}, input_values, {1, 4, 2, 5, 3, 6}, {0}, 2},
5545       TestParams{{2, 3, 1}, input_values, input_values, {1}, 3},
5546       TestParams{{2, 3, 1}, input_values, input_values, {2}, 1},
5547       TestParams{{2, 3, 1}, input_values, input_values, {0, 1}, 6},
5548       // Ok tests with negative axis values
5549       TestParams{{2, 3, 1}, input_values, {1, 4, 2, 5, 3, 6}, {-3}, 2},
5550       TestParams{{2, 3, 1}, input_values, input_values, {-2}, 3},
5551       TestParams{{2, 3, 1}, input_values, input_values, {-1}, 1},
5552       TestParams{{2, 3, 1}, input_values, input_values, {-3, 1}, 6},
5553   };
5554 
5555   for (bool keep_dims : {false, true}) {
5556     for (auto& op : op_test_info) {
5557       for (auto p : params) {
5558         SCOPED_TRACE(StrCat(op.name, keep_dims ? "keep_dims" : ""));
5559         Reset();
5560         NodeDef node_def = op.get_node(tf_type_, keep_dims);
5561 
5562         AddTestTensor("input", p.input_dims, p.input_values);
5563         AddTestWeights<int32>("axis", {static_cast<int>(p.axis.size())},
5564                               p.axis);
5565         std::vector<int> expected_output_dims(p.input_dims);
5566 
5567         // Set expected output dim and conversion error messages
5568         for (int ax : p.axis) {
5569           int rank = p.input_dims.size();
5570           if (ax >= rank || ax < -rank) {
5571             p.conversion_status =
5572                 errors::InvalidArgument("Axis value of ", ax,
5573                                         " is out of bounds, must be in "
5574                                         "range [",
5575                                         -rank, ", ", rank, "), at my_reduce");
5576           } else {
5577             int ax_positive = ax >= 0 ? ax : ax + rank;
5578             // Zero marks elements that we will remove later.
5579             expected_output_dims[ax_positive] = keep_dims ? 1 : 0;
5580             if (trt_mode_ == TrtTestMode::kImplicitBatch &&
5581                 (ax == 0 || ax == -rank)) {
5582               p.conversion_status = errors::Unimplemented(
5583                   "TensorRT does not allow manipulation of the batch "
5584                   "dimension, at my_reduce");
5585             }
5586           }
5587         }
5588         expected_output_dims.erase(std::remove(expected_output_dims.begin(),
5589                                                expected_output_dims.end(), 0),
5590                                    expected_output_dims.end());
5591         VLOG(2) << "out dims "
5592                 << absl::StrCat("[", absl::StrJoin(expected_output_dims, ","),
5593                                 "]");
5594         std::vector<float> expected_values = CalcReduce(
5595             op.name, p.helper_array, p.stride, op.val_func, op.init_val);
5596         TestOpConverter("my_reduce", node_def, expected_output_dims,
5597                         p.conversion_status, Status::OK(),
5598                         ArrayFloatNear(expected_values));
5599       }
5600     }
5601   }
5602 }
5603 
CreateCastOp(DataType tf_type)5604 NodeDef CreateCastOp(DataType tf_type) {
5605   Scope s = Scope::NewRootScope();
5606   auto input = ops::Placeholder(s.WithOpName("input"), DT_HALF);
5607   return ops::Cast(s.WithOpName("my_unary"), input, DT_FLOAT)
5608       .operation.node()
5609       ->def();
5610 }
5611 
TEST_P(OpConverter_FP32_Test,ConvertUnary)5612 TEST_P(OpConverter_FP32_Test, ConvertUnary) {
5613   {
5614     // Input is weights, should fail.
5615     Reset();
5616     const NodeDef node_def = CreateUnaryOp<ops::Neg>(tf_type_);
5617     AddTestWeights<float>("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
5618     RunValidationAndConversion(
5619         node_def, error::UNIMPLEMENTED,
5620         "The input \"x\" for Neg must be a tensor, at my_unary");
5621   }
5622   using OpFunc = std::function<NodeDef(DataType)>;
5623   using ValFunc = float (*)(float);
5624   std::map<std::string, std::pair<OpFunc, ValFunc>> op_map;
5625 #define ADD_OP(name, op, compute) \
5626   op_map[name] =                  \
5627       std::make_pair(CreateUnaryOp<op>, static_cast<ValFunc>(compute))
5628   ADD_OP("Abs", ops::Abs, std::abs);
5629   ADD_OP("Acos", ops::Acos, std::acos);
5630   ADD_OP("Acosh", ops::Acosh, std::acosh);
5631   ADD_OP("Asin", ops::Asin, std::asin);
5632   ADD_OP("Asinh", ops::Asinh, std::asinh);
5633   ADD_OP("Atan", ops::Atan, std::atan);
5634   ADD_OP("Atanh", ops::Atanh, std::atanh);
5635   op_map["Cast"] = std::make_pair(CreateCastOp, [](float x) { return x; });
5636   ADD_OP("Ceil", ops::Ceil, std::ceil);
5637   ADD_OP("Cos", ops::Cos, std::cos);
5638   ADD_OP("Cosh", ops::Cosh, std::cosh);
5639   ADD_OP("Exp", ops::Exp, std::exp);
5640   ADD_OP("Floor", ops::Floor, std::floor);
5641   ADD_OP("Log", ops::Log, std::log);
5642   ADD_OP("Neg", ops::Neg, [](float x) { return -x; });
5643   ADD_OP("Reciprocal", ops::Reciprocal, [](float x) { return 1.0f / x; });
5644   ADD_OP("Rsqrt", ops::Rsqrt, [](float x) { return 1.0f / std::sqrt(x); });
5645   ADD_OP("Sin", ops::Sin, std::sin);
5646   ADD_OP("Sinh", ops::Sinh, std::sinh);
5647   ADD_OP("Sqrt", ops::Sqrt, std::sqrt);
5648   ADD_OP("Tan", ops::Tan, std::tan);
5649 #undef ADD_OP
5650   // Get list of ops to test.
5651   std::vector<string> ops_to_test;
5652   // Add all ops supported by ConvertUnary.
5653   auto* map = UnaryOperationMap();
5654   ops_to_test.reserve(map->size());
5655   for (auto& pair : *map) {
5656     ops_to_test.push_back(pair.first);
5657   }
5658   // Add other unary ops to test.
5659   ops_to_test.push_back("Rsqrt");
5660   // Prepare test parameters
5661   auto p = TestParamBase{
5662       {1, 1, 2, 3},  // input dims
5663       {},            // input partial dims
5664       {1, 1, 2, 3},  // expected output dims
5665   };
5666   for (const string& op_name : ops_to_test) {
5667     SCOPED_TRACE(op_name);
5668     Reset();
5669     if (!op_map.count(op_name)) {
5670       FAIL() << "Unary op test map does not contain op " << op_name;
5671     }
5672     NodeDef node_def = op_map[op_name].first(tf_type_);
5673 
5674     // TODO(bixia): we assume this test is only instantiated for DT_FLOAT for
5675     // now. Need to find a better way to express input and output types.
5676     //
5677     // TODO(tfeher): improve tests by defining an expected output data type and
5678     // check that. Currently only the shape and values of the output are
5679     // checked.
5680     DataType input_tf_type = op_name == "Cast" ? DT_HALF : tf_type_;
5681 
5682     std::vector<float> input_values{-0.9f, 0.6f, 0.0f, -3.5f, 100.0f, 2.9f};
5683     AddTestTensor("input", p.input_dims, input_tf_type, input_values);
5684     std::vector<float> output;
5685     std::transform(input_values.begin(), input_values.end(),
5686                    std::back_inserter(output), op_map[op_name].second);
5687     TestOpConverter("my_unary", node_def, p.expected_output_dims, Status::OK(),
5688                     p.runtime_status, ArrayFloatNear(output, 0.0001, true));
5689   }
5690 }
5691 
5692 // Get the NodeDef for ConcatV2.
5693 // TODO(hinsu): Consider switching this to static function.
__anon73f76ade2702(DataType dtype, int num_inputs) 5694 auto get_concat_nodedef = [](DataType dtype, int num_inputs) -> NodeDef {
5695   Scope s = Scope::NewRootScope();
5696   std::vector<Input> values;
5697   for (int i = 0; i < num_inputs; ++i) {
5698     const string input_name = StrCat("values_", i);
5699     values.push_back(ops::Placeholder(s.WithOpName(input_name), dtype));
5700   }
5701   auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
5702   auto concat = ops::Concat(s.WithOpName("my_concat"),
5703                             absl::Span<const Input>(values), axis);
5704   return concat.operation.node()->def();
5705 };
5706 
5707 #if IS_TRT_VERSION_GE(7, 0, 0, 0)
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertConcat)5708 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertConcat) {
5709 #else
5710 TEST_P(OpConverter_FP32_FP16_Test, ConvertConcat) {
5711 #endif
5712   {
5713     // Axis is a tensor, should fail.
5714     Reset();
5715     NodeDef node_def = get_concat_nodedef(tf_type_, 2);
5716     AddTestTensor("values_0", {1, 1, 2, 3});
5717     AddTestTensor("values_1", {1, 1, 2, 3});
5718     AddTestTensor("axis", {1});
5719     RunValidationAndConversion(
5720         node_def, error::UNIMPLEMENTED,
5721         "The input \"axis\" for ConcatV2 must be a constant, at my_concat");
5722   }
5723   {
5724     // Axis is out of bounds, should fail.
5725     Reset();
5726     NodeDef node_def = get_concat_nodedef(tf_type_, 2);
5727     AddTestTensor("values_0", {1, 1, 2, 3});
5728     AddTestTensor("values_1", {1, 1, 2, 3});
5729     AddTestWeights<int32>("axis", {1}, {4});
5730     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
5731                                "Axis value of 4 is out of bounds, must be in "
5732                                "range [-4, 4), at my_concat");
5733   }
5734   {
5735     // Inputs have inconsistent ranks, should fail.
5736     Reset();
5737     NodeDef node_def = get_concat_nodedef(tf_type_, 2);
5738     AddTestTensor("values_0", {1, 1, 2, 3});
5739     AddTestTensor("values_1", {1, 1, 6});
5740     AddTestWeights<int32>("axis", {1}, {1});
5741     RunValidationAndConversion(
5742         node_def, error::INVALID_ARGUMENT,
5743         "Received inputs with inconsistent rank, at my_concat");
5744   }
5745 
5746   struct TestParams {
5747     std::vector<std::vector<int>> input_shapes;
5748     std::vector<std::vector<int>> input_values;
5749     int axis;
5750     std::vector<int> expected_output_dims;
5751     std::vector<int> expected_output;
5752     Status conversion_status;
5753     Status run_status;
5754     bool input_as_weight;
5755   };
5756 
5757   const std::vector<std::vector<int>> common_input{InitTestVector<int>(6),
5758                                                    InitTestVector<int>(6, 6)};
5759 
5760   std::vector<TestParams> params = {
5761       {
5762           /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
5763           /*input_values=*/common_input,
5764           /*axis=*/1,
5765           /*expected_output_dims=*/{1, 2, 2, 3},
5766           /*expected_output=*/InitTestVector<int>(12),
5767       },
5768       {
5769           /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
5770           /*input_values=*/common_input,
5771           /*axis=*/2,
5772           /*expected_output_dims=*/{1, 1, 4, 3},
5773           /*expected_output=*/InitTestVector<int>(12),
5774       },
5775       {
5776           /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
5777           /*input_values=*/common_input,
5778           /*axis=*/3,
5779           /*expected_output_dims=*/{1, 1, 2, 6},
5780           /*expected_output=*/
5781           {0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11},
5782       },
5783       {
5784           /*input_shapes=*/{{1, 1}, {1, 2}, {1, 3}, {1, 1}, {1, 1}, {1, 2}},
5785           /*input_values=*/
5786           {{1}, {2, 3}, {4, 5, 6}, {7}, {8}, {9, 10}},
5787           /*axis=*/1,
5788           /*expected_output_dims=*/{1, 10},
5789           /*expected_output=*/
5790           InitTestVector<int>(10, /*start_value=*/1),
5791       },
5792       {
5793           // An input is a weight
5794           /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
5795           /*input_values=*/common_input,
5796           /*axis=*/1,
5797           /*expected_output_dims=*/{1, 2, 2, 3},
5798           /*expected_output=*/InitTestVector<int>(12),
5799           /*conversion_status=*/
5800           errors::Unimplemented("The input \"values_1\" for ConcatV2 "
5801                                 "must be a tensor, at my_concat"),
5802           /*run_status=*/Status::OK(),
5803           /*input_as_weight=*/true,
5804       },
5805       {
5806           // Axis is batch dimension, should fail in implicit batch mode.
5807           /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
5808           /*input_values=*/common_input,
5809           /*axis=*/0,
5810           /*expected_output_dims=*/{2, 1, 2, 3},
5811           /*expected_output=*/InitTestVector<int>(12),
5812           /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
5813               ? errors::Unimplemented(
5814                     "TensorRT does not allow manipulation of the "
5815                     "batch dimension, at my_concat")
5816               : Status::OK(),
5817       },
5818       {
5819           // Inconsistent input shape, runtime error in dynamic shape mode.
5820           /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 3, 2}},
5821           /*input_values=*/common_input,
5822           /*axis=*/1,
5823           /*expected_output_dims=*/{2, 1, 2, 3},
5824           /*expected_output=*/InitTestVector<int>(12),
5825           trt_mode_ != TrtTestMode::kDynamicShape
5826               ? errors::InvalidArgument(
5827                     "Received inputs with inconsistent shape, at my_concat")
5828               : Status::OK(),
5829           errors::InvalidArgument(""),
5830       }};
5831 
5832   for (auto p : params) {
5833     Reset();
5834     const int num_inputs = p.input_shapes.size();
5835     EXPECT_EQ(num_inputs, p.input_values.size());
5836     NodeDef node_def = get_concat_nodedef(tf_type_, num_inputs);
5837     // Create inputs.
5838     for (int j = 0; j < num_inputs; ++j) {
5839       string name = StrCat("values_", j);
5840       if (j == 1 && p.input_as_weight) {
5841         AddTestWeights(name, p.input_shapes[j], p.input_values[j], tf_type_);
5842       } else {
5843         AddTestTensor(name, p.input_shapes[j], p.input_values[j]);
5844       }
5845     }
5846     AddTestWeights<int32>("axis", {1}, {p.axis});
5847 
5848     TestOpConverter("my_concat", node_def, p.expected_output_dims,
5849                     p.conversion_status, p.run_status,
5850                     ElementsAreArray(p.expected_output));
5851   }
5852 }
5853 
5854 // Get the NodeDef for Split.
5855 auto get_split_nodedef = [](DataType dtype, int num_split) -> NodeDef {
5856   Scope s = Scope::NewRootScope();
5857   auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
5858   auto value = ops::Placeholder(s.WithOpName("value"), dtype);
5859   auto split = ops::Split(s.WithOpName("my_split"), axis, value, num_split);
5860   return split.operation.node()->def();
5861 };
5862 
5863 template <DataType dtype>
5864 void TestConvertSplit(OpConverterTest* test) {
5865   typedef typename EnumToDataType<dtype>::Type CType;
5866 
5867   struct TestParams {
5868     std::vector<int> input_shape;
5869     std::vector<CType> value;
5870     int axis;
5871     int num_split;
5872     std::vector<int> expected_output_dims;
5873     std::vector<std::vector<CType>> expected_outputs;
5874   };
5875 
5876   const std::vector<CType> common_input = InitTestVector<CType>(6);
5877   std::vector<TestParams> ok_params = {
5878       // Identity (num_split = 1)
5879       {/*input_shape=*/{1, 2, 3}, /*value=*/common_input, /*axis=*/1,
5880        /*num_split=*/1, /*expected_output_dims=*/{1, 2, 3},
5881        /*expected_outputs=*/{InitTestVector<CType>(6)}},
5882       {/*input_shape=*/{1, 2, 3},
5883        /*value=*/common_input,
5884        /*axis=*/3,
5885        /*num_split=*/3,
5886        /*expected_output_dims=*/{1, 2, 1},
5887        /*expected_outputs=*/
5888        {{CType(0), CType(3)}, {CType(1), CType(4)}, {CType(2), CType(5)}}},
5889       {/*input_shape=*/{1, 6},
5890        /*value=*/common_input,
5891        /*axis=*/2,
5892        /*num_split=*/6,
5893        /*expected_output_dims=*/{1, 1},
5894        /*expected_outputs=*/
5895        {{CType(0)},
5896         {CType(1)},
5897         {CType(2)},
5898         {CType(3)},
5899         {CType(4)},
5900         {CType(5)}}},
5901       {/*input_shape=*/{1, 6},
5902        /*value=*/common_input,
5903        /*axis=*/-1,
5904        /*num_split=*/2,
5905        /*expected_output_dims=*/{1, 3},
5906        /*expected_outputs=*/
5907        {InitTestVector<CType>(3), InitTestVector<CType>(3, CType(3))}},
5908   };
5909 
5910   for (int i = 0; i < ok_params.size(); ++i) {
5911     test->Reset();
5912     NodeDef node_def = get_split_nodedef(dtype, ok_params[i].num_split);
5913     // Create inputs.
5914     test->AddTestWeights<int32>("axis", {1}, {ok_params[i].axis});
5915     nvinfer1::DataType trt_type;
5916     TF_ASSERT_OK(TfTypeToTrtType(dtype, &trt_type));
5917     test->AddTestTensor("value", ok_params[i].input_shape, 1, trt_type);
5918     // Convert.
5919     test->RunValidationAndConversion(node_def);
5920 
5921     // Get output tensors and verify output dims.
5922     EXPECT_EQ(ok_params[i].expected_outputs.size(), ok_params[i].num_split);
5923     std::vector<TRT_TensorOrWeights> outputs(ok_params[i].num_split);
5924     DataVec output_data;
5925     for (int j = 0; j < outputs.size(); ++j) {
5926       const string name = j == 0 ? StrCat("my_split") : StrCat("my_split:", j);
5927       TF_EXPECT_OK(test->GetTensorOrWeights(name, &outputs[j]));
5928       EXPECT_TRUE(outputs[j].is_tensor());
5929       ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
5930                                outputs[j].tensor()->getDimensions());
5931       // Create buffer to store output.
5932       output_data.push_back(
5933           {name, test->ConstructTensor<CType>(
5934                      ok_params[i].expected_outputs[j].size())});
5935     }
5936 
5937     // Verify output values are correct.
5938     const DataVec input_data{
5939         {"value", test->AsTensor<CType>(ok_params[i].value)}};
5940     TF_EXPECT_OK(test->BuildAndRun(input_data, &output_data));
5941     for (int j = 0; j < outputs.size(); ++j) {
5942       EXPECT_THAT(GetSpanForData<CType>(output_data[j]),
5943                   ElementsAreArray(ok_params[i].expected_outputs[j]));
5944     }
5945   }
5946 }
5947 
5948 TEST_F(OpConverterTest, ConvertSplit) {
5949   {
5950     // Axis is a tensor, should fail.
5951     Reset();
5952     NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
5953     AddTestTensor("axis", {1});
5954     AddTestTensor("value", {1, 2, 3});
5955     RunValidationAndConversion(
5956         node_def, error::UNIMPLEMENTED,
5957         "The input \"axis\" for Split must be a constant, at my_split");
5958   }
5959   {
5960     // Axis is out of bounds, should fail.
5961     Reset();
5962     NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
5963     AddTestWeights<int32>("axis", {1}, {4});
5964     AddTestTensor("value", {1, 2, 3});
5965     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
5966                                "Axis value of 4 is out of bounds, must be in "
5967                                "range [-4, 4), at my_split");
5968   }
5969   {
5970     // Axis is out of bounds (negative), should fail.
5971     Reset();
5972     NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
5973     AddTestWeights<int32>("axis", {1}, {-5});
5974     AddTestTensor("value", {1, 2, 3});
5975     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
5976                                "Axis value of -5 is out of bounds, must be in "
5977                                "range [-4, 4), at my_split");
5978   }
5979   {
5980     // Axis is batch dimension, should fail.
5981     Reset();
5982     NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
5983     AddTestWeights<int32>("axis", {1}, {0});
5984     AddTestTensor("value", {1, 2, 3});
5985     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
5986                                "TensorRT does not allow manipulation of the "
5987                                "batch dimension, at my_split");
5988   }
5989   {
5990     // Value is a weight, should fail.
5991     Reset();
5992     NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
5993     AddTestWeights<int32>("axis", {1}, {1});
5994     AddTestWeights<float>("value", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
5995     RunValidationAndConversion(
5996         node_def, error::UNIMPLEMENTED,
5997         "The input \"value\" for Split must be a tensor, at my_split");
5998   }
5999   {
6000     // Dim is not evenly divisibly by num_split, should fail.
6001     Reset();
6002     NodeDef node_def = get_split_nodedef(DT_FLOAT, 2);
6003     AddTestWeights<int32>("axis", {1}, {3});
6004     AddTestTensor("value", {1, 2, 3});
6005     RunValidationAndConversion(
6006         node_def, error::INVALID_ARGUMENT,
6007         "Dimension 3 of size 3 is not evenly divisble by 2, at my_split");
6008   }
6009   {
6010     // num_split > dim size, should fail.
6011     Reset();
6012     NodeDef node_def = get_split_nodedef(DT_FLOAT, 4);
6013     AddTestWeights<int32>("axis", {1}, {3});
6014     AddTestTensor("value", {1, 2, 3});
6015     RunValidationAndConversion(
6016         node_def, error::INVALID_ARGUMENT,
6017         "Dimension 3 of size 3 is not evenly divisble by 4, at my_split");
6018   }
6019 
6020   TestConvertSplit<DT_FLOAT>(this);
6021   TestConvertSplit<DT_HALF>(this);
6022 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
6023   TestConvertSplit<DT_INT32>(this);
6024 #endif
6025 }
6026 
6027 // Get the NodeDef for Unpack (Unstack in TF API).
6028 auto get_unpack_nodedef = [](DataType dtype, int num, int axis) -> NodeDef {
6029   Scope s = Scope::NewRootScope();
6030   auto value = ops::Placeholder(s.WithOpName("value"), dtype);
6031   auto unstack_attrs = ops::Unstack::Axis(axis);
6032   auto unstack =
6033       ops::Unstack(s.WithOpName("my_unpack"), value, num, unstack_attrs);
6034   return unstack.operation.node()->def();
6035 };
6036 
6037 template <DataType dtype>
6038 void TestConvertUnpack(OpConverterTest* test) {
6039   typedef typename EnumToDataType<dtype>::Type CType;
6040 
6041   struct TestParams {
6042     std::vector<int> input_shape;
6043     std::vector<CType> value;
6044     int axis;
6045     int num;
6046     std::vector<int> expected_output_dims;
6047     std::vector<std::vector<CType>> expected_outputs;
6048   };
6049 
6050   const std::vector<CType> common_input = InitTestVector<CType>(6);
6051   std::vector<TestParams> ok_params = {
6052       {/*input_shape=*/{1, 2, 3}, /*value=*/common_input, /*axis=*/1,
6053        /*num=*/1, /*expected_output_dims=*/{2, 3},
6054        /*expected_outputs=*/{InitTestVector<CType>(6)}},
6055       {/*input_shape=*/{1, 2, 3},
6056        /*value=*/common_input,
6057        /*axis=*/3,
6058        /*num=*/3,
6059        /*expected_output_dims=*/{1, 2},
6060        /*expected_outputs=*/
6061        {{CType(0), CType(3)}, {CType(1), CType(4)}, {CType(2), CType(5)}}},
6062       {/*input_shape=*/{6, 1},
6063        /*value=*/common_input,
6064        /*axis=*/-2,
6065        /*num=*/6,
6066        /*expected_output_dims=*/{1},
6067        /*expected_outputs=*/
6068        {{CType(0)},
6069         {CType(1)},
6070         {CType(2)},
6071         {CType(3)},
6072         {CType(4)},
6073         {CType(5)}}},
6074       {/*input_shape=*/{6},
6075        /*value=*/common_input,
6076        /*axis=*/1,
6077        /*num=*/6,
6078        /*expected_output_dims=*/{},
6079        /*expected_outputs=*/
6080        {{CType(0)},
6081         {CType(1)},
6082         {CType(2)},
6083         {CType(3)},
6084         {CType(4)},
6085         {CType(5)}}},
6086   };
6087 
6088   for (int i = 0; i < ok_params.size(); ++i) {
6089     test->Reset();
6090     NodeDef node_def =
6091         get_unpack_nodedef(dtype, ok_params[i].num, ok_params[i].axis);
6092     // Create inputs.
6093     nvinfer1::DataType trt_type;
6094     TF_ASSERT_OK(TfTypeToTrtType(dtype, &trt_type));
6095     test->AddTestTensor("value", ok_params[i].input_shape, 1, trt_type);
6096     // Convert.
6097     test->RunValidationAndConversion(node_def);
6098 
6099     // Get output tensors and verify output dims.
6100     EXPECT_EQ(ok_params[i].expected_outputs.size(), ok_params[i].num);
6101     std::vector<TRT_TensorOrWeights> outputs(ok_params[i].num);
6102     DataVec output_data;
6103     for (int j = 0; j < outputs.size(); ++j) {
6104       const string name = j == 0 ? "my_unpack" : StrCat("my_unpack:", j);
6105       TF_EXPECT_OK(test->GetTensorOrWeights(name, &outputs[j]));
6106       EXPECT_TRUE(outputs[j].is_tensor());
6107       ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
6108                                outputs[j].tensor()->getDimensions());
6109       // Create buffer to store output.
6110       output_data.push_back(
6111           {name, test->ConstructTensor<CType>(
6112                      ok_params[i].expected_outputs[j].size())});
6113     }
6114 
6115     // Verify output values are correct.
6116     const DataVec input_data{
6117         {"value", test->AsTensor<CType>(ok_params[i].value)}};
6118     TF_EXPECT_OK(test->BuildAndRun(input_data, &output_data));
6119     for (int j = 0; j < outputs.size(); ++j) {
6120       EXPECT_THAT(GetSpanForData<CType>(output_data[j]),
6121                   ElementsAreArray(ok_params[i].expected_outputs[j]));
6122     }
6123   }
6124 }
6125 
6126 TEST_F(OpConverterTest, ConvertUnpack) {
6127   {
6128     // Value is weights, should fail.
6129     Reset();
6130     NodeDef node_def = get_unpack_nodedef(DT_FLOAT, /*num=*/3, /*axis=*/3);
6131     AddTestWeights<float>("value", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
6132     RunValidationAndConversion(
6133         node_def, error::UNIMPLEMENTED,
6134         "The input \"value\" for Unpack must be a tensor, at my_unpack");
6135   }
6136   {
6137     // Axis is out of bounds, should fail.
6138     Reset();
6139     NodeDef node_def = get_unpack_nodedef(DT_FLOAT, /*num=*/1, /*axis=*/4);
6140     AddTestTensor("value", {1, 2, 3});
6141     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6142                                "Axis value of 4 is out of bounds, must be in "
6143                                "range [-4, 4), at my_unpack");
6144   }
6145   {
6146     // Axis is out of bounds (negative), should fail.
6147     Reset();
6148     NodeDef node_def = get_unpack_nodedef(DT_FLOAT, /*num=*/1, /*axis=*/-5);
6149     AddTestTensor("value", {1, 2, 3});
6150     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6151                                "Axis value of -5 is out of bounds, must be in "
6152                                "range [-4, 4), at my_unpack");
6153   }
6154   {
6155     // Axis is batch dimension, should fail.
6156     Reset();
6157     NodeDef node_def = get_unpack_nodedef(DT_FLOAT, /*num=*/1, /*axis=*/0);
6158     AddTestTensor("value", {1, 2, 3});
6159     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6160                                "TensorRT does not allow manipulation of the "
6161                                "batch dimension, at my_unpack");
6162   }
6163   {
6164     // Dim size does not match num, should fail.
6165     Reset();
6166     NodeDef node_def = get_unpack_nodedef(DT_FLOAT, /*num=*/5, /*axis=*/2);
6167     AddTestTensor("value", {1, 6});
6168     RunValidationAndConversion(
6169         node_def, error::INVALID_ARGUMENT,
6170         "Dimension 2 has size 6 which is not equal to num of 5, at my_unpack");
6171   }
6172   {
6173     // Output would be TF scalar, should fail.
6174     Reset();
6175     NodeDef node_def = get_unpack_nodedef(DT_FLOAT, /*num=*/1, /*axis=*/0);
6176     AddTestTensor("value", {});
6177     RunValidationAndConversion(
6178         node_def, error::UNIMPLEMENTED,
6179         "Input \"value\" for Unpack must be rank 2 or greater, at my_unpack");
6180   }
6181 
6182   TestConvertUnpack<DT_FLOAT>(this);
6183   TestConvertUnpack<DT_HALF>(this);
6184 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
6185   TestConvertUnpack<DT_INT32>(this);
6186 #endif
6187 }
6188 
6189 // Get the NodeDef for Pack.
6190 NodeDef GetPackNodeDef(DataType dtype, int num_inputs, int axis) {
6191   Scope s = Scope::NewRootScope();
6192   std::vector<Input> values;
6193   for (int i = 0; i < num_inputs; ++i) {
6194     const string input_name = StrCat("values_", i);
6195     values.push_back(ops::Placeholder(s.WithOpName(input_name), dtype));
6196   }
6197   // Pack op is renamed to Stack in APIs.
6198   auto pack =
6199       ops::Stack(s.WithOpName("my_pack"), absl::Span<const Input>(values),
6200                  ops::Stack::Axis(axis));
6201   return pack.operation.node()->def();
6202 }
6203 
6204 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
6205 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertPack) {
6206 #else
6207 TEST_P(OpConverter_FP32_FP16_Test, ConvertPack) {
6208 #endif
6209   struct TestParams {
6210     std::vector<std::vector<int>> input_shapes;
6211     std::vector<std::vector<int>> partial_input_shapes;
6212     std::vector<std::vector<float>> input_values;
6213     int axis;
6214     std::vector<int> expected_output_dims;
6215     std::vector<float> expected_output;
6216     Status conversion_status;
6217     Status runtime_status;
6218     bool input_1_is_weight;
6219   };
6220 
6221   const std::vector<std::vector<float>> common_input{
6222       InitTestVector<float>(6), InitTestVector<float>(6, /*start_value=*/6)};
6223   std::vector<TestParams> params = {
6224       // Second input is weight, should fail in implicit batch mode
6225       {/*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6226        /*partial_input_shapes=*/{{}, {}},
6227        /*input_values=*/common_input,
6228        /*axis=*/1,
6229        /*expected_output_dims=*/{1, 2, 2, 3},
6230        /*expected_output=*/InitTestVector<float>(12),
6231        trt_mode_ == TrtTestMode::kImplicitBatch
6232            ? Status{error::UNIMPLEMENTED,
6233                     "The input \"values_1\" for Pack must be a tensor, at "
6234                     "my_pack"}
6235            : Status::OK(),
6236        /*runtime_status*/ Status::OK(),
6237        /*weight_input*/ true},
6238       // Axis is out of bounds, should fail.
6239       {
6240           /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6241           /*partial_input_shapes=*/{{}, {}},
6242           /*input_values=*/common_input,
6243           /*axis=*/-5,
6244           /*expected_output_dims=*/{},
6245           /*expected_output=*/{},
6246           Status{error::INVALID_ARGUMENT,
6247                  "Axis value of -5 is out of bounds, must be in"
6248                  " range [-4, 4), at my_pack"},
6249       },
6250       // Axis is batch dimension, should fail in implicit batch mode.
6251       {/*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6252        /*partial_input_shapes=*/{{}, {}},
6253        /*input_values=*/common_input,
6254        /*axis=*/-4,
6255        /*expected_output_dims=*/{2, 1, 2, 3},
6256        /*expected_output=*/InitTestVector<float>(12),
6257        trt_mode_ == TrtTestMode::kImplicitBatch
6258            ? Status{error::UNIMPLEMENTED,
6259                     "TensorRT does not allow manipulation of the batch "
6260                     "dimension, at my_pack"}
6261            : Status::OK()},
6262       // Inconsistent rank, should fail.
6263       {
6264           /*input_shapes=*/{{1, 2, 3}, {1, 6}},
6265           /*partial_input_shapes=*/{{}, {}},
6266           /*input_values=*/common_input,
6267           /*axis=*/1,
6268           /*expected_output_dims=*/{},
6269           /*expected_output=*/{},
6270           Status{error::INVALID_ARGUMENT,
6271                  "Received inputs with inconsistent rank, at my_pack"},
6272       },
6273       {
6274           /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6275           /*partial_input_shapes=*/{{}, {}},
6276           /*input_values=*/common_input,
6277           /*axis=*/1,
6278           /*expected_output_dims=*/{1, 2, 2, 3},
6279           /*expected_output=*/InitTestVector<float>(12),
6280       },
6281       {
6282           /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6283           /*partial_input_shapes=*/{{}, {}},
6284           /*input_values=*/common_input,
6285           /*axis=*/2,
6286           /*expected_output_dims=*/{1, 2, 2, 3},
6287           /*expected_output=*/
6288           {0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11},
6289       },
6290       {
6291           /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6292           /*partial_input_shapes=*/{{}, {}},
6293           /*input_values=*/common_input,
6294           /*axis=*/3,
6295           /*expected_output_dims=*/{1, 2, 3, 2},
6296           /*expected_output=*/
6297           {0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11},
6298       },
6299       {
6300           /*input_shapes=*/{{1, 2, 3}},
6301           /*partial_input_shapes=*/{{}},
6302           /*input_values=*/{InitTestVector<float>(6)},
6303           /*axis=*/1,
6304           /*expected_output_dims=*/{1, 1, 2, 3},
6305           /*expected_output=*/InitTestVector<float>(6),
6306       },
6307       {
6308           /*input_shapes=*/{{1, 2, 3}},
6309           /*partial_input_shapes=*/{{}},
6310           /*input_values=*/{InitTestVector<float>(6)},
6311           /*axis=*/2,
6312           /*expected_output_dims=*/{1, 2, 1, 3},
6313           /*expected_output=*/InitTestVector<float>(6),
6314       },
6315   };
6316   // Inputs have inconsistent shapes, should fail.
6317   if (trt_mode_ != TrtTestMode::kDynamicShape) {
6318     params.push_back(TestParams{
6319         /*input_shapes=*/{{1, 2, 3}, {1, 3, 2}},
6320         /*partial_input_shapes=*/{{}, {}},
6321         /*input_values=*/common_input,
6322         /*axis=*/1,
6323         /*expected_output_dims=*/{},
6324         /*expected_output=*/InitTestVector<float>(12),
6325         Status{error::INVALID_ARGUMENT,
6326                "Received inputs with inconsistent shape, at my_pack"}});
6327   } else {
6328     // In dynamic shape mode we cannot catch inconsistent shapes at conversion
6329     // time, only during runtime. But TensorRT does not raise a proper runtime
6330     // error, instead it aborts the program with the following message:
6331     //  Assertion failed: t->start.d[i] + t->extent.d[i] <= r.dims.d[i]
6332     // ../builder/cudnnBuilderGraph.cpp:862
6333     // Aborting...
6334     // TODO(tfeher) Add dynamic shapes test once TRT handles shape error
6335     // decently
6336   }
6337   if (trt_mode_ == TrtTestMode::kDynamicShape) {
6338     // Test with mixed dynamic / static shape input tensors
6339     params.push_back(
6340         TestParams{/*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6341                    /*partial_input_shapes=*/{{-1, -1, -1}, {1, 2, 3}},
6342                    /*input_values=*/common_input,
6343                    /*axis=*/2,
6344                    /*expected_output_dims=*/{1, 2, 2, 3},
6345                    /*expected_output=*/
6346                    {0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11}});
6347   }
6348   for (auto p : params) {
6349     Reset();
6350     const int num_inputs = p.input_shapes.size();
6351     EXPECT_EQ(num_inputs, p.input_values.size());
6352 
6353     NodeDef node_def = GetPackNodeDef(tf_type_, num_inputs, p.axis);
6354     // Create inputs.
6355     for (int j = 0; j < num_inputs; ++j) {
6356       if (j == 1 && p.input_1_is_weight) {
6357         AddTestWeights(StrCat("values_", j), p.input_shapes[j],
6358                        p.input_values[j], tf_type_);
6359       } else {
6360         AddTestTensor(StrCat("values_", j), p.input_shapes[j], tf_type_,
6361                       p.input_values[j], p.partial_input_shapes[j]);
6362       }
6363     }
6364     TestOpConverter("my_pack", node_def, p.expected_output_dims,
6365                     p.conversion_status, p.runtime_status,
6366                     ElementsAreArray(p.expected_output));
6367   }
6368 }
6369 
6370 // Get the NodeDef for ArgMin or ArgMax.
6371 template <typename OpType>
6372 NodeDef GetArgMinMaxNodeDef(DataType input_dtype, DataType output_dtype) {
6373   Scope s = Scope::NewRootScope();
6374   auto input = ops::Placeholder(s.WithOpName("input"), input_dtype);
6375   auto dimension = ops::Placeholder(s.WithOpName("dimension"), DT_INT32);
6376   auto attrs = OpType::OutputType(output_dtype);
6377   auto arg = OpType(s.WithOpName("my_arg"), input, dimension, attrs);
6378   return arg.operation.node()->def();
6379 }
6380 
6381 struct ArgMinMaxTestParams {
6382   std::vector<int> input_shape;
6383   std::vector<float> input_value;
6384   int axis;
6385   std::vector<int> expected_output_dims;
6386   std::vector<int> expected_argmax_output;
6387   std::vector<int> expected_argmin_output;
6388   Status status;
6389 };
6390 
6391 template <typename OpType>
6392 void TestConvertArgMinMax(ParameterizedOpConverterTestBase* test,
6393                           DataType _tf_type, ArgMinMaxTestParams& p) {
6394   test->Reset();
6395 
6396   NodeDef node_def = GetArgMinMaxNodeDef<OpType>(_tf_type,
6397                                                  /*output_dtype=*/DT_INT32);
6398 
6399   std::vector<int> expected_out;
6400   if (node_def.op() == "ArgMax") {
6401     expected_out = p.expected_argmax_output;
6402   } else if (node_def.op() == "ArgMin") {
6403     expected_out = p.expected_argmin_output;
6404   } else {
6405     ASSERT_TRUE(false);
6406   }
6407 
6408   test->AddTestTensor("input", p.input_shape, _tf_type, p.input_value);
6409   test->AddTestWeights("dimension", {1}, {p.axis}, DT_INT32);
6410 
6411   test->TestOpConverter("my_arg", node_def, p.expected_output_dims,
6412                         /*expected_conversion_status=*/p.status,
6413                         /*expected_runtime_status=*/Status::OK(),
6414                         /*matcher=*/ElementsAreArray(expected_out), {DT_INT32});
6415 }
6416 
6417 TEST_P(OpConverter_FP32_FP16_Test, ConvertArgMinMax) {
6418   {
6419     // Dimension is a tensor, should fail.
6420     Reset();
6421     NodeDef node_def =
6422         GetArgMinMaxNodeDef<ops::ArgMax>(tf_type_,
6423                                          /*output_dtype=*/DT_INT32);
6424     AddTestTensor("input", {1, 2, 3});
6425     AddTestTensor("dimension", {1});
6426     RunValidationAndConversion(
6427         node_def, error::UNIMPLEMENTED,
6428         "The input \"dimension\" for ArgMax must be a constant, at my_arg");
6429   }
6430   {
6431     // Output type is INT64, should fail.
6432     Reset();
6433     NodeDef node_def =
6434         GetArgMinMaxNodeDef<ops::ArgMax>(tf_type_,
6435                                          /*output_dtype=*/DT_INT64);
6436     AddTestTensor("input", {1, 2, 3});
6437     AddTestWeights("dimension", {1}, {3}, DT_INT32);
6438     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6439                                "Output type int64 is not supported, at my_arg");
6440   }
6441 
6442   const std::vector<float> common_input = InitTestVector<float>(6);
6443   std::vector<ArgMinMaxTestParams> params = {
6444       {/*input_shape=*/{2, 3},
6445        /*input_value=*/common_input,
6446        /*axis=*/0,
6447        /*expected_output_dims=*/{3},
6448        /*expected_argmax_output=*/{1, 1, 1},
6449        /*expected_argmin_output=*/{0, 0, 0},
6450        trt_mode_ == TrtTestMode::kImplicitBatch
6451            ? errors::Unimplemented("TensorRT does not allow manipulation of "
6452                                    "the batch dimension, at my_arg")
6453            : Status::OK()},
6454       {
6455           /*input_shape=*/{1, 6},
6456           /*input_value=*/common_input,
6457           /*axis=*/1,
6458           /*expected_output_dims=*/{1},
6459           /*expected_argmax_output=*/{5},
6460           /*expected_argmin_output=*/{0},
6461       },
6462       {
6463           /*input_shape=*/{1, 10},
6464           /*input_value=*/
6465           {-5.0f, 3.0f, 5.0f, 1.0f, 6.0f, -9.0f, 7.0f, 1.0f, 0.0f, -1.0f},
6466           /*axis=*/-1,
6467           /*expected_output_dims=*/{1},
6468           /*expected_argmax_output=*/{6},
6469           /*expected_argmin_output=*/{5},
6470       },
6471       {
6472           /*input_shape=*/{1, 2, 3},
6473           /*input_value=*/common_input,
6474           /*axis=*/2,
6475           /*expected_output_dims=*/{1, 2},
6476           /*expected_argmax_output=*/{2, 2},
6477           /*expected_argmin_output=*/{0, 0},
6478       },
6479       {
6480           /*input_shape=*/{1, 2, 3},
6481           /*input_value=*/common_input,
6482           /*axis=*/-2,
6483           /*expected_output_dims=*/{1, 3},
6484           /*expected_argmax_output=*/{1, 1, 1},
6485           /*expected_argmin_output=*/{0, 0, 0},
6486       },
6487       {
6488           /*input_shape=*/{1, 2, 1, 3},
6489           /*input_value=*/common_input,
6490           /*axis=*/3,
6491           /*expected_output_dims=*/{1, 2, 1},
6492           /*expected_argmax_output=*/{2, 2},
6493           /*expected_argmin_output=*/{0, 0},
6494       },
6495       {
6496           /*input_shape=*/{1, 2, 1, 3},
6497           /*input_value=*/common_input,
6498           /*axis=*/-3,
6499           /*expected_output_dims=*/{1, 1, 3},
6500           /*expected_argmax_output=*/{1, 1, 1},
6501           /*expected_argmin_output=*/{0, 0, 0},
6502       },
6503       {/*input_shape=*/{1, 2, 1, 1, 3},
6504        /*input_value=*/common_input,
6505        /*axis=*/4,
6506        /*expected_output_dims=*/{1, 2, 1, 1},
6507        /*expected_argmax_output=*/{2, 2},
6508        /*expected_argmin_output=*/{0, 0},
6509 #if !IS_TRT_VERSION_GE(7, 0, 0, 11)
6510        errors::Unimplemented("op is not able to support tensors with 4+"
6511                              " dimensions (excluding batch size)")
6512 #else
6513        Status::OK()
6514 #endif
6515       },
6516       {/*input_shape=*/{1, 2, 1, 1, 3},
6517        /*input_value=*/common_input,
6518        /*axis=*/-4,
6519        /*expected_output_dims=*/{1, 1, 1, 3},
6520        /*expected_argmax_output=*/{1, 1, 1},
6521        /*expected_argmin_output=*/{0, 0, 0},
6522 #if !IS_TRT_VERSION_GE(7, 0, 0, 11)
6523        errors::Unimplemented("op is not able to support tensors with 4+"
6524                              " dimensions (excluding batch size)")
6525 #else
6526        Status::OK()
6527 #endif
6528       },
6529   };
6530 
6531   for (auto p : params) {
6532     TestConvertArgMinMax<ops::ArgMin>(this, tf_type_, p);
6533     TestConvertArgMinMax<ops::ArgMax>(this, tf_type_, p);
6534   }
6535 }
6536 
6537 // Get the NodeDef for DepthToSpace or SpaceToSpace.
6538 template <typename OpType>
6539 NodeDef GetDepthSpaceShuffleNodeDef(DataType dtype, int block_size,
6540                                     string data_format) {
6541   Scope s = Scope::NewRootScope();
6542   auto input = ops::Placeholder(s.WithOpName("input"), dtype);
6543   auto attrs = OpType::DataFormat(data_format);
6544   auto shuffle = OpType(s.WithOpName("my_shuffle"), input, block_size, attrs);
6545   return shuffle.operation.node()->def();
6546 }
6547 
6548 template <typename CType>
6549 struct DepthSpaceShuffleTestParams {
6550   std::vector<int> input_dims;
6551   std::vector<CType> input_value;
6552   int block_size;
6553   string data_format;
6554   std::vector<int> expected_output_dims;
6555   std::vector<CType> expected_output;
6556 };
6557 
6558 template <typename OpType, DataType dtype, typename CType>
6559 void TestConvertDepthSpaceShuffle(
6560     OpConverterTest* test,
6561     const std::vector<DepthSpaceShuffleTestParams<CType>>& params) {
6562   for (int i = 0; i < params.size(); ++i) {
6563     test->Reset();
6564 
6565     NodeDef node_def = GetDepthSpaceShuffleNodeDef<OpType>(
6566         dtype, params[i].block_size, params[i].data_format);
6567     nvinfer1::DataType trt_type;
6568     TF_ASSERT_OK(TfTypeToTrtType(dtype, &trt_type));
6569     test->AddTestTensor("input", params[i].input_dims, 1, trt_type);
6570     test->RunValidationAndConversion(node_def);
6571 
6572     TRT_TensorOrWeights output;
6573     TF_EXPECT_OK(test->GetTensorOrWeights("my_shuffle", &output));
6574     EXPECT_TRUE(output.is_tensor());
6575     ExpectTrtDimsEqualsArray(params[i].expected_output_dims,
6576                              output.tensor()->getDimensions());
6577 
6578     DataVec input_data{{"input", test->AsTensor<CType>(params[i].input_value)}};
6579     DataVec output_data{{"my_shuffle", test->ConstructTensor<CType>(
6580                                            params[i].expected_output.size())}};
6581     TF_EXPECT_OK(test->BuildAndRun(input_data, &output_data));
6582     EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
6583                 ElementsAreArray(params[i].expected_output));
6584   }
6585 }
6586 
6587 template <DataType dtype>
6588 void TestConvertDepthToSpace(OpConverterTest* test) {
6589   typedef typename EnumToDataType<dtype>::Type CType;
6590   const std::vector<CType> common_input = InitTestVector<CType>(16);
6591   std::vector<DepthSpaceShuffleTestParams<CType>> params = {
6592       {
6593           /*input_shape=*/{4, 2, 2},
6594           /*input_value=*/common_input,
6595           /*block_size=*/2,
6596           /*data_format=*/"NCHW",
6597           /*expected_output_dims=*/{1, 4, 4},
6598           /*expected_output=*/
6599           CastTestVector<int, CType>(
6600               {0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15}),
6601       },
6602       {
6603           /*input_shape=*/{2, 2, 4},
6604           /*input_value=*/common_input,
6605           /*block_size=*/2,
6606           /*data_format=*/"NHWC",
6607           /*expected_output_dims=*/{4, 4, 1},
6608           /*expected_output=*/
6609           CastTestVector<int, CType>(
6610               {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}),
6611       },
6612       {
6613           /*input_shape=*/{16, 1, 1},
6614           /*input_value=*/common_input,
6615           /*block_size=*/4,
6616           /*data_format=*/"NCHW",
6617           /*expected_output_dims=*/{1, 4, 4},
6618           /*expected_output=*/InitTestVector<CType>(16),
6619       },
6620       {
6621           /*input_shape=*/{2, 2, 8},
6622           /*input_value=*/InitTestVector<CType>(32),
6623           /*block_size=*/2,
6624           /*data_format=*/"NHWC",
6625           /*expected_output_dims=*/{4, 4, 2},
6626           /*expected_output=*/CastTestVector<int, CType>({0,  1,  2,  3,  8,
6627                                                           9,  10, 11, 4,  5,
6628                                                           6,  7,  12, 13, 14,
6629                                                           15, 16, 17, 18, 19,
6630                                                           24, 25, 26, 27, 20,
6631                                                           21, 22, 23, 28, 29,
6632                                                           30, 31}),
6633       },
6634   };
6635 
6636   TestConvertDepthSpaceShuffle<ops::DepthToSpace, dtype, CType>(test, params);
6637 }
6638 
6639 TEST_F(OpConverterTest, ConvertDepthToSpace) {
6640   {
6641     // Input is a weight, should fail.
6642     Reset();
6643     NodeDef node_def =
6644         GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(DT_FLOAT, 2, "NCHW");
6645     AddTestWeights<float>("input", {4, 1, 1}, {1, 2, 3, 4});
6646     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6647                                "The input \"input\" for DepthToSpace must be a "
6648                                "tensor, at my_shuffle");
6649   }
6650   {
6651     // Input rank != 4
6652     Reset();
6653     NodeDef node_def =
6654         GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(DT_FLOAT, 2, "NCHW");
6655     AddTestTensor("input", {16, 32});
6656     RunValidationAndConversion(
6657         node_def, error::INVALID_ARGUMENT,
6658         "The input to DepthToSpace must be rank 4, at my_shuffle");
6659   }
6660   {
6661     // Channels not divisible by block_size, should fail.
6662     Reset();
6663     NodeDef node_def =
6664         GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(DT_FLOAT, 3, "NCHW");
6665     AddTestTensor("input", {16, 32, 32});
6666     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6667                                "Number of channels must be divisible by "
6668                                "block_size*block_size, at my_shuffle");
6669   }
6670   {
6671     // Unsupported format, should fail.
6672     Reset();
6673     NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(
6674         DT_FLOAT, 2, "NCHW_VECT_C");
6675     AddTestTensor("input", {16, 32, 32});
6676     RunValidationAndConversion(
6677         node_def, error::UNIMPLEMENTED,
6678         "Data format NCHW_VECT_C is not supported, at my_shuffle");
6679   }
6680 
6681   TestConvertDepthToSpace<DT_FLOAT>(this);
6682   TestConvertDepthToSpace<DT_HALF>(this);
6683   TestConvertDepthToSpace<DT_INT32>(this);
6684 }
6685 
6686 template <DataType dtype>
6687 void TestConvertSpaceToDepth(OpConverterTest* test) {
6688   typedef typename EnumToDataType<dtype>::Type CType;
6689   const std::vector<CType> common_input = InitTestVector<CType>(16);
6690   std::vector<DepthSpaceShuffleTestParams<CType>> params = {
6691       {
6692           /*input_shape=*/{1, 4, 4},
6693           /*input_value=*/common_input,
6694           /*block_size=*/2,
6695           /*data_format=*/"NCHW",
6696           /*expected_output_dims=*/{4, 2, 2},
6697           /*expected_output=*/
6698           CastTestVector<int, CType>(
6699               {0, 2, 8, 10, 1, 3, 9, 11, 4, 6, 12, 14, 5, 7, 13, 15}),
6700       },
6701       {
6702           /*input_shape=*/{4, 4, 1},
6703           /*input_value=*/common_input,
6704           /*block_size=*/2,
6705           /*data_format=*/"NHWC",
6706           /*expected_output_dims=*/{2, 2, 4},
6707           /*expected_output=*/
6708           CastTestVector<int, CType>(
6709               {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}),
6710       },
6711       {
6712           /*input_shape=*/{1, 4, 4},
6713           /*input_value=*/common_input,
6714           /*block_size=*/4,
6715           /*data_format=*/"NCHW",
6716           /*expected_output_dims=*/{16, 1, 1},
6717           /*expected_output=*/InitTestVector<CType>(16),
6718       },
6719       {
6720           /*input_shape=*/{4, 4, 2},
6721           /*input_value=*/InitTestVector<CType>(32),
6722           /*block_size=*/2,
6723           /*data_format=*/"NHWC",
6724           /*expected_output_dims=*/{2, 2, 8},
6725           /*expected_output=*/CastTestVector<int, CType>({0,  1,  2,  3,  8,
6726                                                           9,  10, 11, 4,  5,
6727                                                           6,  7,  12, 13, 14,
6728                                                           15, 16, 17, 18, 19,
6729                                                           24, 25, 26, 27, 20,
6730                                                           21, 22, 23, 28, 29,
6731                                                           30, 31}),
6732       },
6733   };
6734 
6735   TestConvertDepthSpaceShuffle<ops::SpaceToDepth, dtype, CType>(test, params);
6736 }
6737 
6738 TEST_F(OpConverterTest, ConvertSpaceToDepth) {
6739   {
6740     // Input is a weight, should fail.
6741     Reset();
6742     NodeDef node_def =
6743         GetDepthSpaceShuffleNodeDef<ops::SpaceToDepth>(DT_FLOAT, 2, "NCHW");
6744     AddTestWeights<float>("input", {4, 1, 1}, {1, 2, 3, 4});
6745     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6746                                "The input \"input\" for SpaceToDepth must be a "
6747                                "tensor, at my_shuffle");
6748   }
6749   {
6750     // Input rank != 4
6751     Reset();
6752     NodeDef node_def =
6753         GetDepthSpaceShuffleNodeDef<ops::SpaceToDepth>(DT_FLOAT, 2, "NCHW");
6754     AddTestTensor("input", {16, 32});
6755     RunValidationAndConversion(
6756         node_def, error::INVALID_ARGUMENT,
6757         "The input to SpaceToDepth must be rank 4, at my_shuffle");
6758   }
6759   {
6760     // Width not divisble by block_size, should fail.
6761     Reset();
6762     NodeDef node_def =
6763         GetDepthSpaceShuffleNodeDef<ops::SpaceToDepth>(DT_FLOAT, 3, "NCHW");
6764     AddTestTensor("input", {16, 9, 32});
6765     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6766                                "Width and height must be divisible by "
6767                                "block_size, at my_shuffle");
6768   }
6769   {
6770     // Height not divisble by block_size, should fail.
6771     Reset();
6772     NodeDef node_def =
6773         GetDepthSpaceShuffleNodeDef<ops::SpaceToDepth>(DT_FLOAT, 3, "NCHW");
6774     AddTestTensor("input", {16, 32, 9});
6775     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6776                                "Width and height must be divisible by "
6777                                "block_size, at my_shuffle");
6778   }
6779   {
6780     // Unsupported format, should fail.
6781     Reset();
6782     NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::SpaceToDepth>(
6783         DT_FLOAT, 2, "NCHW_VECT_C");
6784     AddTestTensor("input", {16, 32, 32});
6785     RunValidationAndConversion(
6786         node_def, error::UNIMPLEMENTED,
6787         "Data format NCHW_VECT_C is not supported, at my_shuffle");
6788   }
6789 
6790   TestConvertSpaceToDepth<DT_FLOAT>(this);
6791   TestConvertSpaceToDepth<DT_HALF>(this);
6792   TestConvertSpaceToDepth<DT_INT32>(this);
6793 }
6794 
6795 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
6796 TEST_P(OpConverter_FP32_FP16_Test, ConvertClipByValue) {
6797   Scope s = Scope::NewRootScope();
6798   auto t = ops::Placeholder(s.WithOpName("t"), tf_type_);
6799   auto clip_value_min =
6800       ops::Placeholder(s.WithOpName("clip_value_min"), tf_type_);
6801   auto clip_value_max =
6802       ops::Placeholder(s.WithOpName("clip_value_max"), tf_type_);
6803   auto clip = ops::ClipByValue(s.WithOpName("my_clip"), t, clip_value_min,
6804                                clip_value_max);
6805   const NodeDef& node_def = clip.operation.node()->def();
6806 
6807   nvinfer1::DataType trt_type_;
6808   TF_ASSERT_OK(TfTypeToTrtType(tf_type_, &trt_type_));
6809 
6810   {
6811     // Input is a weight, should fail.
6812     Reset();
6813     AddTestWeights("t", {1, 2, 3}, {1, 2, 3, 4, 5, 6}, tf_type_);
6814     AddTestWeights("clip_value_min", {1}, {1}, tf_type_);
6815     AddTestWeights("clip_value_max", {1}, {5}, tf_type_);
6816     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6817                                "The input \"t\" for ClipByValue must be a "
6818                                "tensor, at my_clip");
6819   }
6820   {
6821     // Clip min is a tensor, should fail.
6822     Reset();
6823     AddTestTensor("t", {1, 2, 3});
6824     AddTestTensor("clip_value_min", {1});
6825     AddTestWeights("clip_value_max", {1}, {1}, tf_type_);
6826     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6827                                "The input \"clip_value_min\" for ClipByValue "
6828                                "must be a constant, at my_clip");
6829   }
6830   {
6831     // Clip max is a tensor, should fail.
6832     Reset();
6833     AddTestTensor("t", {1, 2, 3});
6834     AddTestWeights("clip_value_min", {1}, {1}, tf_type_);
6835     AddTestTensor("clip_value_max", {1});
6836     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6837                                "The input \"clip_value_max\" for ClipByValue "
6838                                "must be a constant, at my_clip");
6839   }
6840 
6841   struct TestParams {
6842     std::vector<int> dims;
6843     int clip_value_min;
6844     int clip_value_max;
6845     std::vector<float> expected_output;
6846   };
6847 
6848   const std::vector<float> common_input = InitTestVector<float>(6);
6849 
6850   std::vector<TestParams> params = {{
6851                                         /*dims=*/{6},
6852                                         /*clip_value_min=*/2,
6853                                         /*clip_value_max=*/4,
6854                                         /*expected_output=*/{2, 2, 2, 3, 4, 4},
6855                                     },
6856                                     {
6857                                         /*dims=*/{1, 6},
6858                                         /*clip_value_min=*/2,
6859                                         /*clip_value_max=*/4,
6860                                         /*expected_output=*/{2, 2, 2, 3, 4, 4},
6861                                     },
6862                                     {
6863                                         /*dims=*/{1, 2, 3},
6864                                         /*clip_value_min=*/2,
6865                                         /*clip_value_max=*/4,
6866                                         /*expected_output=*/{2, 2, 2, 3, 4, 4},
6867                                     },
6868                                     {
6869                                         /*dims=*/{1, 2, 3, 1},
6870                                         /*clip_value_min=*/2,
6871                                         /*clip_value_max=*/4,
6872                                         /*expected_output=*/{2, 2, 2, 3, 4, 4},
6873                                     },
6874                                     {
6875                                         /*dims=*/{1, 1, 3, 1, 2},
6876                                         /*clip_value_min=*/2,
6877                                         /*clip_value_max=*/4,
6878                                         /*expected_output=*/{2, 2, 2, 3, 4, 4},
6879                                     },
6880                                     {
6881                                         /*dims=*/{1, 1, 3, 1, 2, 1},
6882                                         /*clip_value_min=*/2,
6883                                         /*clip_value_max=*/4,
6884                                         /*expected_output=*/{2, 2, 2, 3, 4, 4},
6885                                     },
6886                                     {
6887                                         /*dims=*/{2, 1, 3},
6888                                         /*clip_value_min=*/-1,
6889                                         /*clip_value_max=*/8,
6890                                         /*expected_output=*/common_input,
6891                                     }};
6892 
6893   for (auto p : params) {
6894     Reset();
6895 
6896     AddTestTensor("t", p.dims, tf_type_, common_input);
6897     AddTestWeights("clip_value_min", {1}, {p.clip_value_min}, tf_type_);
6898     AddTestWeights("clip_value_max", {1}, {p.clip_value_max}, tf_type_);
6899 
6900     TestOpConverter("my_clip", node_def, p.dims,
6901                     /*expected_conversion_status=*/Status::OK(),
6902                     /*expected_runtime_status=*/Status::OK(),
6903                     /*matcher=*/ElementsAreArray(p.expected_output));
6904   }
6905 }
6906 #endif  // IS_TRT_VERSION_GE(5, 1, 2, 0)
6907 
6908 // Get the NodeDef for SquaredDifference.
6909 NodeDef GetSquaredDifferenceNodeDef(DataType dtype) {
6910   Scope s = Scope::NewRootScope();
6911   auto x = ops::Placeholder(s.WithOpName("x"), dtype);
6912   auto y = ops::Placeholder(s.WithOpName("y"), dtype);
6913   auto squared_diff =
6914       ops::SquaredDifference(s.WithOpName("my_squared_diff"), x, y);
6915   return squared_diff.operation.node()->def();
6916 }
6917 
6918 TEST_P(OpConverter_FP32_FP16_Test, ConvertSquaredDifference) {
6919   {
6920     // Input is a weight, should fail.
6921     Reset();
6922     NodeDef node_def = GetSquaredDifferenceNodeDef(tf_type_);
6923     AddTestWeights<float>("x", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
6924     AddTestTensor("y", {1, 1, 2, 3});
6925     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6926                                "The input \"x\" for SquaredDifference must be "
6927                                "a tensor, at my_squared_diff");
6928   }
6929 
6930   struct TestParams {
6931     std::vector<int> dims_x;
6932     std::vector<int> dims_y;
6933     std::vector<float> value_x;
6934     std::vector<float> value_y;
6935     std::vector<int> expected_output_dims;
6936     std::vector<float> expected_output;
6937     Status status;
6938     Status runtime_status;
6939   };
6940 
6941   const std::vector<float> common_input = InitTestVector<float>(6);
6942   std::vector<TestParams> params = {
6943       {/*dims_x=*/{1, 2, 3},
6944        /*dims_y=*/{1, 7, 5},
6945        /*value_x=*/common_input,
6946        /*value_y=*/std::vector<float>(7 * 5, 0),
6947        /*expected_output_dims=*/{1, 1, 2, 3},
6948        /*expected_output=*/common_input,
6949        trt_mode_ == TrtTestMode::kDynamicShape
6950            ? Status::OK()
6951            : errors::InvalidArgument("Infeasible broadcast scheme"),
6952        errors::Internal(
6953            "Binding index out of range. This can happen if profile is not set, "
6954            "or the network is invalid for the current profile.")},
6955       {
6956           /*dims_x=*/{1, 1, 2, 3},
6957           /*dims_y=*/{1, 1, 2, 3},
6958           /*value_x=*/common_input,
6959           /*value_y=*/{0, -1, 3, 0, 10, -7},
6960           /*expected_output_dims=*/{1, 1, 2, 3},
6961           /*expected_output=*/{0, 4, 1, 9, 36, 144},
6962       },
6963       {
6964           /*dims_x=*/{1, 1, 2, 3},
6965           /*dims_y=*/{1, 1, 1, 3},
6966           /*value_x=*/common_input,
6967           /*value_y=*/{0, 1, 2},
6968           /*expected_output_dims=*/{1, 1, 2, 3},
6969           /*expected_output=*/{0, 0, 0, 9, 9, 9},
6970       },
6971   };
6972 
6973   for (auto p : params) {
6974     Reset();
6975     NodeDef node_def = GetSquaredDifferenceNodeDef(tf_type_);
6976     AddTestTensor("x", p.dims_x, p.value_x);
6977     AddTestTensor("y", p.dims_y, p.value_y);
6978     TestOpConverter("my_squared_diff", node_def, p.expected_output_dims,
6979                     p.status, p.runtime_status,
6980                     ElementsAreArray(p.expected_output));
6981   }
6982 }
6983 
6984 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
6985 template <typename OpType>
6986 NodeDef MakeResizeNodeDef(DataType dtype, bool align_corners) {
6987   Scope s = Scope::NewRootScope();
6988   auto input = ops::Placeholder(s.WithOpName("input"), dtype);
6989   auto size = ops::Placeholder(s.WithOpName("size"), DT_INT32);
6990   auto attrs = typename OpType::Attrs().AlignCorners(align_corners);
6991   auto resize = OpType(s.WithOpName("my_resize"), input, size, attrs);
6992   return resize.operation.node()->def();
6993 }
6994 
6995 struct ResizeTestParams {
6996   std::vector<int> input_dims;
6997   std::vector<int> output_resize_dims;
6998   std::vector<float> input_value;
6999   bool align_corners;
7000   std::vector<int> expected_output_dims;
7001   std::vector<float> expected_nearest_output_values;
7002   std::vector<float> expected_bilinear_output_values;
7003   Status status;
7004 };
7005 
7006 template <typename OpType>
7007 void TestConvertResize(ParameterizedOpConverterTestBase* test,
7008                        ResizeTestParams& p) {
7009   test->Reset();
7010   // Create resize node.
7011   NodeDef node_def =
7012       MakeResizeNodeDef<OpType>(test->get_tf_type(), p.align_corners);
7013 
7014   test->AddTestTensor("input", p.input_dims, test->get_tf_type(),
7015                       p.input_value);
7016   // Create output size.
7017   test->AddTestWeights("size", {2}, p.output_resize_dims, DT_INT32);
7018 
7019   std::vector<float> expected_out;
7020 
7021   if (node_def.op() == "ResizeBilinear") {
7022     expected_out = p.expected_bilinear_output_values;
7023   } else if (node_def.op() == "ResizeNearestNeighbor") {
7024     expected_out = p.expected_nearest_output_values;
7025   } else {
7026     ASSERT_TRUE(false);
7027   }
7028 
7029   test->TestOpConverter("my_resize", node_def, p.expected_output_dims,
7030                         /*expected_conversion_status=*/p.status,
7031                         /*expected_runtime_status=*/p.status,
7032                         /*matcher=*/ElementsAreArray(expected_out),
7033                         /*out_tf_types=*/{DT_FLOAT});
7034 }
7035 
7036 TEST_P(OpConverter_FP32_FP16_Test, ConvertResize) {
7037   {
7038     // First input is weight, should fail.
7039     Reset();
7040     NodeDef node_def = MakeResizeNodeDef<ops::ResizeBilinear>(tf_type_,
7041                                                               /*align_corners=*/
7042                                                               true);
7043     AddTestWeights<float>("input", {1, 2}, {1, 2});
7044     AddTestWeights<int>("size", {1, 2}, {1, 2});
7045     RunValidationAndConversion(
7046         node_def, error::UNIMPLEMENTED,
7047         "The input \"input\" for ResizeBilinear must be a "
7048         "tensor, at my_resize");
7049   }
7050   {
7051     // Output dimension is a tensor, should fail.
7052     Reset();
7053     NodeDef node_def = MakeResizeNodeDef<ops::ResizeBilinear>(tf_type_,
7054                                                               /*align_corners=*/
7055                                                               true);
7056     AddTestTensor("input", {1, 2});
7057     AddTestTensor("size", {1, 2});
7058     RunValidationAndConversion(
7059         node_def, error::UNIMPLEMENTED,
7060         "The input \"size\" for ResizeBilinear must be a "
7061         "constant, at my_resize");
7062   }
7063 
7064   const auto job_status =
7065       trt_mode_ == TrtTestMode::kDynamicShape
7066           ? errors::Unimplemented(
7067                 "TensorRT IResizeLayer requires input with static "
7068                 "shape")
7069           : Status::OK();
7070 
7071   std::vector<ResizeTestParams> params{
7072       {/*input_dims=*/{1, 1, 2, 1},    // N, H, W, C
7073        /*output_resize_dims=*/{2, 3},  // H_out, W_out
7074        /*input_values=*/{2.0f, -1.0f},
7075        /*align_corners=*/false,
7076        /*expected_output_dims=*/{1, 2, 3, 1},  // N, H, W, C
7077        /*expected_nearest_output_values=*/
7078        {2.0f, 2.0f, -1.0f, 2.0f, 2.0f, -1.0f},
7079        /*expected_bilinear_output_values=*/
7080        {2.0f, 0.f, -1.0f, 2.0f, 0.f, -1.0f},
7081        /*status=*/job_status},
7082       {/*input_dims=*/{1, 1, 2, 1},    // N, H, W, C
7083        /*output_resize_dims=*/{2, 3},  // H_out, W_out
7084        /*input_values=*/{2.0f, -1.0f},
7085        /*align_corners=*/true,
7086        /*expected_output_dims=*/{1, 2, 3, 1},  // N, H, W, C
7087        /*expected_nearest_output_values=*/
7088        {2.0f, 2.0f, -1.0f, 2.0f, 2.0f, -1.0f},
7089        /*expected_bilinear_output_values=*/
7090        {2.0f, 0.5f, -1.0f, 2.0f, 0.5f, -1.0f},
7091        /*status=*/job_status}};
7092 
7093   for (auto p : params) {
7094     TestConvertResize<ops::ResizeNearestNeighbor>(this, p);
7095 
7096 // This use case is not supported as of TRT version 7.1
7097 #if IS_TRT_VERSION_GE(7, 1, 0, 0)
7098     if (!p.align_corners) {
7099       p.status = errors::InvalidArgument(
7100           "Cannot Convert Bilinear Resize when align_corners=False");
7101     }
7102 #endif
7103 
7104     TestConvertResize<ops::ResizeBilinear>(this, p);
7105   }
7106 }
7107 #endif  // IS_TRT_VERSION_GE(6, 0, 0, 0)
7108 
7109 NodeDef MakePadNodeDef(std::string name, DataType dtype) {
7110   Scope s = Scope::NewRootScope();
7111   auto input = ops::Placeholder(s.WithOpName("input"), dtype);
7112   auto padding = ops::Placeholder(s.WithOpName("padding"), DT_INT32);
7113   auto pad = ops::Pad(s.WithOpName(name), input, padding);
7114   return pad.operation.node()->def();
7115 }
7116 
7117 struct PadTestParams {
7118   std::vector<int> input_dims;
7119   std::vector<int> pad_dims;
7120   std::vector<int> pad_values;
7121   std::vector<float> input_values;
7122   std::vector<int> expected_output_dims;
7123   std::vector<float> expected_output_values;
7124   Status status;
7125 };
7126 
7127 TEST_P(OpConverter_FP32_FP16_Test, ConvertPad) {
7128   {
7129     // First input is weight, should fail.
7130     Reset();
7131     NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
7132     AddTestWeights("input", {1, 2}, {1, 2}, tf_type_);
7133     AddTestWeights<int>("padding", {1, 2}, {1, 2});
7134     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
7135                                "The input \"tensor\" for Pad must be a "
7136                                "tensor");
7137   }
7138   {
7139     // padding is a tensor, should fail.
7140     Reset();
7141     NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
7142     AddTestTensor("input", {1, 2});
7143     AddTestTensor("padding", {1, 2});
7144     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
7145                                "The input \"paddings\" for Pad must be a "
7146                                "constant");
7147   }
7148   {
7149     // Make sure that ranges are inferred across a Pad.
7150     Reset();
7151     NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
7152     AddTestTensor("input", {1, 1, 2, 1});
7153     AddTestWeights<int>("padding", {4, 2}, {0, 0, 1, 0, 0, 1, 0, 0});
7154     TRT_TensorOrWeights input;
7155     TRT_TensorOrWeights output;
7156     RunValidationAndConversion(node_def);
7157     TF_EXPECT_OK(GetTensorOrWeights("input", &input));
7158     TF_EXPECT_OK(GetTensorOrWeights("my_pad", &output));
7159     converter_->ProvideQuantizationRange(input.tensor(), -5.0f, 5.0f);
7160     // Input range should be inferred across pad.
7161     PropagateQuantizationRanges();
7162     auto ranges = quantization_ranges();
7163     EXPECT_EQ(5.0f, ranges[input.tensor()]);
7164     EXPECT_EQ(5.0f, ranges[output.tensor()]);
7165   }
7166 
7167   std::vector<PadTestParams> params{
7168       // 1 padding dim
7169       {
7170           /*input_dims=*/{1, 1, 3, 2},  // N, H, W, C
7171           /*pad_dims=*/{4, 2},          // #dims, {pad_before, pad_after}
7172           /*pad_values*/ {0, 0, 0, 0, 0, 1, 0, 0},
7173           /*input_values=*/{1, 2, 3, 4, 5, 6},
7174           /*expected_output_dims=*/{1, 1, 4, 2},  // N, H, W, C
7175           /*expected_output_values=*/
7176           {1, 2, 3, 4, 5, 6, 0, 0},
7177       },
7178       {
7179           /*input_dims=*/{1, 1, 3, 2},  // N, H, W, C
7180           /*pad_dims=*/{4, 2},          // #dims, {pad_before, pad_after}
7181           /*pad_values*/ {0, 0, 0, 0, 0, 0, 0, 1},
7182           /*input_values=*/{1, 2, 3, 4, 5, 6},
7183           /*expected_output_dims=*/{1, 1, 3, 3},  // N, H, W, C
7184           /*expected_output_values=*/
7185           {1, 2, 0, 3, 4, 0, 5, 6, 0},
7186       },
7187       {
7188           /*input_dims=*/{1, 1, 3, 2},  // N, H, W, C
7189           /*pad_dims=*/{4, 2},          // #dims, {pad_before, pad_after}
7190           /*pad_values*/ {0, 0, 1, 0, 0, 0, 0, 0},
7191           /*input_values=*/{1, 2, 3, 4, 5, 6},
7192           /*expected_output_dims=*/{1, 2, 3, 2},  // N, H, W, C
7193           /*expected_output_values=*/
7194           {0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6},
7195       },
7196       // 2 padding dims
7197       {
7198           /*input_dims=*/{1, 1, 2, 1},  // N, H, W, C
7199           /*pad_dims=*/{4, 2},          // #dims, {pad_before, pad_after}
7200           /*pad_values*/ {0, 0, 1, 0, 0, 1, 0, 0},
7201           /*input_values=*/{2.0f, -1.0f},
7202           /*expected_output_dims=*/{1, 2, 3, 1},  // N, H, W, C
7203           /*expected_output_values=*/
7204           {0.0, 0.0, 0.0, 2.0f, -1.0f, 0.0},
7205       },
7206       PadTestParams{
7207           /*input_dims=*/{1, 1, 2, 2},  // N, H, W, C
7208           /*pad_dims=*/{4, 2},          // #dims, {pad_before, pad_after}
7209           /*pad_values*/ {0, 0, 1, 0, 0, 1, 0, 0},
7210           /*input_values=*/{2, -1, 3., 4},
7211           /*expected_output_dims=*/{1, 2, 3, 2},  // N, H, W, C
7212           /*expected_output_values=*/
7213           {0, 0, 0, 0, 0, 0, 2, -1, 3, 4, 0, 0},
7214       },
7215       PadTestParams{
7216           /*input_dims=*/{1, 1, 2, 1, 2},  // N, C, H, W, D
7217           /*pad_dims=*/{5, 2},             // #dims, {pad_before, pad_after}
7218           /*pad_values*/ {0, 0, 1, 0, 0, 1, 0, 0, 0, 0},
7219           /*input_values=*/{2, -1, 3., 4},
7220           /*expected_output_dims=*/{1, 2, 3, 1, 2},  // N, H, W, C
7221           /*expected_output_values=*/
7222           {0, 0, 0, 0, 0, 0, 2, -1, 3, 4, 0, 0},
7223       },
7224       PadTestParams{
7225           /*input_dims=*/{1, 1, 2, 1, 2},  // N, C, H, W, D
7226           /*pad_dims=*/{5, 2},             // #dims, {pad_before, pad_after}
7227           /*pad_values*/ {0, 0, 0, 1, 0, 0, 1, 1, 0, 0},
7228           /*input_values=*/{2, -1, 3., 4},
7229           /*expected_output_dims=*/{1, 2, 2, 3, 2},  // N, H, W, C
7230           /*expected_output_values=*/
7231           {0., 0., 2., -1., 0., 0., 0., 0., 3., 4., 0., 0.,
7232            0., 0., 0., 0.,  0., 0., 0., 0., 0., 0., 0., 0},
7233       },
7234       PadTestParams{
7235           /*input_dims=*/{1, 1, 2, 1},  // N, H, W, C
7236           /*pad_dims=*/{4, 2},          // #dims, {pad_before, pad_after}
7237           /*pad_values*/ {1, 0, 0, 0, 0, 1, 0, 0},
7238           /*input_values=*/{2.0f, -1.0f},
7239           /*expected_output_dims=*/{2, 1, 3, 1},  // N, H, W, C
7240           /*expected_output_values=*/{0.0, 0.0, 0.0, 2.0f, -1.0f, 0.0},
7241           trt_mode_ == TrtTestMode::kImplicitBatch
7242               ? errors::InvalidArgument("Padding layer does not support "
7243                                         "padding on batch dimension")
7244               : Status::OK()},
7245       PadTestParams{
7246           /*input_dims=*/{1, 1, 2, 1},  // N, H, W, C
7247           /*pad_dims=*/{4, 2},          // #dims, {pad_before, pad_after}
7248           /*pad_values*/ {0, 0, 1, 0, 0, 1, 1, 1},
7249           /*input_values=*/{2.0f, -1.0f},
7250           /*expected_output_dims=*/{},  // N, H, W, C
7251           /*expected_output_values=*/{},
7252           errors::InvalidArgument("Padding layer does not support padding on "
7253                                   "> 2")},
7254       PadTestParams{
7255           /*input_dims=*/{1, 2, 2},  // N, H, W
7256           /*pad_dims=*/{3, 2},       // #dims, {pad_before, pad_after}
7257           /*pad_values*/ {0, 0, 1, 0, 0, 1},
7258           /*input_values=*/{2, -1, 3., 4},
7259           /*expected_output_dims=*/{1, 3, 3},  // N, H, W, C
7260           /*expected_output_values=*/
7261           {0., 0., 0., 2., -1., 0., 3., 4., 0.},
7262           errors::InvalidArgument("Convertpad requires at least 4D input, at "
7263                                   "my_pad")}};
7264 
7265   for (auto p : params) {
7266     Reset();
7267     // Create pad node.
7268     NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
7269     // Create input tensor.
7270     AddTestTensor("input", p.input_dims, p.input_values);
7271     // Create output size.
7272     AddTestWeights<int32>("padding", p.pad_dims, p.pad_values);
7273     TestOpConverter("my_pad", node_def, p.expected_output_dims, p.status,
7274                     p.status, ElementsAreArray(p.expected_output_values));
7275   }
7276 }
7277 }  // namespace convert
7278 }  // namespace tensorrt
7279 }  // namespace tensorflow
7280 
7281 #endif  // GOOGLE_CUDA && GOOGLE_TENSORRT
7282