• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h"
17 
18 #include <algorithm>
19 #include <functional>
20 #include <memory>
21 #include <type_traits>
22 #include <unordered_map>
23 #include <vector>
24 
25 #if GOOGLE_CUDA && GOOGLE_TENSORRT
26 
27 #include <gmock/gmock.h>
28 #include <gtest/gtest.h>
29 #include "absl/algorithm/container.h"
30 #include "absl/strings/match.h"
31 #include "absl/strings/numbers.h"
32 #include "absl/strings/str_cat.h"
33 #include "absl/strings/string_view.h"
34 #include "absl/types/span.h"
35 #include "third_party/gpus/cuda/include/cuda.h"
36 #include "third_party/gpus/cuda/include/cuda_runtime_api.h"
37 #include "tensorflow/cc/framework/ops.h"
38 #include "tensorflow/cc/framework/scope.h"
39 #include "tensorflow/cc/ops/nn_ops_internal.h"
40 #include "tensorflow/cc/ops/standard_ops.h"
41 #include "tensorflow/compiler/tf2tensorrt/common/datavec.h"
42 #include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
43 #include "tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.h"
44 #include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h"
45 #include "tensorflow/core/common_runtime/gpu/gpu_managed_allocator.h"
46 #include "tensorflow/core/framework/allocator.h"
47 #include "tensorflow/core/framework/node_def.pb.h"  // NOLINT
48 #include "tensorflow/core/framework/tensor.h"
49 #include "tensorflow/core/framework/tensor.pb.h"  // NOLINT
50 #include "tensorflow/core/framework/tensor_shape.h"
51 #include "tensorflow/core/framework/tensor_testutil.h"
52 #include "tensorflow/core/framework/types.h"
53 #include "tensorflow/core/grappler/costs/graph_properties.h"
54 #include "tensorflow/core/lib/core/status.h"
55 #include "tensorflow/core/lib/core/status_test_util.h"
56 #include "tensorflow/core/lib/strings/str_util.h"
57 #include "tensorflow/core/lib/strings/strcat.h"
58 #include "tensorflow/core/platform/protobuf.h"
59 #include "tensorflow/core/platform/test.h"
60 #include "tensorflow/core/protobuf/config.pb.h"  // NOLINT
61 #include "tensorflow/core/public/session.h"
62 #include "third_party/tensorrt/NvInfer.h"
63 
64 namespace tensorflow {
65 namespace tensorrt {
66 
67 // TensorRT modes for testing. We define the following three modes:
68 // 1. Implicit batch mode: The tensors have static (known) input shape and the
69 //    the batch dimension (first dim) is removed from the TRT tensor shape. In
70 //    a loose notation: trt_shape = tf_shape[1:].
71 // 2. Explicit batch mode: static (known) input shape, but the batch dimension
72 //    is part of the trt tensor shape. (trt_shape = tf_shape)
73 // 3. Dynamic shape mode allows unknown input shapes, and requires explicit
74 //    batch size definition (trt_shape = tf_shape).
75 //
76 // Note that the Converter only distinguishes between two modes:
77 // - use_implicit_batch == true, this corresponds to kImplicitBatch,
78 // - use_implicit_batch == false which includes both kExplicitBatch and
79 //   kDynamicShape.
80 //
81 // For the converter, the distinction between explicit batch or dynamic shape
82 // mode follows from the input tensors of the network: dynamic shape input
83 // implies dynamic shape mode, while static shape input tensors imply explicit
84 // batch mode. We want to test all these modes, therefore we define the
85 // TrtTestMode with the following three options.
86 enum class TrtTestMode {
87   kImplicitBatch = 0,
88   kExplicitBatch = 1,
89   kDynamicShape = 2
90 };
91 
DebugString(const TrtTestMode mode)92 string DebugString(const TrtTestMode mode) {
93   switch (mode) {
94     case TrtTestMode::kImplicitBatch:
95       return "kImplicitBatch";
96     case TrtTestMode::kExplicitBatch:
97       return "kExplicitBatch";
98     case TrtTestMode::kDynamicShape:
99       return "kDynamicShape";
100     default:
101       return "Invalid TrtTestMode";
102   }
103 }
104 
105 namespace convert {
106 
107 using absl::StrCat;
108 using ::testing::ElementsAre;
109 using ::testing::ElementsAreArray;
110 using ::testing::Matcher;
111 
112 constexpr std::array<TrtTestMode, 3> ValidTrtModes = {
113     TrtTestMode::kImplicitBatch, TrtTestMode::kExplicitBatch,
114     TrtTestMode::kDynamicShape};
115 
116 // TODO(laigd): put this into some test utils file.
ExpectStatus(Status status,error::Code code=error::OK,const char * substr=nullptr)117 void ExpectStatus(Status status, error::Code code = error::OK,
118                   const char* substr = nullptr) {
119   EXPECT_EQ(code, status.code())
120       << status << " vs expected error code \"" << error::Code_Name(code)
121       << "\" and message \"" << substr << "\"";
122   if (substr) {
123     EXPECT_THAT(status.error_message(), ::testing::HasSubstr(substr)) << status;
124   }
125 }
126 
GetTestDims(const std::vector<int> & d)127 nvinfer1::Dims GetTestDims(const std::vector<int>& d) {
128   nvinfer1::Dims dims;
129   dims.nbDims = d.size();
130   for (int i = 0; i < d.size(); ++i) {
131     dims.d[i] = d[i];
132   }
133   return dims;
134 }
135 
136 // Prints the vector to the output stream.
137 template <typename T>
operator <<(std::ostream & os,const std::vector<T> & v)138 std::ostream& operator<<(std::ostream& os, const std::vector<T>& v) {
139   if (!v.empty()) {
140     os << '[';
141     std::copy(v.begin(), v.end(), std::ostream_iterator<T>(os, ", "));
142     os << "\b\b]";
143   }
144   return os;
145 }
146 
MakeNodeDef(const string & name,const string & op,const std::vector<string> & inputs,const std::map<string,AttrValue> attrs={})147 NodeDef MakeNodeDef(const string& name, const string& op,
148                     const std::vector<string>& inputs,
149                     const std::map<string, AttrValue> attrs = {}) {
150   NodeDef node_def;
151   node_def.set_name(name);
152   node_def.set_op(op);
153   for (const string& input : inputs) {
154     node_def.add_input(input);
155   }
156   for (const auto& attr : attrs) {
157     (*node_def.mutable_attr())[attr.first] = attr.second;
158   }
159   return node_def;
160 }
161 
162 template <typename T>
MakeConstNodeDef(const string & name,const std::vector<T> & vals,const TensorShape & shape)163 NodeDef MakeConstNodeDef(const string& name, const std::vector<T>& vals,
164                          const TensorShape& shape) {
165   Scope s = Scope::NewRootScope();
166   Tensor t = test::AsTensor<T>(vals, shape);
167   auto const_op = ops::Const(s.WithOpName(name), t);
168   return const_op.node()->def();
169 }
170 
171 template <typename T>
MakeConstNodeDef(const string & name,const std::vector<T> & vals)172 NodeDef MakeConstNodeDef(const string& name, const std::vector<T>& vals) {
173   TensorShape shape;
174   const std::vector<int32> shape_dims = {static_cast<int32>(vals.size())};
175   TF_EXPECT_OK(TensorShapeUtils::MakeShape(shape_dims, &shape));
176   return MakeConstNodeDef(name, vals, shape);
177 }
178 
TrtDimsEquals(const nvinfer1::Dims & lhs,const nvinfer1::Dims & rhs)179 bool TrtDimsEquals(const nvinfer1::Dims& lhs, const nvinfer1::Dims& rhs) {
180   if (lhs.nbDims != rhs.nbDims) return false;
181   for (int i = 0; i < lhs.nbDims; ++i) {
182     if (lhs.d[i] != rhs.d[i]) return false;
183     // We don't check the types in the tests.
184   }
185   return true;
186 }
187 
TrtDimsEqualsArray(const std::vector<int> & lhs,const nvinfer1::Dims & rhs)188 bool TrtDimsEqualsArray(const std::vector<int>& lhs,
189                         const nvinfer1::Dims& rhs) {
190   return TrtDimsEquals(GetTestDims(lhs), rhs);
191 }
192 
193 // TODO(laigd): define a parameterized matcher that can compare against the
194 // vector.
ExpectTrtDimsEqualsArray(const std::vector<int> & lhs,const nvinfer1::Dims & rhs)195 void ExpectTrtDimsEqualsArray(const std::vector<int>& lhs,
196                               const nvinfer1::Dims& rhs) {
197   EXPECT_TRUE(TrtDimsEqualsArray(lhs, rhs))
198       << "expected: " << DebugString(GetTestDims(lhs)) << "\n"
199       << "  actual: " << DebugString(rhs);
200 }
201 
ExpectTrtLayerNames(absl::Span<const std::string> names,nvinfer1::INetworkDefinition * network)202 void ExpectTrtLayerNames(absl::Span<const std::string> names,
203                          nvinfer1::INetworkDefinition* network) {
204   EXPECT_EQ(network->getNbLayers(), names.size());
205 
206   for (int i = 0; i < network->getNbLayers(); i++) {
207     auto layer = network->getLayer(i);
208     EXPECT_EQ(layer->getName(), names[i]);
209   }
210 }
211 
VerifyTrtLayerNameNotEmpty(nvinfer1::INetworkDefinition * network)212 void VerifyTrtLayerNameNotEmpty(nvinfer1::INetworkDefinition* network) {
213   for (int i = 0; i < network->getNbLayers(); i++) {
214     auto layer = network->getLayer(i);
215     EXPECT_NE(layer->getName(), nullptr);
216   }
217 }
218 
ArrayFloatNear(const std::vector<float> & values,float max_abs_error=1e-5,bool nan_sensitive=false)219 Matcher<std::vector<float>> ArrayFloatNear(const std::vector<float>& values,
220                                            float max_abs_error = 1e-5,
221                                            bool nan_sensitive = false) {
222   std::vector<Matcher<float>> matchers;
223   matchers.reserve(values.size());
224   for (const float& v : values) {
225     if (nan_sensitive) {
226       matchers.emplace_back(::testing::NanSensitiveFloatNear(v, max_abs_error));
227     } else if (max_abs_error == 0) {
228       matchers.emplace_back(::testing::FloatEq(v));
229     } else {
230       EXPECT_GE(max_abs_error, 0);
231       matchers.emplace_back(::testing::FloatNear(v, max_abs_error));
232     }
233   }
234   return ElementsAreArray(matchers);
235 }
236 
237 template <typename T>
ExpectArrayNear(const std::vector<T> & lhs,absl::Span<const T> rhs)238 void ExpectArrayNear(const std::vector<T>& lhs, absl::Span<const T> rhs) {
239   ASSERT_EQ(lhs.size(), rhs.size());
240   for (int i = 0; i < lhs.size(); i++) {
241     EXPECT_FLOAT_EQ(lhs[i], rhs[i]);
242   }
243 }
244 
245 // Eigen::half cannot implicitly convert to float which is required for
246 // EXPECT_FLOAT_EQ.
247 template <>
ExpectArrayNear(const std::vector<Eigen::half> & lhs,absl::Span<const Eigen::half> rhs)248 void ExpectArrayNear(const std::vector<Eigen::half>& lhs,
249                      absl::Span<const Eigen::half> rhs) {
250   ASSERT_EQ(lhs.size(), rhs.size());
251   for (int i = 0; i < lhs.size(); i++) {
252     EXPECT_FLOAT_EQ(static_cast<float>(lhs[i]), static_cast<float>(rhs[i]));
253   }
254 }
255 
256 template <typename T>
ExpectArrayAlmostEqual(const std::vector<T> & lhs,absl::Span<const T> rhs,T tolerance)257 void ExpectArrayAlmostEqual(const std::vector<T>& lhs, absl::Span<const T> rhs,
258                             T tolerance) {
259   ASSERT_EQ(lhs.size(), rhs.size());
260   for (int i = 0; i < lhs.size(); i++) {
261     EXPECT_NEAR(lhs[i], rhs[i], tolerance);
262   }
263 }
264 
265 // Eigen::half cannot implicitly convert to float which is required for
266 // EXPECT_NEAR.
267 template <>
ExpectArrayAlmostEqual(const std::vector<Eigen::half> & lhs,absl::Span<const Eigen::half> rhs,Eigen::half tolerance)268 void ExpectArrayAlmostEqual(const std::vector<Eigen::half>& lhs,
269                             absl::Span<const Eigen::half> rhs,
270                             Eigen::half tolerance) {
271   ASSERT_EQ(lhs.size(), rhs.size());
272   for (int i = 0; i < lhs.size(); i++) {
273     EXPECT_NEAR(static_cast<float>(lhs[i]), static_cast<float>(rhs[i]),
274                 static_cast<float>(tolerance));
275   }
276 }
277 
TrtShapedWeightsEquals(const TRT_ShapedWeights & lhs,const TRT_ShapedWeights & rhs)278 bool TrtShapedWeightsEquals(const TRT_ShapedWeights& lhs,
279                             const TRT_ShapedWeights& rhs) {
280   return TrtDimsEquals(lhs.shape_, rhs.shape_) &&
281          lhs.TrtDType() == rhs.TrtDType() && lhs.GetValues() == rhs.GetValues();
282 }
283 
284 template <typename T>
ValidateWeights(const TRT_ShapedWeights & weights,const std::vector<int> & expected_dims,const std::vector<T> & expected_value)285 void ValidateWeights(const TRT_ShapedWeights& weights,
286                      const std::vector<int>& expected_dims,
287                      const std::vector<T>& expected_value) {
288   ExpectTrtDimsEqualsArray(expected_dims, weights.shape_);
289   ASSERT_EQ(expected_value.size(), weights.count()) << weights.DebugString();
290   const T* actual_values = static_cast<const T*>(weights.GetValues());
291   for (int i = 0; i < expected_value.size(); ++i) {
292     EXPECT_EQ(expected_value[i], actual_values[i]);
293   }
294 }
295 
296 template <typename CType>
InitTestVector(int size,CType start_value=CType (0))297 std::vector<CType> InitTestVector(int size, CType start_value = CType(0)) {
298   std::vector<CType> res;
299   res.reserve(size);
300   for (int i = 0; i < size; ++i) {
301     res.push_back(start_value + CType(i));
302   }
303   return res;
304 }
305 
306 template <typename InCType, typename OutCType>
307 struct StaticCaster {
operator ()tensorflow::tensorrt::convert::StaticCaster308   OutCType operator()(InCType in) const { return static_cast<OutCType>(in); }
309 };
310 
311 template <typename InCType, typename OutCType>
CastTestVector(const gtl::ArraySlice<InCType> & vals)312 std::vector<OutCType> CastTestVector(
313     const gtl::ArraySlice<InCType>& vals) {  // non-absl ok
314   std::vector<OutCType> res(vals.size());
315   std::transform(vals.begin(), vals.end(), res.begin(),
316                  StaticCaster<InCType, OutCType>());
317   return res;
318 }
319 
TEST(TRT_ShapedWeights_Test,Basic)320 TEST(TRT_ShapedWeights_Test, Basic) {
321   // Test constructor with no arguments.
322   {
323     TRT_ShapedWeights weights;
324     TRT_ShapedWeights copy(weights);
325     for (auto ptr : {&weights, &copy}) {
326       nvinfer1::Weights trt_weights = ptr->GetTrtWeights();
327       EXPECT_EQ(nvinfer1::DataType::kFLOAT, trt_weights.type);
328       EXPECT_EQ(nullptr, trt_weights.values);
329       EXPECT_EQ(0, trt_weights.count);
330 
331       EXPECT_EQ(nullptr, ptr->GetValues());
332       EXPECT_EQ(0, ptr->count());
333       EXPECT_EQ(0, ptr->size_bytes());
334     }
335   }
336   // Test constructor with DataType argument.
337   {
338     TRT_ShapedWeights weights(nvinfer1::DataType::kFLOAT);
339     TRT_ShapedWeights copy(weights);
340     for (auto ptr : {&weights, &copy}) {
341       nvinfer1::Weights trt_weights = ptr->GetTrtWeights();
342       EXPECT_EQ(nvinfer1::DataType::kFLOAT, trt_weights.type);
343       EXPECT_EQ(nullptr, trt_weights.values);
344       EXPECT_EQ(0, trt_weights.count);
345 
346       EXPECT_EQ(nullptr, ptr->GetValues());
347       EXPECT_EQ(0, ptr->count());
348       EXPECT_EQ(0, ptr->size_bytes());
349     }
350   }
351   // Test constructor with DataType and nvinfer1::Dims arguments.
352   {
353     TrtWeightStore store;
354     TRT_ShapedWeights weights =
355         store.GetTempWeights(nvinfer1::DataType::kFLOAT, GetTestDims({2, 5}));
356     TRT_ShapedWeights copy(weights);
357     for (auto ptr : {&weights, &copy}) {
358       nvinfer1::Weights trt_weights = ptr->GetTrtWeights();
359       EXPECT_EQ(nvinfer1::DataType::kFLOAT, trt_weights.type);
360       EXPECT_NE(nullptr, trt_weights.values);
361       EXPECT_EQ(10, trt_weights.count);
362 
363       EXPECT_EQ(trt_weights.values, ptr->GetValues());
364       EXPECT_EQ(10, ptr->count());
365       EXPECT_EQ(40, ptr->size_bytes());
366     }
367     // Test that it doesn't copy the underlying buffer.
368     EXPECT_EQ(weights.GetValues(), copy.GetValues());
369   }
370 }
371 
TEST(TRT_TensorOrWeights_Test,Basic)372 TEST(TRT_TensorOrWeights_Test, Basic) {
373   // Test constructor with no arguments.
374   {
375     TRT_TensorOrWeights tw;
376     TRT_TensorOrWeights copy(tw);
377     TRT_TensorOrWeights assigned;
378     assigned = tw;
379     for (auto ptr : {&tw, &copy, &assigned}) {
380       EXPECT_EQ(false, ptr->is_tensor());
381       EXPECT_EQ(false, ptr->is_weights());
382       EXPECT_EQ(-1, ptr->batch_size());
383     }
384   }
385 
386   // Test constructor with ITensor and batch size argument.
387   {
388     nvinfer1::Dims dims;
389     dims.nbDims = 1;
390     dims.d[0] = 1;
391     ITensorProxyPtr itensor(dims);
392     TRT_TensorOrWeights tw(itensor);
393     TRT_TensorOrWeights tw1(itensor, /*batch_size=*/1);
394 
395     for (auto original_ptr : {&tw, &tw1}) {
396       TRT_TensorOrWeights copy(*original_ptr);
397       TRT_TensorOrWeights assigned;
398       assigned = *original_ptr;
399 
400       for (auto ptr : {original_ptr, &copy, &assigned}) {
401         ASSERT_TRUE(ptr->is_tensor());
402         EXPECT_EQ(false, ptr->is_weights());
403         if (original_ptr == &tw) {
404           EXPECT_EQ(-1, ptr->batch_size());
405         } else {
406           EXPECT_EQ(1, ptr->batch_size());
407         }
408         EXPECT_EQ(itensor->simple_tensor(), ptr->tensor()->simple_tensor());
409         ExpectTrtDimsEqualsArray({1}, ptr->GetTrtDims());
410       }
411     }
412   }
413   // Test constructor which creates and owns an ITensor.
414   {
415     nvinfer1::Dims dims;
416     dims.nbDims = 1;
417     dims.d[0] = 1;
418     TRT_TensorOrWeights tw(nvinfer1::DataType::kFLOAT, dims, /*batch_size=*/1);
419     TRT_TensorOrWeights copy(tw);
420     TRT_TensorOrWeights assigned;
421     assigned = tw;
422 
423     for (auto ptr : {&tw, &copy, &assigned}) {
424       ASSERT_TRUE(ptr->is_tensor());
425       EXPECT_EQ(false, ptr->is_weights());
426       EXPECT_EQ(1, ptr->batch_size());
427       EXPECT_NE(nullptr, ptr->tensor()->simple_tensor());
428       ExpectTrtDimsEqualsArray({1}, ptr->GetTrtDims());
429     }
430   }
431   // Test constructor with TRT_ShapedWeights argument.
432   {
433     TRT_ShapedWeights weights;
434     TRT_TensorOrWeights tw(weights);
435     TRT_TensorOrWeights copy(tw);
436     TRT_TensorOrWeights assigned;
437     assigned = tw;
438     for (auto ptr : {&tw, &copy, &assigned}) {
439       EXPECT_EQ(false, ptr->is_tensor());
440       EXPECT_EQ(true, ptr->is_weights());
441       EXPECT_TRUE(TrtShapedWeightsEquals(weights, ptr->weights()));
442       ExpectTrtDimsEqualsArray({}, ptr->GetTrtDims());
443     }
444   }
445 }
446 
447 class ValidatorTest : public ::testing::Test {
448  public:
op_validators(TrtNodeValidator * validator)449   std::unordered_map<string, OpConverter>& op_validators(
450       TrtNodeValidator* validator) {
451     return validator->op_validators_;
452   }
453 
ConvertToTensorOrWeights(const Scope & scope,const Node * node,int output_port,TRT_TensorOrWeights * tensor_or_weights)454   Status ConvertToTensorOrWeights(const Scope& scope, const Node* node,
455                                   int output_port,
456                                   TRT_TensorOrWeights* tensor_or_weights) {
457     grappler::GrapplerItem item;
458     TF_EXPECT_OK(scope.ToGraphDef(&item.graph));
459     grappler::GraphProperties graph_properties(item);
460     TF_EXPECT_OK(graph_properties.InferStatically(true));
461 
462     TrtNodeValidator validator(graph_properties, TrtPrecisionMode::FP32,
463                                /*use_calibration=*/false,
464                                /*use_implicit_batch=*/true);
465     return validator.ConvertToTensorOrWeights(node->def(), output_port,
466                                               tensor_or_weights);
467   }
468 
GetQuantizeOps(TrtNodeValidator * validator)469   const std::set<string>* GetQuantizeOps(TrtNodeValidator* validator) {
470     return validator->quantize_ops;
471   }
472 };
473 
TEST_F(ValidatorTest,QuantizeOpsAreRegistered)474 TEST_F(ValidatorTest, QuantizeOpsAreRegistered) {
475   grappler::GrapplerItem item;
476   grappler::GraphProperties graph_properties(item);
477   TrtNodeValidator validator(graph_properties, TrtPrecisionMode::FP32,
478                              /*use_calibration=*/false,
479                              /*use_implicit_batch=*/true);
480   for (const string& quantize_op : *GetQuantizeOps(&validator)) {
481     QCHECK(op_validators(&validator).count(quantize_op));
482   }
483 }
484 
TEST_F(ValidatorTest,ConvertToTensorOrWeights)485 TEST_F(ValidatorTest, ConvertToTensorOrWeights) {
486   // Convert Const.
487   {
488     Scope s = Scope::NewRootScope();
489     auto node =
490         ops::Const(s.WithOpName("my_const"), {1.0f, 2.0f}, TensorShape({2}));
491     TRT_TensorOrWeights output;
492     ExpectStatus(ConvertToTensorOrWeights(s, node.op().node(),
493                                           /*output_port=*/0, &output));
494     ValidateWeights<float>(output.weights(), {2}, {1.0, 2.0});
495   }
496 
497   // Helper method to run ConvertToTensorOrWeights() with predefined parameters.
498   auto convert_to_tensor_or_weights = [this](const std::vector<int64>& dims,
499                                              TRT_TensorOrWeights* output) {
500     Scope s = Scope::NewRootScope();
501     const auto attrs = ops::Placeholder::Shape(PartialTensorShape{dims});
502     auto feed = ops::Placeholder(s.WithOpName("feed"), DT_FLOAT, attrs);
503     auto add = ops::Add(s.WithOpName("add"), feed, feed);
504     return this->ConvertToTensorOrWeights(s, add.operation.node(),
505                                           /*output_port=*/0, output);
506   };
507   // Convert non-Const with #dims > nvinfer1::Dims::MAX_DIMS+1.
508   {
509     TRT_TensorOrWeights output;
510     ExpectStatus(
511         convert_to_tensor_or_weights(
512             std::vector<int64>(nvinfer1::Dims::MAX_DIMS + 2, 1), &output),
513         error::OUT_OF_RANGE, "Input tensor rank is greater than 9");
514   }
515   // Convert non-Const with #dims < 1.
516   {
517     TRT_TensorOrWeights output;
518     ExpectStatus(
519         convert_to_tensor_or_weights({}, &output), error::INVALID_ARGUMENT,
520         "Scalar input tensor is not supported since the first dimension "
521         "is treated as batch dimension by TRT");
522   }
523   // Convert non-Const. We test the case where the non-batch dimension is
524   // unknown as well, to make sure the validator allows that.
525   for (const int32 non_batch_dim : {-1, 2}) {
526     const int32 batch_size = 12;
527     TRT_TensorOrWeights output;
528     ExpectStatus(
529         convert_to_tensor_or_weights({batch_size, non_batch_dim}, &output));
530     ASSERT_TRUE(output.is_tensor());
531     EXPECT_EQ(batch_size, output.batch_size());
532     EXPECT_NE(nullptr, output.tensor()->simple_tensor());
533     ExpectTrtDimsEqualsArray({non_batch_dim}, output.GetTrtDims());
534   }
535 }
536 
TEST_F(ValidatorTest,IsTensorRTCandidate_Basics)537 TEST_F(ValidatorTest, IsTensorRTCandidate_Basics) {
538   Scope s = Scope::NewRootScope();
539   auto input =
540       ops::Const(s.WithOpName("const"), {1.0f, 2.0f}, TensorShape({2}));
541   auto add = ops::Add(s.WithOpName("add"), input, input);
542   const Node* add_node = add.operation.node();
543 
544   grappler::GrapplerItem item;
545   TF_EXPECT_OK(s.ToGraphDef(&item.graph));
546   grappler::GraphProperties graph_properties(item);
547   TF_EXPECT_OK(graph_properties.InferStatically(true));
548   TrtNodeValidator validator(graph_properties, TrtPrecisionMode::FP32,
549                              /*use_calibration=*/false,
550                              /*use_implicit_batch=*/true);
551 
552   bool start_conversion = false;
553   bool should_fail = false;
554   auto op_converter = [&start_conversion,
555                        &should_fail](OpConverterParams* params) -> Status {
556     if (should_fail) return errors::InvalidArgument("");
557     if (!params->validation_only) start_conversion = true;
558     return Status::OK();
559   };
560 
561   // Validator not registered.
562   ASSERT_EQ(1, op_validators(&validator).erase("Add"));
563   ExpectStatus(validator.IsTensorRTCandidate(add_node), error::UNIMPLEMENTED,
564                "Op type Add is not supported.");
565 
566   // Register validator.
567   op_validators(&validator)["Add"] = op_converter;
568   TF_EXPECT_OK(validator.IsTensorRTCandidate(add_node));
569   EXPECT_EQ(false, start_conversion);
570 
571   // Let the converter return error.
572   should_fail = true;
573   ExpectStatus(validator.IsTensorRTCandidate(add_node),
574                error::INVALID_ARGUMENT);
575 }
576 
TEST(TrtNodeValidator,IsTensorRTCandidate)577 TEST(TrtNodeValidator, IsTensorRTCandidate) {
578   // Create a graph containing both TRT-compatible and TRT-incompatible nodes
579   // and use it to test TrtNodeValidator::IsTensorRTCandidate().
580   const std::vector<int32> input_shape_array{2, 2};
581   TensorShape input_shape;
582   TF_EXPECT_OK(TensorShapeUtils::MakeShape(input_shape_array, &input_shape));
583 
584   Scope s = Scope::NewRootScope();
585   ops::Placeholder::Attrs feed_attrs;
586   TF_EXPECT_OK(
587       TensorShapeUtils::MakeShape(input_shape_array, &feed_attrs.shape_));
588 
589   // Compatible input.
590   auto feed = ops::Placeholder(s.WithOpName("feed"), DT_FLOAT, feed_attrs);
591   auto const_1 = ops::Const(s.WithOpName("const_1"), 1.0f, input_shape);
592 
593   // Compatible MatMul.
594   auto matmul = ops::MatMul(s.WithOpName("matmul"), feed, const_1);
595 
596   // Incompatible MatMul.
597   ops::MatMul::Attrs matmul_attrs;
598   matmul_attrs.transpose_a_ = true;
599   auto incompatible_matmul = ops::MatMul(s.WithOpName("incompatible_matmul"),
600                                          feed, const_1, matmul_attrs);
601 
602   // Unsupported op.
603   auto unsupported_op = ops::Erfc(s.WithOpName("sin"), feed);
604 
605   // Incompatible input.
606   auto incompatible_feed = ops::Placeholder(s.WithOpName("feed"), DT_DOUBLE);
607   auto const_2 = ops::Const(s.WithOpName("const_2"), 1.0, input_shape);
608   // Compatible op with incompatible input.
609   auto matmul_with_incompatible_input =
610       ops::MatMul(s.WithOpName("matmul_with_incompatible_input"),
611                   incompatible_feed, const_2);
612 
613   // Quantize ops.
614   auto quantize_attrs = ops::FakeQuantWithMinMaxArgs::Min(-6.0f).Max(6.0f);
615   auto quantize = ops::FakeQuantWithMinMaxArgs(s.WithOpName("quantize"), feed,
616                                                quantize_attrs);
617 
618   // Get GrapplerItem and GraphProperties.
619   grappler::GrapplerItem item;
620   TF_EXPECT_OK(s.ToGraphDef(&item.graph));
621   Tensor feed_tensor(DT_FLOAT, input_shape);
622   item.feed.push_back(std::make_pair("feed", feed_tensor));
623   grappler::GraphProperties graph_properties(item);
624   TF_EXPECT_OK(graph_properties.InferStatically(true));
625 
626   for (const TrtPrecisionMode precision_mode :
627        {TrtPrecisionMode::FP32, TrtPrecisionMode::INT8}) {
628     TrtNodeValidator validator(graph_properties, precision_mode,
629                                /*use_calibration=*/false,
630                                /*use_implicit_batch=*/true);
631     TF_EXPECT_OK(validator.IsTensorRTCandidate(matmul.operation.node()));
632     ExpectStatus(
633         validator.IsTensorRTCandidate(incompatible_matmul.operation.node()),
634         error::INVALID_ARGUMENT,
635         "MatMul with 2D tensors requires explicit batch mode, or that tensor A "
636         "is not transposed and B is a constant tensor.");
637     ExpectStatus(validator.IsTensorRTCandidate(unsupported_op.operation.node()),
638                  error::UNIMPLEMENTED, "Op type Erfc is not supported");
639     ExpectStatus(validator.IsTensorRTCandidate(
640                      matmul_with_incompatible_input.operation.node()),
641                  error::INTERNAL,
642                  "Failed to convert input feed_1 to a TRT_TensorOrWeights");
643     if (precision_mode == TrtPrecisionMode::INT8) {
644       TF_EXPECT_OK(validator.IsTensorRTCandidate(quantize.operation.node()));
645     } else {
646       ExpectStatus(validator.IsTensorRTCandidate(quantize.operation.node()),
647                    error::UNIMPLEMENTED,
648                    "Op type FakeQuantWithMinMaxArgs is not supported");
649     }
650   }
651 }
652 
653 class ConverterTest : public ::testing::Test {
654  public:
ConverterTest()655   ConverterTest() { Reset(); }
656 
Reset()657   void Reset() {
658     converter_ =
659         std::move(Converter::Create(TrtPrecisionMode::FP32,
660                                     /*use_calibration=*/false, &logger_,
661                                     /*use_implicit_batch=*/true,
662                                     /*engine_name=*/"TRTEngineOp_0_0")
663                       .ValueOrDie());
664     weight_store_ = &converter_->weight_store_;
665   }
666 
AddOpConverter(const string & op_name,OpConverter op_converter)667   void AddOpConverter(const string& op_name, OpConverter op_converter) {
668     converter_->op_registry_[op_name] = op_converter;
669   }
670 
671   // Below we expose private methods of Converter for testing.
672 
MaybeUpdateBatchSize(int batch_size)673   Status MaybeUpdateBatchSize(int batch_size) {
674     return converter_->MaybeUpdateBatchSize(batch_size);
675   }
676 
AddTensorOrWeights(const string & name,TRT_TensorOrWeights input)677   Status AddTensorOrWeights(const string& name, TRT_TensorOrWeights input) {
678     return converter_->AddTensorOrWeights(name, input);
679   }
680 
GetTensorOrWeights(const string & name,TRT_TensorOrWeights * output)681   Status GetTensorOrWeights(const string& name, TRT_TensorOrWeights* output) {
682     return converter_->GetTensorOrWeights(name, output);
683   }
684 
GetInputs(const NodeDef & node_def,std::vector<TRT_TensorOrWeights> * inputs) const685   Status GetInputs(const NodeDef& node_def,
686                    std::vector<TRT_TensorOrWeights>* inputs) const {
687     return converter_->GetInputs(node_def, inputs);
688   }
689 
GetWeightRange(const TRT_ShapedWeights & weights,float * out_min,float * out_max) const690   Status GetWeightRange(const TRT_ShapedWeights& weights, float* out_min,
691                         float* out_max) const {
692     return converter_->GetWeightRange(weights, out_min, out_max);
693   }
694 
batch_size() const695   int batch_size() const { return converter_->batch_size_; }
696 
quantization_ranges_proxy()697   std::unordered_map<ITensorProxyPtr*, float>& quantization_ranges_proxy() {
698     return converter_->quantization_ranges_proxy_;
699   }
700 
quantization_ranges()701   std::unordered_map<nvinfer1::ITensor*, float>& quantization_ranges() {
702     return converter_->quantization_ranges_;
703   }
704 
705  private:
706   Logger& logger_ = *Logger::GetLogger();
707 
708  protected:
709   std::unique_ptr<Converter> converter_;
710   TrtWeightStore* weight_store_;
711 };
712 
TEST_F(ConverterTest,ConvertNode)713 TEST_F(ConverterTest, ConvertNode) {
714   ITensorProxyPtr output_tensors[2];
715   auto op_converter = [&output_tensors](OpConverterParams* params) -> Status {
716     nvinfer1::Dims dims = params->inputs[0].tensor()->getDimensions();
717     for (int i = 0; i < 2; ++i) {
718       dims.d[0] += 1;
719       output_tensors[i]->setDimensions(dims);
720       params->outputs->push_back(TRT_TensorOrWeights(output_tensors[i]));
721     }
722     return Status::OK();
723   };
724   NodeDef node_def = MakeNodeDef("my_op", "MyOp", {"my_input"});
725   TF_EXPECT_OK(converter_->AddInputTensor(
726       "my_input", nvinfer1::DataType::kFLOAT, GetTestDims({123}), 1));
727 
728   // Converter not registered.
729   ExpectStatus(converter_->ConvertNode(node_def), error::UNIMPLEMENTED,
730                "No converter registered for op: MyOp");
731 
732   // Register the converter and retry.
733   AddOpConverter("MyOp", op_converter);
734   TF_EXPECT_OK(converter_->ConvertNode(node_def));
735 
736   TRT_TensorOrWeights actual_output_1;
737   TF_EXPECT_OK(GetTensorOrWeights("my_op", &actual_output_1));
738   EXPECT_EQ(output_tensors[0]->simple_tensor(),
739             actual_output_1.tensor()->simple_tensor());
740   EXPECT_EQ(124, actual_output_1.tensor()->getDimensions().d[0]);
741 
742   TRT_TensorOrWeights actual_output_2;
743   TF_EXPECT_OK(GetTensorOrWeights("my_op:1", &actual_output_2));
744   EXPECT_EQ(output_tensors[1]->simple_tensor(),
745             actual_output_2.tensor()->simple_tensor());
746   EXPECT_EQ(125, actual_output_2.tensor()->getDimensions().d[0]);
747 
748   VerifyTrtLayerNameNotEmpty(converter_->network());
749 }
750 
TEST_F(ConverterTest,AddAndGetInputs)751 TEST_F(ConverterTest, AddAndGetInputs) {
752   NodeDef node_def;
753   node_def.add_input("^control_input");
754   node_def.add_input("input");
755   node_def.add_input("input:0");
756   node_def.add_input("input:1");
757   node_def.add_input("weird_input:2:3:4:0");
758 
759   TF_EXPECT_OK(converter_->AddInputTensor("input", nvinfer1::DataType::kFLOAT,
760                                           GetTestDims({1}), 1));
761   TF_EXPECT_OK(converter_->AddInputTensor("input:1", nvinfer1::DataType::kINT32,
762                                           GetTestDims({2, 3}), 1));
763   TF_EXPECT_OK(converter_->AddInputTensor(
764       "weird_input:2:3:4", nvinfer1::DataType::kHALF, GetTestDims({5, 3}), 1));
765 
766   std::vector<TRT_TensorOrWeights> inputs;
767   TF_EXPECT_OK(GetInputs(node_def, &inputs));
768 
769   EXPECT_EQ(4, inputs.size());
770   EXPECT_EQ(inputs[0].tensor()->simple_tensor(),
771             inputs[1].tensor()->simple_tensor());
772 
773   EXPECT_EQ(nvinfer1::DataType::kFLOAT, inputs[0].tensor()->getType());
774   EXPECT_EQ(nvinfer1::DataType::kINT32, inputs[2].tensor()->getType());
775   EXPECT_EQ(nvinfer1::DataType::kHALF, inputs[3].tensor()->getType());
776   ExpectTrtDimsEqualsArray({1}, inputs[0].tensor()->getDimensions());
777   ExpectTrtDimsEqualsArray({2, 3}, inputs[2].tensor()->getDimensions());
778   ExpectTrtDimsEqualsArray({5, 3}, inputs[3].tensor()->getDimensions());
779 
780   VerifyTrtLayerNameNotEmpty(converter_->network());
781 }
782 
TEST_F(ConverterTest,RenameAndMarkOutputTensors)783 TEST_F(ConverterTest, RenameAndMarkOutputTensors) {
784   // Test that the tensor are actually named and marked as output after
785   // Converter::RenameAndMarkOutputTensors() is called.
786 
787   // Register a custom converter which shuffles the input. We use it to build a
788   // TRT network whose output will be later marked.
789   std::vector<ITensorProxyPtr> output_tensors;
790   auto op_converter = [&output_tensors](OpConverterParams* params) -> Status {
791     nvinfer1::Permutation perm;
792     perm.order[0] = 1;
793     perm.order[1] = 0;
794     for (int i = 0; i < 2; ++i) {
795       ITensorProxyPtr input_tensor = params->inputs[0].tensor();
796       nvinfer1::IShuffleLayer* layer =
797           params->converter->network()->addShuffle(*input_tensor->trt_tensor());
798       layer->setFirstTranspose(perm);
799       ITensorProxyPtr output_tensor = layer->getOutput(0);
800       params->outputs->emplace_back(output_tensor);
801       output_tensors.push_back(output_tensor);
802     }
803     TRT_ShapedWeights output_weights(nvinfer1::DataType::kFLOAT);
804     params->outputs->emplace_back(output_weights);
805     return Status::OK();
806   };
807   AddOpConverter("MyOp", op_converter);
808 
809   // Run the conversion.
810   NodeDef node_def = MakeNodeDef("my_op", "MyOp", {"my_input"});
811   TF_EXPECT_OK(converter_->AddInputTensor(
812       "my_input", nvinfer1::DataType::kFLOAT, GetTestDims({1, 2}), 1));
813   TF_EXPECT_OK(converter_->ConvertNode(node_def));
814 
815   // Mark a weight as output, should fail.
816   ExpectStatus(
817       converter_->RenameAndMarkOutputTensors({{"my_op:2", "my_output"}}),
818       error::INVALID_ARGUMENT, "Output my_op:2 is weights not tensor");
819 
820   // Mark tensors as output, should pass.
821   TF_EXPECT_OK(converter_->RenameAndMarkOutputTensors(
822       {{"my_op", "my_output"}, {"my_op:1", "my_output_1"}}));
823   EXPECT_EQ(2, output_tensors.size());
824   for (auto output_tensor : output_tensors) {
825     ExpectTrtDimsEqualsArray({2, 1}, output_tensor->getDimensions());
826   }
827   EXPECT_EQ("my_output", string(output_tensors[0]->getName()));
828   EXPECT_EQ("my_output_1", string(output_tensors[1]->getName()));
829 
830   VerifyTrtLayerNameNotEmpty(converter_->network());
831 }
832 
TEST_F(ConverterTest,TransposeTensor)833 TEST_F(ConverterTest, TransposeTensor) {
834   ITensorProxyPtr input_tensor = converter_->network()->addInput(
835       "", nvinfer1::DataType::kFLOAT, GetTestDims({2, 3, 5}));
836   ITensorProxyPtr output_tensor = nullptr;
837   NodeDef dummy_node_def = MakeNodeDef("dummy_op", "DummyOp", {});
838   // Rank doesn't match.
839   ExpectStatus(
840       converter_->TransposeTensor(input_tensor, {0, 1}, &output_tensor,
841                                   dummy_node_def, "sub1"),
842       error::INVALID_ARGUMENT,
843       "Rank of perm for transpose does not match with that of the input");
844 
845   // Transpose at batch dimension.
846   ExpectStatus(
847       converter_->TransposeTensor(input_tensor, {1, 0, 2, 3}, &output_tensor,
848                                   dummy_node_def, "sub2"),
849       error::UNIMPLEMENTED, "Transpose at batch dimension is not supported.");
850 
851   // OK.
852   TF_EXPECT_OK(converter_->TransposeTensor(
853       input_tensor, {0, 3, 1, 2}, &output_tensor, dummy_node_def, "sub3"));
854   ExpectTrtDimsEqualsArray({5, 2, 3}, output_tensor->getDimensions());
855   ExpectTrtLayerNames({"TRTEngineOp_0_0/dummy_op-sub3:SHUFFLE"},
856                       converter_->network());
857 }
858 
TestPrepareTensorForShape(const std::vector<int> & input_dims,const std::vector<int> & reshape_dims,const std::vector<int> & expected_tensor_dims,bool input_is_tensor,Converter * converter,TrtWeightStore * weight_store,error::Code expected_code=error::OK,const char * expected_error_msg_substr=nullptr)859 void TestPrepareTensorForShape(
860     const std::vector<int>& input_dims, const std::vector<int>& reshape_dims,
861     const std::vector<int>& expected_tensor_dims, bool input_is_tensor,
862     Converter* converter, TrtWeightStore* weight_store,
863     error::Code expected_code = error::OK,
864     const char* expected_error_msg_substr = nullptr) {
865   TRT_TensorOrWeights input;
866   if (input_is_tensor) {
867     input = TRT_TensorOrWeights(converter->network()->addInput(
868         "", nvinfer1::DataType::kFLOAT, GetTestDims(input_dims)));
869   } else {
870     input = TRT_TensorOrWeights(weight_store->GetTempWeights(
871         nvinfer1::DataType::kFLOAT, GetTestDims(input_dims)));
872   }
873   ITensorProxyPtr output_tensor = nullptr;
874 
875   NodeDef dummy_node_def = MakeNodeDef("dummy_op", "DummyOp", {});
876   for (bool validation_only : {false, true}) {
877     const Status status =
878         PrepareTensorForShape(converter, input, GetTestDims(reshape_dims),
879                               validation_only, &output_tensor, dummy_node_def);
880     if (expected_code == error::OK) {
881       TF_EXPECT_OK(status);
882       if (validation_only) {
883         EXPECT_EQ(nullptr, *output_tensor);
884       } else {
885         ExpectTrtDimsEqualsArray(expected_tensor_dims,
886                                  output_tensor->getDimensions());
887       }
888     } else {
889       ExpectStatus(status, expected_code, expected_error_msg_substr);
890     }
891   }
892 }
893 
TEST_F(ConverterTest,PrepareTensorForShape)894 TEST_F(ConverterTest, PrepareTensorForShape) {
895   for (bool input_is_tensor : {true, false}) {
896     // Shape size doesn't match.
897     Reset();
898     TestPrepareTensorForShape({2, 3, 5}, {2, 3, 6}, {}, input_is_tensor,
899                               converter_.get(), weight_store_,
900                               error::INVALID_ARGUMENT, "Incompatible shapes");
901 
902     // Regular shape.
903     Reset();
904     TestPrepareTensorForShape({2, 3, 5}, {10, 3}, {10, 3}, input_is_tensor,
905                               converter_.get(), weight_store_);
906 
907     // Reshape to zero rank.
908     Reset();
909     TestPrepareTensorForShape({1, 1}, {}, {}, input_is_tensor, converter_.get(),
910                               weight_store_);
911   }
912 
913   // Tensor input with zero rank.
914   Reset();
915   TestPrepareTensorForShape({}, {1, 1}, {1, 1}, /*input_is_tensor=*/true,
916                             converter_.get(), weight_store_);
917 
918   // TODO(aaroey): we should check the case where uninferred dimensions are
919   // not an exact divisor of input dim ensions, e.g. for dims {-1, 7}.
920 
921   // Infer tensor shape, ok.
922   Reset();
923   TestPrepareTensorForShape({2, 3, 5}, {-1, 2}, {15, 2},
924                             /*input_is_tensor=*/true, converter_.get(),
925                             weight_store_);
926 
927   // Infer weight shape, should fail.
928   Reset();
929   TestPrepareTensorForShape({2, 3, 5}, {-1, 2}, {15, 2},
930                             /*input_is_tensor=*/false, converter_.get(),
931                             weight_store_, error::INVALID_ARGUMENT,
932                             "Shape is not fully defined");
933 
934   VerifyTrtLayerNameNotEmpty(converter_->network());
935 }
936 
TEST_F(ConverterTest,MaybeUpdateBatchSize)937 TEST_F(ConverterTest, MaybeUpdateBatchSize) {
938   EXPECT_EQ(-1, batch_size());
939 
940   TF_EXPECT_OK(MaybeUpdateBatchSize(-1));
941   EXPECT_EQ(-1, batch_size());
942 
943   TF_EXPECT_OK(MaybeUpdateBatchSize(123));
944   EXPECT_EQ(123, batch_size());
945 
946   TF_EXPECT_OK(MaybeUpdateBatchSize(123));
947   EXPECT_EQ(123, batch_size());
948 
949   TF_EXPECT_OK(MaybeUpdateBatchSize(-1));
950   EXPECT_EQ(123, batch_size());
951 
952   ExpectStatus(MaybeUpdateBatchSize(124), error::INVALID_ARGUMENT,
953                "Provided batch size does not match converter batch size");
954 }
955 
TEST_F(ConverterTest,AddAndGetTensorOrWeights)956 TEST_F(ConverterTest, AddAndGetTensorOrWeights) {
957   // Add a tensor.
958   ITensorProxyPtr simple_tensor;
959   TRT_TensorOrWeights tensor(simple_tensor);
960   EXPECT_EQ(-1, tensor.batch_size());
961   TF_EXPECT_OK(MaybeUpdateBatchSize(123));
962   TF_EXPECT_OK(AddTensorOrWeights("my_tensor", tensor));
963 
964   // Get the added tensor.
965   TRT_TensorOrWeights added_tensor;
966   TF_EXPECT_OK(GetTensorOrWeights("my_tensor", &added_tensor));
967   EXPECT_EQ(123, added_tensor.batch_size());
968 
969   // Add the same tensor again.
970   ExpectStatus(AddTensorOrWeights("my_tensor", tensor), error::ALREADY_EXISTS,
971                "tensor/weights my_tensor already exist");
972 }
973 
974 template <typename T>
TestGetWeightRange(ConverterTest * test,TrtWeightStore * weight_store)975 void TestGetWeightRange(ConverterTest* test, TrtWeightStore* weight_store) {
976   nvinfer1::DataType trt_type;
977   TF_ASSERT_OK(TfTypeToTrtType(DataTypeToEnum<T>::v(), &trt_type));
978   TRT_ShapedWeights weights =
979       weight_store->GetTempWeights(trt_type, GetTestDims({2, 3}));
980   const std::vector<T> values = {T(3), T(1), T(2), T(6), T(5), T(4)};
981   memcpy(weights.GetValues(), values.data(), weights.size_bytes());
982 
983   float out_min = 0.0f;
984   float out_max = 0.0f;
985   TF_EXPECT_OK(test->GetWeightRange(weights, &out_min, &out_max));
986   EXPECT_EQ(1.0f, out_min);
987   EXPECT_EQ(6.0f, out_max);
988 }
989 
TEST_F(ConverterTest,GetWeightRange)990 TEST_F(ConverterTest, GetWeightRange) {
991   TestGetWeightRange<float>(this, weight_store_);
992   TestGetWeightRange<Eigen::half>(this, weight_store_);
993   TestGetWeightRange<int32>(this, weight_store_);
994 }
995 
TEST_F(ConverterTest,ProvideQuantizationRange)996 TEST_F(ConverterTest, ProvideQuantizationRange) {
997   ITensorProxyPtr simple_tensor;
998   // Asymmetric range
999   converter_->ProvideQuantizationRange(&simple_tensor, 0.0f, 6.0f);
1000   EXPECT_EQ(6.0f, quantization_ranges_proxy()[&simple_tensor]);
1001   converter_->ProvideQuantizationRange(&simple_tensor, 1.0f, 6.0f);
1002   EXPECT_EQ(6.0f, quantization_ranges_proxy()[&simple_tensor]);
1003   converter_->ProvideQuantizationRange(&simple_tensor, -8.0f, 6.0f);
1004   EXPECT_EQ(8.0f, quantization_ranges_proxy()[&simple_tensor]);
1005   converter_->ProvideQuantizationRange(&simple_tensor, -8.123f, -6.123f);
1006   EXPECT_EQ(8.123f, quantization_ranges_proxy()[&simple_tensor]);
1007   // Symmetric range
1008   converter_->ProvideQuantizationRange(&simple_tensor, -6.123f, 6.123f);
1009   EXPECT_EQ(6.123f, quantization_ranges_proxy()[&simple_tensor]);
1010 
1011   VerifyTrtLayerNameNotEmpty(converter_->network());
1012 }
1013 
TEST_F(ConverterTest,MaybeApplyQuantizationRanges)1014 TEST_F(ConverterTest, MaybeApplyQuantizationRanges) {
1015   ITensorProxyPtr input;
1016   ITensorProxyPtr not_infer;
1017   Logger& logger = *Logger::GetLogger();
1018   auto int8_converter = Converter::Create(TrtPrecisionMode::INT8,
1019                                           /*use_calibration=*/true, &logger,
1020                                           /*use_implicit_batch=*/true,
1021                                           /*engine_name=*/"")
1022                             .ValueOrDie();
1023   int8_converter->ProvideQuantizationRange(&input, -5.0f, 5.0f);
1024   int8_converter->ProvideQuantizationRange(&not_infer, -100.0f, 100.0f);
1025 
1026   int8_converter->MaybeApplyQuantizationRanges();
1027   EXPECT_EQ(input->getDynamicRangeMax(), 5.0f);
1028   EXPECT_EQ(not_infer->getDynamicRangeMax(), 100.0f);
1029 
1030   VerifyTrtLayerNameNotEmpty(int8_converter->network());
1031 }
1032 
TEST_F(ConverterTest,GetTrtBroadcastShape)1033 TEST_F(ConverterTest, GetTrtBroadcastShape) {
1034   const bool kIsTensor = true;
1035   const bool kIsNotTensor = false;
1036   auto symmetric_test = [this](const std::vector<int>& operand_1_shape,
1037                                const std::vector<int>& operand_2_shape,
1038                                const bool operand_1_is_tensor,
1039                                const bool operand_2_is_tensor,
1040                                const std::vector<int>& expected_operand_1_shape,
1041                                const std::vector<int>& expected_operand_2_shape,
1042                                error::Code expected_code = error::OK,
1043                                const char* expected_error_msg_substr = nullptr,
1044                                const int operand_1_batch_size = -1,
1045                                const int operand_2_batch_size = -1) {
1046     auto create_tensor_or_weights = [](const std::vector<int>& shape,
1047                                        bool is_tensor, int batch_size = -1) {
1048       if (is_tensor) {
1049         return TRT_TensorOrWeights{nvinfer1::DataType::kFLOAT,
1050                                    GetTestDims(shape), batch_size};
1051       }
1052       TRT_ShapedWeights weights;
1053       weights.shape_ = GetTestDims(shape);
1054       return TRT_TensorOrWeights(weights);
1055     };
1056 
1057     nvinfer1::Dims operand_1_new_dims, operand_2_new_dims;
1058     TRT_TensorOrWeights operand_1 = create_tensor_or_weights(
1059         operand_1_shape, operand_1_is_tensor, operand_1_batch_size);
1060     TRT_TensorOrWeights operand_2 = create_tensor_or_weights(
1061         operand_2_shape, operand_2_is_tensor, operand_2_batch_size);
1062 
1063     // operand_1 broadcast operand_2
1064     ExpectStatus(
1065         GetTrtBroadcastShape(operand_1, operand_2, /*check_feasibility=*/true,
1066                              /*use_implicit_batch=*/true, &operand_1_new_dims,
1067                              &operand_2_new_dims),
1068         expected_code, expected_error_msg_substr);
1069     if (expected_code == error::OK) {
1070       ExpectTrtDimsEqualsArray(expected_operand_1_shape, operand_1_new_dims);
1071       ExpectTrtDimsEqualsArray(expected_operand_2_shape, operand_2_new_dims);
1072     }
1073     // operand_2 broadcast operand_1
1074     ExpectStatus(
1075         GetTrtBroadcastShape(operand_2, operand_1, /*check_feasibility=*/true,
1076                              /*use_implicit_batch=*/true, &operand_2_new_dims,
1077                              &operand_1_new_dims),
1078         expected_code, expected_error_msg_substr);
1079     if (expected_code == error::OK) {
1080       ExpectTrtDimsEqualsArray(expected_operand_1_shape, operand_1_new_dims);
1081       ExpectTrtDimsEqualsArray(expected_operand_2_shape, operand_2_new_dims);
1082     }
1083   };
1084 
1085   // Both inputs are weights.
1086   symmetric_test(
1087       {1}, {1}, kIsNotTensor, kIsNotTensor, {}, {}, error::INVALID_ARGUMENT,
1088       "Broadcasting requires at least one of the operands be tensors");
1089 
1090   // One tensor and one weights.
1091   symmetric_test({1, 1, 1}, {2}, kIsTensor, kIsNotTensor, {1, 1, 1}, {1, 1, 2});
1092   symmetric_test({1, 1, 2}, {2}, kIsTensor, kIsNotTensor, {1, 1, 2}, {1, 1, 2});
1093   symmetric_test({1, 3, 2}, {1}, kIsTensor, kIsNotTensor, {1, 3, 2}, {1, 1, 1});
1094   symmetric_test({1, 1, 1}, {2, 3}, kIsTensor, kIsNotTensor, {1, 1, 1},
1095                  {1, 2, 3});
1096   symmetric_test({1, 1, 1}, {2, 3, 4}, kIsTensor, kIsNotTensor, {1, 1, 1},
1097                  {2, 3, 4});
1098   symmetric_test({1, 1, 1}, {1, 2, 3, 4}, kIsTensor, kIsNotTensor, {1, 1, 1},
1099                  {2, 3, 4});
1100   symmetric_test({1, 3, 4}, {1, 2, 1, 4}, kIsTensor, kIsNotTensor, {1, 3, 4},
1101                  {2, 1, 4});
1102   symmetric_test({1, 1, 1}, {2, 1, 1, 1}, kIsTensor, kIsNotTensor, {}, {},
1103                  error::INVALID_ARGUMENT, "Infeasible broadcast scheme");
1104   symmetric_test({1, 1, 1}, {2, 1, 1, 1}, kIsTensor, kIsNotTensor, {}, {},
1105                  error::INVALID_ARGUMENT, "Infeasible broadcast scheme",
1106                  /*operand_1_batch_size=*/2);
1107   symmetric_test({1, 1, 1}, {1, 1, 1, 1, 1}, kIsTensor, kIsNotTensor, {}, {},
1108                  error::INVALID_ARGUMENT,
1109                  "Broadcasting beyond batch dimension is not supported "
1110                  "(tensor #dims 4 vs broadcast #dims 5)");
1111   symmetric_test({3}, {1, 1, 3}, kIsTensor, kIsNotTensor, {}, {},
1112                  error::INVALID_ARGUMENT,
1113                  "Broadcasting beyond batch dimension is not supported "
1114                  "(tensor #dims 2 vs broadcast #dims 3)",
1115                  /*operand_1_batch_size=*/2);
1116 
1117   // Both inputs are tensors.
1118   symmetric_test({1, 1, 1}, {1, 1}, kIsTensor, kIsTensor, {}, {},
1119                  error::INVALID_ARGUMENT,
1120                  "Broadcasting beyond batch dimension is not supported "
1121                  "(tensor #dims 3 vs broadcast #dims 4)");
1122   symmetric_test({1, 3}, {3}, kIsTensor, kIsTensor, {}, {},
1123                  error::INVALID_ARGUMENT,
1124                  "Broadcasting beyond batch dimension is not supported "
1125                  "(tensor #dims 2 vs broadcast #dims 3)");
1126   symmetric_test({1, 3, 4}, {2, 1, 4}, kIsTensor, kIsTensor, {1, 3, 4},
1127                  {2, 1, 4});
1128   symmetric_test({1, 1, 1}, {1, 1, 1, 1}, kIsTensor, kIsTensor, {}, {},
1129                  error::INVALID_ARGUMENT,
1130                  "Broadcasting beyond batch dimension is not supported "
1131                  "(tensor #dims 4 vs broadcast #dims 5)");
1132   symmetric_test({2, 3}, {7, 5}, kIsTensor, kIsTensor, {}, {},
1133                  error::INVALID_ARGUMENT, "Infeasible broadcast scheme");
1134 
1135   VerifyTrtLayerNameNotEmpty(converter_->network());
1136 }
1137 
TEST_F(ConverterTest,CreateConstantLayer)1138 TEST_F(ConverterTest, CreateConstantLayer) {
1139   for (auto dtype : {nvinfer1::DataType::kFLOAT, nvinfer1::DataType::kINT32}) {
1140     TRT_ShapedWeights weights =
1141         weight_store_->GetTempWeights(dtype, GetTestDims({2, 3, 5}));
1142     ITensorProxyPtr tensor =
1143         converter_->CreateConstantLayer(weights, GetTestDims({3, 10}));
1144     ASSERT_NE(nullptr, tensor->trt_tensor());
1145     EXPECT_EQ(dtype, tensor->getType())
1146         << "Expected " << DebugString(dtype) << " vs. actual "
1147         << DebugString(tensor->getType());
1148     ExpectTrtDimsEqualsArray({3, 10}, tensor->getDimensions());
1149   }
1150 
1151   VerifyTrtLayerNameNotEmpty(converter_->network());
1152 }
1153 
1154 class ConvertGraphDefToEngineTest : public ::testing::Test {
1155  public:
RunConvertGraphDefToEngine(Scope * s)1156   Status RunConvertGraphDefToEngine(Scope* s) {
1157     GraphDef gdef;
1158     TF_EXPECT_OK(s->ToGraphDef(&gdef));
1159     std::vector<PartialTensorShape> input_shapes;
1160     int batch_size = -1;
1161     for (const NodeDef& node : gdef.node()) {
1162       absl::string_view node_name(node.name());
1163       if (absl::ConsumePrefix(&node_name, IONamePrefixes::kInputPHName)) {
1164         int port = -1;
1165         EXPECT_TRUE(absl::SimpleAtoi(node_name, &port)) << node.name();
1166         if (input_shapes.size() < port + 1) input_shapes.resize(port + 1);
1167         input_shapes[port] =
1168             PartialTensorShape(node.attr().at("shape").shape());
1169         if (batch_size == -1) {
1170           batch_size = input_shapes[port].dim_size(0);
1171         } else {
1172           EXPECT_EQ(batch_size, input_shapes[port].dim_size(0));
1173         }
1174       }
1175     }
1176     // TODO(laigd): execute the engine and get outputs.
1177     return ConvertGraphDefToEngine(
1178         gdef, TrtPrecisionMode::FP32, /*max_batch_size=*/1,
1179         /*max_workspace_size_bytes=*/64 << 20, input_shapes, &logger_,
1180         /*allocator=*/nullptr, /*calibrator=*/nullptr, &engine_,
1181         /*use_calibration=*/false, /*use_implicit_batch=*/true,
1182         /*convert_successfully=*/nullptr, /*profiles=*/nullptr,
1183         "TRTEngineOp_0_0");
1184   }
1185 
1186  protected:
1187   TrtUniquePtrType<nvinfer1::ICudaEngine> engine_;
1188 
1189  private:
1190   Logger& logger_ = *Logger::GetLogger();
1191 };
1192 
TEST_F(ConvertGraphDefToEngineTest,IdentityGraph)1193 TEST_F(ConvertGraphDefToEngineTest, IdentityGraph) {
1194   Scope s = Scope::NewRootScope();
1195   auto input =
1196       ops::Placeholder(s.WithOpName(StrCat(IONamePrefixes::kInputPHName, 0)),
1197                        DT_FLOAT, ops::Placeholder::Shape({1, 1}));
1198   auto output = ops::Identity(s.WithOpName("identity1"), input);
1199   output = ops::Identity(s.WithOpName("identity2"), output);
1200   output = ops::Identity(s.WithOpName(StrCat(IONamePrefixes::kOutputPHName, 0)),
1201                          output);
1202   // If the converter marks the input tensor as output tensor, the conversion
1203   // below will fail with:
1204   // > TensorRTOutputPH_0 cannot be both input and output
1205   // > Network must have at least one output
1206   TF_EXPECT_OK(RunConvertGraphDefToEngine(&s));
1207 }
1208 
1209 // Returns a vector of shapes from a vector of input tensors. This can be used
1210 // to create optimization profiles.
GetShapeFromDataVec(DataVec input_data,std::vector<TensorShape> * shape_vec)1211 Status GetShapeFromDataVec(DataVec input_data,
1212                            std::vector<TensorShape>* shape_vec) {
1213   shape_vec->reserve(input_data.size());
1214   std::transform(input_data.begin(), input_data.end(),
1215                  std::back_inserter(*shape_vec),
1216                  [](InputOutputData x) { return x.tensor.shape(); });
1217   return Status::OK();
1218 }
1219 
1220 template <typename T>
GetSpanForData(const InputOutputData & data)1221 inline absl::Span<const T> GetSpanForData(const InputOutputData& data) {
1222   const auto& tensor_map = data.tensor.flat<T>();
1223   return absl::Span<const T>(tensor_map.data(), tensor_map.size());
1224 }
1225 
GetDataAsFloat(InputOutputData & data)1226 std::vector<float> GetDataAsFloat(InputOutputData& data) {
1227   if (data.tensor.dtype() == DT_FLOAT) {
1228     auto span = GetSpanForData<float>(data);
1229     return std::vector<float>(span.begin(), span.end());
1230   }
1231   if (data.tensor.dtype() == DT_HALF) {
1232     return CastTestVector<Eigen::half, float>(
1233         GetSpanForData<Eigen::half>(data));
1234   }
1235   if (data.tensor.dtype() == DT_INT32) {
1236     return CastTestVector<int32, float>(GetSpanForData<int32>(data));
1237   }
1238   LOG(FATAL) << "DataType not supported for testing "
1239              << DataTypeString(data.tensor.dtype());
1240 }
1241 // Class to test various op converters, using both a TrtNodeValidator and
1242 // Converter.
1243 class OpConverterTest : public ::testing::Test {
1244  public:
OpConverterTest()1245   OpConverterTest()
1246       : tensor_buffer_allocator_(new GpuManagedAllocator()),
1247         scope_(Scope::NewRootScope()) {
1248     QCHECK_EQ(0, cudaStreamCreate(&stream_));
1249     Reset();
1250   }
1251 
~OpConverterTest()1252   ~OpConverterTest() noexcept override {
1253     QCHECK_EQ(0, cudaStreamDestroy(stream_));
1254   }
1255 
GetTensorOrWeights(const string & name,TRT_TensorOrWeights * output)1256   Status GetTensorOrWeights(const string& name, TRT_TensorOrWeights* output) {
1257     return converter_->GetTensorOrWeights(name, output);
1258   }
1259 
Reset(TrtPrecisionMode precision_mode_to_test=TrtPrecisionMode::FP32,TrtTestMode trt_mode=TrtTestMode::kImplicitBatch)1260   void Reset(TrtPrecisionMode precision_mode_to_test = TrtPrecisionMode::FP32,
1261              TrtTestMode trt_mode = TrtTestMode::kImplicitBatch) {
1262     // Destroy existing TRT objects in a proper order.
1263     converter_.reset(nullptr);
1264     engine_.reset(nullptr);
1265 
1266     // Re-create them in proper order.
1267     converter_ =
1268         std::move(Converter::Create(precision_mode_to_test,
1269                                     /*use_calibration=*/false, &logger_,
1270                                     /*use_implicit_batch=*/trt_mode ==
1271                                         TrtTestMode::kImplicitBatch,
1272                                     /*engine_name=*/"")
1273                       .ValueOrDie());
1274 
1275     // Reset other related artifacts.
1276     scope_ = Scope::NewRootScope();
1277   }
1278 
1279   // Constructs a flat tensor with 'vals' in Unified Memory.
1280   template <typename T>
AsTensor(gtl::ArraySlice<T> vals)1281   Tensor AsTensor(gtl::ArraySlice<T> vals) {  // non-absl ok
1282     Tensor ret(tensor_buffer_allocator_.get(), DataTypeToEnum<T>::value,
1283                {static_cast<int64>(vals.size())});
1284     std::copy_n(vals.data(), vals.size(), ret.flat<T>().data());
1285     return ret;
1286   }
1287 
1288   // Constructs a tensor of "shape" with values "vals" in Unified Memory.
1289   template <typename T>
AsTensor(gtl::ArraySlice<T> vals,const TensorShape & shape)1290   Tensor AsTensor(gtl::ArraySlice<T> vals,  // non-absl ok
1291                   const TensorShape& shape) {
1292     Tensor ret(tensor_buffer_allocator_.get(), DataTypeToEnum<T>::value,
1293                {static_cast<int64>(vals.size())});
1294     CHECK(ret.CopyFrom(AsTensor(vals), shape));
1295     return ret;
1296   }
1297 
1298   // Constructs a tensor with given values (vals). The tensor type is defined by
1299   // the tf_type argument, its shape is given by input_dims. The tensor is
1300   // constructed using the allocator of OpConverterTest in Unified Memory.
1301   template <typename T>
AsTensor(std::vector<T> vals,const std::vector<int> input_dims,DataType tf_type)1302   Tensor AsTensor(std::vector<T> vals, const std::vector<int> input_dims,
1303                   DataType tf_type) {
1304     Tensor ret(tensor_buffer_allocator_.get(), tf_type,
1305                {static_cast<int64>(vals.size())});
1306     if (tf_type == DT_FLOAT) {
1307       auto conv_vals = CastTestVector<T, float>(vals);
1308       std::copy_n(conv_vals.data(), conv_vals.size(), ret.flat<float>().data());
1309     } else if (tf_type == DT_HALF) {
1310       auto conv_vals = CastTestVector<T, Eigen::half>(vals);
1311       std::copy_n(conv_vals.data(), conv_vals.size(),
1312                   ret.flat<Eigen::half>().data());
1313     } else if (tf_type == DT_INT32) {
1314       auto conv_vals = CastTestVector<T, int32>(vals);
1315       std::copy_n(conv_vals.data(), conv_vals.size(), ret.flat<int32>().data());
1316     } else {
1317       LOG(FATAL) << "Cannot create tensor with type "
1318                  << DataTypeString(tf_type);
1319     }
1320     TensorShape shape;
1321     TF_EXPECT_OK(TensorShapeUtils::MakeShape(input_dims, &shape));
1322     CHECK(ret.CopyFrom(ret, shape));
1323     return ret;
1324   }
1325 
1326   // Constructs a flat tensor in Unified Memory.
1327   template <typename T>
ConstructTensor(int data_size,const T & value=T ())1328   Tensor ConstructTensor(int data_size, const T& value = T()) {
1329     std::vector<T> values(data_size, value);
1330     return AsTensor<T>(values);
1331   }
1332 
1333   // Constructs a flat tensor in Unified Memory.
1334   template <typename T>
ConstructTensor(int data_size,const T & value,DataType tf_type)1335   Tensor ConstructTensor(int data_size, const T& value, DataType tf_type) {
1336     std::vector<T> values(data_size, value);
1337     return AsTensor<T>(values, {data_size}, tf_type);
1338   }
1339 
CheckDataTypeMatches(const DataVec & datas)1340   void CheckDataTypeMatches(const DataVec& datas) {
1341     if (VLOG_IS_ON(2)) {
1342       int nbBindings = engine_->getNbBindings();
1343       VLOG(2) << "Number of engine bindings: " << nbBindings;
1344       for (int i = 0; i < nbBindings; i++) {
1345         VLOG(2) << "Binding " << i << " name: " << engine_->getBindingName(i);
1346       }
1347     }
1348     for (const auto& data : datas) {
1349       VLOG(2) << "Checking if data type matches for tensor " << data.name;
1350       const int input_index = engine_->getBindingIndex(data.name.c_str());
1351       ASSERT_NE(-1, input_index);
1352       const nvinfer1::DataType trt_dtype =
1353           engine_->getBindingDataType(input_index);
1354       DataType tf_type;
1355       TF_ASSERT_OK(TrtTypeToTfType(trt_dtype, &tf_type));
1356       ASSERT_EQ(data.tensor.dtype(), tf_type)
1357           << DataTypeString(data.tensor.dtype()) << " vs. "
1358           << DataTypeString(tf_type);
1359     }
1360   }
1361 
BuildAndRun(const DataVec & input_data,DataVec * output_data,const int batch_size=1)1362   Status BuildAndRun(const DataVec& input_data, DataVec* output_data,
1363                      const int batch_size = 1) {
1364     // Mark the output tensor as TRT engine output.
1365     std::vector<Converter::EngineOutputInfo> output_info;
1366     for (const auto& data : *output_data) {
1367       nvinfer1::DataType trt_type;
1368       TF_RETURN_IF_ERROR(TfTypeToTrtType(data.tensor.dtype(), &trt_type));
1369       output_info.push_back({data.name, data.name, trt_type});
1370     }
1371     TF_RETURN_IF_ERROR(converter_->RenameAndMarkOutputTensors(output_info));
1372 
1373     // Build the TRT engine.
1374     if (engine_.get() != nullptr) {
1375       return errors::Internal("Engine already exists");
1376     }
1377     TrtShapeOptimizationProfile profiles;
1378     if (!converter_->use_implicit_batch()) {
1379       profiles.SetShapeTensorMask(converter_->network());
1380       TF_RETURN_IF_ERROR(profiles.CollectShapeValues(input_data));
1381       // Create a single optimization profile for explicit batch mode
1382       std::vector<TensorShape> input_shapes;
1383       TF_RETURN_IF_ERROR(GetShapeFromDataVec(input_data, &input_shapes));
1384       profiles.AddShape(input_shapes);
1385       std::vector<PartialTensorShape> input_partial_shapes;
1386       TF_RETURN_IF_ERROR(
1387           GetNetworkInputShapes(converter_->network(), &input_partial_shapes));
1388       profiles.InitProfiles(input_partial_shapes,
1389                             ProfileStrategy::kImplicitBatchModeCompatible);
1390     }
1391     TF_RETURN_IF_ERROR(
1392         converter_->BuildCudaEngine(&engine_,
1393                                     /*max_batch_size=*/batch_size,
1394                                     /*max_workspace_size_bytes=*/1 << 26,
1395                                     /*allocator=*/nullptr,
1396                                     /*calibrator=*/nullptr,
1397                                     /*profiles=*/&profiles));
1398     CHECK_NOTNULL(engine_.get());
1399     CheckDataTypeMatches(input_data);
1400     CheckDataTypeMatches(*output_data);
1401 
1402     const int num_bindings = input_data.size() + output_data->size();
1403     std::vector<void*> buffers(num_bindings);
1404 
1405     if (engine_->getNbBindings() != num_bindings) {
1406       return errors::Internal("Number of bindings do not match");
1407     }
1408     // Since we have only 1 optimization profile (which is enabled by default)
1409     // it is fine to create execution context directly, instead of calling
1410     // profiles.CreateExecutionContexts()
1411     TrtUniquePtrType<nvinfer1::IExecutionContext> execution_context(
1412         engine_->createExecutionContext());
1413 
1414     // Prepare input bindings.
1415     TF_RETURN_IF_ERROR(
1416         SetTrtEngineInputs(engine_.get(), execution_context.get(), 0, buffers,
1417                            converter_->use_implicit_batch(), batch_size,
1418                            profiles, nullptr, &input_data));
1419     // Prepare output bindings.
1420     TF_RETURN_IF_ERROR(SetTrtEngineOutputs(
1421         engine_.get(), execution_context.get(), 0, buffers,
1422         converter_->use_implicit_batch(), batch_size, nullptr, output_data));
1423     // Execute the TRT engine.
1424     TF_RETURN_IF_ERROR(TrtEnqueue(execution_context.get(), buffers, stream_,
1425                                   converter_->use_implicit_batch(),
1426                                   batch_size));
1427     cudaStreamSynchronize(stream_);
1428     return Status::OK();
1429   }
1430 
1431   // Adds ITensor for both validation and conversion, assuming explicit batch
1432   // dimension is included in dims (ie for an NCHW tensor dims = {N, C, H, W}).
AddTestTensorWithTFDims(const string & name,const std::vector<int32> & dims,nvinfer1::DataType trt_type=nvinfer1::DataType::kFLOAT,Status add_input_status=Status::OK ())1433   void AddTestTensorWithTFDims(
1434       const string& name, const std::vector<int32>& dims,
1435       nvinfer1::DataType trt_type = nvinfer1::DataType::kFLOAT,
1436       Status add_input_status = Status::OK()) {
1437     DataType tf_type;
1438     TF_ASSERT_OK(TrtTypeToTfType(trt_type, &tf_type));
1439     ops::Placeholder::Attrs attrs;
1440     TF_EXPECT_OK(TensorShapeUtils::MakeShape(dims, &attrs.shape_));
1441 
1442     auto input = ops::Placeholder(scope_.WithOpName(name), tf_type, attrs);
1443     node_inputs_[name] = input.output;
1444 
1445     // Add a real ITensor for conversion conditionally.
1446     nvinfer1::Dims trt_dims;
1447     Status status = TensorShapeToTrtDims(
1448         attrs.shape_, converter_->use_implicit_batch(), &trt_dims);
1449     if (converter_->use_implicit_batch() && !status.ok()) {
1450       ASSERT_EQ(add_input_status, status);
1451       return;
1452     } else {
1453       TF_EXPECT_OK(status);
1454     }
1455     if (!converter_->use_implicit_batch() || HasStaticShape(trt_dims)) {
1456       int batch_size = dims.size() > 0 ? dims[0] : 0;
1457       Status status =
1458           converter_->AddInputTensor(name, trt_type, trt_dims, batch_size);
1459       ASSERT_EQ(add_input_status, status);
1460     }
1461   }
1462 
1463   // Adds ITensor for both validation and conversion. The difference compared to
1464   // AddTestTensorWithTFDims is in the meaning of the dims parameter. To define
1465   // a tensor with NCHW shape, here we set dims = {C,H,W} and batch_size = N.
1466   // TODO(tfeher) remove this function once all test are updated to use the
1467   // other version of AddTestTensor (defined by
1468   // ParameterizedOpConverterTestBase).
AddTestTensor(const string & name,const std::vector<int32> & dims,int batch_size=1,nvinfer1::DataType trt_dtype=nvinfer1::DataType::kFLOAT)1469   void AddTestTensor(
1470       const string& name, const std::vector<int32>& dims, int batch_size = 1,
1471       nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT) {
1472     std::vector<int32> dims_with_batch(dims.size() + 1);
1473     dims_with_batch[0] = batch_size;
1474     std::copy(dims.begin(), dims.end(), dims_with_batch.begin() + 1);
1475     AddTestTensorWithTFDims(name, dims_with_batch, trt_dtype);
1476     if (HasStaticShape(dims)) {
1477       ASSERT_EQ(batch_size, converter_->batch_size_);
1478     }
1479   }
1480 
1481   // Add weights for both validation and conversion.
1482   template <typename T>
AddTestWeights(const string & name,const std::vector<int> & dims,const std::vector<T> & values)1483   void AddTestWeights(const string& name, const std::vector<int>& dims,
1484                       const std::vector<T>& values) {
1485     // Add weights for validation.
1486     TensorShape shape;
1487     TF_EXPECT_OK(TensorShapeUtils::MakeShape(dims, &shape));
1488     Tensor t = AsTensor<T>(values, shape);
1489     node_inputs_[name] = ops::Const(scope_.WithOpName(name), t);
1490 
1491     // Add weights for conversion.
1492     nvinfer1::DataType dtype;
1493     TF_ASSERT_OK(TfTypeToTrtType(DataTypeToEnum<T>::v(), &dtype));
1494     const nvinfer1::Dims trt_dims = GetTestDims(dims);
1495     const int64_t num_elements = TRT_ShapedWeights::count(trt_dims);
1496     QCHECK_EQ(num_elements, values.size())
1497         << num_elements << " vs " << values.size();
1498     TRT_ShapedWeights weights(dtype);
1499     if (num_elements) {
1500       weights = converter_->weight_store_.GetTempWeights(dtype, trt_dims);
1501       QCHECK_EQ(weights.size_bytes(), sizeof(T) * values.size())
1502           << weights.size_bytes() << " vs " << sizeof(T) * values.size();
1503       memcpy(weights.GetValues(), values.data(), weights.size_bytes());
1504     }
1505     TF_EXPECT_OK(
1506         converter_->AddTensorOrWeights(name, TRT_TensorOrWeights{weights}));
1507   }
1508 
1509   template <typename T = int32>
AddTestWeights(const string & name,const std::vector<int> & dims,const std::vector<T> & values,DataType tf_type)1510   void AddTestWeights(const string& name, const std::vector<int>& dims,
1511                       const std::vector<T>& values, DataType tf_type) {
1512     if (tf_type == DT_FLOAT) {
1513       AddTestWeights(name, dims, CastTestVector<T, float>(values));
1514     } else if (tf_type == DT_HALF) {
1515       AddTestWeights(name, dims, CastTestVector<T, Eigen::half>(values));
1516     } else if (tf_type == DT_INT32) {
1517       AddTestWeights(name, dims, CastTestVector<T, int32>(values));
1518     } else {
1519       FAIL() << "Cannot create test weights with type "
1520              << DataTypeString(tf_type);
1521     }
1522   }
1523 
1524   // Test validation in validation-only mode.
RunValidation(const Node * node)1525   Status RunValidation(const Node* node) {
1526     grappler::GrapplerItem item;
1527     TF_EXPECT_OK(scope_.ToGraphDef(&item.graph));
1528     grappler::GraphProperties graph_properties(item);
1529     TF_EXPECT_OK(graph_properties.InferStatically(true));
1530 
1531     TrtNodeValidator validator(graph_properties, converter_->precision_mode(),
1532                                /*use_calibration=*/false,
1533                                converter_->use_implicit_batch());
1534     return validator.IsTensorRTCandidate(node);
1535   }
1536 
RunConversion(const Node * node,error::Code expected_code=error::OK,const char * expected_msg_substr=nullptr)1537   void RunConversion(const Node* node, error::Code expected_code = error::OK,
1538                      const char* expected_msg_substr = nullptr) {
1539     ExpectStatus(converter_->ConvertNode(node->def()), expected_code,
1540                  expected_msg_substr);
1541     if (expected_code == error::OK) {
1542       VerifyTrtLayerNameNotEmpty(converter_->network());
1543     }
1544   }
1545 
1546   // Helper method to run both validation and conversion, when the expected
1547   // output are same.
RunValidationAndConversion(const NodeDef & node_def,error::Code expected_code=error::OK,const char * expected_msg_substr=nullptr,bool should_run_conversion=true)1548   void RunValidationAndConversion(const NodeDef& node_def,
1549                                   error::Code expected_code = error::OK,
1550                                   const char* expected_msg_substr = nullptr,
1551                                   bool should_run_conversion = true) {
1552     // Add the node to the graph.
1553     // TODO(laigd): we should accept a function that adds the node using
1554     // `scope_`, so individual test case can reuse the scope object and we don't
1555     // need to add the edges here by ourselves.
1556     Graph* graph = scope_.graph();
1557     Status status;
1558     Node* node = graph->AddNode(std::move(node_def), &status);
1559     TF_EXPECT_OK(status);
1560     for (int i = 0; i < node_def.input().size(); ++i) {
1561       const string& input_name = node_def.input(i);
1562       const auto& itr = node_inputs_.find(input_name);
1563       QCHECK(itr != node_inputs_.end());
1564       const Output& input = itr->second;
1565       graph->AddEdge(input.node(), input.index(), node, i);
1566     }
1567 
1568     status = RunValidation(node);
1569     if (should_run_conversion && status.ok()) {
1570       RunConversion(node, expected_code, expected_msg_substr);
1571     } else {
1572       ExpectStatus(status, expected_code, expected_msg_substr);
1573     }
1574   }
1575 
1576   // Helper method to run both validation and conversion, and check the output
1577   // shapes.
RunValidationAndConversion(const NodeDef & node_def,const Status & status,const char * output_name,const std::vector<std::vector<int>> & exp_out_dims)1578   void RunValidationAndConversion(
1579       const NodeDef& node_def, const Status& status, const char* output_name,
1580       const std::vector<std::vector<int>>& exp_out_dims) {
1581     RunValidationAndConversion(node_def, status.code(),
1582                                status.error_message().c_str(), true);
1583 
1584     if (status.ok()) {
1585       // TODO(tfeher): Enable this check in explicit_batch_mode.
1586       // In dynamic shape mode the output dims cannot be tested here. In that
1587       // case we need to wait for the concrate input shapes to be defined (by
1588       // setBindingDimensions before enqueue) before we can check the output
1589       // dims.
1590       if (converter_->use_implicit_batch()) {
1591         for (int i = 0; i < exp_out_dims.size(); i++) {
1592           TRT_TensorOrWeights output;
1593           string name = i == 0 ? output_name : StrCat(output_name, ":", i);
1594           TF_EXPECT_OK(GetTensorOrWeights(name.c_str(), &output));
1595           ASSERT_TRUE(output.is_tensor());
1596           if (!exp_out_dims[i].empty()) {
1597             // Removing batch dim.
1598             auto out_dims = std::vector<int>(exp_out_dims[i].begin() + 1,
1599                                              exp_out_dims[i].end());
1600             VLOG(2) << "Testing output shape for tensor " << name;
1601             ExpectTrtDimsEqualsArray(out_dims,
1602                                      output.tensor()->getDimensions());
1603           }
1604         }
1605       }
1606     }
1607   }
1608 
1609   // Expose quantization_ranges_ for tests
quantization_ranges_proxy()1610   std::unordered_map<ITensorProxyPtr*, float>& quantization_ranges_proxy() {
1611     return converter_->quantization_ranges_proxy_;
1612   }
1613 
1614   // Expose quantization_ranges_ for tests
quantization_ranges()1615   std::unordered_map<nvinfer1::ITensor*, float>& quantization_ranges() {
1616     return converter_->quantization_ranges_;
1617   }
1618 
1619   std::unique_ptr<Converter> converter_;
1620 
1621  private:
1622   Logger& logger_ = *Logger::GetLogger();
1623   TrtUniquePtrType<nvinfer1::ICudaEngine> engine_;
1624   cudaStream_t stream_;
1625   std::unique_ptr<Allocator> tensor_buffer_allocator_;
1626   // The scope that contains the graph being converted. Because
1627   // tensor_buffer_allocator_ provides the storage for tensor contents that are
1628   // represented as attributes for graph nodes within scope_,
1629   // tensor_buffer_allocator_ needs to be available when destructing scope_.
1630   // Therefore, scope_ comes after tensor_buffer_allocator_ in the class member
1631   // field list.
1632   Scope scope_;
1633   std::unordered_map<string, Output> node_inputs_;
1634 };
1635 
1636 // General test parameters to be used with ops that take a single input tensor.
1637 struct TestParamBase {
1638   // Concrete input dimensions for the test (including the batch dim)
1639   std::vector<int> input_dims;
1640 
1641   // Dimensions to define an input with PartialTensorShape. This can be used to
1642   // define networks with dynamic input shape. It can be left empty, in that
1643   // case AddTestTensor sets partial shapes that are appropriate to TrtTestMode.
1644   std::vector<int> partial_input_dims;
1645 
1646   // Concrete (static) output dimensions, including batch size as first dim
1647   std::vector<int> expected_output_dims;
1648 
1649   // Parameter vector, has converter specific meaning.
1650   std::vector<int> param;
1651 
1652   // Expected status of conversion (with concrete error message)
1653   Status status;
1654 
1655   // Expected status of BuildAndRun
1656   Status runtime_status;
1657 };
1658 
operator <<(std::ostream & os,const TestParamBase & p)1659 std::ostream& operator<<(std::ostream& os, const TestParamBase& p) {
1660   os << "input_dims" << p.input_dims;
1661   if (!p.partial_input_dims.empty()) {
1662     os << ", partial_input_dims" << p.partial_input_dims;
1663   }
1664   if (!p.expected_output_dims.empty()) {
1665     os << ", exp_out_dims" << p.expected_output_dims;
1666   }
1667   if (!p.param.empty()) {
1668     os << ", param" << p.param;
1669   }
1670   os << ", " << p.status;
1671   return os;
1672 }
1673 
1674 // Parameterized version of OpConverterTest. We have the following parameters:
1675 // 1. TrtTestMode: implicit batch, explicit batch, dynamic shape modes
1676 // 2. DataType of the input TF tensors: DT_FLOAT, DT_HALF, DT_INT32
1677 // 3. TrtPrecisionMode argument for the Converter: FP32, FP16, INT8
1678 // We will introduce subclasses that will be instantiated using different
1679 // combinations of the DataType and TrtPrecisionMode parameters.
1680 class ParameterizedOpConverterTestBase
1681     : public OpConverterTest,
1682       public ::testing::WithParamInterface<
1683           std::tuple<TrtTestMode, DataType, TrtPrecisionMode>> {
1684  public:
ParameterizedOpConverterTestBase()1685   ParameterizedOpConverterTestBase()
1686       : trt_mode_(std::get<0>(GetParam())),
1687         tf_type_(std::get<1>(GetParam())),
1688         converter_precision_(std::get<2>(GetParam())) {
1689     LOG(INFO) << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%";
1690     LOG(INFO) << "tf_type_: " << DebugString(tf_type_);
1691     LOG(INFO) << "trt_mode_: " << DebugString(trt_mode_);
1692     LOG(INFO) << "converter_precision_: " << DebugString(converter_precision_);
1693     LOG(INFO) << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%";
1694   }
1695 
Reset()1696   void Reset() {
1697     OpConverterTest::Reset(converter_precision_, trt_mode_);
1698     input_data_.clear();
1699   }
1700 
Reset(TrtPrecisionMode precision)1701   void Reset(TrtPrecisionMode precision) {
1702     OpConverterTest::Reset(precision, trt_mode_);
1703     input_data_.clear();
1704   }
1705 
1706   // Getters of protected attributes
get_tf_type()1707   DataType get_tf_type() { return tf_type_; }
get_trt_mode()1708   TrtTestMode get_trt_mode() { return trt_mode_; }
get_converter_precision()1709   TrtPrecisionMode get_converter_precision() { return converter_precision_; }
1710 
1711   // Adds an input ITensor for TRT network. Also creates the corresponding TF
1712   // tensor, and stores it in the list of inputs (input_data_).
1713   //
1714   // The TF tensor is always created with concrete static input shape given by
1715   // dims. The ITensor can have static or dynamic shape based on the trt_mode
1716   // attribute. The ITensor shape is set automatically according to the trt_mode
1717   // parameter, unless the user overrides it with an explicit
1718   // partial_input_shape_dims argument.
1719   //
1720   // Parameters:
1721   // - name of the input node
1722   // - dims actual dimensions of the tensor that we will use during the test
1723   //   (including explicit batch dim)
1724   // - values initial values for the TF tensor
1725   // - dtype data type of the tensor
1726   // - partial_input_shape dimensions which can include unknown shapes. This can
1727   //   be empty, in that case the partial_input_shape will be set automatically
1728   //   depending on the trt_mode argument. (This argument also includes explicit
1729   //   batch dim).
1730   // - add_input_status adding ITensor to the network can fail in implicit batch
1731   //   mode if the batch size is inconsistent. Using the add_input_status arg we
1732   //   can test such errors.
1733   //
1734   template <typename T = int>
AddTestTensor(const string & name,const std::vector<int32> & dims,DataType tf_type,const std::vector<T> & values,const std::vector<int32> & partial_input_shape_dims={},Status add_input_status=Status::OK ())1735   void AddTestTensor(const string& name, const std::vector<int32>& dims,
1736                      DataType tf_type, const std::vector<T>& values,
1737                      const std::vector<int32>& partial_input_shape_dims = {},
1738                      Status add_input_status = Status::OK()) {
1739     if (!dims.empty()) {
1740       const auto num_elements = std::accumulate(
1741           std::begin(dims), std::end(dims), 1, std::multiplies<double>());
1742       if (!values.empty() && num_elements != values.size()) {
1743         // Note: for conversion only tests, it is valid to have empty values,
1744         // otherwise the number of elements should match.
1745         LOG(WARNING) << "Expected Test Tensor Shape: " << DebugString(dims)
1746                      << ", Received Input Tensor: " << DebugString(values);
1747       }
1748     }
1749 
1750     std::vector<int32> partial_shape;
1751     if (!partial_input_shape_dims.empty()) {
1752       partial_shape = partial_input_shape_dims;
1753     } else {
1754       if (trt_mode_ == TrtTestMode::kDynamicShape) {
1755         // In dynamic shape mode we make all dims unknown.
1756         partial_shape = std::vector<int32>(dims.size(), -1);
1757       } else {
1758         // Use static (known) input shapes.
1759         partial_shape = dims;
1760       }
1761     }
1762     nvinfer1::DataType trt_type;
1763     TF_ASSERT_OK(TfTypeToTrtType(tf_type, &trt_type));
1764     AddTestTensorWithTFDims(name, partial_shape, trt_type, add_input_status);
1765     if (!values.empty()) {
1766       VLOG(2) << "Adding test tensor: " << name << " "
1767               << DataTypeString(tf_type);
1768       InputOutputData data{name, AsTensor(values, dims, tf_type)};
1769       VLOG(2) << "Added tensor: " << data.name << " with dtype "
1770               << DataTypeString(data.tensor.dtype());
1771       input_data_.push_back(data);
1772     }
1773   }
1774 
1775   // Adds test tensor (same as above) but with the default tf_type defined by
1776   // the test params.
1777   template <typename T = int>
AddTestTensor(const string & name,const std::vector<int32> & dims,const std::vector<T> & values={},const std::vector<int32> & partial_input_shape_dims={})1778   void AddTestTensor(const string& name, const std::vector<int32>& dims,
1779                      const std::vector<T>& values = {},
1780                      const std::vector<int32>& partial_input_shape_dims = {}) {
1781     AddTestTensor<T>(name, dims, tf_type_, values, partial_input_shape_dims);
1782   }
1783 
1784   // Builds and runs the converted network. Checks output tensor shape. Tests
1785   // output values using a matcher. The network can have multiple input and
1786   // output tensors. The inputs are defined by the input_data_ member variable.
BuildAndRun(const string & name,const std::vector<std::vector<int>> & expected_output_dims,const Status & expected_runtime_status,const std::vector<Matcher<std::vector<float>>> & matcher,const std::vector<DataType> & out_tf_types={})1787   void BuildAndRun(const string& name,
1788                    const std::vector<std::vector<int>>& expected_output_dims,
1789                    const Status& expected_runtime_status,
1790                    const std::vector<Matcher<std::vector<float>>>& matcher,
1791                    const std::vector<DataType>& out_tf_types = {}) {
1792     TensorShape shape;
1793     const int n_output = expected_output_dims.size();
1794     ASSERT_EQ(n_output, matcher.size());
1795     DataVec output_data;
1796     for (int i = 0; i < n_output; i++) {
1797       TF_EXPECT_OK(
1798           TensorShapeUtils::MakeShape(expected_output_dims[i], &shape));
1799       string out_name = (i == 0) ? name : StrCat(name, ":", i);
1800       DataType out_tf_type =
1801           out_tf_types.size() > i ? out_tf_types[i] : tf_type_;
1802       InputOutputData data{
1803           out_name, ConstructTensor(shape.num_elements(), 0, out_tf_type)};
1804       output_data.push_back(data);
1805     }
1806     const int batch_size =
1807         input_data_.empty() ||
1808                 TensorShapeUtils::IsScalar(input_data_[0].tensor.shape())
1809             ? 1
1810             : input_data_[0].tensor.shape().dim_size(0);
1811     Status stat =
1812         OpConverterTest::BuildAndRun(input_data_, &output_data, batch_size);
1813     ASSERT_EQ(expected_runtime_status.ok(), stat.ok())
1814         << "expected status: " << expected_runtime_status
1815         << ", actual status: " << stat;
1816     if (expected_runtime_status.ok() && stat.ok()) {
1817       for (int i = 0; i < n_output; i++) {
1818         // Check the shape of the actual output tensors
1819         TF_EXPECT_OK(
1820             TensorShapeUtils::MakeShape(expected_output_dims[i], &shape));
1821         EXPECT_TRUE(output_data[i].tensor.shape() == shape)
1822             << "Expected shape: " << shape.DebugString() << ", actual shape: "
1823             << output_data[i].tensor.shape().DebugString();
1824         EXPECT_THAT(GetDataAsFloat(output_data[i]), matcher[i]);
1825       }
1826     }
1827   }
1828 
1829   // Runs validation and conversion. If conversion is successfull then builds
1830   // the TRT network, executes it and checks the output. Handles multiple output
1831   // tensors.
TestOpConverterMultiOut(const string & name,const NodeDef node_def,const std::vector<std::vector<int>> & expected_output_dims,const Status & expected_conversion_status,const Status & expected_runtime_status,const std::vector<Matcher<std::vector<float>>> & matcher,const std::vector<DataType> & out_tf_type={})1832   void TestOpConverterMultiOut(
1833       const string& name, const NodeDef node_def,
1834       const std::vector<std::vector<int>>& expected_output_dims,
1835       const Status& expected_conversion_status,
1836       const Status& expected_runtime_status,
1837       const std::vector<Matcher<std::vector<float>>>& matcher,
1838       const std::vector<DataType>& out_tf_type = {}) {
1839     RunValidationAndConversion(node_def, expected_conversion_status,
1840                                name.c_str(), expected_output_dims);
1841     if (expected_conversion_status.ok()) {
1842       BuildAndRun(name, expected_output_dims, expected_runtime_status, matcher,
1843                   out_tf_type);
1844     }
1845   }
1846 
1847   // Runs validation and conversion. If conversion is successfull then builds
1848   // the TRT network, executes it and checks the output.
TestOpConverter(const string & name,const NodeDef node_def,const std::vector<int> & expected_output_dims,const Status & expected_conversion_status,const Status & expected_runtime_status,const Matcher<std::vector<float>> & matcher,const std::vector<DataType> & out_tf_types={})1849   void TestOpConverter(const string& name, const NodeDef node_def,
1850                        const std::vector<int>& expected_output_dims,
1851                        const Status& expected_conversion_status,
1852                        const Status& expected_runtime_status,
1853                        const Matcher<std::vector<float>>& matcher,
1854                        const std::vector<DataType>& out_tf_types = {}) {
1855     RunValidationAndConversion(
1856         node_def, expected_conversion_status, name.c_str(),
1857         std::vector<std::vector<int>>({expected_output_dims}));
1858     if (expected_conversion_status.ok()) {
1859       BuildAndRun(name, std::vector<std::vector<int>>({expected_output_dims}),
1860                   expected_runtime_status,
1861                   std::vector<Matcher<std::vector<float>>>({matcher}),
1862                   out_tf_types);
1863     }
1864   }
1865 
1866  protected:
1867   const TrtTestMode trt_mode_;
1868   const DataType tf_type_;
1869   const TrtPrecisionMode converter_precision_;
1870   DataVec input_data_;
1871 };
1872 
1873 // Op converter test in FP32 mode. While for debugging purposes it might make
1874 // sense to run over all possible combinations, normally a subset of them
1875 // would be sufficient:
1876 // - All valid options to TrtTestMode (implicit, explicit, dynamic shape)
1877 // - DataType: is the TF data type of the input tensors. This usually only
1878 //   influences the data type added by Converter::AddInputTensor. We test the
1879 //   valid combinations of input data types in AddAndGetInputs, therefore
1880 //   for most of the OpConverterTest its is sufficient to test for DT_FLOAT.
1881 // - TrtPrecisionMode: valid options are FP32, FP16 and INT8. This influences
1882 //   how TRT handles the precision inside the TRT network, but should not matter
1883 //   for the TF -> TRT conversion. Therefore it should be sufficient to test
1884 //   for FP32.
1885 class OpConverter_FP32_Test : public ParameterizedOpConverterTestBase {};
1886 // Base class for tests that need to be tested for both FP32 and FP16.
1887 class OpConverter_FP32_FP16_Test : public ParameterizedOpConverterTestBase {};
1888 // Base class for tests that need to be tested for FP32, FP16, and INT32
1889 class OpConverter_FP32_FP16_INT32_Test
1890     : public ParameterizedOpConverterTestBase {};
1891 
1892 // Instantiate parameter combinations to OpConverter_<DT_X...>_Test
1893 INSTANTIATE_TEST_CASE_P(
1894     OpConvTestInstantiation, OpConverter_FP32_Test,
1895     ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
1896                        ::testing::Values(DT_FLOAT),
1897                        ::testing::Values(TrtPrecisionMode::FP32)));
1898 
1899 INSTANTIATE_TEST_CASE_P(
1900     OpConvTestInstantiation, OpConverter_FP32_FP16_Test,
1901     ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
1902                        ::testing::Values(DT_FLOAT, DT_HALF),
1903                        ::testing::Values(TrtPrecisionMode::FP32)));
1904 
1905 INSTANTIATE_TEST_CASE_P(
1906     OpConvTestInstantiation, OpConverter_FP32_FP16_INT32_Test,
1907     ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
1908                        ::testing::Values(DT_FLOAT, DT_HALF, DT_INT32),
1909                        ::testing::Values(TrtPrecisionMode::FP32)));
1910 
1911 template <typename T>
CopyTensorElements(const Tensor & tensor,protobuf::RepeatedField<T> * out)1912 void CopyTensorElements(const Tensor& tensor, protobuf::RepeatedField<T>* out) {
1913   out->Clear();
1914   if (tensor.NumElements() == 0) return;
1915 
1916   // TensorProto does not need to have all the elements present and can truncate
1917   // trailing elements with the same value for compressed representation. Such
1918   // elements are derived based on the tensor shape.
1919   const auto flat = tensor.flat<T>();
1920   int64 last_index = 0;
1921   for (int64 i = 0; i < tensor.NumElements(); ++i) {
1922     if (flat(i) != flat(last_index)) {
1923       last_index = i;
1924     }
1925   }
1926 
1927   int num_out_elements = last_index + 1;
1928   out->Reserve(num_out_elements);
1929   out->AddNAlreadyReserved(num_out_elements);
1930   const T* src = flat.data();
1931   T* dst = out->mutable_data();
1932   std::copy(src, src + num_out_elements, dst);
1933 }
1934 
1935 template <DataType dtype, typename InputCType, typename OutputCType>
TestConvertConst(OpConverterTest * test)1936 void TestConvertConst(OpConverterTest* test) {
1937   NodeDef node_def;
1938   node_def.set_name("my_const");
1939   node_def.set_op("Const");
1940 
1941   auto reset_and_test = [&node_def, test](
1942                             const Tensor& tensor, const bool as_tensor_content,
1943                             const std::vector<int>& expected_dims,
1944                             const std::vector<OutputCType>& expected_value) {
1945     test->Reset();
1946 
1947     TensorProto* tensor_attr =
1948         (*node_def.mutable_attr())["value"].mutable_tensor();
1949     tensor_attr->Clear();
1950 
1951     if (as_tensor_content) {
1952       tensor.AsProtoTensorContent(tensor_attr);
1953     } else {
1954       tensor.shape().AsProto(tensor_attr->mutable_tensor_shape());
1955       tensor_attr->set_dtype(tensor.dtype());
1956 
1957       if (tensor.dtype() == DT_FLOAT) {
1958         CopyTensorElements<float>(tensor, tensor_attr->mutable_float_val());
1959       } else if (tensor.dtype() == DT_INT32) {
1960         CopyTensorElements<int32>(tensor, tensor_attr->mutable_int_val());
1961       } else {
1962         tensor.AsProtoField(tensor_attr);
1963       }
1964     }
1965     test->RunValidationAndConversion(node_def);
1966     TRT_TensorOrWeights output;
1967     TF_EXPECT_OK(test->GetTensorOrWeights("my_const", &output));
1968     ValidateWeights(output.weights(), expected_dims, expected_value);
1969   };
1970 
1971   auto& attr = *node_def.mutable_attr();
1972   attr["dtype"].set_type(dtype);
1973   {
1974     // By default empty tensor will pick DT_FLOAT as data type and we fix it
1975     // here.
1976     Tensor t(dtype);  // Empty tensor.
1977     reset_and_test(t, false, {}, {});
1978   }
1979   {
1980     Tensor t = test::AsScalar<InputCType>(12);
1981     std::vector<int> expected_dims{1};
1982     // Scalars are represented as rank 0 tensors.
1983     expected_dims.clear();
1984     reset_and_test(t, false, expected_dims, {12});
1985     reset_and_test(t, true, expected_dims, {12});
1986   }
1987   {
1988     Tensor t = test->AsTensor<InputCType>({1, 2});
1989     reset_and_test(t, false, {2}, {1, 2});
1990     reset_and_test(t, true, {2}, {1, 2});
1991   }
1992   {
1993     Tensor t =
1994         test->AsTensor<InputCType>({1, 2, 3, 4, 5, 6}, TensorShape({2, 3}));
1995     reset_and_test(t, false, {2, 3}, {1, 2, 3, 4, 5, 6});
1996     reset_and_test(t, true, {2, 3}, {1, 2, 3, 4, 5, 6});
1997   }
1998   {
1999     // Set all tensor elements to the same value. Such tensors are encoded
2000     // using a single element list in tensor proto.
2001     Tensor t =
2002         test->AsTensor<InputCType>({1, 1, 1, 1, 1, 1}, TensorShape({2, 3}));
2003     reset_and_test(t, false, {2, 3}, {1, 1, 1, 1, 1, 1});
2004     reset_and_test(t, true, {2, 3}, {1, 1, 1, 1, 1, 1});
2005   }
2006   {
2007     // Set trailing tensor elements to the same value. Such tensors are
2008     // encoded by truncating all equal elements except the first one.
2009     Tensor t =
2010         test->AsTensor<InputCType>({2, 2, 1, 1, 1, 1}, TensorShape({2, 3}));
2011     reset_and_test(t, false, {2, 3}, {2, 2, 1, 1, 1, 1});
2012     reset_and_test(t, true, {2, 3}, {2, 2, 1, 1, 1, 1});
2013   }
2014 }
2015 
TEST_F(OpConverterTest,ConvertConst)2016 TEST_F(OpConverterTest, ConvertConst) {
2017   {
2018     Reset();
2019     NodeDef node_def = MakeConstNodeDef<double>("my_const", {});
2020     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
2021                                "Unsupported tensorflow data type double");
2022   }
2023   {
2024     Reset();
2025     Tensor tensor = AsTensor<int64>({1, std::numeric_limits<int64>::max(), 1, 1,
2026                                      1, std::numeric_limits<int64>::lowest()},
2027                                     TensorShape({2, 3}));
2028     NodeDef node_def;
2029     node_def.set_name("my_const");
2030     node_def.set_op("Const");
2031     (*node_def.mutable_attr())["dtype"].set_type(DT_INT64);
2032     TensorProto* tensor_attr =
2033         (*node_def.mutable_attr())["value"].mutable_tensor();
2034     tensor_attr->Clear();
2035     tensor.AsProtoTensorContent(tensor_attr);
2036     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
2037                                "outside the range of int32");
2038   }
2039 
2040   TestConvertConst<DT_FLOAT, float, float>(this);
2041   TestConvertConst<DT_INT8, int8, int32>(this);
2042   TestConvertConst<DT_UINT8, uint8, int32>(this);
2043   TestConvertConst<DT_INT16, int16, int32>(this);
2044   TestConvertConst<DT_UINT16, uint16, int32>(this);
2045   TestConvertConst<DT_INT32, int32, int32>(this);
2046   TestConvertConst<DT_UINT32, uint32, int32>(this);
2047   TestConvertConst<DT_INT64, int64, int32>(this);
2048   TestConvertConst<DT_UINT64, uint64, int32>(this);
2049 }
2050 
2051 template <typename T>
CreateFusedBatchNormOp(DataType tf_type,std::string data_format,bool is_training,float epsilon)2052 NodeDef CreateFusedBatchNormOp(DataType tf_type, std::string data_format,
2053                                bool is_training, float epsilon) {
2054   Scope s = Scope::NewRootScope();
2055   auto x = ops::Placeholder(s.WithOpName("x"), tf_type);
2056   auto scale = ops::Placeholder(s.WithOpName("scale"), tf_type);
2057   auto offset = ops::Placeholder(s.WithOpName("offset"), tf_type);
2058   auto mean = ops::Placeholder(s.WithOpName("mean"), tf_type);
2059   auto variance = ops::Placeholder(s.WithOpName("variance"), tf_type);
2060   typename T::Attrs attrs;
2061   attrs.data_format_ = data_format;
2062   attrs.is_training_ = is_training;
2063   if (epsilon > 0) {
2064     attrs.epsilon_ = epsilon;
2065   } else {
2066     EXPECT_GE(epsilon, 0);
2067   }
2068   return T(s.WithOpName("my_batchnorm"), x, scale, offset, mean, variance,
2069            attrs)
2070       .operation.node()
2071       ->def();
2072 }
2073 
TEST_P(OpConverter_FP32_Test,ConvertFusedBatchNorm)2074 TEST_P(OpConverter_FP32_Test, ConvertFusedBatchNorm) {
2075   using OpFunc = std::function<NodeDef(DataType, std::string, bool, float)>;
2076   std::vector<OpFunc> get_node_def_vec{
2077       CreateFusedBatchNormOp<ops::FusedBatchNorm>,
2078       CreateFusedBatchNormOp<ops::FusedBatchNormV2>,
2079       CreateFusedBatchNormOp<ops::FusedBatchNormV3>};
2080 
2081   struct TestParam {
2082     std::string data_format;
2083     int tensor_input_idx;  // Index of an input that will be provided as tensor.
2084     bool is_training;
2085     float epsilon;
2086     Status conversion_status;
2087     bool keep_channel_unknown;
2088   };
2089 
2090   struct NodeInput {
2091     std::string name;
2092     std::vector<int> dims;
2093     std::vector<float> val;
2094   };
2095   std::vector<NodeInput> node_input{
2096       {"x", {2, 3, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}},
2097       {"scale", {3}, {7, 8, 9}},
2098       {"offset", {3}, {10, 20, 30}},
2099       {"mean", {3}, {1, 2, 3}},
2100       {"variance", {3}, {4, 5, 6}}};
2101 
2102   std::vector<float> expected_output{10.0,      13.495633, 23.574135, 27.148273,
2103                                      37.342354, 41.013527, 30.9738,   34.469433,
2104                                      45.018955, 48.59309,  59.369415, 63.04059};
2105   for (auto get_node_def : get_node_def_vec) {
2106     NodeDef tmp_node_def = get_node_def(tf_type_, "NCHW", true, 0);
2107     std::string op_name = tmp_node_def.op();
2108     std::vector<TestParam> test_param{
2109         {"NHWC", 0, false, 0,
2110          errors::Unimplemented(StrCat(
2111              op_name, " only supports data_format=NCHW, at my_batchnorm"))},
2112         {"NCHW", 0, true, 0,
2113          errors::Unimplemented(StrCat(
2114              op_name, " only supports is_training=false, at my_batchnorm"))},
2115         {"NCHW", 1, false, 0,
2116          errors::Unimplemented(StrCat("The input \"scale\" for ", op_name,
2117                                       " must be a constant, at my_batchnorm"))},
2118         {"NCHW", 2, false, 0,
2119          errors::Unimplemented(StrCat("The input \"offset\" for ", op_name,
2120                                       " must be a constant, at my_batchnorm"))},
2121         {"NCHW", 3, false, 0,
2122          errors::Unimplemented(StrCat("The input \"mean\" for ", op_name,
2123                                       " must be a constant, at my_batchnorm"))},
2124         {"NCHW", 4, false, 0,
2125          errors::Unimplemented(StrCat("The input \"variance\" for ", op_name,
2126                                       " must be a constant, at my_batchnorm"))},
2127         {"NCHW", 0, false, 0.01}};  // The last one is the only test that runs.
2128     if (trt_mode_ == TrtTestMode::kDynamicShape) {
2129       test_param.push_back(
2130           {"NCHW", 0, false, 0.01,
2131            errors::InvalidArgument(
2132                "Channel dimension must be static, at my_batchnorm"),
2133            true});
2134     }
2135     for (auto p : test_param) {
2136       Reset();
2137       NodeDef node_def =
2138           get_node_def(tf_type_, p.data_format, p.is_training, p.epsilon);
2139       for (int i = 0; i < node_input.size(); i++) {
2140         if (i == 0 || i == p.tensor_input_idx) {
2141           // The first input (x) is always added as a tensor, and it hase shape
2142           // NCHW. The other inputs are per channel values (1D, size C).
2143           //
2144           // In implicit batch mode, it is not possible to add any of the 1D
2145           // inputs as a tensor: the first dim is always treated as batch dim in
2146           // implicit batch mode, and that has to agree for all tensors. We have
2147           // two input tensors with shapes NCHW and C and in general N != C.
2148           // The converter already picked up N from the fist input, and reports
2149           // an error when we try to add any other tensors with not matching
2150           // first dim.
2151           //
2152           // This restriction does not apply in explicit batch mode: the tensors
2153           // can have different first dim. The converter still expects that only
2154           // the first arg is a tensor. TODO(tfeher) Check if one can relax this
2155           // restriction.
2156           Status expected_status =
2157               (i != 0 && trt_mode_ == TrtTestMode::kImplicitBatch)
2158                   ? errors::InvalidArgument(
2159                         StrCat("Batch size doesn't match for tensor ",
2160                                node_input[i].name,
2161                                ": Provided batch size does not match "
2162                                "converter batch size: 3 vs 2"))
2163                   : Status::OK();
2164           std::vector<int> partial_input_shape;
2165           if (i == 0 && trt_mode_ == TrtTestMode::kDynamicShape &&
2166               !p.keep_channel_unknown) {
2167             // keep channel dim static (known)
2168             partial_input_shape.resize(4, -1);
2169             partial_input_shape[1] = node_input[i].dims[1];
2170           }
2171           AddTestTensor(node_input[i].name, node_input[i].dims, tf_type_,
2172                         node_input[i].val, partial_input_shape,
2173                         expected_status);
2174 
2175         } else {
2176           AddTestWeights(node_input[i].name, node_input[i].dims,
2177                          node_input[i].val, tf_type_);
2178         }
2179       }
2180       TestOpConverter("my_batchnorm", node_def, node_input[0].dims,
2181                       p.conversion_status, Status::OK(),
2182                       ArrayFloatNear(expected_output));
2183     }
2184   }
2185 }
2186 
TEST_P(OpConverter_FP32_Test,ConvertTranspose)2187 TEST_P(OpConverter_FP32_Test, ConvertTranspose) {
2188   // Get the NodeDef for Transpose.
2189   Scope s = Scope::NewRootScope();
2190   auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
2191   auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
2192   auto transpose = ops::Transpose(s.WithOpName("my_transpose"), input, weights);
2193   const NodeDef& node_def = transpose.operation.node()->def();
2194 
2195   std::vector<TestParamBase> test_params = {
2196       // For the first test we leave param empty. This signals to use a
2197       // input as weight which will be invalid
2198       TestParamBase{{3, 1, 2, 1},
2199                     {},
2200                     {},
2201                     {},
2202                     Status(error::UNIMPLEMENTED,
2203                            "The input \"perm\" for Transpose must be a "
2204                            "constant, at my_transpose")},
2205       TestParamBase{{1, 1, 2, 3},
2206                     {},
2207                     {},
2208                     {0, 1, 2},
2209                     Status(error::INVALID_ARGUMENT,
2210                            "Rank of perm for transpose does not match with "
2211                            "that of the input.")},
2212       // Transpose batch dim
2213       TestParamBase{
2214           {1, 1, 2, 3},
2215           {},
2216           {3, 2, 1, 1},
2217           {3, 2, 1, 0},
2218           (trt_mode_ == TrtTestMode::kImplicitBatch)
2219               ? Status(error::UNIMPLEMENTED,
2220                        "Transpose at batch dimension is not supported")
2221               : Status::OK()},
2222       TestParamBase{{1, 1, 2, 3}, {}, {1, 3, 1, 2}, {0, 3, 1, 2}},
2223   };
2224   if (trt_mode_ == TrtTestMode::kDynamicShape) {
2225     // Dynamic shape tests where some shapes are known
2226     test_params.push_back(TestParamBase{
2227         {1, 1, 2, 3}, {-1, 1, 2, -1}, {1, 3, 1, 2}, {0, 3, 1, 2}});
2228   }
2229   std::vector<float> expected_values{1, 4, 2, 5, 3, 6};
2230   for (auto p : test_params) {
2231     SCOPED_TRACE(p);
2232     Reset();
2233     AddTestTensor("input", p.input_dims, {1, 2, 3, 4, 5, 6},
2234                   p.partial_input_dims);
2235     if (p.param.empty()) {
2236       AddTestTensor("weights", {3});
2237     } else {
2238       AddTestWeights<int32>("weights", {static_cast<int>(p.param.size())},
2239                             p.param);
2240     }
2241     TestOpConverter("my_transpose", node_def, p.expected_output_dims, p.status,
2242                     p.runtime_status, ElementsAreArray(expected_values));
2243   }
2244 }
2245 
TEST_P(OpConverter_FP32_Test,ConvertReshape)2246 TEST_P(OpConverter_FP32_Test, ConvertReshape) {
2247   // Get the NodeDef for Reshape.
2248   Scope s = Scope::NewRootScope();
2249   auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
2250   auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
2251   auto reshape = ops::Reshape(s.WithOpName("my_reshape"), input, weights);
2252   const NodeDef& node_def = reshape.operation.node()->def();
2253 
2254   if (trt_mode_ == TrtTestMode::kImplicitBatch) {
2255     // Shape is a tensor, should fail in implicit batch mode.
2256     Reset();
2257     AddTestTensor("input", {3, 2, 1});
2258     AddTestTensor("weights", {3});
2259     RunValidationAndConversion(
2260         node_def, error::INVALID_ARGUMENT,
2261         "The input \"shape\" for Reshape must be a constant in implicit batch "
2262         "mode, at my_reshape");
2263   } else if (!IS_TRT_VERSION_GE(7, 1, 3, 0)) {
2264     // Shape is a tensor, should fail before TRT 7.1.3 even in explicit batch /
2265     // dynamic shape mode.
2266     Reset();
2267     AddTestTensor("input", {3, 2, 1});
2268     AddTestTensor("weights", {3});
2269     RunValidationAndConversion(
2270         node_def, error::INVALID_ARGUMENT,
2271         "Non constant shape input tensor for Reshape requires minimum TRT "
2272         "7.1.3");
2273   }
2274 
2275   Status reshape_from_scalar_status =
2276       trt_mode_ == TrtTestMode::kImplicitBatch
2277           ? errors::Internal(
2278                 "Failed to convert input input to a TRT_TensorOrWeights: "
2279                 "Scalar input tensor is not supported since the first "
2280                 "dimension is treated as batch dimension by TRT")
2281           : Status::OK();
2282   Status add_scalar_tensor_status =
2283       trt_mode_ == TrtTestMode::kImplicitBatch
2284           ? errors::Internal(
2285                 "Scalars cannot be represented in implicit batch mode")
2286           : Status::OK();
2287   Status reshape_to_scalar_status =
2288       trt_mode_ == TrtTestMode::kImplicitBatch
2289           ? errors::Unimplemented(
2290                 "Reshape to shape=[] is not supported, at my_reshape")
2291           : Status::OK();
2292   Status reshape_batch_status =
2293       trt_mode_ == TrtTestMode::kImplicitBatch
2294           ? errors::Unimplemented(
2295                 "Reshape on batch dimension is not supported, at my_reshape")
2296           : Status::OK();
2297 
2298   struct TestParams {
2299     std::vector<int> tensor_dims;
2300     std::vector<int> shape;
2301     std::vector<int> expected_shape;
2302     Status conversion_status;
2303     Status runtime_status;
2304     std::vector<int> shape_prof;  // needed concrete values if shape == -1.
2305     Status add_test_tensor_status;
2306   };
2307 
2308   std::vector<TestParams> params = {
2309       // Reshape scalar to tensor, should fail in implicit batch mode.
2310       TestParams{{},
2311                  {1, 1},
2312                  {},
2313                  reshape_from_scalar_status,
2314                  {},
2315                  {},
2316                  add_scalar_tensor_status},
2317       // Reshape tensor to scalar, should fail in implicit batch mode.
2318       // - In explicit batch mode if shape is set as weight it works.
2319       // - In explicit batch mode && using shape as tensor input it should
2320       //   fail. In that case we set the expected conversion status in the
2321       //   test loop.
2322       TestParams{{1, 1}, {}, {}, reshape_to_scalar_status},
2323       // Reshape at batch dimension, should fail in implicit batch mode.
2324       TestParams{{1, 1, 2, 3}, {3, 1, 1, 2}, {}, reshape_batch_status},
2325       TestParams{{2, 1, 2, 3}, {-1, 1, 4}, {3, 1, 4}, reshape_batch_status},
2326       // Tests that should succeed in every trt_mode.
2327       TestParams{{1, 1, 2, 3}, {-1, 1, 3, 2}, {1, 1, 3, 2}},
2328       TestParams{{1, 1, 2, 3}, {1, 1, -1}, {1, 1, 6}},
2329       TestParams{{1, 1, 2, 3}, {1, 1, 3, 2}},
2330       TestParams{{2, 1, 2, 3}, {2, 1, 3, 2}},
2331       TestParams{{1, 1, 1}, {1}},
2332       TestParams{{1}, {1, 1}},
2333       TestParams{{2, 1, 1}, {2}},
2334       TestParams{{2}, {2, 1}},
2335   };
2336   if (trt_mode_ == TrtTestMode::kImplicitBatch) {
2337     // Reshape tensor with zero rank using an empty shape tensor, should fail in
2338     // implicit batch mode. In explicit batch mode this is an identity operation
2339     // and does not add a reshape layer therefore we do not test it.
2340     params.push_back(TestParams{{},
2341                                 {},
2342                                 {},
2343                                 reshape_from_scalar_status,
2344                                 {},
2345                                 {},
2346                                 add_scalar_tensor_status});
2347   }
2348   // Testing the methods for representing the reshape shape for IShuffleLayer:
2349   // as a weight (true) or as a tensor (false).
2350   std::vector<bool> shape_input_options(1, true);
2351 
2352   if (trt_mode_ != TrtTestMode::kImplicitBatch &&
2353       IS_TRT_VERSION_GE(7, 1, 3, 0)) {
2354     shape_input_options.push_back(false);
2355   }
2356 
2357   for (auto p : params) {
2358     for (auto shape_as_weight : shape_input_options) {
2359       std::ostringstream oss;
2360       oss << "shape " << p.shape;
2361       SCOPED_TRACE(StrCat(oss.str(), shape_as_weight ? " weight" : " tensor"));
2362       if (!shape_as_weight && p.shape.empty()) {
2363         p.conversion_status = errors::Unimplemented(
2364             "Reshape with dynamic input requires 1D input tensor, at "
2365             "my_reshape");
2366       }
2367       Reset();
2368       const int n_elements =
2369           std::accumulate(p.tensor_dims.begin(), p.tensor_dims.end(), 1,
2370                           std::multiplies<int>());
2371       std::vector<float> input_vec(n_elements);
2372       std::iota(input_vec.begin(), input_vec.end(), 1);
2373       AddTestTensor("input", p.tensor_dims, tf_type_, input_vec, {},
2374                     p.add_test_tensor_status);
2375       if (shape_as_weight) {
2376         AddTestWeights<int32>("weights", {static_cast<int>(p.shape.size())},
2377                               p.shape);
2378       } else {
2379         std::vector<int32> dims;
2380         std::vector<int32> values{p.shape};
2381         if (!p.shape.empty()) {
2382           dims.push_back(p.shape.size());
2383         } else {
2384           // If the shape is empty we use a dummy value to ensure that
2385           // AddTestTensor creates the corresponding entry in InputOutputData.
2386           values.push_back(1);
2387         }
2388         AddTestTensor("weights", dims, DT_INT32, values, dims);
2389       }
2390       std::vector<int> expected_shape =
2391           p.expected_shape.empty() ? p.shape : p.expected_shape;
2392       VLOG(2) << "Calling TestOpConverter";
2393       TestOpConverter("my_reshape", node_def, expected_shape,
2394                       p.conversion_status, p.runtime_status,
2395                       ElementsAreArray(input_vec));
2396     }
2397   }
2398 }
2399 
TEST_P(OpConverter_FP32_Test,ConvertShape)2400 TEST_P(OpConverter_FP32_Test, ConvertShape) {
2401   // Get the NodeDef for Shape op.
2402   Scope s = Scope::NewRootScope();
2403   auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
2404   auto shape = ops::Shape(s.WithOpName("my_shape"), input);
2405   const NodeDef& node_def = shape.operation.node()->def();
2406 
2407   Status conversion_status =
2408       (trt_mode_ == TrtTestMode::kImplicitBatch)
2409           ? errors::Unimplemented(
2410                 "Shape is only supported for explicit batch mode.")
2411           : Status::OK();
2412   std::vector<TestParamBase> test_params = {
2413 // TODO(b/166274212): Enable the test parameter for TensorRT 7.1.3.
2414 #if !IS_TRT_VERSION_GE(7, 1, 3, 0)
2415     TestParamBase{{1, 2, 3}, {}, {3}, {}, conversion_status},
2416 #endif
2417     // Add input as weight (we use non empty param ({1}) to trigger this).
2418     TestParamBase{{1, 2, 3}, {}, {3}, {1}, conversion_status},
2419   };
2420 
2421   auto input_is_weight = [](const TestParamBase p) { return !p.param.empty(); };
2422   for (auto p : test_params) {
2423     SCOPED_TRACE(p);
2424     Reset();
2425     // The number of elements of the input tensor. We leave it 0 in case we do
2426     // not need to add an input tensor. This happens in explicit batch mode: the
2427     // shape is known at conversion time and therefore the shape is added to the
2428     // network as a constant layer. In this case the single node network that
2429     // we use for the unit test have no actual input tensor when it is converted
2430     // to a TensorRT network.
2431     int n_elements = 0;
2432     if (input_is_weight(p) || trt_mode_ != TrtTestMode::kExplicitBatch) {
2433       // Calculate the number of elements for adding input data.
2434       n_elements = std::accumulate(p.input_dims.begin(), p.input_dims.end(), 1,
2435                                    std::multiplies<int>());
2436     }
2437     std::vector<float> input_val(n_elements, 1);
2438     if (!input_is_weight(p)) {
2439       AddTestTensor("input", p.input_dims, input_val);
2440     } else {
2441       AddTestWeights("input", p.input_dims, input_val, tf_type_);
2442     }
2443     TestOpConverter("my_shape", node_def, p.expected_output_dims, p.status,
2444                     p.runtime_status, ElementsAreArray(p.input_dims),
2445                     {DT_INT32});
2446   }
2447 }
2448 
2449 struct MatMulTestParams {
2450   std::vector<int> shape_a;
2451   std::vector<int> values_a;
2452   bool transpose_a;
2453   std::vector<int> shape_b;
2454   std::vector<int> values_b;
2455   bool transpose_b;
2456   std::vector<int> expected_shape;
2457   std::vector<int> expected_output;
2458 };
2459 
2460 // Helper function for testing MatMul and BatchMatMul. get_matmul is a function
2461 // used to generate the node. It accepts (DataType, transpose_a, transpose_b) as
2462 // parameters.
TestMatMulHelper(ParameterizedOpConverterTestBase * test,const std::function<NodeDef (DataType,bool,bool)> & get_matmul,const std::vector<MatMulTestParams> & params)2463 void TestMatMulHelper(
2464     ParameterizedOpConverterTestBase* test,
2465     const std::function<NodeDef(DataType, bool, bool)>& get_matmul,
2466     const std::vector<MatMulTestParams>& params) {
2467   {
2468     // Unsupported data type.
2469     test->Reset();
2470     NodeDef node_def = get_matmul(DT_INT32, false, false);
2471     test->AddTestTensor("input", {1, 2}, DT_INT32, {});
2472     test->AddTestWeights<int32>("weights", {2, 1}, {3, 5});
2473     test->RunValidationAndConversion(
2474         node_def, error::UNIMPLEMENTED,
2475         StrCat("Data type int32 is not supported for ", node_def.op(),
2476                ", must be one of [float, half], at my_matmul")
2477             .c_str());
2478   }
2479 
2480   // FC conversion depends on whether the last dim of A is known or not. In
2481   // Dynamic shape mode, we will check whether A is handled correctly if it has
2482   // a partially known input shape (last dim known).
2483   std::vector<bool> a_test_partial_shape_values{false};
2484   if (test->get_trt_mode() == TrtTestMode::kDynamicShape) {
2485     a_test_partial_shape_values.push_back(true);
2486   }
2487 
2488   for (auto p : params) {
2489     for (bool a_is_tensor : {true, false}) {
2490       for (bool b_is_tensor : {true, false}) {
2491         for (bool a_partial_shape : a_test_partial_shape_values) {
2492           if (a_partial_shape && !a_is_tensor) {
2493             // Only tensors can have partial shape.
2494             continue;
2495           }
2496           if (!a_is_tensor && !b_is_tensor) {
2497             // Skip test when both args are weights. We do not convert this
2498             // since const folding eliminates this case.
2499             continue;
2500           }
2501           SCOPED_TRACE(StrCat("A", p.transpose_a ? ".T" : "", " is ",
2502                               a_is_tensor ? "tensor" : "weight", ", B",
2503                               p.transpose_b ? ".T" : "", " is ",
2504                               b_is_tensor ? "tensor " : "weight, rank A ",
2505                               p.shape_a.size(), ", rank B ", p.shape_b.size()));
2506           test->Reset();
2507 
2508           NodeDef node_def =
2509               get_matmul(test->get_tf_type(), p.transpose_a, p.transpose_b);
2510           const bool is_batch_matmul = node_def.op() == "BatchMatMul";
2511 
2512           if (a_is_tensor) {
2513             if (a_partial_shape) {
2514               // Prepare a partial shape for A where only the last dim is known.
2515               std::vector<int> partial_shape(p.shape_a.size(), -1);
2516               int k = p.shape_a.size() - 1;
2517               partial_shape.at(k) = p.shape_a.at(k);
2518               test->AddTestTensor("input", p.shape_a, test->get_tf_type(),
2519                                   p.values_a, partial_shape);
2520             } else {
2521               test->AddTestTensor("input", p.shape_a, p.values_a);
2522             }
2523           } else {
2524             test->AddTestWeights("input", p.shape_a, p.values_a,
2525                                  test->get_tf_type());
2526           }
2527           if (b_is_tensor) {
2528             if (a_is_tensor && p.shape_a[0] != p.shape_b[0] &&
2529                 test->get_trt_mode() == TrtTestMode::kImplicitBatch) {
2530               VLOG(2) << "Skipping test with inpcompatible batch dimensions";
2531               continue;
2532             }
2533             test->AddTestTensor("weights", p.shape_b, p.values_b);
2534           } else {
2535             test->AddTestWeights("weights", p.shape_b, p.values_b,
2536                                  test->get_tf_type());
2537           }
2538 
2539           Status conversion_status = Status::OK();
2540           if (test->get_trt_mode() == TrtTestMode::kImplicitBatch) {
2541             // Implicit batch mode has several restriction. We change conversion
2542             // status accordingly.
2543             if (is_batch_matmul) {
2544               if (a_is_tensor && p.shape_a.size() < p.shape_b.size()) {
2545                 conversion_status = errors::InvalidArgument(
2546                     "Broadcasting beyond batch dimension is not supported "
2547                     "(tensor #dims ",
2548                     p.shape_a.size(), " vs broadcast #dims ", p.shape_b.size(),
2549                     ")");
2550               }
2551               if (b_is_tensor && p.shape_b.size() < p.shape_a.size()) {
2552                 conversion_status = errors::InvalidArgument(
2553                     "Broadcasting beyond batch dimension is not supported "
2554                     "(tensor #dims ",
2555                     p.shape_b.size(), " vs broadcast #dims ", p.shape_a.size(),
2556                     ")");
2557               }
2558               if ((!a_is_tensor || !b_is_tensor) && p.shape_a[0] != 1) {
2559                 conversion_status = errors::Unimplemented(
2560                     "TensorRT does not support batched constants in implicit "
2561                     "batch mode.");
2562               }
2563             } else if ((a_is_tensor && p.shape_a.size() <= 2 &&
2564                         (p.transpose_a || b_is_tensor)) ||
2565                        (b_is_tensor && p.shape_b.size() <= 2)) {
2566               conversion_status = errors::InvalidArgument(
2567                   "MatMul with 2D tensors requires explicit batch mode, or that"
2568                   " tensor A is not transposed and B is a constant tensor.");
2569             }
2570           }
2571 
2572           test->TestOpConverter("my_matmul", node_def, p.expected_shape,
2573                                 conversion_status, Status::OK(),
2574                                 ElementsAreArray(p.expected_output));
2575           if (!conversion_status.ok()) {
2576             VLOG(2) << "Converted with status " << conversion_status;
2577           }
2578           VLOG(2) << "== Finished test iteration ==";
2579         }
2580       }
2581     }
2582   }
2583 }
2584 
2585 template <typename LayerType>
CheckAddedLayers(OpConverterTest * test,bool expect_found)2586 void CheckAddedLayers(OpConverterTest* test, bool expect_found) {
2587   bool layer_found = false;
2588   for (int i = 0; i < test->converter_->network()->getNbLayers(); i++) {
2589     nvinfer1::ILayer* layer = test->converter_->network()->getLayer(i);
2590     if (dynamic_cast<LayerType*>(layer)) {
2591       layer_found = true;
2592     }
2593   }
2594   EXPECT_EQ(expect_found, layer_found);
2595 }
2596 
GetMatMulTestParams()2597 std::vector<MatMulTestParams> GetMatMulTestParams() {
2598   std::vector<MatMulTestParams> params{
2599       // clang-format off
2600       MatMulTestParams{{2, 2}, {0, 1, 2, 3}, false,  // A (shape, val, T?)
2601                        {2, 2}, {0, 1, 2, 3}, false,  // B (shape, val, T?)
2602                        {2, 2}, {2, 3, 6, 11}},       // result (shape, val)
2603       MatMulTestParams{{2, 2}, {0, 1, 2, 3}, false,
2604                        {2, 2}, {0, 1, 2, 3},  true,
2605                        {2, 2}, {1, 3, 3, 13}},
2606       MatMulTestParams{{2, 2}, {0, 1, 2, 3},  true,
2607                        {2, 2}, {0, 1, 2, 3}, false,
2608                        {2, 2}, {4, 6, 6, 10}},
2609       MatMulTestParams{{2, 2}, {0, 1, 2, 3}, true,
2610                        {2, 2}, {0, 1, 2, 3}, true,
2611                        {2, 2}, {2, 6, 3, 11}},
2612       MatMulTestParams{{2, 3}, {0, 1, 2, 3, 4, 5}, false,
2613                        {2, 3}, {1, 2, 3, 4, 5, 6}, true,
2614                        {2, 2}, {8, 17, 26, 62}},
2615       MatMulTestParams{{2, 3}, {0, 1, 2, 3, 4, 5}, true,
2616                        {2, 3}, {1, 2, 3, 4, 5, 6}, false,
2617                        {3, 3}, {12, 15, 18, 17, 22, 27, 22, 29, 36}},
2618       MatMulTestParams{{3, 2}, {0, 1, 2, 3, 4, 5}, false,
2619                        {2, 3}, {1, 2, 3, 4, 5, 6}, false,
2620                        {3, 3}, {4, 5, 6, 14, 19, 24, 24, 33, 42}},
2621       MatMulTestParams{{3, 2}, {0, 1, 2, 3, 4, 5}, true,
2622                        {2, 3}, {1, 2, 3, 4, 5, 6}, true,
2623                        {2, 2}, {16, 34, 22, 49}},
2624       // clang-format on
2625   };
2626   return params;
2627 }
2628 
TEST_P(OpConverter_FP32_Test,ConvertMatMul)2629 TEST_P(OpConverter_FP32_Test, ConvertMatMul) {
2630   // Get the NodeDef for MatMul.
2631   auto get_matmul_nodedef = [](DataType dtype, bool transpose_a,
2632                                bool transpose_b) -> NodeDef {
2633     Scope s = Scope::NewRootScope();
2634     auto input = ops::Placeholder(s.WithOpName("input"), dtype);
2635     auto weights = ops::Placeholder(s.WithOpName("weights"), dtype);
2636     const auto matmul_attrs =
2637         ops::MatMul::TransposeA(transpose_a).TransposeB(transpose_b);
2638     auto matmul =
2639         ops::MatMul(s.WithOpName("my_matmul"), input, weights, matmul_attrs);
2640     return matmul.operation.node()->def();
2641   };
2642 
2643   TestMatMulHelper(this, get_matmul_nodedef, GetMatMulTestParams());
2644 }
2645 
TEST_P(OpConverter_FP32_Test,ConvertBatchMatMul)2646 TEST_P(OpConverter_FP32_Test, ConvertBatchMatMul) {
2647   // Get the NodeDef for BatchMatMul.
2648   auto get_batch_matmul_nodedef = [](DataType dtype, bool transpose_a,
2649                                      bool transpose_b) -> NodeDef {
2650     Scope s = Scope::NewRootScope();
2651     auto input = ops::Placeholder(s.WithOpName("input"), dtype);
2652     auto weights = ops::Placeholder(s.WithOpName("weights"), dtype);
2653     const auto matmul_attrs =
2654         ops::BatchMatMul::AdjX(transpose_a).AdjY(transpose_b);
2655     auto matmul = ops::BatchMatMul(s.WithOpName("my_matmul"), input, weights,
2656                                    matmul_attrs);
2657     return matmul.operation.node()->def();
2658   };
2659 
2660   // We derive test data from the MatMul test params by adding extra leading
2661   // dimensions.
2662   std::vector<MatMulTestParams> params_2d = GetMatMulTestParams();
2663   std::vector<MatMulTestParams> params;
2664   params.reserve(params_2d.size() * 3 + 1);
2665 
2666   auto insert_ones = [](std::vector<int> v, int n) {
2667     std::vector<int> ones(n, 1);
2668     ones.insert(ones.end(), v.begin(), v.end());
2669     return ones;
2670   };
2671 
2672   // Add a leading 1 dimension to A, B and result.
2673   std::transform(params_2d.begin(), params_2d.end(), std::back_inserter(params),
2674                  [](MatMulTestParams p) {
2675                    p.shape_a.insert(p.shape_a.begin(), 1);
2676                    p.shape_b.insert(p.shape_b.begin(), 1);
2677                    p.expected_shape.insert(p.expected_shape.begin(), 1);
2678                    return p;
2679                  });
2680 
2681   // Test with N > 1: weights cannot be batched in implicit batch mode.
2682   // clang-format off
2683   params.push_back(
2684       MatMulTestParams{{2, 2, 2}, {0, 1, 2, 3, 0, 1, 2, 3}, false,  // A
2685                        {2, 2, 2}, {0, 1, 2, 3, 0, 1, 2, 3}, false,  // B
2686                        {2, 2, 2}, {2, 3, 6, 11, 2, 3, 6, 11}}       // result
2687   );
2688 
2689   params.push_back(
2690       MatMulTestParams{{2, 2, 3}, {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5},
2691       false,
2692                        {2, 2, 3}, {1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6}, true,
2693                        {2, 2, 2}, {8, 17, 26, 62, 8, 17, 26, 62}});
2694   // clang-format on
2695 
2696   // Add two leading 1 dimensions to A, B and result.
2697   std::transform(params_2d.begin(), params_2d.end(), std::back_inserter(params),
2698                  [insert_ones](MatMulTestParams p) {
2699                    p.shape_a = insert_ones(p.shape_a, 2);
2700                    p.shape_b = insert_ones(p.shape_b, 2);
2701                    p.expected_shape = insert_ones(p.expected_shape, 2);
2702                    return p;
2703                  });
2704 
2705   // Test broadcast: add two leading 1 dimensions to A, but not to B.
2706   std::transform(params_2d.begin(), params_2d.end(), std::back_inserter(params),
2707                  [insert_ones](MatMulTestParams p) {
2708                    p.shape_a = insert_ones(p.shape_a, 2);
2709                    p.expected_shape = insert_ones(p.expected_shape, 2);
2710                    return p;
2711                  });
2712 
2713   // Test broadcast: add a leading 1 dimension to A and two leading 1s to B.
2714   // Broadcasting A need a dynamic brodacast which will be incompatible with
2715   // FC layer.
2716   std::transform(params_2d.begin(), params_2d.end(), std::back_inserter(params),
2717                  [insert_ones](MatMulTestParams p) {
2718                    p.shape_a = insert_ones(p.shape_a, 1);
2719                    p.shape_b = insert_ones(p.shape_b, 2);
2720                    p.expected_shape = insert_ones(p.expected_shape, 2);
2721                    return p;
2722                  });
2723 
2724   // Test with N > 1: since weights cannot be batched in implicit batch mode.
2725   // We tests with batch size 2.
2726   std::transform(params_2d.begin(), params_2d.end(), std::back_inserter(params),
2727                  [insert_ones](MatMulTestParams p) {
2728                    p.shape_a.insert(p.shape_a.begin(), 2);
2729                    p.values_a.reserve(p.values_a.size() * 2);
2730                    p.values_a.insert(p.values_a.end(), p.values_a.begin(),
2731                                      p.values_a.end());
2732 
2733                    p.shape_b.insert(p.shape_b.begin(), 2);
2734                    p.values_b.reserve(p.values_b.size() * 2);
2735                    p.values_b.insert(p.values_b.end(), p.values_b.begin(),
2736                                      p.values_b.end());
2737 
2738                    p.expected_shape.insert(p.expected_shape.begin(), 2);
2739                    p.expected_output.reserve(p.expected_output.size() * 2);
2740                    p.expected_output.insert(p.expected_output.end(),
2741                                             p.expected_output.begin(),
2742                                             p.expected_output.end());
2743                    return p;
2744                  });
2745 
2746   // 4D tensor where the second "batch dim" is not 1
2747   params.push_back(MatMulTestParams{
2748       {1, 2, 4, 5},
2749       {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13,
2750        14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
2751        28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39},
2752       false,  // A
2753       {1, 2, 3, 5},
2754       {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2755        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30},
2756       true,  // B
2757       {1, 2, 4, 3},
2758       {40,   90,   140,  115,  290,  465,  190,  490,
2759        790,  265,  690,  1115, 1990, 2540, 3090, 2440,
2760        3115, 3790, 2890, 3690, 4490, 3340, 4265, 5190}});  // result
2761 
2762   TestMatMulHelper(this, get_batch_matmul_nodedef, params);
2763 }
2764 
2765 #if IS_TRT_VERSION_GE(7, 1, 3, 0)
TEST_P(OpConverter_FP32_Test,ConvertEinsum)2766 TEST_P(OpConverter_FP32_Test, ConvertEinsum) {
2767   // Get the NodeDef for Einsum.
2768   auto get_einsum_nodedef = [](DataType dtype, std::string eq,
2769                                int n_inputs = 2) -> NodeDef {
2770     Scope s = Scope::NewRootScope();
2771     auto a = ops::Placeholder(s.WithOpName("input_a"), dtype);
2772     std::vector<Input> input_vec{a};
2773     if (n_inputs > 1) {
2774       auto b = ops::Placeholder(s.WithOpName("input_b"), dtype);
2775       input_vec.push_back(b);
2776     }
2777     InputList inputs(input_vec);
2778     auto einsum = ops::Einsum(s.WithOpName("my_einsum"), inputs, eq);
2779     return einsum.operation.node()->def();
2780   };
2781 
2782   // TODO(b/191407966): re-enable the test for kExplicitBatch.
2783   if (trt_mode_ == TrtTestMode::kExplicitBatch) {
2784     return;
2785   }
2786 
2787   if (trt_mode_ == TrtTestMode::kImplicitBatch) {
2788     Reset();
2789     NodeDef node_def = get_einsum_nodedef(tf_type_, "ab,cb->ac");
2790     AddTestTensor("input_a", {2, 3});
2791     AddTestTensor("input_b", {2, 3});
2792     TestOpConverter(
2793         "my_einsum", node_def, {2, 2},
2794         errors::Unimplemented("Einsum converter requires dynamic shape mode"),
2795         Status::OK(), ElementsAreArray({13, 16, 40, 52}));
2796     // No further tests.
2797     return;
2798   }
2799 
2800   struct TestParams {
2801     std::string equation;
2802     std::vector<int> shape_a;
2803     std::vector<int> values_a;
2804     std::vector<int> shape_b;
2805     std::vector<int> values_b;
2806     std::vector<int> expected_shape;
2807     std::vector<int> expected_output;
2808     Status conv_status;
2809   };
2810 
2811   Status unimplemented_eq =
2812       errors::Unimplemented("No conversion for einsum equation.");
2813 
2814   std::vector<TestParams> params{
2815       // Dot product.
2816       TestParams{"i,i->", {2}, {2, 3}, {2}, {1, 2}, {1}, {8}, unimplemented_eq},
2817           // Outer product.
2818           TestParams{"i,k->ik",
2819                      {2},
2820                      {1, 2},
2821                      {3},
2822                      {1, 2, 3},
2823                      {2, 3},
2824                      {1, 2, 3, 2, 4, 6},
2825                      unimplemented_eq},
2826           // Transpose.
2827           TestParams{"ik->ki", {2, 3}, {0, 1, 2, 3, 4, 5}, {},
2828                      {},       {3, 2}, {0, 3, 1, 4, 2, 5}, unimplemented_eq},
2829           // Diag.
2830           TestParams{"ii->i",
2831                      {3, 3},
2832                      {0, 1, 2, 3, 4, 5, 6, 7, 8},
2833                      {},
2834                      {},
2835                      {3},
2836                      {0, 4, 8},
2837                      unimplemented_eq},
2838           // Trace.
2839           TestParams{
2840               "ii", {3, 3},          {0, 1, 2, 3, 4, 5, 6, 7, 8}, {}, {}, {},
2841               {12}, unimplemented_eq},
2842           // MatMul with reduction.
2843           TestParams{"abbc,dc->ad",
2844                      {1, 2, 2, 3},
2845                      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
2846                      {2, 3},
2847                      {1, 2, 3, 4, 5, 6},
2848                      {2, 3},
2849                      {1, 2, 3, 2, 4, 6},
2850                      unimplemented_eq},
2851           // Ellipsis with broadcast.
2852           TestParams{"...ik,...jk->...ij",
2853                      {1, 3, 1, 4},
2854                      {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
2855                      {2, 1, 1, 4},
2856                      {1, 2, 3, 4, 5, 6, 7, 8},
2857                      {2, 3, 1, 1},
2858                      {20, 60, 100, 44, 148, 252},
2859                      unimplemented_eq},
2860           // MatMul and Batched MatMul.
2861           TestParams{"ab,bc->ac",        {2, 3}, {0, 1, 2, 3, 4, 5}, {3, 2},
2862                      {1, 2, 3, 4, 5, 6}, {2, 2}, {13, 16, 40, 52}},
2863           TestParams{"abc,cde->abde",
2864                      {1, 2, 3},
2865                      {0, 1, 2, 3, 4, 5},
2866                      {3, 2, 2},
2867                      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
2868                      {1, 2, 2, 2},
2869                      {23, 26, 29, 32, 68, 80, 92, 104}},
2870           TestParams{"abcd,cde->abe",
2871                      {1, 2, 2, 3},
2872                      {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
2873                      {2, 3, 2},
2874                      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
2875                      {1, 2, 2},
2876                      {125, 140, 341, 392}},
2877           TestParams{"abc,cd->abd",      {1, 2, 3}, {0, 1, 2, 3, 4, 5}, {3, 2},
2878                      {1, 2, 3, 4, 5, 6}, {1, 2, 2}, {13, 16, 40, 52}},
2879           TestParams{"acbe,aecd->abcd",
2880                      {1, 2, 3, 4},
2881                      {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
2882                       12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
2883                      {1, 4, 2, 3},
2884                      {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
2885                       13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
2886                      {1, 3, 2, 3},
2887                      {90, 96, 102, 732, 786, 840, 250, 272, 294, 940, 1010,
2888                       1080, 410, 448, 486, 1148, 1234, 1320}},
2889           TestParams{
2890               "aecd,abcd->acbe",
2891               {1, 2, 3, 4},
2892               {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
2893                12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
2894               {1, 2, 3, 4},
2895               {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
2896                13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
2897               {1, 3, 2, 2},
2898               {20, 140, 92, 788, 148, 460, 412, 1300, 404, 908, 860, 1940}},
2899           TestParams{"acd,dce->ae",
2900                      {1, 2, 3},
2901                      {0, 1, 2, 3, 4, 5},
2902                      {3, 2, 2},
2903                      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
2904                      {1, 2},
2905                      {115, 130}},
2906           TestParams{"abcd,bace->bade",
2907                      {2, 3, 2, 1},
2908                      {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
2909                      {3, 2, 2, 1},
2910                      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
2911                      {3, 2, 1, 1},
2912                      {2, 46, 28, 128, 86, 242}},
2913 #if !IS_TRT_VERSION_GE(8, 0, 0, 0)
2914           // Deactivating buggy test case for TRT8 per nvbug 3322485.
2915           TestParams{"cebfad,fageb->abcdg",
2916                      {1, 1, 3, 3, 2, 2},
2917                      {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
2918                       12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
2919                       24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35},
2920                      {3, 2, 2, 1, 3},
2921                      {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
2922                       13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
2923                       25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36},
2924                      {2, 3, 1, 2, 2},
2925                      {252,  288,  291,  336,  768,  912,  810,  963,
2926                       1356, 1608, 1401, 1662, 438,  492,  495,  558,
2927                       1176, 1338, 1236, 1407, 1986, 2256, 2049, 2328}},
2928 #endif
2929   };
2930 
2931   for (auto p : params) {
2932     for (bool a_is_tensor : {true, false}) {
2933       for (bool b_is_tensor : {true, false}) {
2934         if (!a_is_tensor && !b_is_tensor) {
2935           // Skip test when both args are weights. We do not convert this
2936           // since const folding eliminates this case.
2937           continue;
2938         }
2939         Reset();
2940         int n_inputs = p.shape_b.empty() ? 1 : 2;
2941         NodeDef node_def = get_einsum_nodedef(tf_type_, p.equation, n_inputs);
2942         if (a_is_tensor) {
2943           AddTestTensor("input_a", p.shape_a, p.values_a);
2944         } else {
2945           AddTestWeights("input_a", p.shape_a, p.values_a, tf_type_);
2946         }
2947         if (!p.shape_b.empty()) {
2948           if (b_is_tensor) {
2949             AddTestTensor("input_b", p.shape_b, p.values_b);
2950           } else {
2951             AddTestWeights("input_b", p.shape_b, p.values_b, tf_type_);
2952           }
2953         }
2954         TestOpConverter("my_einsum", node_def, p.expected_shape, p.conv_status,
2955                         Status::OK(), ElementsAreArray(p.expected_output));
2956       }
2957     }
2958   }
2959 }
2960 #endif  // IS_TRT_VERSION_GE(7, 1, 3, 0)
2961 
TEST_P(OpConverter_FP32_FP16_Test,ConvertBiasAdd)2962 TEST_P(OpConverter_FP32_FP16_Test, ConvertBiasAdd) {
2963   // Note that kINT32 is not supported by IScaleLayer, so we don't test
2964   // DT_INT32 type here. DT_FLOAT and DT_HALF are tested.
2965   // Get the NodeDef for BiasAdd.
2966   auto get_biasadd_nodedef = [](const string& data_format,
2967                                 DataType tf_type) -> NodeDef {
2968     Scope s = Scope::NewRootScope();
2969     auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
2970     auto weights = ops::Placeholder(s.WithOpName("weights"), tf_type);
2971     const auto biasadd_attrs = ops::BiasAdd::DataFormat(data_format);
2972     auto biasadd =
2973         ops::BiasAdd(s.WithOpName("my_biasadd"), input, weights, biasadd_attrs);
2974     return biasadd.operation.node()->def();
2975   };
2976 
2977   for (const string& data_format : {"NHWC", "NCHW"}) {
2978     for (const int trt_input_rank : {1, 2, 3, 4}) {
2979       Reset();
2980       NodeDef node_def = get_biasadd_nodedef(data_format, tf_type_);
2981 
2982       // Add input, dims_array will be like {2, 1, ..., 1, 3}
2983       std::vector<int32> dims_array(trt_input_rank + 1, 1);
2984       if (trt_input_rank == 1) {
2985         dims_array[1] = (data_format == "NHWC" ? 3 : 2);
2986       } else {
2987         dims_array[1] = 2;
2988         dims_array[trt_input_rank] = 3;
2989       }
2990       const int num_input = TrtTensorDimsNumElements(GetTestDims(dims_array));
2991       ASSERT_EQ(trt_input_rank > 1 ? 6 : (data_format == "NHWC" ? 3 : 2),
2992                 num_input);
2993       std::vector<float> input_data(num_input, 0);
2994 
2995       AddTestTensor("input", dims_array, input_data);
2996 
2997       const int channel_size = (data_format == "NHWC" ? 3 : 2);
2998       std::vector<float> bias(channel_size);
2999       for (int i = 0; i < channel_size; ++i) {
3000         bias[i] = i + 1;  // bias will be {1, 2, 3, ...}
3001       }
3002       AddTestWeights("weights", {channel_size}, bias, tf_type_);
3003 
3004       // Build and run the engine.
3005       std::vector<float> output_data;
3006 
3007       if (trt_input_rank == 1) {
3008         if (data_format == "NHWC") {
3009           output_data = {1, 2, 3};
3010         } else {
3011           output_data = {1, 2};
3012         }
3013       } else {
3014         if (data_format == "NHWC") {
3015           output_data = {1, 2, 3, 1, 2, 3};
3016         } else {
3017           output_data = {1, 1, 1, 2, 2, 2};
3018         }
3019       }
3020       TestOpConverter("my_biasadd", node_def, dims_array, Status::OK(),
3021                       Status::OK(), ElementsAreArray(output_data));
3022     }
3023   }
3024 }
3025 
3026 template <typename OpType>
GetBinaryOpNodeDef(DataType dtype)3027 NodeDef GetBinaryOpNodeDef(DataType dtype) {
3028   Scope s = Scope::NewRootScope();
3029   auto input_l = ops::Placeholder(s.WithOpName("input1"), dtype);
3030   auto input_r = ops::Placeholder(s.WithOpName("input2"), dtype);
3031   auto op = OpType(s.WithOpName("my_binary"), input_l, input_r);
3032   return op.operation.node()->def();
3033 }
3034 
TEST_P(OpConverter_FP32_FP16_Test,ConvertBinary)3035 TEST_P(OpConverter_FP32_FP16_Test, ConvertBinary) {
3036   {
3037     AttrValue dtype;
3038     dtype.set_type(tf_type_);
3039     // Both inputs are weights.
3040     Reset();
3041     NodeDef node_def =
3042         MakeNodeDef("my_add", "Add", {"weights1", "weights2"}, {{"T", dtype}});
3043     AddTestWeights<float>("weights1", {1}, {1});
3044     AddTestWeights<float>("weights2", {1}, {1});
3045     RunValidationAndConversion(
3046         node_def, error::UNIMPLEMENTED,
3047         "Constant folding is falled back to TensorFlow, binary op received "
3048         "both input as constant at: my_add");
3049   }
3050 
3051   using OpFunc = std::function<NodeDef(DataType)>;
3052   std::map<std::string, std::pair<OpFunc, std::vector<float>>> op_test_info;
3053 #define ADD_OP(name, op, v1, v2, v3, v4, v5, v6, v7, v8) \
3054   op_test_info[name] =                                   \
3055       std::make_pair(GetBinaryOpNodeDef<op>,             \
3056                      std::vector<float>(v1, v2, v3, v4, v5, v6, v7, v8))
3057   ADD_OP("Add", ops::Add, {5, 8, 6, 9, 5, 8, 6, 9});
3058   ADD_OP("AddV2", ops::AddV2, {5, 8, 6, 9, 5, 8, 6, 9});
3059   ADD_OP("Sub", ops::Sub, {1, 4, 0, 3, 1, 4, 0, 3});
3060   ADD_OP("Mul", ops::Mul, {6, 12, 9, 18, 6, 12, 9, 18});
3061   ADD_OP("Div", ops::Div, {1.5, 3, 1, 2, 1.5, 3, 1, 2});
3062   ADD_OP("RealDiv", ops::RealDiv, {1.5, 3, 1, 2, 1.5, 3, 1, 2});
3063   ADD_OP("FloorDiv", ops::FloorDiv, {1, 3, 1, 2, 1, 3, 1, 2});
3064   ADD_OP("Minimum", ops::Minimum, {2, 2, 3, 3, 2, 2, 3, 3});
3065   ADD_OP("Maximum", ops::Maximum, {3, 6, 3, 6, 3, 6, 3, 6});
3066   ADD_OP("Pow", ops::Pow, {9, 36, 27, 216, 9, 36, 27, 216});
3067 #undef ADD_OP
3068   // Add all ops supported by ConvertBinary.
3069   auto* supported_ops = BinaryOperationMap();
3070   // Test combinations of tensor vs weight inputs (except when both inputs are
3071   // weights).
3072   for (const bool operand_1_is_tensor : {true, false}) {
3073     for (const bool operand_2_is_tensor : {true, false}) {
3074       if (!operand_1_is_tensor && !operand_2_is_tensor) continue;
3075       for (auto& iter : *supported_ops) {
3076         string op_name = iter.first;
3077         SCOPED_TRACE(StrCat(op_name, "_", operand_1_is_tensor ? "T" : "W",
3078                             operand_2_is_tensor ? "T" : "W"));
3079         Reset();
3080         if (!op_test_info.count(op_name)) {
3081           FAIL() << "Binary op test map does not contain op " << op_name;
3082         }
3083         NodeDef node_def = op_test_info[op_name].first(tf_type_);
3084         std::vector<std::string> input_names;
3085         std::vector<std::vector<int>> input_dims;
3086         std::vector<std::vector<float>> input_values;
3087         if (operand_1_is_tensor) {
3088           AddTestTensor("input1", {2, 1, 2}, {3, 6, 3, 6});
3089         } else {
3090           AddTestWeights("input1", {1, 2}, std::vector<float>{3, 6}, tf_type_);
3091         }
3092         if (operand_2_is_tensor) {
3093           AddTestTensor("input2", {2, 2, 1}, {2, 3, 2, 3});
3094         } else {
3095           AddTestWeights("input2", {2, 1}, std::vector<float>{2, 3}, tf_type_);
3096         }
3097         TestOpConverter("my_binary", node_def, {2, 2, 2}, Status::OK(),
3098                         Status::OK(),
3099                         ElementsAreArray(op_test_info[op_name].second));
3100       }
3101     }
3102   }
3103 }
3104 
GetAddNNodeDef(const std::vector<string> & input_names,DataType dtype)3105 NodeDef GetAddNNodeDef(const std::vector<string>& input_names, DataType dtype) {
3106   Scope s = Scope::NewRootScope();
3107   OutputList inputs;
3108   for (const string& name : input_names) {
3109     inputs.push_back(ops::Placeholder(s.WithOpName(name), dtype));
3110   }
3111   auto op = ops::AddN(s.WithOpName("my_addn"), inputs);
3112   return op.operation.node()->def();
3113 }
3114 
3115 struct AddNTestParams {
3116   std::vector<float> input_values;
3117   std::vector<string> input_names;
3118   std::vector<int> dimensions;
3119   std::vector<float> expected_output;
3120   Status status;
3121 };
3122 
TestAddN(ParameterizedOpConverterTestBase * test,AddNTestParams & p)3123 void TestAddN(ParameterizedOpConverterTestBase* test, AddNTestParams& p) {
3124   // All inputs are tensors.
3125   test->Reset();
3126   const NodeDef node_def = GetAddNNodeDef(p.input_names, test->get_tf_type());
3127 
3128   if (p.input_values.size() % p.input_names.size() != 0) {
3129     LOG(ERROR) << "The number of input values: `" << p.input_values.size()
3130                << "` is not a multiple of the number of inputs: `"
3131                << p.input_names.size() << "`";
3132     ASSERT_TRUE(false);
3133   }
3134 
3135   DataVec input_data;
3136   int input_offset = 0;
3137   const int window_size = p.input_values.size() / p.input_names.size();
3138   for (const string& name : p.input_names) {
3139     std::vector<float>::const_iterator start_pos =
3140         p.input_values.begin() + input_offset;
3141     std::vector<float>::const_iterator end_pos = start_pos + window_size;
3142     std::vector<float> sub_input_val(start_pos, end_pos);
3143     input_offset += window_size;
3144 
3145     test->AddTestTensor(name, p.dimensions, test->get_tf_type(), sub_input_val);
3146   }
3147 
3148   test->TestOpConverter("my_addn", node_def, p.dimensions,
3149                         /*expected_conversion_status=*/p.status,
3150                         /*expected_runtime_status=*/p.status,
3151                         /*matcher=*/ElementsAreArray(p.expected_output),
3152                         /*out_tf_types=*/{test->get_tf_type()});
3153 }
3154 
TEST_P(OpConverter_FP32_FP16_Test,ConvertAddN)3155 TEST_P(OpConverter_FP32_FP16_Test, ConvertAddN) {
3156   {
3157     // Weights with batch dim that is not 1.
3158     Reset();
3159     const NodeDef node_def = GetAddNNodeDef({"tensor", "weights"}, tf_type_);
3160     AddTestTensor("tensor", /*dims=*/{1, 2});
3161     AddTestWeights<float>("weights", {2, 1, 2}, {0, 1, 2, 3});
3162     RunValidationAndConversion(
3163         node_def, error::INVALID_ARGUMENT,
3164         "Weights input to AddN is required to have batch dimension 1.");
3165   }
3166 
3167   const std::vector<float> common_input = InitTestVector<float>(6);
3168 
3169   std::vector<AddNTestParams> params = {
3170       {/*input_values=*/common_input,
3171        /*input_names=*/{"inp1", "inp2", "inp3"},
3172        /*dimensions=*/{1, 1, 2, 1, 1},
3173        /*expected_output=*/{6, 9},
3174        /*status=*/Status::OK()},
3175       {/*input_values=*/common_input,
3176        /*input_names=*/{"inp1", "inp2"},
3177        /*dimensions=*/{1, 1, 3, 1, 1},
3178        /*expected_output=*/{3, 5, 7},
3179        /*status=*/Status::OK()},
3180       {/*input_values=*/common_input,
3181        /*input_names=*/{"inp1", "inp2", "inp3"},
3182        /*dimensions=*/{1, 2, 1, 1},
3183        /*expected_output=*/{6, 9},
3184        /*status=*/Status::OK()},
3185       {/*input_values=*/common_input,
3186        /*input_names=*/{"inp1", "inp2"},
3187        /*dimensions=*/{1, 1, 3, 1},
3188        /*expected_output=*/{3, 5, 7},
3189        /*status=*/Status::OK()},
3190       {/*input_values=*/common_input,
3191        /*input_names=*/{"inp1", "inp2", "inp3"},
3192        /*dimensions=*/{1, 2, 1},
3193        /*expected_output=*/{6, 9},
3194        /*status=*/Status::OK()},
3195       {/*input_values=*/common_input,
3196        /*input_names=*/{"inp1", "inp2"},
3197        /*dimensions=*/{1, 1, 3},
3198        /*expected_output=*/{3, 5, 7},
3199        /*status=*/Status::OK()},
3200       {/*input_value=*/common_input,
3201        /*input_names=*/{"inp1", "inp2", "inp3"},
3202        /*dimensions=*/{2, 1},
3203        /*expected_output=*/{6, 9},
3204        /*status=*/Status::OK()},
3205       {/*input_values=*/common_input,
3206        /*input_names=*/{"inp1", "inp2"},
3207        /*dimensions=*/{1, 3},
3208        /*expected_output=*/{3, 5, 7},
3209        /*status=*/Status::OK()},
3210       {/*input_values=*/common_input,
3211        /*input_names=*/{"inp1", "inp2", "inp3"},
3212        /*dimensions=*/{2},
3213        /*expected_output=*/{6, 9},
3214        /*status=*/Status::OK()},
3215       {/*input_values=*/common_input,
3216        /*input_names=*/{"inp1", "inp2"},
3217        /*dimensions=*/{3},
3218        /*expected_output=*/{3, 5, 7},
3219        /*status=*/Status::OK()},
3220       {/*input_values=*/common_input,
3221        /*input_names=*/{"inp1", "inp2", "inp3", "inp4", "inp5", "inp6"},
3222        /*dimensions=*/{1},
3223        /*expected_output=*/{15},
3224        /*status=*/Status::OK()},
3225   };
3226 
3227   for (auto p : params) {
3228     TestAddN(this, p);
3229   }
3230 }
3231 
TEST_P(OpConverter_FP32_Test,ConvertQuantize)3232 TEST_P(OpConverter_FP32_Test, ConvertQuantize) {
3233   {
3234     // FakeQuantWithMinMaxArgs attributes are empty, should fail.
3235     Reset(TrtPrecisionMode::INT8);
3236     NodeDef node_def =
3237         MakeNodeDef("my_quantize", "FakeQuantWithMinMaxArgs", {"input"});
3238     AddTestTensor("input", {1, 2, 3});
3239     RunValidationAndConversion(
3240         node_def, error::INVALID_ARGUMENT,
3241         "Min or max attribute not found for FakeQuantWithMinMaxArgs "
3242         "at my_quantize");
3243   }
3244   {
3245     // FakeQuantWithMinMaxArgs ranges set via attributes, ok.
3246     Reset(TrtPrecisionMode::INT8);
3247     Scope s = Scope::NewRootScope();
3248     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3249     auto quantize_attrs = ops::FakeQuantWithMinMaxArgs::Min(-6.0f).Max(6.0f);
3250     auto quantize = ops::FakeQuantWithMinMaxArgs(s.WithOpName("my_quantize"),
3251                                                  input, quantize_attrs);
3252     const NodeDef& node_def = quantize.operation.node()->def();
3253     AddTestTensor("input", {1, 2, 3});
3254     RunValidationAndConversion(node_def);
3255     TRT_TensorOrWeights output;
3256     TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3257     ASSERT_TRUE(output.is_tensor());
3258     auto ranges = quantization_ranges();
3259     EXPECT_EQ(1, ranges.count(output.tensor()->trt_tensor()));
3260     EXPECT_EQ(6.0f, ranges[output.tensor()->trt_tensor()]);
3261   }
3262   {
3263     // FakeQuantWithMinMaxVars ranges set via inputs, ok.
3264     Reset(TrtPrecisionMode::INT8);
3265     Scope s = Scope::NewRootScope();
3266     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3267     auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3268     auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3269     auto quantize = ops::FakeQuantWithMinMaxVars(
3270         s.WithOpName("my_quantize"), input, weights_min, weights_max);
3271     const NodeDef& node_def = quantize.operation.node()->def();
3272     AddTestTensor("input", {1, 2, 3});
3273     AddTestWeights<float>("weights_min", {1}, {-6.0f});
3274     AddTestWeights<float>("weights_max", {1}, {6.0f});
3275     RunValidationAndConversion(node_def);
3276     TRT_TensorOrWeights output;
3277     TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3278     ASSERT_TRUE(output.is_tensor());
3279     auto ranges = quantization_ranges();
3280     EXPECT_EQ(1, ranges.count(output.tensor()->trt_tensor()));
3281     EXPECT_EQ(6.0f, ranges[output.tensor()->trt_tensor()]);
3282   }
3283   {
3284     // QuantizeAndDequantizeV2 ranges set via inputs, ok.
3285     Reset(TrtPrecisionMode::INT8);
3286     Scope s = Scope::NewRootScope();
3287     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3288     auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3289     auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3290     auto quantize = ops::QuantizeAndDequantizeV2(
3291         s.WithOpName("my_quantize"), input, weights_min, weights_max);
3292     const NodeDef& node_def = quantize.operation.node()->def();
3293     AddTestTensor("input", {1, 2, 3});
3294     AddTestWeights<float>("weights_min", {1}, {-6.0f});
3295     AddTestWeights<float>("weights_max", {1}, {6.0f});
3296     RunValidationAndConversion(node_def);
3297     TRT_TensorOrWeights output;
3298     TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3299     ASSERT_TRUE(output.is_tensor());
3300     auto ranges = quantization_ranges();
3301     EXPECT_EQ(1, ranges.count(output.tensor()->trt_tensor()));
3302     EXPECT_EQ(6.0f, ranges[output.tensor()->trt_tensor()]);
3303   }
3304   {
3305     // QuantizeAndDequantizeV2 Range inputs are tensors, should fail.
3306     Reset(TrtPrecisionMode::INT8);
3307     Scope s = Scope::NewRootScope();
3308     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3309     auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3310     auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3311     auto quantize = ops::QuantizeAndDequantizeV2(
3312         s.WithOpName("my_quantize"), input, weights_min, weights_max);
3313     const NodeDef& node_def = quantize.operation.node()->def();
3314     AddTestTensor("input", {1, 2, 3});
3315     AddTestTensor("weights_min", {1});
3316     AddTestTensor("weights_max", {1});
3317     RunValidationAndConversion(
3318         node_def, error::UNIMPLEMENTED,
3319         "The input \"input_min\" for QuantizeAndDequantizeV2 must be a constant"
3320         ", at my_quantize");
3321   }
3322   {
3323     // QuantizeAndDequantizeV3 ranges set via inputs, ok.
3324     Reset(TrtPrecisionMode::INT8);
3325     Scope s = Scope::NewRootScope();
3326     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3327     auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3328     auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3329     auto num_bits = ops::Placeholder(s.WithOpName("num_bits"), DT_INT32);
3330     auto quantize = ops::QuantizeAndDequantizeV3(
3331         s.WithOpName("my_quantize"), input, weights_min, weights_max, num_bits);
3332     const NodeDef& node_def = quantize.operation.node()->def();
3333     AddTestTensor("input", {1, 2, 3});
3334     AddTestWeights<float>("weights_min", {1}, {-6.0f});
3335     AddTestWeights<float>("weights_max", {1}, {6.0f});
3336     AddTestWeights<int>("num_bits", {1}, {8});
3337     RunValidationAndConversion(node_def);
3338     TRT_TensorOrWeights output;
3339     TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3340     ASSERT_TRUE(output.is_tensor());
3341     auto ranges = quantization_ranges();
3342     EXPECT_EQ(1, ranges.count(output.tensor()->trt_tensor()));
3343     EXPECT_EQ(6.0f, ranges[output.tensor()->trt_tensor()]);
3344   }
3345 }
3346 
TEST_P(OpConverter_FP32_FP16_Test,ConvertSquare)3347 TEST_P(OpConverter_FP32_FP16_Test, ConvertSquare) {
3348   {
3349     // Input is weights, should fail.
3350     Reset();
3351     Scope s = Scope::NewRootScope();
3352     auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
3353     auto square = ops::Square(s.WithOpName("my_square"), input);
3354     NodeDef node_def = square.operation.node()->def();
3355     AddTestWeights("input", {1, 2, 3}, {1, 2, 3, 4, -5, 6}, tf_type_);
3356     RunValidationAndConversion(
3357         node_def, error::UNIMPLEMENTED,
3358         "The input \"x\" for Square must be a tensor, at my_square");
3359   }
3360 
3361   Reset();
3362 
3363   Scope s = Scope::NewRootScope();
3364   auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
3365   auto square = ops::Square(s.WithOpName("my_square"), input);
3366   NodeDef node_def = square.operation.node()->def();
3367 
3368   const int num_inputs = 20;
3369   std::vector<float> inputs(num_inputs);
3370   std::vector<float> expected_outputs(num_inputs);
3371 
3372   for (int i = 0; i < num_inputs; ++i) {
3373     const float value = (i - 9);
3374     inputs[i] = value;
3375     expected_outputs[i] = value * value;
3376   }
3377   AddTestTensor("input", {1, 1, 20}, tf_type_, inputs);
3378 
3379   TestOpConverter("my_square", node_def, {1, 1, 20}, Status::OK(), Status::OK(),
3380                   ArrayFloatNear(expected_outputs, 0));
3381 }
3382 
3383 #if IS_TRT_VERSION_GE(7, 1, 3, 0)
TEST_P(OpConverter_FP32_Test,ConvertCombinedNMS)3384 TEST_P(OpConverter_FP32_Test, ConvertCombinedNMS) {
3385   // Get the NodeDef for CombinedNMS.
3386   auto get_nms_nodedef = [](DataType tf_type, bool clip_boxes = true,
3387                             bool pad_per_class = false) -> NodeDef {
3388     Scope s = Scope::NewRootScope();
3389     auto boxes_tensor = ops::Placeholder(s.WithOpName("boxes"), tf_type);
3390     auto scores_tensor = ops::Placeholder(s.WithOpName("scores"), tf_type);
3391     auto max_output_size_per_class =
3392         ops::Placeholder(s.WithOpName("max_output_size_per_class"), DT_INT32);
3393     auto max_total_size =
3394         ops::Placeholder(s.WithOpName("max_total_size"), DT_INT32);
3395     auto iou_threshold =
3396         ops::Placeholder(s.WithOpName("iou_threshold"), tf_type);
3397     auto score_threshold =
3398         ops::Placeholder(s.WithOpName("score_threshold"), tf_type);
3399     auto nms_attrs = ops::CombinedNonMaxSuppression::Attrs()
3400                          .PadPerClass(pad_per_class)
3401                          .ClipBoxes(clip_boxes);
3402 
3403     auto nms_op = ops::CombinedNonMaxSuppression(
3404         s.WithOpName("my_nms"), boxes_tensor, scores_tensor,
3405         max_output_size_per_class, max_total_size, iou_threshold,
3406         score_threshold, nms_attrs);
3407     return nms_op.operation.node()->def();
3408   };
3409 
3410   struct TestParams {
3411     const std::string description;
3412     const std::vector<int32> boxes_tensor_dims;
3413     const std::vector<int32> scores_tensor_dims;
3414     const std::vector<float> boxes_values;
3415     const std::vector<float> scores_values;
3416     const int32 max_output_size_per_class;
3417     const int32 max_total_size;
3418     const float iou_threshold;
3419     const float score_threshold;
3420     bool pad_per_class;
3421     bool clip_boxes;
3422     const std::vector<std::vector<int32>> expected_output_dims;
3423     const std::vector<float> exp_boxes;
3424     const std::vector<float> exp_scores;
3425     const std::vector<float> exp_classes;
3426     const std::vector<float> exp_num_detections;
3427     Status conversion_status;
3428     Status runtime_status;
3429   };
3430 
3431   Status conv_status =
3432       trt_mode_ == TrtTestMode::kDynamicShape
3433           ? errors::Unimplemented(
3434                 "TensorRT BatchedNMS Plugin requires input with static shape")
3435           : Status::OK();
3436 
3437   std::vector<TestParams> params = {
3438       // TODO(aaroey): there is a bug in TRT's CombinedNonMaxSuppression
3439       // implementation that, the extra output classes that are outside of the
3440       // range specified by valid_detections[i] are not zeros but -1s.
3441       TestParams{
3442           "Test 1: Original test",
3443           {1, 1, 3, 4},                                      // boxes dims
3444           {1, 1, 3},                                         // scores dims
3445           {0, 0, 0.3, 0.4, 0, 0, 0.3, 0.4, 0, 0, 0.3, 0.4},  // boxes values
3446           {0.4, 0.7, 0.3},                                   // scores values
3447           3,                                 // max_output_size_per_class
3448           2,                                 // max_total_size
3449           .5f,                               // IOU threshold
3450           0,                                 // score_threshold
3451           false,                             // pad_per_class
3452           true,                              // clip_boxes
3453           {{1, 2, 4},                        // expected_nmsed_boxes_dims
3454            {1, 2},                           // expected_nmsed_scores_dims
3455            {1, 2},                           // expected_nmsed_classes_dims
3456            {1}},                             // expected_valid_detections_dims
3457           {0, 0, 0.3, 0.4, 0, 0, 0.3, 0.4},  // exp_boxes_values
3458           {0.7, 0.4},                        // exp_scores
3459           {1, 0},                            // exp_classes
3460           {2},                               // exp_num_detections
3461           conv_status},
3462       // Test with clip_boxes = False
3463       TestParams{
3464           "Test 2: clip_boxes",
3465           {1, 5, 1, 4},  // boxes dims
3466           {1, 5, 1},     // scores dims
3467           // boxes values:
3468           {0, 0, 5, 10, 0, 4, 5, 14, 8, 0, 12, 4, 6, 2, 10, 6, 8, 9, 11, 12},
3469           {5, 4, 3, 2, 1},  // scores values
3470           4,                // max_output_size_per_class
3471           4,                // max_total_size
3472           0.1,              // IOU threshold
3473           0,                // score threshold
3474           false,            // pad_per_class
3475           false,            // clip_boxes
3476           {{1, 4, 4},       // expected nmsed_boxes_dims
3477            {1, 4},          // expected nmsed_scores_dims
3478            {1, 4},          // expected_nmsed_classes_dims
3479            {1}},            // expected_valid_detections_dims
3480                             // exp_boxes_values:
3481           {0, 0, 5, 10, 8, 0, 12, 4, 8, 9, 11, 12, 0, 0, 0, 0},
3482           {5, 3, 1, 0},   // exp_scores
3483           {0, 0, 0, -1},  // exp_classes
3484           {3},            // exp_num_detections
3485           conv_status},
3486       // Test with clip_boxes = False, and nonzero score threshold
3487       TestParams{
3488           "Test 3: score threshold",
3489           {1, 5, 1, 4},  // boxes dims
3490           {1, 5, 1},     // scores dims
3491           // boxes values:
3492           {0, 0, 5, 10, 0, 4, 5, 14, 8, 0, 12, 4, 6, 2, 10, 6, 8, 9, 11, 12},
3493           {5, 4, 3, 2, 1},  // scores values
3494           4,                // max_output_size_per_class
3495           4,                // max_total_size
3496           0.1,              // IOU threshold
3497           2,                // score threshold
3498           false,            // pad_per_class
3499           false,            // clip_boxes
3500           {{1, 4, 4},       // expected nmsed_boxes_dims
3501            {1, 4},          // expected nmsed_scores_dims
3502            {1, 4},          // expected_nmsed_classes_dims
3503            {1}},            // expected_valid_detections_dims
3504                             // exp_boxes_values:
3505           {0, 0, 5, 10, 8, 0, 12, 4, 0, 0, 0, 0, 0, 0, 0, 0},
3506           {5, 3, 0, 0},    // exp_scores
3507           {0, 0, -1, -1},  // exp_classes
3508           {2},             // exp_num_detections
3509           conv_status},
3510       // Test where the boxes are defined as with max value first for the box
3511       // coordinates. This test fails before TRT 7.1.3.
3512       TestParams{
3513           "Test 4: max coord first",
3514           {1, 5, 1, 4},  // boxes dims
3515           {1, 5, 1},     // scores dims
3516                          // boxes values:
3517           {5, 10, 0, 0, 5, 14, 0, 4, 12, 4, 8, 0, 10, 6, 6, 2, 11, 12, 8, 9},
3518           {5, 4, 3, 2, 1},  // scores values
3519           4,                // max_output_size_per_class
3520           4,                // max_total_size
3521           0.1,              // IOU threshold
3522           0,                // score threshold
3523           false,            // pad_per_class
3524           false,            // clip_boxes
3525           {{1, 4, 4},       // expected nmsed_boxes_dims
3526            {1, 4},          // expected nmsed_scores_dims
3527            {1, 4},          // expected_nmsed_classes_dims
3528            {1}},            // expected_valid_detections_dims
3529                             // exp_boxes_values:
3530           {5, 10, 0, 0, 12, 4, 8, 0, 11, 12, 8, 9, 0, 0, 0, 0},
3531           {5, 3, 1, 0},   // exp_scores
3532           {0, 0, 0, -1},  // exp_classes
3533           {3},            // exp_num_detections
3534           conv_status},
3535       TestParams{"Test 5: TopK error",
3536                  {1, 5000, 1, 4},  // boxes dims
3537                  {1, 5000, 1},     // scores dims
3538                  {},               // boxes values:
3539                  {},               // scores values
3540                  4,                // max_output_size_per_class
3541                  4,                // max_total_size
3542                  0.1,              // IOU threshold
3543                  0,                // score threshold
3544                  false,            // pad_per_class
3545                  false,            // clip_boxes
3546                  {},               // expected_valid_detections_dims
3547                  {},               // exp_boxes_values
3548                  {},               // exp_scores
3549                  {},               // exp_classes
3550                  {},               // exp_num_detections
3551                  conv_status.ok()
3552                      ? errors::InvalidArgument(
3553                            "TRT NMS plugin allow top_k<=4096, where top_k = "
3554                            "max(num_boxes, max_total_size). You can override "
3555                            "this by setting TF_TRT_ALLOW_NMS_TOPK_OVERRIDE=1 "
3556                            "environment variable, but this can result in a "
3557                            "loss of accuracy.")
3558                      : conv_status},
3559   };
3560 
3561   for (auto p : params) {
3562     Reset();
3563     SCOPED_TRACE(p.description);
3564     AddTestTensor("boxes", p.boxes_tensor_dims, p.boxes_values);
3565     AddTestTensor("scores", p.scores_tensor_dims, p.scores_values);
3566     AddTestWeights<int32>("max_output_size_per_class", {1},
3567                           {p.max_output_size_per_class});
3568     AddTestWeights<int32>("max_total_size", {1}, {p.max_total_size});
3569     AddTestWeights<float>("iou_threshold", {1}, {p.iou_threshold}, tf_type_);
3570     AddTestWeights<float>("score_threshold", {1}, {p.score_threshold},
3571                           tf_type_);
3572 
3573     auto node_def = get_nms_nodedef(tf_type_, p.clip_boxes, p.pad_per_class);
3574 
3575     TestOpConverterMultiOut("my_nms", node_def, p.expected_output_dims,
3576                             p.conversion_status, p.runtime_status,
3577                             {
3578                                 ElementsAreArray(p.exp_boxes),
3579                                 ElementsAreArray(p.exp_scores),
3580                                 ElementsAreArray(p.exp_classes),
3581                                 ElementsAreArray(p.exp_num_detections),
3582                             },
3583                             {tf_type_, tf_type_, tf_type_, DT_INT32});
3584   }
3585 }
3586 #endif  // IS_TRT_VERSION_GE(7, 1, 3, 0)
3587 
3588 template <typename T>
CreateUnaryOp(DataType tf_type)3589 NodeDef CreateUnaryOp(DataType tf_type) {
3590   Scope s = Scope::NewRootScope();
3591   auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
3592   return T(s.WithOpName("my_unary"), input).operation.node()->def();
3593 }
3594 
3595 constexpr float kLeakyReluAlpha = 0.2f;
3596 template <>
CreateUnaryOp(DataType tf_type)3597 NodeDef CreateUnaryOp<ops::internal::LeakyRelu>(DataType tf_type) {
3598   Scope s = Scope::NewRootScope();
3599   auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
3600   return ops::internal::LeakyRelu(
3601              s.WithOpName("my_unary"), input,
3602              ops::internal::LeakyRelu::Alpha(kLeakyReluAlpha))
3603       .operation.node()
3604       ->def();
3605 }
3606 
TEST_P(OpConverter_FP32_Test,ConvertActivation)3607 TEST_P(OpConverter_FP32_Test, ConvertActivation) {
3608   {
3609     // Input is weights, should fail.
3610     Reset();
3611     const NodeDef& node_def = CreateUnaryOp<ops::Relu>(tf_type_);
3612     AddTestWeights<int32>("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
3613     RunValidationAndConversion(
3614         node_def, error::UNIMPLEMENTED,
3615         "The input \"input\" for Relu must be a tensor, at my_unary");
3616   }
3617 
3618   constexpr float kSeluAlpha = 1.7580993408473768599402175208123f;
3619   constexpr float kSeluScale = 1.0507009873554804934193349852946f;
3620   using OpFunc = std::function<NodeDef(DataType)>;
3621   using ValFunc = float (*)(float);
3622   std::map<std::string, std::pair<OpFunc, ValFunc>> op_map;
3623 
3624 #define ADD_OP(name, op, compute) \
3625   op_map[name] = std::make_pair(CreateUnaryOp<op>, compute)
3626   ADD_OP("LeakyRelu", ops::internal::LeakyRelu,
3627          [](float x) { return (x > 0.0f) ? x : x * kLeakyReluAlpha; });
3628   ADD_OP("Relu", ops::Relu, [](float x) { return (x > 0.0f) ? x : 0.0f; });
3629   ADD_OP("Relu6", ops::Relu6,
3630          [](float x) { return std::min(std::max(x, 0.0f), 6.0f); });
3631   ADD_OP("Sigmoid", ops::Sigmoid,
3632          [](float x) { return 1.0f / (1.0f + std::exp(-x)); });
3633   ADD_OP("Tanh", ops::Tanh, static_cast<ValFunc>(std::tanh));
3634   ADD_OP("Elu", ops::Elu,
3635          [](float x) { return (x > 0.0f) ? x : std::exp(x) - 1; });
3636   ADD_OP("Selu", ops::Selu, [](float x) {
3637     return (x > 0.0f) ? kSeluScale * x
3638                       : kSeluScale * kSeluAlpha * (std::exp(x) - 1);
3639   });
3640   ADD_OP("Softsign", ops::Softsign,
3641          [](float x) { return x / (std::abs(x) + 1); });
3642   ADD_OP("Softplus", ops::Softplus,
3643          [](float x) { return std::log(std::exp(x) + 1); });
3644 #undef ADD_OP
3645 
3646   // Get list of ops to test.
3647   std::vector<string> ops_to_test;
3648   // Add all ops supported by ConvertActivation.
3649   auto* map = ActivationTypeMap();
3650   ops_to_test.reserve(map->size());
3651   for (auto& pair : *map) {
3652     ops_to_test.push_back(pair.first);
3653   }
3654   // Add other activation ops to test.
3655   ops_to_test.push_back("Relu6");
3656   ops_to_test.push_back("LeakyRelu");
3657   auto p = TestParamBase{
3658       {1, 1, 2, 3},  // input dims
3659       {},            // input partial dims
3660       {1, 1, 2, 3},  // expected output dims
3661   };
3662   // Ok.
3663   for (const string& op_name : ops_to_test) {
3664     if (!op_map.count(op_name)) {
3665       FAIL() << "Activation op test map does not contain op " << op_name;
3666     }
3667     Reset();
3668     NodeDef node_def = op_map[op_name].first(tf_type_);
3669     const std::vector<float> input = {-100, -2, -1, 0, 1, 88};
3670     AddTestTensor("input", p.input_dims, input);
3671 
3672     // std::exp in Softplus will overflow for input > 88
3673     std::vector<float> output_values;
3674     std::transform(input.begin(), input.end(),
3675                    std::back_inserter(output_values), op_map[op_name].second);
3676 
3677     TestOpConverter("my_unary", node_def, p.expected_output_dims, Status::OK(),
3678                     Status::OK(),
3679 #if IS_TRT_VERSION_GE(8, 0, 0, 0)
3680                     // NVBug # 3322482 - Known bug with TRT 8.0 on specific GPU
3681                     // architectures
3682                     ArrayFloatNear(output_values, 1e-4, false)
3683 #else
3684                     ArrayFloatNear(output_values, 0, false)
3685 #endif
3686     );
3687 
3688     TRT_TensorOrWeights output;
3689     TF_EXPECT_OK(GetTensorOrWeights("my_unary", &output));
3690   }
3691 }
3692 
TEST_P(OpConverter_FP32_Test,ConvertExpandDims)3693 TEST_P(OpConverter_FP32_Test, ConvertExpandDims) {
3694   // Get the NodeDef for ExpandDims.
3695   Scope s = Scope::NewRootScope();
3696   auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
3697   auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
3698   auto expanddims =
3699       ops::ExpandDims(s.WithOpName("my_expanddims"), input, weights);
3700   const NodeDef& node_def = expanddims.operation.node()->def();
3701   {
3702     // Input is weights, should fail.
3703     Reset();
3704     AddTestWeights<int32>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
3705     AddTestWeights<int32>("weights", {1}, {1});
3706     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
3707                                "The input \"input\" for ExpandDims must be a "
3708                                "tensor, at my_expanddims");
3709   }
3710   {
3711     // Axis is a tensor, should fail.
3712     Reset();
3713     AddTestTensor("input", {3, 2, 1});
3714     AddTestTensor("weights", {3});
3715     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
3716                                "The input \"axis\" for ExpandDims must be a "
3717                                "constant, at my_expanddims");
3718   }
3719   std::vector<TestParamBase> test_params = {
3720       TestParamBase{{1, 1, 2, 3},
3721                     {},
3722                     {1, 1, 1, 2, 3},
3723                     {0},
3724                     trt_mode_ == TrtTestMode::kImplicitBatch
3725                         ? Status(error::UNIMPLEMENTED,
3726                                  "TensorRT does not allow manipulation of the "
3727                                  "batch dimension, at my_expanddims")
3728                         : Status::OK()},
3729       TestParamBase{{1, 1, 2, 3},
3730                     {},
3731                     {1, 1, 1, 2, 3},
3732                     {-5},
3733                     trt_mode_ == TrtTestMode::kImplicitBatch
3734                         ? Status(error::UNIMPLEMENTED,
3735                                  "TensorRT does not allow manipulation of the "
3736                                  "batch dimension, at my_expanddims")
3737                         : Status::OK()},
3738       TestParamBase{{1, 1, 2, 3},
3739                     {},
3740                     {},
3741                     {5},
3742                     Status(error::INVALID_ARGUMENT,
3743                            "Axis value of 5 is out of bounds, must be in range"
3744                            " [-5, 5), at my_expanddims")},
3745       TestParamBase{{1, 1, 2, 3},
3746                     {},
3747                     {},
3748                     {-6},
3749                     Status(error::INVALID_ARGUMENT,
3750                            "Axis value of -6 is out of bounds, must be in range"
3751                            " [-5, 5), at my_expanddims")},
3752       TestParamBase{{1, 2, 3}, {}, {1, 1, 2, 3}, {1}},
3753       TestParamBase{{1, 2, 3}, {}, {1, 1, 2, 3}, {-3}},
3754       TestParamBase{{1, 2, 3}, {}, {1, 2, 3, 1}, {3}},
3755       TestParamBase{{1, 2, 3}, {}, {1, 2, 3, 1}, {-1}},
3756       TestParamBase{{1, 2, 3}, {}, {1, 2, 1, 3}, {2}},
3757       TestParamBase{{1, 2, 3}, {}, {1, 2, 1, 3}, {-2}},
3758       TestParamBase{{1, 6}, {}, {1, 1, 6}, {1}},
3759       TestParamBase{{1, 6}, {}, {1, 6, 1}, {-1}},
3760   };
3761   for (auto p : test_params) {
3762     Reset();
3763     AddTestTensor("input", p.input_dims, {1, 2, 3, 4, 5, 6});
3764     AddTestWeights<int32>("weights", {1}, {p.param[0]});
3765     TestOpConverter("my_expanddims", node_def, p.expected_output_dims, p.status,
3766                     p.runtime_status, ElementsAreArray({1, 2, 3, 4, 5, 6}));
3767   }
3768 }
3769 
TEST_P(OpConverter_FP32_FP16_Test,ConvertSoftmax)3770 TEST_P(OpConverter_FP32_FP16_Test, ConvertSoftmax) {
3771   // Get the NodeDef for SoftMax.
3772   Scope s = Scope::NewRootScope();
3773   auto input = ops::Placeholder(s.WithOpName("logits"), tf_type_);
3774   auto softmax = ops::Softmax(s.WithOpName("my_softmax"), input);
3775   const NodeDef& node_def = softmax.operation.node()->def();
3776 
3777   struct TestParams {
3778     std::vector<int> input_dims;
3779     std::vector<float> expected_values;
3780   };
3781   std::vector<TestParams> test_params = {
3782       TestParams{{2, 3},
3783                  {0.09003057, 0.24472848, 0.66524094, 0.09003057, 0.24472848,
3784                   0.66524094}},
3785       TestParams{{6, 1}, {1, 1, 1, 1, 1, 1}},  // works with std input
3786       TestParams{{1, 6},  // this works with arange(1,7) input
3787                  {0.00426978, 0.01160646, 0.03154963, 0.08576079, 0.23312202,
3788                   0.6336913}},
3789   };
3790   std::vector<float> input_values{1, 2, 3, 4, 5, 6};
3791   for (auto p : test_params) {
3792     Reset();
3793     AddTestTensor("logits", p.input_dims, input_values);
3794     TestOpConverter("my_softmax", node_def, p.input_dims, Status::OK(),
3795                     Status::OK(), ArrayFloatNear(p.expected_values, 1e-3));
3796   }
3797 }
3798 
TEST_P(OpConverter_FP32_Test,ConvertSqueeze)3799 TEST_P(OpConverter_FP32_Test, ConvertSqueeze) {
3800   const bool use_implicit_batch = (trt_mode_ == TrtTestMode::kImplicitBatch);
3801   // Get the NodeDef for Squeeze.
3802   auto get_squeeze_nodedef = [](std::vector<int> axes,
3803                                 DataType tf_type) -> NodeDef {
3804     Scope s = Scope::NewRootScope();
3805     auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
3806     if (!axes.empty()) {
3807       ops::Squeeze::Attrs squeeze_attrs;
3808       squeeze_attrs.axis_ = gtl::ArraySlice<int>(axes);  // non-absl ok
3809       auto squeeze =
3810           ops::Squeeze(s.WithOpName("my_squeeze"), input, squeeze_attrs);
3811       return squeeze.operation.node()->def();
3812     } else {
3813       auto squeeze = ops::Squeeze(s.WithOpName("my_squeeze"), input);
3814       return squeeze.operation.node()->def();
3815     }
3816   };
3817   std::vector<TestParamBase> test_params = {
3818       TestParamBase{
3819           {1, 2, 1, 3},  // input dims
3820           {},            // input partial dims
3821           {2, 3},        // expected output dims
3822           {},            // axis
3823           trt_mode_ == TrtTestMode::kExplicitBatch
3824               ? Status::OK()
3825               : Status{error::UNIMPLEMENTED,
3826                        "Squeeze is not implemented for empty squeeze_dims, at "
3827                        "my_squeeze"}},
3828       TestParamBase{{1, 2, 1, 3},
3829                     {},
3830                     {2, 1, 3},
3831                     {0},
3832                     use_implicit_batch
3833                         ? Status{error::UNIMPLEMENTED,
3834                                  "TensorRT does not allow manipulation of the "
3835                                  "batch dimension, at my_squeeze"}
3836                         : Status::OK()},
3837       TestParamBase{{1, 2, 1, 3},
3838                     {},
3839                     {2, 1, 3},
3840                     {-4},
3841                     use_implicit_batch
3842                         ? Status{error::UNIMPLEMENTED,
3843                                  "TensorRT does not allow manipulation of the "
3844                                  "batch dimension, at my_squeeze"}
3845                         : Status::OK()},
3846       TestParamBase{
3847           {1, 1, 2, 3},
3848           {},
3849           {},
3850           {4},
3851           Status{error::INVALID_ARGUMENT,
3852                  "Axis value of 4 is out of bounds, must be in range [-4, 4), "
3853                  "at my_squeeze"}},
3854       TestParamBase{
3855           {1, 1, 2, 3},
3856           {},
3857           {},
3858           {-5},
3859           Status{error::INVALID_ARGUMENT,
3860                  "Axis value of -5 is out of bounds, must be in range [-4, 4), "
3861                  "at my_squeeze"}},
3862       TestParamBase{{1, 1, 2, 3}, {}, {1, 2, 3}, {1}},
3863       TestParamBase{{1, 1, 2, 3}, {}, {1, 2, 3}, {-3}},
3864       TestParamBase{{1, 2, 3, 1}, {}, {1, 2, 3}, {3}},
3865       TestParamBase{{1, 2, 3, 1}, {}, {1, 2, 3}, {-1}},
3866       TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {1, 3, 5}},
3867       TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {3, 1, 5}},
3868       TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {-1, -3, -5}},
3869       TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {1, -3, 5}},
3870       TestParamBase{{1, 1, 6}, {}, {1, 6}, {1}},
3871       TestParamBase{{1, 6, 1}, {}, {1, 6}, {2}},
3872   };
3873   auto squeeze_non_singleton = TestParamBase{
3874       {1, 1, 2, 3},
3875       {},
3876       {},
3877       {2},
3878       Status{error::INVALID_ARGUMENT,
3879              "Dimension 2 with size 2 cannot be squeezed because it must be "
3880              "size 1, at my_squeeze"}};
3881 
3882   if (trt_mode_ == TrtTestMode::kDynamicShape) {
3883     // In this test we try to squeeze axis=2 which has size > 1. In dynamic
3884     // shape mode the converter sees only -1, so it cannot catch this error.
3885     squeeze_non_singleton.status = Status::OK();  // conversion status
3886     squeeze_non_singleton.runtime_status =
3887         errors::InvalidArgument("Negative number of dimensions -1");
3888     // Dynamic shape tests with partially known input shape
3889     test_params.push_back(TestParamBase{{2, 1, 3}, {2, -1, 3}, {2, 3}, {1}});
3890     test_params.push_back(TestParamBase{{2, 1, 3}, {2, 1, -1}, {2, 3}, {1}});
3891   }
3892   test_params.push_back(squeeze_non_singleton);
3893 
3894   for (TestParamBase p : test_params) {
3895     SCOPED_TRACE(p);
3896     Reset();
3897     NodeDef node_def = get_squeeze_nodedef(p.param, tf_type_);
3898     AddTestTensor("input", p.input_dims, {1, 2, 3, 4, 5, 6},
3899                   p.partial_input_dims);
3900     TestOpConverter("my_squeeze", node_def, p.expected_output_dims, p.status,
3901                     p.runtime_status, ElementsAreArray({1, 2, 3, 4, 5, 6}));
3902   }
3903 }
3904 
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertStridedSlice)3905 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertStridedSlice) {
3906   // Get nodedef for StridedSlice layer.
3907   auto get_strided_slice_nodedef =
3908       [](DataType tf_type, int64 begin_mask = 0, int64 end_mask = 0,
3909          int64 ellipsis_mask = 0, int64 new_axis_mask = 0,
3910          int64 shrink_axis_mask = 0) -> NodeDef {
3911     Scope s = Scope::NewRootScope();
3912     auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
3913     auto begin = ops::Placeholder(s.WithOpName("begin"), DT_INT32);
3914     auto end = ops::Placeholder(s.WithOpName("end"), DT_INT32);
3915     auto strides = ops::Placeholder(s.WithOpName("strides"), DT_INT32);
3916     ops::StridedSlice::Attrs attrs = ops::StridedSlice::Attrs()
3917                                          .BeginMask(begin_mask)
3918                                          .EndMask(end_mask)
3919                                          .EllipsisMask(ellipsis_mask)
3920                                          .NewAxisMask(new_axis_mask)
3921                                          .ShrinkAxisMask(shrink_axis_mask);
3922     auto strided_slice = ops::StridedSlice(s.WithOpName("my_strided_slice"),
3923                                            input, begin, end, strides, attrs);
3924     return strided_slice.operation.node()->def();
3925   };
3926 
3927   {
3928     // Input is weights, should fail.
3929     Reset();
3930     NodeDef node_def = get_strided_slice_nodedef(tf_type_);
3931     AddTestWeights<int32>("input", {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6});
3932     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
3933     AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
3934     AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
3935     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
3936                                "The input \"input\" for StridedSlice must "
3937                                "be a tensor, at my_strided_slice");
3938   }
3939   {
3940     // Begin, end, strides are tensors, should fail.
3941     Reset();
3942     NodeDef node_def = get_strided_slice_nodedef(tf_type_);
3943     AddTestTensor("input", {4, 1, 1, 1});
3944     AddTestTensor("begin", {4});
3945     AddTestTensor("end", {4});
3946     AddTestTensor("strides", {4});
3947     RunValidationAndConversion(
3948         node_def, error::UNIMPLEMENTED,
3949         "The input \"begin\" for StridedSlice must be a constant, at "
3950         "my_strided_slice");
3951   }
3952 
3953   struct TestParams {
3954     std::vector<int> input_dims;
3955     std::vector<int> begin;
3956     std::vector<int> end;
3957     std::vector<int> strides;
3958     int begin_mask;
3959     int end_mask;
3960     int ellipsis_mask;
3961     int new_axis_mask;
3962     int shrink_axis_mask;
3963     std::vector<int> expected_output_dims;
3964     std::vector<float> expected_output;
3965     Status conversion_status;
3966     Status runtime_status;
3967     std::vector<int> partial_input_dims;
3968   };
3969 
3970   auto get_mask = [](const std::vector<int>& mask) {
3971     int result = 0;
3972     for (int i = 0; i < mask.size(); i++) {
3973       if (mask[i]) result += (1 << i);
3974     }
3975     return result;
3976   };
3977 
3978   // Same input is used for all tests.
3979   const std::vector<float> ok_input = {1, 2, 3, 4, 5, 6};
3980 
3981   Status batch_conv_status =
3982       (trt_mode_ == TrtTestMode::kImplicitBatch)
3983           ? errors::Unimplemented(
3984                 "TensorRT does not allow modifications to "
3985                 "the batch dimension, at my_strided_slice")
3986           : Status::OK();
3987   std::vector<TestParams> params = {
3988       // Modify batch dim, should fail in implicit batch mode.
3989       TestParams{
3990           /*input_dims=*/{2, 1, 1, 3},
3991           /*begin=*/{0, 0, 0, 0},
3992           /*end=*/{1, 1, 1, 2},
3993           /*strides=*/{1, 1, 1, 1},
3994           /*begin_mask=*/get_mask({0, 0, 0, 0}),
3995           /*end_mask=*/get_mask({0, 0, 0, 0}),
3996           /*ellipsis_mask=*/0,
3997           /*new_axis_mask=*/0,
3998           /*shrink_axis_mask=*/0,
3999           /*expected_output_dims=*/{1, 1, 1, 2},
4000           /*expected_output=*/{1, 2},
4001           batch_conv_status,
4002       },
4003       // Unknown batch size without end_mask.
4004       TestParams{
4005           /*input_dims=*/{2, 1, 1, 3},
4006           /*begin=*/{0, 0, 0, 0},
4007           /*end=*/{1, 1, 1, 2},
4008           /*strides=*/{1, 1, 1, 1},
4009           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4010           /*end_mask=*/get_mask({0, 0, 0, 0}),
4011           /*ellipsis_mask=*/0,
4012           /*new_axis_mask=*/0,
4013           /*shrink_axis_mask=*/0,
4014           /*expected_output_dims=*/{1, 1, 1, 2},
4015           /*expected_output=*/{1, 2},
4016           batch_conv_status,
4017           Status::OK(),
4018           {-1, 1, 1, 3},
4019       },
4020       // Unknown batch size but using end_mask, ok.
4021       TestParams{
4022           /*input_dims=*/{2, 1, 1, 3},
4023           /*begin=*/{0, 0, 0, 0},
4024           /*end=*/{0, 1, 1, 2},
4025           /*strides=*/{1, 1, 1, 1},
4026           /*begin_mask=*/get_mask({1, 0, 0, 0}),
4027           /*end_mask=*/get_mask({1, 0, 0, 0}),
4028           /*ellipsis_mask=*/0,
4029           /*new_axis_mask=*/0,
4030           /*shrink_axis_mask=*/0,
4031           /*expected_output_dims=*/{2, 1, 1, 2},
4032           /*expected_output=*/{1, 2, 4, 5},
4033           Status::OK(),
4034           Status::OK(),
4035           {-1, 1, 1, 3},
4036       },
4037       TestParams{
4038           /*input_dims=*/{1, 1, 2, 3},
4039           /*begin=*/{0, 0, 2, 0},
4040           /*end=*/{1, 1, 0, 3},
4041           /*strides=*/{1, 1, 1, 1},
4042           /*begin_mask=*/0,
4043           /*end_mask=*/0,
4044           /*ellipsis_mask=*/0,
4045           /*new_axis_mask=*/0,
4046           /*shrink_axis_mask=*/0,
4047           /*expected_output_dims=*/{},
4048           /*expected_output=*/{},
4049           errors::InvalidArgument("\"size\" cannot be negative for "
4050                                   "StridedSlice"),
4051       },
4052       // 2D Crop.
4053       TestParams{
4054           /*input_dims=*/{1, 1, 2, 3},
4055           /*begin=*/{0, 0, 0, 0},
4056           /*end=*/{0, 0, 1, 2},
4057           /*strides=*/{1, 1, 1, 1},
4058           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4059           /*end_mask=*/get_mask({1, 1, 0, 0}),
4060           /*ellipsis_mask=*/0,
4061           /*new_axis_mask=*/0,
4062           /*shrink_axis_mask=*/0,
4063           /*expected_output_dims=*/{1, 1, 1, 2},
4064           /*expected_output=*/{1, 2},
4065       },
4066       TestParams{
4067           /*input_dims=*/{1, 1, 2, 3},
4068           /*begin=*/{0, 0, 1, 1},
4069           /*end=*/{0, 0, 0, 0},
4070           /*strides=*/{1, 1, 1, 1},
4071           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4072           /*end_mask=*/get_mask({1, 1, 1, 1}),
4073           /*ellipsis_mask=*/0,
4074           /*new_axis_mask=*/0,
4075           /*shrink_axis_mask=*/0,
4076           /*expected_output_dims=*/{1, 1, 1, 2},
4077           /*expected_output=*/{5, 6},
4078       },
4079       TestParams{
4080           /*input_dims=*/{1, 1, 2, 3},
4081           /*begin=*/{0, 0, 1, 1},
4082           /*end=*/{0, 1, 2, 3},
4083           /*strides=*/{1, 1, 1, 1},
4084           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4085           /*end_mask=*/get_mask({1, 1, 0, 0}),
4086           /*ellipsis_mask=*/0,
4087           /*new_axis_mask=*/0,
4088           /*shrink_axis_mask=*/0,
4089           /*expected_output_dims=*/{1, 1, 1, 2},
4090           /*expected_output=*/{5, 6},
4091       },
4092       // 2D crop with negative stride
4093       TestParams{
4094           /*input_dims=*/{1, 1, 2, 3},
4095           /*begin=*/{0, 0, 1, 2},
4096           /*end=*/{0, 0, 0, 0},
4097           /*strides=*/{1, 1, -1, -1},
4098           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4099           /*end_mask=*/get_mask({1, 1, 0, 0}),
4100           /*ellipsis_mask=*/0,
4101           /*new_axis_mask=*/0,
4102           /*shrink_axis_mask=*/0,
4103           /*expected_output_dims=*/{1, 1, 1, 2},
4104           /*expected_output=*/{6, 5},
4105       },
4106       TestParams{
4107           /*input_dims=*/{1, 1, 2, 3},
4108           /*begin=*/{0, 0, 1, 1},
4109           /*end=*/{0, 0, 0, 0},
4110           /*strides=*/{1, 1, -1, -1},
4111           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4112           /*end_mask=*/get_mask({1, 1, 1, 1}),
4113           /*ellipsis_mask=*/0,
4114           /*new_axis_mask=*/0,
4115           /*shrink_axis_mask=*/0,
4116           /*expected_output_dims=*/{1, 1, 2, 2},
4117           /*expected_output=*/{5, 4, 2, 1},
4118       },
4119       TestParams{
4120           /*input_dims=*/{1, 1, 2, 3},
4121           /*begin=*/{0, 0, 0, 0},
4122           /*end=*/{0, 0, 0, 0},
4123           /*strides=*/{1, 1, -1, -1},
4124           /*begin_mask=*/get_mask({0, 0, 1, 1}),
4125           /*end_mask=*/get_mask({1, 1, 0, 0}),
4126           /*ellipsis_mask=*/0,
4127           /*new_axis_mask=*/0,
4128           /*shrink_axis_mask=*/0,
4129           /*expected_output_dims=*/{1, 1, 1, 2},
4130           /*expected_output=*/{6, 5},
4131       },
4132       TestParams{
4133           /*input_dims=*/{1, 1, 2, 3},
4134           /*begin=*/{0, 0, 0, 0},
4135           /*end=*/{0, 0, 0, 0},
4136           /*strides=*/{1, -1, -1, -1},
4137           /*begin_mask=*/get_mask({1, 1, 1, 1}),
4138           /*end_mask=*/get_mask({1, 1, 1, 1}),
4139           /*ellipsis_mask=*/0,
4140           /*new_axis_mask=*/0,
4141           /*shrink_axis_mask=*/0,
4142           /*expected_output_dims=*/{1, 1, 2, 3},
4143           /*expected_output=*/{6, 5, 4, 3, 2, 1},
4144       },
4145       // 2D Crop, with transpose.
4146       TestParams{
4147           /*input_dims=*/{1, 2, 3, 1},
4148           /*begin=*/{0, 0, 0, 0},
4149           /*end=*/{0, 1, 2, 1},
4150           /*strides=*/{1, 1, 1, 1},
4151           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4152           /*end_mask=*/get_mask({1, 0, 0, 0}),
4153           /*ellipsis_mask=*/0,
4154           /*new_axis_mask=*/0,
4155           /*shrink_axis_mask=*/0,
4156           /*expected_output_dims=*/{1, 1, 2, 1},
4157           /*expected_output=*/{1, 2},
4158       },
4159       TestParams{
4160           /*input_dims=*/{1, 2, 3, 1},
4161           /*begin=*/{0, 1, 1, 0},
4162           /*end=*/{0, 2, 3, 1},
4163           /*strides=*/{1, 1, 1, 1},
4164           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4165           /*end_mask=*/get_mask({1, 0, 0, 0}),
4166           /*ellipsis_mask=*/0,
4167           /*new_axis_mask=*/0,
4168           /*shrink_axis_mask=*/0,
4169           /*expected_output_dims=*/{1, 1, 2, 1},
4170           /*expected_output=*/{5, 6},
4171       },
4172       TestParams{
4173           /*input_dims=*/{1, 2, 1, 3},
4174           /*begin=*/{0, 0, 0, 0},
4175           /*end=*/{0, 1, 1, 2},
4176           /*strides=*/{1, 1, 1, 1},
4177           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4178           /*end_mask=*/get_mask({1, 0, 0, 0}),
4179           /*ellipsis_mask=*/0,
4180           /*new_axis_mask=*/0,
4181           /*shrink_axis_mask=*/0,
4182           /*expected_output_dims=*/{1, 1, 1, 2},
4183           /*expected_output=*/{1, 2},
4184       },
4185       TestParams{
4186           /*input_dims=*/{1, 2, 1, 3},
4187           /*begin=*/{0, 1, 0, 1},
4188           /*end=*/{0, 2, 1, 3},
4189           /*strides=*/{1, 1, 1, 1},
4190           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4191           /*end_mask=*/get_mask({1, 0, 0, 0}),
4192           /*ellipsis_mask=*/0,
4193           /*new_axis_mask=*/0,
4194           /*shrink_axis_mask=*/0,
4195           /*expected_output_dims=*/{1, 1, 1, 2},
4196           /*expected_output=*/{5, 6},
4197       },
4198       // 2D Crop, with reshape.
4199       TestParams{
4200           /*input_dims=*/{1, 2, 3},
4201           /*begin=*/{0, 0, 0},
4202           /*end=*/{0, 1, 2},
4203           /*strides=*/{1, 1, 1},
4204           /*begin_mask=*/get_mask({0, 0, 0}),
4205           /*end_mask=*/get_mask({1, 0, 0}),
4206           /*ellipsis_mask=*/0,
4207           /*new_axis_mask=*/0,
4208           /*shrink_axis_mask=*/0,
4209           /*expected_output_dims=*/{1, 1, 2},
4210           /*expected_output=*/{1, 2},
4211       },
4212       TestParams{
4213           /*input_dims=*/{1, 2, 3},
4214           /*begin=*/{0, 1, 1},
4215           /*end=*/{0, 0, 0},
4216           /*strides=*/{1, 1, 1},
4217           /*begin_mask=*/get_mask({0, 0, 0}),
4218           /*end_mask=*/get_mask({1, 1, 1}),
4219           /*ellipsis_mask=*/0,
4220           /*new_axis_mask=*/0,
4221           /*shrink_axis_mask=*/0,
4222           /*expected_output_dims=*/{1, 1, 2},
4223           /*expected_output=*/{5, 6},
4224       },
4225       // 1D Crop.
4226       TestParams{
4227           /*input_dims=*/{1, 1, 2, 3},
4228           /*begin=*/{0, 0, 0, 0},
4229           /*end=*/{0, 0, 0, 2},
4230           /*strides=*/{1, 1, 1, 1},
4231           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4232           /*end_mask=*/get_mask({1, 1, 1, 0}),
4233           /*ellipsis_mask=*/0,
4234           /*new_axis_mask=*/0,
4235           /*shrink_axis_mask=*/0,
4236           /*expected_output_dims=*/{1, 1, 2, 2},
4237           /*expected_output=*/{1, 2, 4, 5},
4238       },
4239       TestParams{
4240           /*input_dims=*/{1, 1, 2, 3},
4241           /*begin=*/{0, 0, 1, 0},
4242           /*end=*/{0, 0, 0, 0},
4243           /*strides=*/{1, 1, 1, 1},
4244           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4245           /*end_mask=*/get_mask({1, 1, 1, 1}),
4246           /*ellipsis_mask=*/0,
4247           /*new_axis_mask=*/0,
4248           /*shrink_axis_mask=*/0,
4249           /*expected_output_dims=*/{1, 1, 1, 3},
4250           /*expected_output=*/{4, 5, 6},
4251       },
4252       // 1D Crop, with transpose.
4253       TestParams{
4254           /*input_dims=*/{1, 2, 3, 1},
4255           /*begin=*/{0, 0, 0, 0},
4256           /*end=*/{0, 1, 0, 0},
4257           /*strides=*/{1, 1, 1, 1},
4258           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4259           /*end_mask=*/get_mask({1, 0, 1, 1}),
4260           /*ellipsis_mask=*/0,
4261           /*new_axis_mask=*/0,
4262           /*shrink_axis_mask=*/0,
4263           /*expected_output_dims=*/{1, 1, 3, 1},
4264           /*expected_output=*/{1, 2, 3},
4265       },
4266       TestParams{
4267           /*input_dims=*/{1, 2, 3, 1},
4268           /*begin=*/{0, 1, 0, 0},
4269           /*end=*/{0, 0, 0, 0},
4270           /*strides=*/{1, 1, 1, 1},
4271           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4272           /*end_mask=*/get_mask({1, 1, 1, 1}),
4273           /*ellipsis_mask=*/0,
4274           /*new_axis_mask=*/0,
4275           /*shrink_axis_mask=*/0,
4276           /*expected_output_dims=*/{1, 1, 3, 1},
4277           /*expected_output=*/{4, 5, 6},
4278       },
4279       // 1D Crop, with reshape.
4280       TestParams{
4281           /*input_dims=*/{1, 6},
4282           /*begin=*/{0, 0},
4283           /*end=*/{0, 3},
4284           /*strides=*/{1, 1},
4285           /*begin_mask=*/get_mask({0, 0}),
4286           /*end_mask=*/get_mask({1, 0}),
4287           /*ellipsis_mask=*/0,
4288           /*new_axis_mask=*/0,
4289           /*shrink_axis_mask=*/0,
4290           /*expected_output_dims=*/{1, 3},
4291           /*expected_output=*/{1, 2, 3},
4292       },
4293       TestParams{
4294           /*input_dims=*/{1, 1, 6},
4295           /*begin=*/{0, 0, 2},
4296           /*end=*/{0, 0, 5},
4297           /*strides=*/{1, 1, 1},
4298           /*begin_mask=*/get_mask({0, 0, 0}),
4299           /*end_mask=*/get_mask({1, 1, 0}),
4300           /*ellipsis_mask=*/0,
4301           /*new_axis_mask=*/0,
4302           /*shrink_axis_mask=*/0,
4303           /*expected_output_dims=*/{1, 1, 3},
4304           /*expected_output=*/{3, 4, 5},
4305       },
4306       TestParams{
4307           /*input_dims=*/{1, 6, 1},
4308           /*begin=*/{0, 2, 0},
4309           /*end=*/{0, 5, 0},
4310           /*strides=*/{1, 1, 1},
4311           /*begin_mask=*/get_mask({0, 0, 0}),
4312           /*end_mask=*/get_mask({1, 0, 1}),
4313           /*ellipsis_mask=*/0,
4314           /*new_axis_mask=*/0,
4315           /*shrink_axis_mask=*/0,
4316           /*expected_output_dims=*/{1, 3, 1},
4317           /*expected_output=*/{3, 4, 5},
4318       },
4319       // Negative axis.
4320       TestParams{
4321           /*input_dims=*/{1, 6, 1},
4322           /*begin=*/{0, -6, 0},
4323           /*end=*/{0, -3, 0},
4324           /*strides=*/{1, 1, 1},
4325           /*begin_mask=*/get_mask({0, 0, 0}),
4326           /*end_mask=*/get_mask({1, 0, 1}),
4327           /*ellipsis_mask=*/0,
4328           /*new_axis_mask=*/0,
4329           /*shrink_axis_mask=*/0,
4330           /*expected_output_dims=*/{1, 3, 1},
4331           /*expected_output=*/{1, 2, 3},
4332       },
4333       TestParams{
4334           /*input_dims=*/{1, 6, 1},
4335           /*begin=*/{0, 0, 0},
4336           /*end=*/{0, -1, 0},
4337           /*strides=*/{1, 1, 1},
4338           /*begin_mask=*/get_mask({0, 0, 0}),
4339           /*end_mask=*/get_mask({1, 0, 1}),
4340           /*ellipsis_mask=*/0,
4341           /*new_axis_mask=*/0,
4342           /*shrink_axis_mask=*/0,
4343           /*expected_output_dims=*/{1, 5, 1},
4344           /*expected_output=*/{1, 2, 3, 4, 5},
4345       },
4346       // Clamp out of bounds begin and end.
4347       TestParams{
4348           /*input_dims=*/{1, 1, 2, 3},
4349           /*begin=*/{0, 0, -9999, -9},
4350           /*end=*/{0, 1, 1000, 4},
4351           /*strides=*/{1, 1, 1, 1},
4352           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4353           /*end_mask=*/get_mask({1, 0, 0, 0}),
4354           /*ellipsis_mask=*/0,
4355           /*new_axis_mask=*/0,
4356           /*shrink_axis_mask=*/0,
4357           /*expected_output_dims=*/{1, 1, 2, 3},
4358           /*expected_output=*/{1, 2, 3, 4, 5, 6},
4359       },
4360       // Strides
4361       TestParams{
4362           /*input_dims=*/{1, 6},
4363           /*begin=*/{0, 0},
4364           /*end=*/{0, 5},
4365           /*strides=*/{1, 2},
4366           /*begin_mask=*/get_mask({0, 0}),
4367           /*end_mask=*/get_mask({1, 0}),
4368           /*ellipsis_mask=*/0,
4369           /*new_axis_mask=*/0,
4370           /*shrink_axis_mask=*/0,
4371           /*expected_output_dims=*/{1, 3},
4372           /*expected_output=*/{1, 3, 5},
4373       },
4374       TestParams{
4375           /*input_dims=*/{1, 6},
4376           /*begin=*/{0, 0},
4377           /*end=*/{0, 6},
4378           /*strides=*/{1, 2},
4379           /*begin_mask=*/get_mask({0, 0}),
4380           /*end_mask=*/get_mask({1, 0}),
4381           /*ellipsis_mask=*/0,
4382           /*new_axis_mask=*/0,
4383           /*shrink_axis_mask=*/0,
4384           /*expected_output_dims=*/{1, 3},
4385           /*expected_output=*/{1, 3, 5},
4386       },
4387       TestParams{
4388           /*input_dims=*/{1, 6},
4389           /*begin=*/{0, 1},
4390           /*end=*/{0, 6},
4391           /*strides=*/{1, 2},
4392           /*begin_mask=*/get_mask({0, 0}),
4393           /*end_mask=*/get_mask({1, 0}),
4394           /*ellipsis_mask=*/0,
4395           /*new_axis_mask=*/0,
4396           /*shrink_axis_mask=*/0,
4397           /*expected_output_dims=*/{1, 3},
4398           /*expected_output=*/{2, 4, 6},
4399       },
4400       TestParams{
4401           /*input_dims=*/{1, 6},
4402           /*begin=*/{0, 2},
4403           /*end=*/{0, 6},
4404           /*strides=*/{1, 3},
4405           /*begin_mask=*/get_mask({0, 0}),
4406           /*end_mask=*/get_mask({1, 0}),
4407           /*ellipsis_mask=*/0,
4408           /*new_axis_mask=*/0,
4409           /*shrink_axis_mask=*/0,
4410           /*expected_output_dims=*/{1, 2},
4411           /*expected_output=*/{3, 6},
4412       },
4413       // Negative non -1 strides
4414       TestParams{
4415           /*input_dims=*/{1, 6},
4416           /*begin=*/{0, 5},
4417           /*end=*/{0, 0},
4418           /*strides=*/{1, -2},
4419           /*begin_mask=*/get_mask({0, 0}),
4420           /*end_mask=*/get_mask({1, 1}),
4421           /*ellipsis_mask=*/0,
4422           /*new_axis_mask=*/0,
4423           /*shrink_axis_mask=*/0,
4424           /*expected_output_dims=*/{1, 3},
4425           /*expected_output=*/{6, 4, 2},
4426       },
4427       TestParams{
4428           /*input_dims=*/{1, 6},
4429           /*begin=*/{0, 5},
4430           /*end=*/{0, 0},
4431           /*strides=*/{1, -2},
4432           /*begin_mask=*/get_mask({0, 0}),
4433           /*end_mask=*/get_mask({1, 0}),
4434           /*ellipsis_mask=*/0,
4435           /*new_axis_mask=*/0,
4436           /*shrink_axis_mask=*/0,
4437           /*expected_output_dims=*/{1, 3},
4438           /*expected_output=*/{6, 4, 2},
4439       },
4440       TestParams{
4441           /*input_dims=*/{1, 6},
4442           /*begin=*/{0, 5},
4443           /*end=*/{0, 1},
4444           /*strides=*/{1, -3},
4445           /*begin_mask=*/get_mask({0, 0}),
4446           /*end_mask=*/get_mask({1, 0}),
4447           /*ellipsis_mask=*/0,
4448           /*new_axis_mask=*/0,
4449           /*shrink_axis_mask=*/0,
4450           /*expected_output_dims=*/{1, 2},
4451           /*expected_output=*/{6, 3},
4452       },
4453       // ellipsis_mask
4454       TestParams{
4455           /*input_dims=*/{1, 1, 2, 3},
4456           /*begin=*/{0, 1},
4457           /*end=*/{0, 2},
4458           /*strides=*/{1, 1},
4459           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4460           /*end_mask=*/get_mask({0, 0, 0, 0}),
4461           /*ellipsis_mask=*/get_mask({1, 0, 0, 0}),
4462           /*new_axis_mask=*/0,
4463           /*shrink_axis_mask=*/0,
4464           /*expected_output_dims=*/{1, 1, 2, 1},
4465           /*expected_output=*/{2, 5},
4466       },
4467       TestParams{
4468           /*input_dims=*/{1, 1, 2, 3},
4469           /*begin=*/{0, 0, 1},
4470           /*end=*/{0, 0, 2},
4471           /*strides=*/{1, 1, 1},
4472           /*begin_mask=*/get_mask({1, 0, 0, 0}),
4473           /*end_mask=*/get_mask({1, 0, 0, 0}),
4474           /*ellipsis_mask=*/get_mask({0, 1, 0, 0}),
4475           /*new_axis_mask=*/0,
4476           /*shrink_axis_mask=*/0,
4477           /*expected_output_dims=*/{1, 1, 2, 1},
4478           /*expected_output=*/{2, 5},
4479       },
4480       TestParams{
4481           /*input_dims=*/{1, 1, 2, 3},
4482           /*begin=*/{0, 0, 0, 1},
4483           /*end=*/{0, 1, 2, 2},
4484           /*strides=*/{1, 1, 1, 1},
4485           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4486           /*end_mask=*/get_mask({0, 0, 0, 0}),
4487           /*ellipsis_mask=*/get_mask({1, 0, 0, 0}),
4488           /*new_axis_mask=*/0,
4489           /*shrink_axis_mask=*/0,
4490           /*expected_output_dims=*/{1, 1, 2, 1},
4491           /*expected_output=*/{2, 5},
4492       },
4493       TestParams{
4494           /*input_dims=*/{1, 1, 2, 3},
4495           /*begin=*/{0, 0, 0, 1},
4496           /*end=*/{1, 1, 2, 2},
4497           /*strides=*/{1, 1, 1, 1},
4498           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4499           /*end_mask=*/get_mask({0, 0, 0, 0}),
4500           /*ellipsis_mask=*/get_mask({0, 1, 0, 0}),
4501           /*new_axis_mask=*/0,
4502           /*shrink_axis_mask=*/0,
4503           /*expected_output_dims=*/{1, 1, 2, 1},
4504           /*expected_output=*/{2, 5},
4505       },
4506       TestParams{
4507           /*input_dims=*/{1, 1, 2, 3},
4508           /*begin=*/{0, 0, 0, 0, 1},
4509           /*end=*/{0, 1, 1, 2, 2},
4510           /*strides=*/{1, 1, 1, 1, 1},
4511           /*begin_mask=*/get_mask({0, 0, 0, 0}),
4512           /*end_mask=*/get_mask({0, 0, 0, 0}),
4513           /*ellipsis_mask=*/get_mask({1, 0, 0, 0}),
4514           /*new_axis_mask=*/0,
4515           /*shrink_axis_mask=*/0,
4516           /*expected_output_dims=*/{1, 1, 2, 1},
4517           /*expected_output=*/{2, 5},
4518       },
4519       // shrink_axis_mask
4520       TestParams{
4521           /*input_dims=*/{1, 1, 2, 3},
4522           /*begin=*/{0, 0, 0, 1},
4523           /*end=*/{0, 0, 0, 2},
4524           /*strides=*/{1, 1, 1, 1},
4525           /*begin_mask=*/get_mask({1, 1, 1, 0}),
4526           /*end_mask=*/get_mask({1, 1, 1, 0}),
4527           /*ellipsis_mask=*/0,
4528           /*new_axis_mask=*/0,
4529           /*shrink_axis_mask=*/get_mask({0, 0, 0, 1}),
4530           /*expected_output_dims=*/{1, 1, 2},
4531           /*expected_output=*/{2, 5},
4532       },
4533       TestParams{
4534           /*input_dims=*/{1, 1, 2, 3},
4535           /*begin=*/{0, 0, 0, 1},
4536           /*end=*/{0, 1, 2, 2},
4537           /*strides=*/{1, 1, 1, 1},
4538           /*begin_mask=*/get_mask({1, 0, 0, 0}),
4539           /*end_mask=*/get_mask({1, 0, 0, 0}),
4540           /*ellipsis_mask=*/0,
4541           /*new_axis_mask=*/0,
4542           /*shrink_axis_mask=*/get_mask({0, 1, 0, 1}),
4543           /*expected_output_dims=*/{1, 2},
4544           /*expected_output=*/{2, 5},
4545       },
4546       TestParams{
4547           /*input_dims=*/{1, 6},
4548           /*begin=*/{0, 0},
4549           /*end=*/{0, 1},
4550           /*strides=*/{1, 1},
4551           /*begin_mask=*/get_mask({1, 0}),
4552           /*end_mask=*/get_mask({1, 0}),
4553           /*ellipsis_mask=*/0,
4554           /*new_axis_mask=*/0,
4555           /*shrink_axis_mask=*/get_mask({0, 1}),
4556           /*expected_output_dims=*/{1},
4557           /*expected_output=*/{1},
4558       },
4559   };
4560 
4561   for (auto p : params) {
4562     if (trt_mode_ == TrtTestMode::kDynamicShape ||
4563         (trt_mode_ == TrtTestMode::kExplicitBatch &&
4564          !HasStaticShape(p.partial_input_dims))) {
4565       p.conversion_status = errors::Unimplemented(
4566           "Strided slice op not implemented for dynamic shape input");
4567     }
4568     Reset();
4569     NodeDef node_def = get_strided_slice_nodedef(
4570         tf_type_, p.begin_mask, p.end_mask, p.ellipsis_mask, p.new_axis_mask,
4571         p.shrink_axis_mask);
4572 
4573     VLOG(2) << "Preparing test case with dims " << DebugString(p.input_dims);
4574     if (p.partial_input_dims.empty()) {
4575       AddTestTensor("input", p.input_dims, ok_input);
4576     } else {
4577       AddTestTensor("input", p.input_dims, tf_type_, ok_input,
4578                     p.partial_input_dims);
4579     }
4580     VLOG(2) << "Adding weights begin: " << DebugString(p.begin)
4581             << ", end: " << DebugString(p.end)
4582             << ", strides: " << DebugString(p.strides);
4583     AddTestWeights<int32>("begin", {static_cast<int>(p.begin.size())}, p.begin);
4584     AddTestWeights<int32>("end", {static_cast<int>(p.end.size())}, p.end);
4585     AddTestWeights<int32>("strides", {static_cast<int>(p.strides.size())},
4586                           p.strides);
4587 
4588     TestOpConverter("my_strided_slice", node_def, p.expected_output_dims,
4589                     p.conversion_status, p.runtime_status,
4590                     ElementsAreArray(p.expected_output));
4591   }
4592 }
4593 
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertSlice)4594 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertSlice) {
4595   // Get nodedef for Slice layer.
4596   auto get_slice_nodedef = [](DataType tf_type) -> NodeDef {
4597     Scope s = Scope::NewRootScope();
4598     auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
4599     auto begin = ops::Placeholder(s.WithOpName("begin"), DT_INT32);
4600     auto size = ops::Placeholder(s.WithOpName("size"), DT_INT32);
4601     auto slice = ops::Slice(s.WithOpName("my_slice"), input, begin, size);
4602     return slice.operation.node()->def();
4603   };
4604 
4605   struct TestParams {
4606     std::vector<int> input_dims;
4607     std::vector<int> begin;
4608     std::vector<int> size;
4609     std::vector<int> expected_output_dims;
4610     std::vector<int> expected_output;
4611     Status conversion_status;
4612     Status runtime_status;
4613   };
4614 
4615   Status conv_dynamic =
4616       trt_mode_ == TrtTestMode::kDynamicShape
4617           ? errors::Unimplemented(
4618                 "Strided slice op not implemented for dynamic shape input")
4619           : Status::OK();
4620   Status conv_dynamic2 =
4621       trt_mode_ == TrtTestMode::kDynamicShape
4622           ? errors::Unimplemented(
4623                 "Input dims must be defined for size = -1, at my_slice")
4624           : Status::OK();
4625   std::vector<TestParams> params = {
4626       // Begin is below bounds, should fail.
4627       TestParams{
4628           {1, 1, 2, 3},
4629           {0, 0, -1, 0},
4630           {1, 1, 2, 3},
4631           {},
4632           {},
4633           trt_mode_ == TrtTestMode::kDynamicShape
4634               ? conv_dynamic
4635               : errors::InvalidArgument("\"begin\" for dimension 2 in Slice "
4636                                         "is out of range, at my_slice")},
4637       // Batch dimension is modified, should fail in implicit batch mode.
4638       TestParams{
4639           {2, 1, 1, 3},
4640           {0, 0, 0, 0},
4641           {1, 1, 1, 3},
4642           {1, 1, 1, 3},
4643           {1, 2, 3},
4644           trt_mode_ == TrtTestMode::kImplicitBatch
4645               ? errors::Unimplemented("TensorRT does not allow modifications"
4646                                       " to the batch dimension, at my_slice")
4647               : Status::OK()},
4648       // Dynamic batch size but using size[0] of -1, ok.
4649       TestParams{{1, 1, 2, 3},
4650                  {0, 0, 0, 0},
4651                  {-1, 1, 2, 2},
4652                  {1, 1, 2, 2},
4653                  {1, 2, 4, 5},
4654                  conv_dynamic2},
4655       // OK test: but converter fails in dynamic shape mode
4656       TestParams{{1, 1, 2, 3},
4657                  {0, 0, 0, 0},
4658                  {-1, -1, -1, -1},
4659                  {1, 1, 2, 3},
4660                  {1, 2, 3, 4, 5, 6},
4661                  conv_dynamic2},
4662       TestParams{{1, 1, 2, 3},
4663                  {0, 0, 0, 0},
4664                  {1, 1, 2, 3},
4665                  {1, 1, 2, 3},
4666                  {1, 2, 3, 4, 5, 6}},
4667       TestParams{{1, 1, 2, 3},
4668                  {0, 0, 0, 0},
4669                  {1, -1, 2, 2},
4670                  {1, 1, 2, 2},
4671                  {1, 2, 4, 5},
4672                  conv_dynamic2},
4673       TestParams{{1, 6}, {0, 1}, {1, 5}, {1, 5}, {2, 3, 4, 5, 6}},
4674       TestParams{{1, 6}, {0, 1}, {-1, 3}, {1, 3}, {2, 3, 4}, conv_dynamic2},
4675       //
4676       // In dynamic shape mode we do not know the input shape during
4677       // conversion, therfore we cannot check out of bound access.
4678       TestParams{
4679           {1, 1, 2, 3},
4680           {0, 0, 3, 0},
4681           {1, 1, 2, 3},
4682           {},
4683           {},
4684           trt_mode_ == TrtTestMode::kDynamicShape
4685               ? Status::OK()
4686               : errors::InvalidArgument("\"begin\" for dimension 2 in Slice "
4687                                         "is out of range, at my_slice"),
4688           errors::Internal("Internal: Failed to build TensorRT engine")},
4689       TestParams{{1, 1, 2, 3},
4690                  {0, 0, 0, 0},
4691                  {1, 1, 2, -2},
4692                  {},
4693                  {},
4694                  errors::InvalidArgument("Invalid size value at my_slice")},
4695       TestParams{
4696           {1, 1, 2, 3},
4697           {0, 0, 0, 0},
4698           {1, 1, 3, 2},
4699           {},
4700           {},
4701           trt_mode_ == TrtTestMode::kDynamicShape
4702               ? Status::OK()
4703               : errors::InvalidArgument("\"begin\" + \"size\" for dimension "
4704                                         "2 in Slice is out of range, at "
4705                                         "my_slice"),
4706           errors::Internal("Internal: Failed to build TensorRT engine")},
4707   };
4708 
4709   for (auto p : params) {
4710     Reset();
4711     NodeDef node_def = get_slice_nodedef(tf_type_);
4712     AddTestTensor("input", p.input_dims, {1, 2, 3, 4, 5, 6});
4713     AddTestWeights<int32>("begin", {static_cast<int>(p.begin.size())}, p.begin);
4714     AddTestWeights<int32>("size", {static_cast<int>(p.size.size())}, p.size);
4715 
4716     TestOpConverter("my_slice", node_def, p.expected_output_dims,
4717                     p.conversion_status, p.runtime_status,
4718                     ElementsAreArray(p.expected_output));
4719   }
4720 }
4721 
TEST_P(OpConverter_FP32_Test,ConvertConv2D)4722 TEST_P(OpConverter_FP32_Test, ConvertConv2D) {
4723   // Get nodedef for Conv2D layer.
4724   DataType tf_type = tf_type_;
4725   auto get_conv2d_nodedef =
4726       [tf_type](std::vector<int> strides = {1, 1, 1, 1},
4727                 string padding = "SAME", string data_format = "NCHW",
4728                 std::vector<int> dilations = {1, 1, 1, 1}) -> NodeDef {
4729     Scope s = Scope::NewRootScope();
4730     auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
4731     auto filter = ops::Placeholder(s.WithOpName("weights"), tf_type);
4732     ops::Conv2D::Attrs attrs =
4733         ops::Conv2D::Attrs().DataFormat(data_format).Dilations(dilations);
4734     auto conv2d = ops::Conv2D(s.WithOpName("my_conv2d"), input, filter, strides,
4735                               padding, attrs);
4736     return conv2d.operation.node()->def();
4737   };
4738 
4739   {
4740     // Input is weights, should fail.
4741     Reset();
4742     NodeDef node_def = get_conv2d_nodedef();
4743     AddTestWeights<float>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
4744     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4745     RunValidationAndConversion(
4746         node_def, error::UNIMPLEMENTED,
4747         "The input \"input\" for Conv2D must be a tensor, at my_conv2d");
4748   }
4749   {
4750     // Filter is tensor, should fail.
4751     Reset();
4752     NodeDef node_def = get_conv2d_nodedef();
4753     AddTestTensor("input", {3, 1, 2, 1});
4754     AddTestTensor("weights", {3, 3, 1, 1});
4755     RunValidationAndConversion(
4756         node_def, error::UNIMPLEMENTED,
4757         "The input \"filter\" for Conv2D must be a constant, at my_conv2d");
4758   }
4759   {
4760     // Filter is not 4D, should fail.
4761     Reset();
4762     NodeDef node_def = get_conv2d_nodedef();
4763     AddTestTensor("input", {1, 1, 2, 3});
4764     AddTestWeights<float>("weights", {3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4765     RunValidationAndConversion(
4766         node_def, error::INVALID_ARGUMENT,
4767         "Conv2D expects kernel of dimension 4, at my_conv2d");
4768   }
4769   {
4770     // Dilations is not 4D, should fail.
4771     Reset();
4772     NodeDef node_def =
4773         get_conv2d_nodedef({1, 1, 1, 1}, "SAME", "NCHW", {1, 1, 1});
4774     AddTestTensor("input", {1, 1, 2, 3});
4775     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4776     RunValidationAndConversion(
4777         node_def, error::INVALID_ARGUMENT,
4778         "Convolution dilations field must specify 4 dimensions, at my_conv2d");
4779   }
4780   {
4781     // Dilation value is not 1 for channel, should fail.
4782     Reset();
4783     NodeDef node_def =
4784         get_conv2d_nodedef({1, 1, 1, 1}, "SAME", "NCHW", {1, 2, 1, 1});
4785     AddTestTensor("input", {1, 1, 2, 3});
4786     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4787     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4788                                "Dilation rate must be 1 for batch and channel "
4789                                "dimensions, at my_conv2d");
4790   }
4791   {
4792     // Dilation value is not 1 for channel (NHWC), should fail.
4793     Reset();
4794     NodeDef node_def =
4795         get_conv2d_nodedef({1, 1, 1, 1}, "SAME", "NHWC", {1, 1, 1, 2});
4796     AddTestTensor("input", {1, 2, 3, 1});
4797     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4798     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4799                                "Dilation rate must be 1 for batch and channel "
4800                                "dimensions, at my_conv2d");
4801   }
4802   {
4803     // Strides is not 4D, should fail.
4804     Reset();
4805     NodeDef node_def =
4806         get_conv2d_nodedef({1, 1, 1}, "SAME", "NCHW", {1, 1, 1, 1});
4807     AddTestTensor("input", {1, 1, 2, 3});
4808     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4809     RunValidationAndConversion(
4810         node_def, error::INVALID_ARGUMENT,
4811         "Convolution strides field must specify 4 dimensions, at my_conv2d");
4812   }
4813   {
4814     // Stride value is not 1 for channel, should fail.
4815     Reset();
4816     NodeDef node_def =
4817         get_conv2d_nodedef({1, 2, 1, 1}, "SAME", "NCHW", {1, 1, 1, 1});
4818     AddTestTensor("input", {1, 1, 2, 3});
4819     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4820     RunValidationAndConversion(
4821         node_def, error::UNIMPLEMENTED,
4822         "Stride must be 1 for batch and channel dimensions, at my_conv2d");
4823   }
4824   if (trt_mode_ == TrtTestMode::kDynamicShape) {
4825     Reset();
4826     NodeDef node_def = get_conv2d_nodedef();
4827     // Channel dim unknown, should fail.
4828     nvinfer1::DataType trt_type;
4829     TF_ASSERT_OK(TfTypeToTrtType(tf_type_, &trt_type));
4830     AddTestTensorWithTFDims("input", {-1, -1, -1, -1}, trt_type);
4831     AddTestWeights<float>("weights", {1, 2, 1, 1}, {-1, 1});
4832     RunValidationAndConversion(
4833         node_def, error::INVALID_ARGUMENT,
4834         "Channel dimension must be static, at my_conv2d");
4835   }
4836 
4837   struct TestParams {
4838     std::vector<int> input_dims;
4839     std::vector<float> input;
4840     std::vector<int> filter_dims;
4841     std::vector<float> filter;
4842     std::vector<int> strides;
4843     string padding;
4844     string data_format;
4845     std::vector<int> dilations;
4846     std::vector<int> expected_output_dims;
4847     std::vector<float> expected_output;
4848   };
4849 
4850   // Ok.
4851   std::vector<TestParams> ok_params = {
4852       // Basic
4853       TestParams{/*input_dims=*/{1, 1, 2, 3},
4854                  /*input=*/{0, 1, 2, 3, 3, 4},
4855                  /*filter_dims=*/{1, 2, 1, 1},
4856                  /*filter=*/{-1, 1},
4857                  /*strides=*/{1, 1, 1, 1},
4858                  /*padding=*/"VALID",
4859                  /*data_format=*/"NCHW",
4860                  /*dilations=*/{1, 1, 1, 1},
4861                  /*expected_output_dims=*/{1, 1, 2, 2},
4862                  /*expected_output=*/{1, 1, 0, 1}},
4863       // SAME padding (Asymmetric)
4864       TestParams{/*input_dims=*/{1, 1, 2, 3},
4865                  /*input=*/{0, 1, 2, 3, 3, 4},
4866                  /*filter_dims=*/{1, 2, 1, 1},
4867                  /*filter=*/{-1, 1},
4868                  /*strides=*/{1, 1, 1, 1},
4869                  /*padding=*/"SAME",
4870                  /*data_format=*/"NCHW",
4871                  /*dilations=*/{1, 1, 1, 1},
4872                  /*expected_output_dims=*/{1, 1, 2, 3},
4873                  /*expected_output=*/{1, 1, -2, 0, 1, -4}},
4874       // SAME padding (Symmetric)
4875       TestParams{/*input_dims=*/{1, 1, 2, 3},
4876                  /*input=*/{0, 1, 2, 3, 3, 4},
4877                  /*filter_dims=*/{1, 3, 1, 1},
4878                  /*filter=*/{-1, 0, 1},
4879                  /*strides=*/{1, 1, 1, 1},
4880                  /*padding=*/"SAME",
4881                  /*data_format=*/"NCHW",
4882                  /*dilations=*/{1, 1, 1, 1},
4883                  /*expected_output_dims=*/{1, 1, 2, 3},
4884                  /*expected_output=*/{1, 2, -1, 3, 1, -3}},
4885       // NHWC
4886       TestParams{/*input_dims=*/{1, 2, 3, 1},
4887                  /*input=*/{0, 1, 2, 3, 3, 4},
4888                  /*filter_dims=*/{1, 2, 1, 1},
4889                  /*filter=*/{-1, 1},
4890                  /*strides=*/{1, 1, 1, 1},
4891                  /*padding=*/"VALID",
4892                  /*data_format=*/"NHWC",
4893                  /*dilations=*/{1, 1, 1, 1},
4894                  /*expected_output_dims=*/{1, 2, 2, 1},
4895                  /*expected_output=*/{1, 1, 0, 1}},
4896       // Dilated
4897       TestParams{/*input_dims=*/{1, 1, 2, 3},
4898                  /*input=*/{0, 1, 2, 3, 3, 4},
4899                  /*filter_dims=*/{1, 2, 1, 1},
4900                  /*filter=*/{-1, 1},
4901                  /*strides=*/{1, 1, 1, 1},
4902                  /*padding=*/"VALID",
4903                  /*data_format=*/"NCHW",
4904                  /*dilations=*/{1, 1, 1, 2},
4905                  /*expected_output_dims=*/{1, 1, 2, 1},
4906                  /*expected_output=*/{2, 1}},
4907       // Strided
4908       TestParams{/*input_dims=*/{1, 1, 2, 4},
4909                  /*input=*/{0, 1, 2, 2, 3, 4, 4, 7},
4910                  /*filter_dims=*/{1, 2, 1, 1},
4911                  /*filter=*/{-1, 1},
4912                  /*strides=*/{1, 1, 1, 2},
4913                  /*padding=*/"VALID",
4914                  /*data_format=*/"NCHW",
4915                  /*dilations=*/{1, 1, 1, 1},
4916                  /*expected_output_dims=*/{1, 1, 2, 2},
4917                  /*expected_output=*/{1, 0, 1, 3}},
4918   };
4919 
4920   for (int i = 0; i < ok_params.size(); i++) {
4921     Reset();
4922     NodeDef node_def =
4923         get_conv2d_nodedef(ok_params[i].strides, ok_params[i].padding,
4924                            ok_params[i].data_format, ok_params[i].dilations);
4925     std::vector<int> partial_input_shape;
4926     if (trt_mode_ == TrtTestMode::kDynamicShape) {
4927       // The channel dim cannot have unknown size, fix that.
4928       partial_input_shape.resize(ok_params[i].input_dims.size(), -1);
4929       int channel_id = (ok_params[i].data_format == "NCHW") ? 1 : 3;
4930       partial_input_shape[channel_id] = ok_params[i].input_dims[channel_id];
4931     }
4932 
4933     AddTestTensor("input", ok_params[i].input_dims, tf_type_,
4934                   ok_params[i].input, partial_input_shape);
4935     AddTestWeights<float>("weights", ok_params[i].filter_dims,
4936                           ok_params[i].filter);
4937 
4938     TestOpConverter("my_conv2d", node_def, ok_params[i].expected_output_dims,
4939                     Status::OK(), Status::OK(),
4940                     ElementsAreArray(ok_params[i].expected_output));
4941   }
4942 }
4943 
TEST_P(OpConverter_FP32_Test,ConvertConv2DBackpropInput)4944 TEST_P(OpConverter_FP32_Test, ConvertConv2DBackpropInput) {
4945   // Get nodedef for Conv2D layer.
4946   auto get_conv2d_backprop_input_nodedef =
4947       [](DataType tf_type, std::vector<int> strides = {1, 1, 1, 1},
4948          string padding = "SAME", string data_format = "NCHW",
4949          std::vector<int> dilations = {1, 1, 1, 1}) -> NodeDef {
4950     Scope s = Scope::NewRootScope();
4951     auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
4952     auto filter = ops::Placeholder(s.WithOpName("weights"), tf_type);
4953     auto input_sizes = ops::Placeholder(s.WithOpName("input_sizes"), DT_INT32);
4954     ops::Conv2DBackpropInput::Attrs attrs = ops::Conv2DBackpropInput::Attrs()
4955                                                 .DataFormat(data_format)
4956                                                 .Dilations(dilations);
4957     auto conv2d = ops::Conv2DBackpropInput(
4958         s.WithOpName("my_conv2d_backprop_input"), input_sizes, filter, input,
4959         strides, padding, attrs);
4960     return conv2d.operation.node()->def();
4961   };
4962 
4963   struct TestParams {
4964     std::vector<int> input_dims;
4965     std::vector<float> input;
4966     std::vector<int> filter_dims;
4967     std::vector<float> filter;
4968     std::vector<int> strides;
4969     string padding;
4970     string data_format;
4971     std::vector<int> dilations;
4972     std::vector<int> expected_output_dims;
4973     std::vector<float> expected_output;
4974     Status conversion_status;
4975     bool unknown_channel;
4976   };
4977 
4978   // Ok.
4979   std::vector<TestParams> params = {
4980       // Transpose Strided
4981       TestParams{/*input_dims=*/{1, 1, 2, 2},
4982                  /*input=*/{0, 1, 2, 3},
4983                  /*filter_dims=*/{1, 2, 1, 1},
4984                  /*filter=*/{-1, 1},
4985                  /*strides=*/{1, 1, 1, 2},
4986                  /*padding=*/"SAME",
4987                  /*data_format=*/"NCHW",
4988                  /*dilations=*/{1, 1, 1, 1},
4989                  /*expected_output_dims=*/{1, 1, 2, 4},
4990                  /*expected_output=*/{0, 0, -1, 1, -2, 2, -3, 3}},
4991       // Transpose Strided NHWC
4992       TestParams{/*input_dims=*/{1, 2, 2, 1},
4993                  /*input=*/{0, 1, 2, 3},
4994                  /*filter_dims=*/{1, 2, 1, 1},
4995                  /*filter=*/{-1, 1},
4996                  /*strides=*/{1, 1, 2, 1},
4997                  /*padding=*/"SAME",
4998                  /*data_format=*/"NHWC",
4999                  /*dilations=*/{1, 1, 1, 1},
5000                  /*expected_output_dims=*/{1, 2, 4, 1},
5001                  /*expected_output=*/{0, 0, -1, 1, -2, 2, -3, 3}},
5002       // Transpose Strided NHWC with VALID padding
5003       TestParams{/*input_dims=*/{1, 3, 1, 1},
5004                  /*input=*/{0, 1, 2},
5005                  /*filter_dims=*/{2, 1, 1, 1},
5006                  /*filter=*/{-1, 1},
5007                  /*strides=*/{1, 2, 1, 1},
5008                  /*padding=*/"VALID",
5009                  /*data_format=*/"NHWC",
5010                  /*dilations=*/{1, 1, 1, 1},
5011                  /*expected_output_dims=*/{1, 7, 1, 1},
5012                  /*expected_output=*/{0, 0, -1, 1, -2, 2, 0}},
5013       TestParams{/*input_dims=*/{1, 1, 2, 2},
5014                  /*input=*/{0, 1, 2, 3},
5015                  /*filter_dims=*/{1, 2, 1, 1},
5016                  /*filter=*/{-1, 1},
5017                  /*strides=*/{1, 1, 1, 2},
5018                  /*padding=*/"EXPLICIT",
5019                  /*data_format=*/"NCHW",
5020                  /*dilations=*/{1, 1, 1, 1},
5021                  /*expected_output_dims=*/{1, 1, 2, 4},
5022                  /*expected_output=*/{0, 0, -1, 1, -2, 2, -3, 3},
5023                  errors::Unimplemented("EXPLICIT padding type not "
5024                                        "implemented, only VALID and SAME are"
5025                                        " supported")},
5026       // Dilation + Conv2DBackpropInput, should fail.
5027       TestParams{/*input_dims=*/{1, 1, 2, 2},
5028                  /*input=*/{0, 1, 2, 3},
5029                  /*filter_dims=*/{1, 2, 1, 1},
5030                  /*filter=*/{-1, 1},
5031                  /*strides=*/{1, 1, 1, 1},
5032                  /*padding=*/"SAME",
5033                  /*data_format=*/"NCHW",
5034                  /*dilations=*/{1, 1, 1, 2},
5035                  {1, 1, 2, 2},
5036                  {},
5037                  errors::Unimplemented("Dilation with Conv2DBackpropInput "
5038                                        "(conv2d_transpose) is not supported, "
5039                                        "at my_conv2d_backprop_input")},
5040   };
5041   if (trt_mode_ == TrtTestMode::kDynamicShape) {
5042     params.push_back(TestParams{
5043         /*input_dims=*/{1, 1, 2, 2},
5044         /*input=*/{0, 1, 2, 3},
5045         /*filter_dims=*/{1, 2, 1, 1},
5046         /*filter=*/{-1, 1},
5047         /*strides=*/{1, 1, 1, 2},
5048         /*padding=*/"SAME",
5049         /*data_format=*/"NCHW",
5050         /*dilations=*/{1, 1, 1, 1},
5051         /*expected_output_dims=*/{1, 1, 2, 4},
5052         /*expected_output=*/{0, 0, -1, 1, -2, 2, -3, 3},
5053         errors::InvalidArgument(
5054             "Channel dimension must be static, at my_conv2d_backprop_input"),
5055         1});
5056   }
5057   for (auto p : params) {
5058     for (int input_sizes_length : {2, 4}) {
5059       Reset();
5060       NodeDef node_def = get_conv2d_backprop_input_nodedef(
5061           tf_type_, p.strides, p.padding, p.data_format, p.dilations);
5062 
5063       std::vector<int> partial_input_shape;
5064       if (trt_mode_ == TrtTestMode::kDynamicShape && !p.unknown_channel) {
5065         // In dynamic shape mode, AddTestTensor will replace the input tensor
5066         // dims with -1, unless we give a non-empty partial_input_shape_tensor.
5067         // Having -1 channel dimension is invalid for TRT. We have a single
5068         // test to check the converter in that case (p.unknown_channel==true).
5069         // For all the other tests, we define here an input with known channel
5070         // dimension.
5071         partial_input_shape.resize(p.input_dims.size(), -1);
5072         int channel_id = (p.data_format == "NCHW") ? 1 : 3;
5073         partial_input_shape[channel_id] = p.input_dims[channel_id];
5074       }
5075 
5076       AddTestTensor("input", p.input_dims, tf_type_, p.input,
5077                     partial_input_shape);
5078       AddTestWeights<float>("weights", p.filter_dims, p.filter, tf_type_);
5079 
5080       if (input_sizes_length == 4) {
5081         AddTestWeights<int>("input_sizes", {4}, p.expected_output_dims);
5082       } else {
5083         std::vector<int> tf_input_sizes(2);
5084         // Remove the channel and batch dimensions.
5085         if (p.data_format == "NHWC") {
5086           std::copy(p.expected_output_dims.begin() + 1,
5087                     p.expected_output_dims.end() - 1, tf_input_sizes.begin());
5088         } else {
5089           std::copy(p.expected_output_dims.begin() + 2,
5090                     p.expected_output_dims.end(), tf_input_sizes.begin());
5091         }
5092         QCHECK_EQ(2, tf_input_sizes.size());
5093         AddTestWeights<int>("input_sizes", {2}, tf_input_sizes);
5094       }
5095       Status conv_status =
5096           trt_mode_ == TrtTestMode::kDynamicShape
5097               ? errors::Unimplemented(
5098                     "Conv2dBackpropInput does not support input with unknown "
5099                     "shape, at my_conv2d_backprop_input")
5100               : p.conversion_status;
5101 
5102       TestOpConverter("my_conv2d_backprop_input", node_def,
5103                       p.expected_output_dims, conv_status, Status::OK(),
5104                       ElementsAreArray(p.expected_output));
5105     }
5106   }
5107 }
5108 
5109 // Get the NodeDef for Pack.
GetConv3DNodeDef(std::vector<int> strides={1, 1, 1, 1, 1},string padding="SAME",string data_format="NCDHW",std::vector<int> dilations={1, 1, 1, 1, 1},bool is_conv3d_backprop_input=false)5110 NodeDef GetConv3DNodeDef(std::vector<int> strides = {1, 1, 1, 1, 1},
5111                          string padding = "SAME", string data_format = "NCDHW",
5112                          std::vector<int> dilations = {1, 1, 1, 1, 1},
5113                          bool is_conv3d_backprop_input = false) {
5114   Scope s = Scope::NewRootScope();
5115   auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
5116   auto filter = ops::Placeholder(s.WithOpName("weights"), DT_FLOAT);
5117 
5118   if (is_conv3d_backprop_input) {
5119     auto input_sizes = ops::Placeholder(s.WithOpName("input_sizes"), DT_INT32);
5120     ops::Conv3DBackpropInputV2::Attrs attrs =
5121         ops::Conv3DBackpropInputV2::Attrs()
5122             .DataFormat(data_format)
5123             .Dilations(dilations);
5124     auto conv3d =
5125         ops::Conv3DBackpropInputV2(s.WithOpName("my_conv3d"), input_sizes,
5126                                    filter, input, strides, padding, attrs);
5127     return conv3d.operation.node()->def();
5128   } else {
5129     ops::Conv3D::Attrs attrs =
5130         ops::Conv3D::Attrs().DataFormat(data_format).Dilations(dilations);
5131     auto conv3d = ops::Conv3D(s.WithOpName("my_conv3d"), input, filter, strides,
5132                               padding, attrs);
5133     return conv3d.operation.node()->def();
5134   }
5135 }
5136 
5137 struct Conv3DTestParams {
5138   std::vector<int> input_dims;
5139   std::vector<float> input;
5140   std::vector<int> filter_dims;
5141   std::vector<float> filter;
5142   std::vector<int> strides;
5143   string padding;
5144   string data_format;
5145   std::vector<int> dilations;
5146   bool is_conv3d_backprop;
5147   std::vector<int> expected_output_dims;
5148   std::vector<float> expected_output;
5149   bool allow_dynamic_channel_dim;
5150   Status validation_status;
5151 };
5152 
TestConv3D(ParameterizedOpConverterTestBase * test,Conv3DTestParams & p)5153 void TestConv3D(ParameterizedOpConverterTestBase* test, Conv3DTestParams& p) {
5154   test->Reset();
5155   NodeDef node_def = GetConv3DNodeDef(p.strides, p.padding, p.data_format,
5156                                       p.dilations, p.is_conv3d_backprop);
5157 
5158   std::vector<int> partial_input_shape;
5159   if (!p.allow_dynamic_channel_dim &&
5160       test->get_trt_mode() == TrtTestMode::kDynamicShape) {
5161     // The channel dim cannot have unknown size, fix that.
5162     partial_input_shape.resize(p.input_dims.size(), -1);
5163     int channel_id = (p.data_format == "NCDHW") ? 1 : 4;
5164     partial_input_shape[channel_id] = p.input_dims[channel_id];
5165   }
5166 
5167   test->AddTestTensor("input", p.input_dims, test->get_tf_type(), p.input,
5168                       partial_input_shape);
5169   test->AddTestWeights<float>("weights", p.filter_dims, p.filter);
5170 
5171   if (p.is_conv3d_backprop) {
5172     test->AddTestWeights<float>("input_sizes",
5173                                 {static_cast<int>(p.expected_output.size())},
5174                                 p.expected_output);
5175   }
5176 
5177   test->TestOpConverter("my_conv3d", node_def, p.expected_output_dims,
5178                         /*expected_conversion_status=*/p.validation_status,
5179                         /*expected_runtime_status=*/Status::OK(),
5180                         /*matcher=*/ElementsAreArray(p.expected_output),
5181                         /*out_tf_types=*/{test->get_tf_type()});
5182 }
5183 
TEST_P(OpConverter_FP32_FP16_Test,ConvertConv3D)5184 TEST_P(OpConverter_FP32_FP16_Test, ConvertConv3D) {
5185   {
5186     // Input is weights, should fail.
5187     Reset();
5188     NodeDef node_def = GetConv3DNodeDef();
5189 
5190     AddTestWeights<float>("input", {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6});
5191     AddTestWeights<float>("weights", {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
5192     RunValidationAndConversion(
5193         node_def, error::UNIMPLEMENTED,
5194         "The input \"input\" for Conv3D must be a tensor, at my_conv3d");
5195   }
5196   {
5197     // Filter is tensor, should fail.
5198     Reset();
5199     NodeDef node_def = GetConv3DNodeDef();
5200     AddTestTensor("input", {1, 1, 2, 3}, tf_type_, InitTestVector<float>(6));
5201     AddTestTensor("weights", {1, 3, 3, 1}, tf_type_, InitTestVector<float>(9));
5202     RunValidationAndConversion(
5203         node_def, error::UNIMPLEMENTED,
5204         "The input \"filter\" for Conv3D must be a constant, at my_conv3d");
5205   }
5206   {
5207     // Filter is not 5D, should fail.
5208     Reset();
5209     NodeDef node_def = GetConv3DNodeDef();
5210     AddTestTensor("input", {1, 1, 2, 3}, tf_type_, InitTestVector<float>(6));
5211     AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
5212     RunValidationAndConversion(
5213         node_def, error::INVALID_ARGUMENT,
5214         "Conv3D expects kernel of dimension 5, at my_conv3d");
5215   }
5216   {
5217     // Dilations is not 5D, should fail.
5218     Reset();
5219     NodeDef node_def =
5220         GetConv3DNodeDef({1, 1, 1, 1, 1}, "SAME", "NCDHW", {1, 1, 1, 1});
5221     AddTestTensor("input", {1, 1, 2, 3}, tf_type_, InitTestVector<float>(6));
5222     AddTestWeights<float>(
5223         "weights", {3, 3, 1, 1, 1},
5224         {1, 2, 3, 4, 5, 6, 7, 8, 9});  // Dimensions, then values
5225     RunValidationAndConversion(
5226         node_def, error::INVALID_ARGUMENT,
5227         "Convolution dilations field must specify 5 dimensions, at my_conv3d");
5228   }
5229   {
5230     // Dilation value is not 1 for channel, should fail.
5231     Reset();
5232     NodeDef node_def =
5233         GetConv3DNodeDef({1, 1, 1, 1, 1}, "SAME", "NCDHW", {1, 2, 1, 1, 1});
5234     AddTestTensor("input", {1, 1, 2, 3}, tf_type_, InitTestVector<float>(6));
5235     AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
5236                           {1, 2, 3, 4, 5, 6, 7, 8, 9});
5237     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
5238                                "Dilation rate must be 1 for batch and channel "
5239                                "dimensions, at my_conv3d");
5240   }
5241   {
5242     // Dilation value is not 1 for channel (NDHWC), should fail.
5243     Reset();
5244     NodeDef node_def =
5245         GetConv3DNodeDef({1, 1, 1, 1, 1}, "SAME", "NDHWC", {1, 1, 1, 1, 2});
5246     AddTestTensor("input", {1, 2, 3, 1}, tf_type_, InitTestVector<float>(6));
5247     AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
5248                           {1, 2, 3, 4, 5, 6, 7, 8, 9});
5249     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
5250                                "Dilation rate must be 1 for batch and channel "
5251                                "dimensions, at my_conv3d");
5252   }
5253   {
5254     // Dilation + Conv3DBackpropInputV2, should fail.
5255     Reset();
5256     NodeDef node_def = GetConv3DNodeDef({1, 1, 1, 1, 1}, "SAME", "NDHWC",
5257                                         {1, 1, 2, 1, 1}, true);
5258     AddTestTensor("input", {1, 2, 3, 1}, tf_type_, InitTestVector<float>(6));
5259     AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
5260                           {1, 2, 3, 4, 5, 6, 7, 8, 9});
5261     AddTestWeights<int>("input_sizes", {4}, {1, 2, 3, 1});
5262     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
5263                                "Dilation with Conv3DBackpropInputV2 "
5264                                "(conv3d_transpose) is not supported, "
5265                                "at my_conv3d");
5266   }
5267   {
5268     // Asymmetric+ Conv3DBackpropInputV2, should fail.
5269     Reset();
5270     NodeDef node_def = GetConv3DNodeDef({1, 1, 1, 1, 1}, "SAME", "NDHWC",
5271                                         {1, 1, 1, 1, 1}, true);
5272     AddTestTensor("input", {1, 2, 2, 2}, tf_type_, InitTestVector<float>(8));
5273     AddTestWeights<float>("weights", {1, 1, 2, 1, 1}, {1, 1});
5274     AddTestWeights<int>("input_sizes", {8}, {1, 2, 3, 4, 5, 6, 7, 8});
5275     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
5276                                "Asymmetric padding with Conv3DBackpropInputV2 "
5277                                "(conv3d_transpose) is not supported, at "
5278                                "my_conv3d");
5279   }
5280   {
5281     // Strides is not 5D, should fail.
5282     Reset();
5283     NodeDef node_def =
5284         GetConv3DNodeDef({1, 1, 1, 1, 1, 1}, "SAME", "NCDHW", {1, 1, 1, 1, 1});
5285     AddTestTensor("input", {1, 2, 2, 2}, tf_type_, InitTestVector<float>(8));
5286     AddTestWeights<float>("weights", {1, 1, 2, 1, 1}, {1, 1});
5287     RunValidationAndConversion(
5288         node_def, error::INVALID_ARGUMENT,
5289         "Convolution strides field must specify 5 dimensions, at my_conv3d");
5290   }
5291   {
5292     // Stride value is not 1 for channel, should fail.
5293     Reset();
5294     NodeDef node_def =
5295         GetConv3DNodeDef({1, 2, 1, 1, 1}, "SAME", "NCDHW", {1, 1, 1, 1, 1});
5296     AddTestTensor("input", {1, 1, 2, 3}, tf_type_, InitTestVector<float>(6));
5297     AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
5298                           {1, 2, 3, 4, 5, 6, 7, 8, 9});
5299     RunValidationAndConversion(
5300         node_def, error::UNIMPLEMENTED,
5301         "Stride must be 1 for batch and channel dimensions, at my_conv3d");
5302   }
5303 
5304   // Start here
5305   std::vector<Conv3DTestParams> ok_params = {
5306       // Basic - just 1x1 conv - input = output
5307       {/*input_dims=*/{1, 1, 3, 3, 3},  // CDHW
5308        /*input=*/{1, 2,  15,  3, 6,  -3, 22, 1, 88, 56, 36, 1,  1, 105,
5309                   1, 16, -28, 1, 42, 9,  3,  1, 7,  1,  11, 61, 5},
5310        /*filter_dims=*/{1, 1, 1, 1, 1},  // DRSCK
5311        /*filter=*/{1},
5312        /*strides=*/{1, 1, 1, 1, 1},
5313        /*padding=*/"VALID",
5314        /*data_format=*/"NCDHW",
5315        /*dilations=*/{1, 1, 1, 1, 1},
5316        /*is_conv3d_backprop=*/false,
5317        /*expected_output_dims=*/{1, 1, 3, 3, 3},
5318        /*expected_output=*/{1,  2,  15, 3, 6,   -3, 22, 1,   88,
5319                             56, 36, 1,  1, 105, 1,  16, -28, 1,
5320                             42, 9,  3,  1, 7,   1,  11, 61,  5},
5321        /*allow_dynamic_channel_dim=*/false,
5322        /*validation_status=*/Status::OK()},
5323       // Basic - 2x1 filter
5324       {/*input_dims=*/{1, 1, 3, 3, 3},  // CDHW
5325        /*input=*/{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5326                   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6},
5327        /*filter_dims=*/{2, 1, 1, 1, 1},  // DRSCK
5328        /*filter=*/{1, 1},
5329        /*strides=*/{1, 1, 1, 1, 1},
5330        /*padding=*/"VALID",
5331        /*data_format=*/"NCDHW",
5332        /*dilations=*/{1, 1, 1, 1, 1},
5333        /*is_conv3d_backprop=*/false,
5334        /*expected_output_dims=*/{1, 1, 2, 3, 3},
5335        /*expected_output=*/
5336        {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7},
5337        /*allow_dynamic_channel_dim=*/false,
5338        /*validation_status=*/Status::OK()},
5339       // SAME padding (Asymmetric)
5340       {/*input_dims=*/{1, 1, 2, 3, 2},  // CDHW
5341        /*input=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
5342        /*filter_dims=*/{2, 1, 1, 1, 1},  // DRSCK
5343        /*filter=*/{-1, 1},
5344        /*strides=*/{1, 1, 1, 1, 1},
5345        /*padding=*/"SAME",
5346        /*data_format=*/"NCDHW",
5347        /*dilations=*/{1, 1, 1, 1, 1},
5348        /*is_conv3d_backprop=*/false,
5349        /*expected_output_dims=*/{1, 1, 2, 3, 2},
5350        // Diff in first 2 depths is const 6.
5351        /*expected_output=*/{6, 6, 6, 6, 6, 6, -6, -7, -8, -9, -10, -11},
5352        /*allow_dynamic_channel_dim=*/false,
5353        /*validation_status=*/Status::OK()},
5354       // SAME padding (Symmetric)
5355       {/*input_dims=*/{1, 1, 2, 3, 2},  // CDHW
5356        /*input=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
5357        /*filter_dims=*/{3, 1, 1, 1, 1},  // DRSCK
5358        /*filter=*/{-1, 0, 1},
5359        /*strides=*/{1, 1, 1, 1, 1},
5360        /*padding=*/"SAME",
5361        /*data_format=*/"NCDHW",
5362        /*dilations=*/{1, 1, 1, 1, 1},
5363        /*is_conv3d_backprop=*/false,
5364        /*expected_output_dims=*/{1, 1, 2, 3, 2},
5365        // Swaps front two depths, negates
5366        /*expected_output=*/{6, 7, 8, 9, 10, 11, 0, -1, -2, -3, -4, -5},
5367        /*allow_dynamic_channel_dim=*/false,
5368        /*validation_status=*/Status::OK()
5369 
5370       },
5371       // NDHWC (multi-channel)
5372       {/*input_dims=*/{1, 2, 3, 2, 2},  // DHWC
5373        /*input=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
5374                   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
5375        /*filter_dims=*/{2, 1, 1, 2, 1},  // DRSCK
5376        /*filter=*/{-1, 1, 1, -1},
5377        /*strides=*/{1, 1, 1, 1, 1},
5378        /*padding=*/"VALID",
5379        /*data_format=*/"NDHWC",
5380        /*dilations=*/{1, 1, 1, 1, 1},
5381        /*is_conv3d_backprop=*/false,
5382        /*expected_output_dims=*/{1, 1, 3, 2, 1},
5383        /*expected_output=*/{0, 0, 0, 0, 0, 0},  // Filters oppose each-other
5384        /*allow_dynamic_channel_dim=*/false,
5385        /*validation_status=*/Status::OK()},
5386       // Dilated
5387       {/*input_dims=*/{1, 1, 3, 3, 3},  // CDHW
5388        /*input=*/{1,   1,   1,   1,   1, 1, 1, 1, 1, -10, -10, -10, -10, -10,
5389                   -10, -10, -10, -10, 7, 7, 7, 7, 7, 7,   7,   7,   7},
5390        /*filter_dims=*/{2, 1, 1, 1, 1},  // DRSCK
5391        /*filter=*/{1, 1},
5392        /*strides=*/{1, 1, 1, 1, 1},
5393        /*padding=*/"VALID",
5394        /*data_format=*/"NCDHW",
5395        /*dilations=*/{1, 1, 2, 1, 1},
5396        /*is_conv3d_backprop=*/false,
5397        /*expected_output_dims=*/{1, 1, 1, 3, 3},
5398        // Only front depth is valid, skips neg values
5399        /*expected_output=*/{8, 8, 8, 8, 8, 8, 8, 8, 8},
5400        /*allow_dynamic_channel_dim=*/false,
5401        /*validation_status=*/Status::OK()},
5402       // Strided
5403       {/*input_dims=*/{1, 1, 3, 3, 3},
5404        /*input=*/{1, 0, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0,
5405                   0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 7, 0, 8},
5406        /*filter_dims=*/{1, 1, 1, 1, 1},
5407        /*filter=*/{1},
5408        /*strides=*/{1, 1, 2, 2, 2},
5409        /*padding=*/"VALID",
5410        /*data_format=*/"NCDHW",
5411        /*dilations=*/{1, 1, 1, 1, 1},
5412        /*is_conv3d_backprop=*/false,
5413        /*expected_output_dims=*/{1, 1, 2, 2, 2},
5414        // Should only pick up the corners
5415        /*expected_output=*/{1, 2, 3, 4, 5, 6, 7, 8},
5416        /*allow_dynamic_channel_dim=*/false,
5417        /*validation_status=*/Status::OK()},
5418       // Transpose Strided
5419       {/*input_dims=*/{1, 1, 2, 2, 2},  // CDHW
5420        /*input=*/{1, 2, 3, 4, 5, 6, 7, 8},
5421        /*filter_dims=*/{1, 1, 1, 1, 1},
5422        /*filter=*/{1},
5423        /*strides=*/{1, 1, 2, 2, 2},
5424        /*padding=*/"VALID",
5425        /*data_format=*/"NCDHW",
5426        /*dilations=*/{1, 1, 1, 1, 1},
5427        /*is_conv3d_backprop=*/true,
5428        /*expected_output_dims=*/{1, 1, 3, 3, 3},
5429        /*expected_output=*/{1, 0, 2, 0, 0, 0, 3, 0, 4,   // Cube expands and
5430                             0, 0, 0, 0, 0, 0, 0, 0, 0,   // fills center
5431                             5, 0, 6, 0, 0, 0, 7, 0, 8},  // with zeroes
5432        /*allow_dynamic_channel_dim=*/false,
5433        /*validation_status=*/Status::OK()},
5434   };
5435 
5436   if (trt_mode_ == TrtTestMode::kDynamicShape) {
5437     ok_params.reserve(ok_params.size() + 2);
5438     const std::vector<float> common_input = InitTestVector<float>(3 * 3 * 3);
5439     // NCDHW - Dynamic Channel - Should fail in kDynamicShape
5440     ok_params.push_back(Conv3DTestParams{
5441         /*input_dims=*/{1, 1, 3, 3, 3},
5442         /*input=*/common_input,
5443         /*filter_dims=*/{1, 1, 1, 1, 1},
5444         /*filter=*/{1},
5445         /*strides=*/{1, 1, 2, 2, 2},
5446         /*padding=*/"VALID",
5447         /*data_format=*/"NCDHW",
5448         /*dilations=*/{1, 1, 1, 1, 1},
5449         /*is_conv3d_backprop=*/false,
5450         /*expected_output_dims=*/{},  // ignore, will fail anyway
5451         /*expected_output=*/{},       // ignore, will fail anyway
5452         /*allow_dynamic_channel_dim=*/true,
5453         /*validation_status=*/
5454         Status{error::INVALID_ARGUMENT,
5455                "Channel dimension must be static, at my_conv3d"}});
5456     // NDHWC - Dynamic Channel - Should fail in kDynamicShape
5457     ok_params.push_back(Conv3DTestParams{
5458         /*input_dims=*/{1, 3, 3, 3, 1},
5459         /*input=*/common_input,
5460         /*filter_dims=*/{1, 1, 1, 1, 1},
5461         /*filter=*/{1},
5462         /*strides=*/{1, 2, 2, 2, 1},
5463         /*padding=*/"VALID",
5464         /*data_format=*/"NDHWC",
5465         /*dilations=*/{1, 1, 1, 1, 1},
5466         /*is_conv3d_backprop=*/false,
5467         /*expected_output_dims=*/{},  // ignore, will fail anyway
5468         /*expected_output=*/{},       // ignore, will fail anyway
5469         /*allow_dynamic_channel_dim=*/true,
5470         /*validation_status=*/
5471         Status{error::INVALID_ARGUMENT,
5472                "Channel dimension must be static, at my_conv3d"}});
5473   }
5474 
5475   for (auto p : ok_params) {
5476     TestConv3D(this, p);
5477   }
5478 }
5479 
5480 template <typename T>
CreatePoolOp(DataType tf_type,std::vector<int> ksize,std::vector<int> strides,string padding,string data_format)5481 NodeDef CreatePoolOp(DataType tf_type, std::vector<int> ksize,
5482                      std::vector<int> strides, string padding,
5483                      string data_format) {
5484   Scope s = Scope::NewRootScope();
5485   auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
5486   typename T::Attrs attrs;
5487   attrs.data_format_ = data_format;
5488   return T(s.WithOpName("my_pool"), input, ksize, strides, padding, attrs)
5489       .operation.node()
5490       ->def();
5491 }
TEST_P(OpConverter_FP32_Test,ConvertPool)5492 TEST_P(OpConverter_FP32_Test, ConvertPool) {
5493   // Get nodedef for MaxPool and AvgPool layers (2D or 3D).
5494   auto get_pool_nodedef =
5495       [](DataType tf_type, int nDim, std::vector<int> ksize = {},
5496          std::vector<int> strides = {}, string padding = "SAME",
5497          string data_format = "", const bool is_max_pooling = true) -> NodeDef {
5498     if (ksize.empty()) {
5499       ksize = nDim == 2 ? std::vector<int>{1, 1, 1, 1}
5500                         : std::vector<int>{1, 1, 1, 1, 1};
5501     }
5502     if (strides.empty()) {
5503       strides = nDim == 2 ? std::vector<int>{1, 1, 1, 1}
5504                           : std::vector<int>{1, 1, 1, 1, 1};
5505     }
5506     if (data_format == "") {
5507       data_format = nDim == 2 ? "NCHW" : "NCDHW";
5508     }
5509     if (is_max_pooling) {
5510       if (nDim == 3) {
5511         return CreatePoolOp<ops::MaxPool3D>(tf_type, ksize, strides, padding,
5512                                             data_format);
5513       } else {
5514         return CreatePoolOp<ops::MaxPool>(tf_type, ksize, strides, padding,
5515                                           data_format);
5516       }
5517     } else {
5518       if (nDim == 3) {
5519         return CreatePoolOp<ops::AvgPool3D>(tf_type, ksize, strides, padding,
5520                                             data_format);
5521       } else {
5522         return CreatePoolOp<ops::AvgPool>(tf_type, ksize, strides, padding,
5523                                           data_format);
5524       }
5525     }
5526   };
5527 
5528   std::vector<int> test_nDims{2, 3};
5529 
5530   for (int nDim : test_nDims) {
5531     // Input is weights, should fail.
5532     Reset();
5533     NodeDef node_def = get_pool_nodedef(tf_type_, nDim);
5534 
5535     AddTestWeights<float>("input", {1, 1, 1, 2, 3}, {1, 2, 3, 4, 5, 6});
5536     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
5537                                StrCat("The input \"input\" for ", node_def.op(),
5538                                       " must be a tensor, at my_pool")
5539                                    .c_str());
5540   }
5541 
5542   struct TestParams {
5543     std::vector<int> input_dims;
5544     std::vector<float> input;
5545     std::vector<int> ksize;
5546     std::vector<int> strides;
5547     string padding;
5548     string data_format;
5549     std::vector<int> expected_output_dims;
5550     // The expected outputs for the following operations: MaxPool2D, AvgPool2D,
5551     // MaxPool3D, AvgPool3D
5552     std::vector<std::vector<float>> expected_outputs;
5553   };
5554 
5555   // We use common_input as the input to test both 2D and 3D pooling operations,
5556   // to simplify TestParams. For 2D operations, only the first 1/3 of the values
5557   // are used.
5558   const std::vector<float> common_input{-4, 2,  15, 3, 6,   -3, 22, 1,   88,
5559                                         56, 36, 1,  1, 105, 1,  16, -28, 1,
5560                                         42, 9,  3,  1, 7,   1,  11, 61,  5};
5561   // The output of 2D ops for the case where the op is equivalent to the
5562   // identity op.
5563   const std::vector<float> common_2d_output{-4, 2, 15, 3, 6, -3, 22, 1, 88};
5564   std::vector<TestParams> ok_params = {
5565       // Basic - just 1x1 max pooling - input = output
5566       TestParams{
5567           /*input_dims=*/{1, 1, 3, 3, 3},
5568           /*input=*/common_input,
5569           /*ksize=*/{1, 1, 1, 1, 1},
5570           /*strides=*/{1, 1, 1, 1, 1},
5571           /*padding=*/"VALID",
5572           /*data_format=*/"NCDHW",
5573           /*expected_output_dims=*/{1, 1, 3, 3, 3},
5574           /*expected_outputs=*/
5575           {common_2d_output, common_2d_output, common_input, common_input}},
5576       // Basic - just 1x1 max pooling - input = output, SAME padding
5577       TestParams{
5578           /*input_dims=*/{1, 1, 3, 3, 3},
5579           /*input=*/common_input,
5580           /*ksize=*/{1, 1, 1, 1, 1},
5581           /*strides=*/{1, 1, 1, 1, 1},
5582           /*padding=*/"SAME",
5583           /*data_format=*/"NCDHW",
5584           /*expected_output_dims=*/{1, 1, 3, 3, 3},
5585           /*expected_outputs=*/
5586           {common_2d_output, common_2d_output, common_input, common_input}},
5587       // 3x3 pooling NCDHW
5588       TestParams{/*input_dims=*/{1, 1, 3, 3, 3},
5589                  /*input=*/common_input,
5590                  /*ksize=*/{1, 1, 3, 3, 3},
5591                  /*strides=*/{1, 1, 1, 1, 1},
5592                  /*padding=*/"VALID",
5593                  /*data_format=*/"NCDHW",
5594                  /*expected_output_dims=*/{1, 1, 1, 1, 1},
5595                  /*expected_outputs=*/{{88}, {14.444445}, {105}, {17}}},
5596       // 3x3 pooling, NDHWC
5597       TestParams{/*input_dims=*/{1, 3, 3, 3, 1},
5598                  /*input=*/common_input,
5599                  /*ksize=*/{1, 3, 3, 3, 1},
5600                  /*strides=*/{1, 1, 1, 1, 1},
5601                  /*padding=*/"VALID",
5602                  /*data_format=*/"NDHWC",
5603                  /*expected_output_dims=*/{1, 1, 1, 1, 1},
5604                  /*expected_outputs=*/{{88}, {14.444445}, {105}, {17}}},
5605       // Strided
5606       TestParams{/*input_dims=*/{1, 1, 3, 3, 3},
5607                  /*input=*/{1, 0, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0,
5608                             0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 7, 0, 8},
5609                  /*ksize=*/{1, 1, 1, 1, 1},
5610                  /*strides=*/{1, 1, 2, 2, 2},
5611                  /*padding=*/"VALID",
5612                  /*data_format=*/"NCDHW",
5613                  /*expected_output_dims=*/{1, 1, 2, 2, 2},
5614                  /*expected_outputs=*/
5615                  {{1, 2, 3, 4},  // Should only pick up the corners
5616                   {1, 2, 3, 4},
5617                   {1, 2, 3, 4, 5, 6, 7, 8},
5618                   {1, 2, 3, 4, 5, 6, 7, 8}}},
5619   };
5620 
5621   for (auto p : ok_params) {
5622     int test_counter = 0;
5623     for (int nDim : test_nDims) {
5624       auto input = p.input;
5625       auto input_dims = p.input_dims;
5626       auto ksize = p.ksize;
5627       auto strides = p.strides;
5628       auto expected_output_dims = p.expected_output_dims;
5629       std::string data_format = p.data_format;
5630       if (nDim == 2) {
5631         input.resize(9);
5632         data_format = p.data_format == "NDHWC" ? "NHWC" : "NCHW";
5633         // Remove one of the spatial dimensions
5634         input_dims.erase(input_dims.begin() + 2);
5635         ksize.erase(ksize.begin() + 2);
5636         strides.erase(strides.begin() + 2);
5637         expected_output_dims.erase(expected_output_dims.begin() + 2);
5638       }
5639       for (bool is_max_pooling : {true, false}) {
5640         Reset();
5641         NodeDef node_def =
5642             get_pool_nodedef(tf_type_, nDim, ksize, strides, p.padding,
5643                              data_format, is_max_pooling);
5644         AddTestTensor("input", input_dims, input);
5645         TestOpConverter("my_pool", node_def, expected_output_dims, Status::OK(),
5646                         Status::OK(),
5647                         ElementsAreArray(p.expected_outputs.at(test_counter)));
5648         test_counter++;
5649       }
5650     }
5651   }
5652 }
5653 
TEST_P(OpConverter_FP32_FP16_Test,ConvertTopK)5654 TEST_P(OpConverter_FP32_FP16_Test, ConvertTopK) {
5655   // Get the NodeDef for TopKV2.
5656   Scope s = Scope::NewRootScope();
5657   auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
5658   auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
5659   auto topk = ops::TopK(s.WithOpName("my_topk"), input, weights);
5660   const NodeDef& node_def = topk.operation.node()->def();
5661   {
5662     // K is a tensor, should fail.
5663     Reset();
5664     AddTestTensor("input", {1, 1, 2, 3});
5665     AddTestTensor("weights", {1}, DT_INT32, {});
5666     RunValidationAndConversion(
5667         node_def, error::UNIMPLEMENTED,
5668         "The input \"k\" for TopKV2 must be a constant, at my_topk");
5669   }
5670   {
5671     // Ok.
5672     Reset();
5673     AddTestTensor("input", {1, 1, 2, 5}, {-9, 3, 5, 1, 6, -5, 7, 1, 0, -1});
5674     AddTestWeights<int32>("weights", {1}, {2});
5675     std::vector<std::vector<int>> expected_output_dims{{1, 1, 2, 2},
5676                                                        {1, 1, 2, 2}};
5677     TestOpConverterMultiOut("my_topk", node_def, expected_output_dims,
5678                             Status::OK(), Status::OK(),
5679                             {ElementsAre(6, 5, 7, 1), ElementsAre(4, 2, 1, 2)},
5680                             {tf_type_, DT_INT32});
5681   }
5682 }
5683 
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertGather)5684 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertGather) {
5685   // Get the NodeDef for GatherV2.
5686   Scope s = Scope::NewRootScope();
5687   auto params = ops::Placeholder(s.WithOpName("params"), tf_type_);
5688   auto indices = ops::Placeholder(s.WithOpName("indices"), DT_INT32);
5689   auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
5690   auto gather = ops::GatherV2(s.WithOpName("my_gather"), params, indices, axis);
5691   const NodeDef& node_def = gather.operation.node()->def();
5692   {
5693     // Axis is a tensor, should fail.
5694     Reset();
5695     AddTestTensor("params", {1, 1, 2, 3}, tf_type_, {});
5696     AddTestTensor("indices", {1, 2}, DT_INT32, {});
5697     AddTestTensor("axis", {1}, DT_INT32, {});
5698     RunValidationAndConversion(
5699         node_def, error::UNIMPLEMENTED,
5700         "The input \"axis\" for GatherV2 must be a constant, at my_gather");
5701   }
5702   {
5703     // Axis is out of bounds, should fail.
5704     Reset();
5705     AddTestTensor("params", {1, 1, 2, 3});
5706     AddTestTensor("indices", {1, 2}, DT_INT32, {});
5707     AddTestWeights<int32>("axis", {1}, {4});
5708     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
5709                                "Axis value of 4 is out of bounds, must be in "
5710                                "range [-4, 4), at my_gather");
5711   }
5712 
5713   struct TestParams {
5714     // TF shape of the input 'params' (including batch dimension).
5715     std::vector<int> params_shape;
5716     // TF shape of the input 'indices' (including batch dimension).
5717     std::vector<int> indices_shape;
5718     std::vector<int> indices;
5719     int axis;
5720     // Expected TF shape of the output (including batch dimension).
5721     std::vector<int> expected_output_shape;
5722     std::vector<int> expected_output;
5723     bool params_is_tensor;
5724     Status status;
5725     Status runtime_status;
5726     Status add_index_status;
5727   };
5728 
5729   // Input is the same {1, 2, 3, 4, 5, 6} for all cases.
5730   const std::vector<int> params_input = {1, 2, 3, 4, 5, 6};
5731   std::vector<TestParams> test_params = {
5732       // Axis is batch dimension, should fail in implicit batch mode.
5733       TestParams{/*params_shape=*/{2, 1, 1, 3},
5734                  /*indices_shape=*/{2},
5735                  /*indices=*/{1, 0},
5736                  /*axis=*/0,
5737                  /*expected_output_shape=*/{2, 1, 1, 3},
5738                  /*expected_output=*/{4, 5, 6, 1, 2, 3},
5739                  /*params_is_tensor=*/true,
5740                  trt_mode_ == TrtTestMode::kImplicitBatch
5741                      ? Status{error::UNIMPLEMENTED,
5742                               "TensorRT does not allow manipulation of the"
5743                               " batch dimension, at my_gather"}
5744                      : Status::OK()},
5745       // Batch size of indices is not 1 when params is a tensor.
5746       TestParams{/*params_shape=*/{2, 1, 3},
5747                  /*indices_shape=*/{2, 1},
5748                  /*indices=*/{2, 0},
5749                  /*axis=*/2,
5750                  /*expected_output_shape=*/{2, 1, 2, 1},
5751                  /*expected_output=*/{3, 1, 6, 4},
5752                  /*params_is_tensor=*/true,
5753                  trt_mode_ == TrtTestMode::kImplicitBatch
5754                      ? Status{error::UNIMPLEMENTED,
5755                               "Indices must have a batch size of 1 when params"
5756                               " is a tensor."}
5757                      : Status::OK()},
5758       // Axis is not zero when params is a weight, should fail in implicit batch
5759       // mode.
5760       TestParams{/*params_shape=*/{2, 1, 3},
5761                  /*indices_shape=*/{2},
5762                  /*indices=*/{1, 2},
5763                  /*axis=*/2,
5764                  /*expected_output_shape=*/{2, 1, 2},
5765                  /*expected_output=*/{2, 3, 5, 6},
5766                  /*params_is_tensor=*/false,
5767                  trt_mode_ == TrtTestMode::kImplicitBatch
5768                      ? Status{error::UNIMPLEMENTED,
5769                               "The input axis must be zero when params is a"
5770                               " weight."}
5771                      : Status::OK()},
5772       // Params with only batch dimension.
5773       TestParams{/*params_shape=*/{6},
5774                  /*indices_shape=*/{2},
5775                  /*indices=*/{1, 3},
5776                  /*axis=*/0,
5777                  /*expected_output_shape=*/{2},
5778                  /*expected_output=*/{2, 4},
5779                  /*params_is_tensor=*/true,
5780                  trt_mode_ == TrtTestMode::kImplicitBatch  // conversion_status
5781                      ? Status{error::UNIMPLEMENTED,
5782                               "TensorRT does not allow manipulation of the "
5783                               "batch dimension, at my_gather"}
5784                      : Status::OK(),
5785                  Status::OK(),                             // runtime_status
5786                  trt_mode_ == TrtTestMode::kImplicitBatch  // add_index_status
5787                      ? Status{error::INVALID_ARGUMENT,
5788                               "Batch size doesn't match for tensor indices: "
5789                               "Provided batch size does not match converter "
5790                               "batch size: 2 vs 6"}
5791                      : Status::OK()},
5792       // Vector indices, and output rank is rank(params).
5793       TestParams{
5794           /*params_shape=*/{1, 1, 2, 3},
5795           /*indices_shape=*/{1},
5796           /*indices=*/{0},
5797           /*axis=*/3,
5798           /*expected_output_shape=*/{1, 1, 2, 1},
5799           /*expected_output=*/{1, 4},
5800           /*params_is_tensor=*/true,
5801       },
5802       TestParams{
5803           /*params_shape=*/{1, 1, 2, 3},
5804           /*indices_shape=*/{1},
5805           /*indices=*/{1},
5806           /*axis=*/2,
5807           /*expected_output_shape=*/{1, 1, 1, 3},
5808           /*expected_output=*/{4, 5, 6},
5809           /*params_is_tensor=*/true,
5810       },
5811       // Indices with rank>1, and output rank is rank(params) + rank(indices) -
5812       // 1
5813       TestParams{
5814           /*params_shape=*/{1, 1, 2, 3},
5815           /*indices_shape=*/{1, 1},
5816           /*indices=*/{0},
5817           /*axis=*/3,
5818           /*expected_output_shape=*/{1, 1, 2, 1, 1},
5819           /*expected_output=*/{1, 4},
5820           /*params_is_tensor=*/true,
5821       },
5822       TestParams{
5823           /*params_shape=*/{1, 1, 2, 3},
5824           /*indices_shape=*/{1, 1},
5825           /*indices=*/{1},
5826           /*axis=*/3,
5827           /*expected_output_shape=*/{1, 1, 2, 1, 1},
5828           /*expected_output=*/{2, 5},
5829           /*params_is_tensor=*/true,
5830       },
5831       TestParams{
5832           /*params_shape=*/{1, 1, 2, 3},
5833           /*indices_shape=*/{1, 1},
5834           /*indices=*/{2},
5835           /*axis=*/-1,
5836           /*expected_output_shape=*/{1, 1, 2, 1, 1},
5837           /*expected_output=*/{3, 6},
5838           /*params_is_tensor=*/true,
5839       },
5840       TestParams{
5841           /*params_shape=*/{1, 1, 2, 3},
5842           /*indices_shape=*/{1, 3},
5843           /*indices=*/{2, 0, 1},
5844           /*axis=*/3,
5845           /*expected_output_shape=*/{1, 1, 2, 1, 3},
5846           /*expected_output=*/{3, 1, 2, 6, 4, 5},
5847           /*params_is_tensor=*/true,
5848       },
5849       TestParams{
5850           /*params_shape=*/{1, 3, 2},
5851           /*indices_shape=*/{1, 2, 2},
5852           /*indices=*/{0, 0, 1, 0},
5853           /*axis=*/2,
5854           /*expected_output_shape=*/{1, 3, 1, 2, 2},
5855           /*expected_output=*/{1, 1, 2, 1, 3, 3, 4, 3, 5, 5, 6, 5},
5856           /*params_is_tensor=*/true,
5857       },
5858       TestParams{
5859           /*params_shape=*/{1, 2, 3},
5860           /*indices_shape=*/{1},
5861           /*indices=*/{0},
5862           /*axis=*/0,
5863           /*expected_output_shape=*/{1, 2, 3},
5864           /*expected_output=*/{1, 2, 3, 4, 5, 6},
5865           /*params_is_tensor=*/false,
5866       },
5867       TestParams{
5868           /*params_shape=*/{3, 2},
5869           /*indices_shape=*/{1, 2},
5870           /*indices=*/{0, 1},
5871           /*axis=*/0,
5872           /*expected_output_shape=*/{1, 2, 2},
5873           /*expected_output=*/{1, 2, 3, 4},
5874           /*params_is_tensor=*/false,
5875       },
5876       TestParams{
5877           /*params_shape=*/{2, 3},
5878           /*indices_shape=*/{1, 1, 2},
5879           /*indices=*/{0, 1},
5880           /*axis=*/0,
5881           /*expected_output_shape=*/{1, 1, 2, 3},
5882           /*expected_output=*/{1, 2, 3, 4, 5, 6},
5883           /*params_is_tensor=*/false,
5884       },
5885       TestParams{
5886           /*params_shape=*/{3, 2},
5887           /*indices_shape=*/{2, 2},
5888           /*indices=*/{0, 2, 1, 0},
5889           /*axis=*/0,
5890           /*expected_output_shape=*/{2, 2, 2},
5891           /*expected_output=*/{1, 2, 5, 6, 3, 4, 1, 2},
5892           /*params_is_tensor=*/false,
5893       },
5894   };
5895 
5896   for (auto p : test_params) {
5897     Reset();
5898     if (p.params_is_tensor) {
5899       AddTestTensor("params", p.params_shape, params_input);
5900     } else {
5901       AddTestWeights("params", p.params_shape, params_input, tf_type_);
5902     }
5903     AddTestTensor("indices", p.indices_shape, DT_INT32, p.indices, {},
5904                   p.add_index_status);
5905     AddTestWeights<int32>("axis", {1}, {p.axis});
5906     TestOpConverter("my_gather", node_def, p.expected_output_shape, p.status,
5907                     p.runtime_status, ElementsAreArray(p.expected_output));
5908   }
5909 }
5910 
5911 template <typename OpType>
CreateReduceOp(DataType tf_type,bool keep_dims)5912 NodeDef CreateReduceOp(DataType tf_type, bool keep_dims) {
5913   Scope s = Scope::NewRootScope();
5914   auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
5915   auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
5916   typename OpType::Attrs op_attrs;
5917   op_attrs.keep_dims_ = keep_dims;
5918   auto op = OpType(s.WithOpName("my_reduce"), input, axis, op_attrs);
5919   return op.operation.node()->def();
5920 }
5921 
5922 // Applies reduction op on sub-sequences of input
5923 // output[i] = reduce(input[m * i : m * (i +1)])
CalcReduce(string op_name,std::vector<float> input,int m,float (* op)(float,float),float init)5924 std::vector<float> CalcReduce(string op_name, std::vector<float> input, int m,
5925                               float (*op)(float, float), float init) {
5926   std::vector<float> output(input.size() / m);
5927   for (int i = 0; i < output.size(); i++) {
5928     auto begin = input.begin() + i * m;
5929     auto end = input.begin() + (i + 1) * m;
5930     output[i] = std::accumulate(begin, end, init, op);
5931     if (op_name == "Mean") {
5932       output[i] /= m;
5933     }
5934   }
5935   return output;
5936 }
TEST_P(OpConverter_FP32_Test,ConvertReduce)5937 TEST_P(OpConverter_FP32_Test, ConvertReduce) {
5938   {
5939     // Input is weights, should fail.
5940     Reset();
5941     const NodeDef node_def = CreateReduceOp<ops::Sum>(tf_type_, false);
5942     AddTestWeights<float>("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
5943     AddTestWeights<int32>("axis", {1}, {1});
5944     RunValidationAndConversion(
5945         node_def, error::UNIMPLEMENTED,
5946         "The input \"input\" for Sum must be a tensor, at my_reduce");
5947   }
5948   {
5949     // Axis is weights, should fail.
5950     Reset();
5951     const NodeDef node_def = CreateReduceOp<ops::Sum>(tf_type_, false);
5952     AddTestTensor("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
5953     AddTestTensor("axis", {1}, DT_INT32, {1});
5954     RunValidationAndConversion(
5955         node_def, error::UNIMPLEMENTED,
5956         "The input \"axis\" for Sum must be a constant, at my_reduce");
5957   }
5958   using OpFunc = std::function<NodeDef(DataType, bool)>;
5959   using ValFunc = float (*)(float, float);
5960   struct ReduceTestDescriptor {
5961     string name;
5962     OpFunc get_node;
5963     ValFunc val_func;
5964     float init_val;
5965   };
5966   std::vector<ReduceTestDescriptor> op_test_info{
5967       {"Sum", CreateReduceOp<ops::Sum>, [](float x, float y) { return x + y; },
5968        0},
5969       {"Prod", CreateReduceOp<ops::Prod>,
5970        [](float x, float y) { return x * y; }, 1},
5971       {"Mean", CreateReduceOp<ops::Mean>,
5972        [](float x, float y) { return x + y; }, 0},
5973       {"Min", CreateReduceOp<ops::Min>,
5974        [](float x, float y) { return y < x ? y : x; }, 1000},
5975       {"Max", CreateReduceOp<ops::Max>,
5976        [](float x, float y) { return x < y ? y : x; }, -1000}};
5977 
5978   std::vector<float> input_values{1, 2, 3, 4, 5, 6};
5979   struct TestParams {
5980     std::vector<int> input_dims;
5981     std::vector<float> input_values;
5982     // Helper array contains the same elements as input but permuted in a way
5983     // that the reduction can be calculated over contiguous elements using
5984     // CalcReduce
5985     std::vector<float> helper_array;
5986     std::vector<int> axis;
5987     int stride;  // product of input_dims along axis
5988     Status conversion_status;
5989   };
5990   std::vector<TestParams> params{
5991       // Out of range tests
5992       TestParams{{2, 3, 1}, input_values, input_values, {3}, 3},
5993       TestParams{{2, 3, 1}, input_values, input_values, {-4}, 3},
5994       // Ok tests
5995       TestParams{{2, 3, 1}, input_values, {1, 4, 2, 5, 3, 6}, {0}, 2},
5996       TestParams{{2, 3, 1}, input_values, input_values, {1}, 3},
5997       TestParams{{2, 3, 1}, input_values, input_values, {2}, 1},
5998       TestParams{{2, 3, 1}, input_values, input_values, {0, 1}, 6},
5999       // Ok tests with negative axis values
6000       TestParams{{2, 3, 1}, input_values, {1, 4, 2, 5, 3, 6}, {-3}, 2},
6001       TestParams{{2, 3, 1}, input_values, input_values, {-2}, 3},
6002       TestParams{{2, 3, 1}, input_values, input_values, {-1}, 1},
6003       TestParams{{2, 3, 1}, input_values, input_values, {-3, 1}, 6},
6004   };
6005 
6006   for (bool keep_dims : {false, true}) {
6007     for (auto& op : op_test_info) {
6008       for (auto p : params) {
6009         SCOPED_TRACE(StrCat(op.name, keep_dims ? "keep_dims" : ""));
6010         Reset();
6011         NodeDef node_def = op.get_node(tf_type_, keep_dims);
6012 
6013         AddTestTensor("input", p.input_dims, p.input_values);
6014         AddTestWeights<int32>("axis", {static_cast<int>(p.axis.size())},
6015                               p.axis);
6016         std::vector<int> expected_output_dims(p.input_dims);
6017 
6018         // Set expected output dim and conversion error messages
6019         for (int ax : p.axis) {
6020           int rank = p.input_dims.size();
6021           if (ax >= rank || ax < -rank) {
6022             p.conversion_status =
6023                 errors::InvalidArgument("Axis value of ", ax,
6024                                         " is out of bounds, must be in "
6025                                         "range [",
6026                                         -rank, ", ", rank, "), at my_reduce");
6027           } else {
6028             int ax_positive = ax >= 0 ? ax : ax + rank;
6029             // Zero marks elements that we will remove later.
6030             expected_output_dims[ax_positive] = keep_dims ? 1 : 0;
6031             if (trt_mode_ == TrtTestMode::kImplicitBatch &&
6032                 (ax == 0 || ax == -rank)) {
6033               p.conversion_status = errors::Unimplemented(
6034                   "TensorRT does not allow manipulation of the batch "
6035                   "dimension, at my_reduce");
6036             }
6037           }
6038         }
6039         expected_output_dims.erase(std::remove(expected_output_dims.begin(),
6040                                                expected_output_dims.end(), 0),
6041                                    expected_output_dims.end());
6042         VLOG(2) << "out dims "
6043                 << absl::StrCat("[", absl::StrJoin(expected_output_dims, ","),
6044                                 "]");
6045         std::vector<float> expected_values = CalcReduce(
6046             op.name, p.helper_array, p.stride, op.val_func, op.init_val);
6047         TestOpConverter("my_reduce", node_def, expected_output_dims,
6048                         p.conversion_status, Status::OK(),
6049                         ArrayFloatNear(expected_values));
6050       }
6051     }
6052   }
6053 }
6054 
CreateCastOp(DataType tf_type)6055 NodeDef CreateCastOp(DataType tf_type) {
6056   Scope s = Scope::NewRootScope();
6057   auto input = ops::Placeholder(s.WithOpName("input"), DT_HALF);
6058   return ops::Cast(s.WithOpName("my_unary"), input, DT_FLOAT)
6059       .operation.node()
6060       ->def();
6061 }
6062 
TEST_P(OpConverter_FP32_Test,ConvertUnary)6063 TEST_P(OpConverter_FP32_Test, ConvertUnary) {
6064   {
6065     // Input is weights, should fail.
6066     Reset();
6067     const NodeDef node_def = CreateUnaryOp<ops::Neg>(tf_type_);
6068     AddTestWeights<float>("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
6069     RunValidationAndConversion(
6070         node_def, error::UNIMPLEMENTED,
6071         "The input \"x\" for Neg must be a tensor, at my_unary");
6072   }
6073   using OpFunc = std::function<NodeDef(DataType)>;
6074   using ValFunc = float (*)(float);
6075   std::map<std::string, std::pair<OpFunc, ValFunc>> op_map;
6076 #define ADD_OP(name, op, compute) \
6077   op_map[name] =                  \
6078       std::make_pair(CreateUnaryOp<op>, static_cast<ValFunc>(compute))
6079   ADD_OP("Abs", ops::Abs, std::abs);
6080   ADD_OP("Acos", ops::Acos, std::acos);
6081   ADD_OP("Acosh", ops::Acosh, std::acosh);
6082   ADD_OP("Asin", ops::Asin, std::asin);
6083   ADD_OP("Asinh", ops::Asinh, std::asinh);
6084   ADD_OP("Atan", ops::Atan, std::atan);
6085   ADD_OP("Atanh", ops::Atanh, std::atanh);
6086   op_map["Cast"] = std::make_pair(CreateCastOp, [](float x) { return x; });
6087   ADD_OP("Ceil", ops::Ceil, std::ceil);
6088   ADD_OP("Cos", ops::Cos, std::cos);
6089   ADD_OP("Cosh", ops::Cosh, std::cosh);
6090   ADD_OP("Exp", ops::Exp, std::exp);
6091   ADD_OP("Erf", ops::Erf, std::erf);
6092   ADD_OP("Floor", ops::Floor, std::floor);
6093   ADD_OP("Log", ops::Log, std::log);
6094   ADD_OP("Neg", ops::Neg, [](float x) { return -x; });
6095   ADD_OP("Reciprocal", ops::Reciprocal, [](float x) { return 1.0f / x; });
6096   ADD_OP("Rsqrt", ops::Rsqrt, [](float x) { return 1.0f / std::sqrt(x); });
6097   ADD_OP("Sin", ops::Sin, std::sin);
6098   ADD_OP("Sinh", ops::Sinh, std::sinh);
6099   ADD_OP("Sqrt", ops::Sqrt, std::sqrt);
6100   ADD_OP("Tan", ops::Tan, std::tan);
6101 #undef ADD_OP
6102   // Get list of ops to test.
6103   std::vector<string> ops_to_test;
6104   // Add all ops supported by ConvertUnary.
6105   auto* map = UnaryOperationMap();
6106   ops_to_test.reserve(map->size());
6107   for (auto& pair : *map) {
6108     ops_to_test.push_back(pair.first);
6109   }
6110   // Add other unary ops to test.
6111   ops_to_test.push_back("Rsqrt");
6112   // Prepare test parameters
6113   auto p = TestParamBase{
6114       {1, 1, 2, 3},  // input dims
6115       {},            // input partial dims
6116       {1, 1, 2, 3},  // expected output dims
6117   };
6118   for (const string& op_name : ops_to_test) {
6119     SCOPED_TRACE(op_name);
6120     Reset();
6121     if (!op_map.count(op_name)) {
6122       FAIL() << "Unary op test map does not contain op " << op_name;
6123     }
6124     NodeDef node_def = op_map[op_name].first(tf_type_);
6125 
6126     // TODO(bixia): we assume this test is only instantiated for DT_FLOAT for
6127     // now. Need to find a better way to express input and output types.
6128     //
6129     // TODO(tfeher): improve tests by defining an expected output data type and
6130     // check that. Currently only the shape and values of the output are
6131     // checked.
6132     DataType input_tf_type = op_name == "Cast" ? DT_HALF : tf_type_;
6133 
6134     std::vector<float> input_values{-0.9f, 0.6f, 0.0f, -3.5f, 100.0f, 2.9f};
6135     AddTestTensor("input", p.input_dims, input_tf_type, input_values);
6136     std::vector<float> output;
6137     std::transform(input_values.begin(), input_values.end(),
6138                    std::back_inserter(output), op_map[op_name].second);
6139     TestOpConverter("my_unary", node_def, p.expected_output_dims, Status::OK(),
6140                     p.runtime_status, ArrayFloatNear(output, 0.0001, true));
6141   }
6142 }
6143 
6144 // Get the NodeDef for ConcatV2.
6145 // TODO(hinsu): Consider switching this to static function.
__anonbe2eabc32d02(DataType dtype, int num_inputs) 6146 auto get_concat_nodedef = [](DataType dtype, int num_inputs) -> NodeDef {
6147   Scope s = Scope::NewRootScope();
6148   std::vector<Input> values;
6149   for (int i = 0; i < num_inputs; ++i) {
6150     const string input_name = StrCat("values_", i);
6151     values.push_back(ops::Placeholder(s.WithOpName(input_name), dtype));
6152   }
6153   auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
6154   auto concat = ops::Concat(s.WithOpName("my_concat"),
6155                             absl::Span<const Input>(values), axis);
6156   return concat.operation.node()->def();
6157 };
6158 
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertConcat)6159 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertConcat) {
6160   {
6161     // Axis is a tensor, should fail.
6162     Reset();
6163     NodeDef node_def = get_concat_nodedef(tf_type_, 2);
6164     AddTestTensor("values_0", {1, 1, 2, 3});
6165     AddTestTensor("values_1", {1, 1, 2, 3});
6166     AddTestTensor("axis", {1});
6167     RunValidationAndConversion(
6168         node_def, error::UNIMPLEMENTED,
6169         "The input \"axis\" for ConcatV2 must be a constant, at my_concat");
6170   }
6171   {
6172     // Axis is out of bounds, should fail.
6173     Reset();
6174     NodeDef node_def = get_concat_nodedef(tf_type_, 2);
6175     AddTestTensor("values_0", {1, 1, 2, 3});
6176     AddTestTensor("values_1", {1, 1, 2, 3});
6177     AddTestWeights<int32>("axis", {1}, {4});
6178     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6179                                "Axis value of 4 is out of bounds, must be in "
6180                                "range [-4, 4), at my_concat");
6181   }
6182   {
6183     // Inputs have inconsistent ranks, should fail.
6184     Reset();
6185     NodeDef node_def = get_concat_nodedef(tf_type_, 2);
6186     AddTestTensor("values_0", {1, 1, 2, 3});
6187     AddTestTensor("values_1", {1, 1, 6});
6188     AddTestWeights<int32>("axis", {1}, {1});
6189     RunValidationAndConversion(
6190         node_def, error::INVALID_ARGUMENT,
6191         "Received inputs with inconsistent rank, at my_concat");
6192   }
6193 
6194   struct TestParams {
6195     std::vector<std::vector<int>> input_shapes;
6196     std::vector<std::vector<int>> input_values;
6197     int axis;
6198     std::vector<int> expected_output_dims;
6199     std::vector<int> expected_output;
6200     Status conversion_status;
6201     Status run_status;
6202     bool input_as_weight;
6203   };
6204 
6205   const std::vector<std::vector<int>> common_input{InitTestVector<int>(6),
6206                                                    InitTestVector<int>(6, 6)};
6207 
6208   std::vector<TestParams> params = {
6209       {
6210           /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
6211           /*input_values=*/common_input,
6212           /*axis=*/1,
6213           /*expected_output_dims=*/{1, 2, 2, 3},
6214           /*expected_output=*/InitTestVector<int>(12),
6215       },
6216       {
6217           /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
6218           /*input_values=*/common_input,
6219           /*axis=*/2,
6220           /*expected_output_dims=*/{1, 1, 4, 3},
6221           /*expected_output=*/InitTestVector<int>(12),
6222       },
6223       {
6224           /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
6225           /*input_values=*/common_input,
6226           /*axis=*/3,
6227           /*expected_output_dims=*/{1, 1, 2, 6},
6228           /*expected_output=*/
6229           {0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11},
6230       },
6231       {
6232           /*input_shapes=*/{{1, 1}, {1, 2}, {1, 3}, {1, 1}, {1, 1}, {1, 2}},
6233           /*input_values=*/
6234           {{1}, {2, 3}, {4, 5, 6}, {7}, {8}, {9, 10}},
6235           /*axis=*/1,
6236           /*expected_output_dims=*/{1, 10},
6237           /*expected_output=*/
6238           InitTestVector<int>(10, /*start_value=*/1),
6239       },
6240       {
6241           // An input is a weight
6242           /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
6243           /*input_values=*/common_input,
6244           /*axis=*/1,
6245           /*expected_output_dims=*/{1, 2, 2, 3},
6246           /*expected_output=*/InitTestVector<int>(12),
6247           /*conversion_status=*/
6248           errors::Unimplemented("The input \"values_1\" for ConcatV2 "
6249                                 "must be a tensor, at my_concat"),
6250           /*run_status=*/Status::OK(),
6251           /*input_as_weight=*/true,
6252       },
6253       {
6254           // Axis is batch dimension, should fail in implicit batch mode.
6255           /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
6256           /*input_values=*/common_input,
6257           /*axis=*/0,
6258           /*expected_output_dims=*/{2, 1, 2, 3},
6259           /*expected_output=*/InitTestVector<int>(12),
6260           /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
6261               ? errors::Unimplemented(
6262                     "TensorRT does not allow manipulation of the "
6263                     "batch dimension, at my_concat")
6264               : Status::OK(),
6265       },
6266       {
6267           // Inconsistent input shape, runtime error in dynamic shape mode.
6268           /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 3, 2}},
6269           /*input_values=*/common_input,
6270           /*axis=*/1,
6271           /*expected_output_dims=*/{2, 1, 2, 3},
6272           /*expected_output=*/InitTestVector<int>(12),
6273           trt_mode_ != TrtTestMode::kDynamicShape
6274               ? errors::InvalidArgument(
6275                     "Received inputs with inconsistent shape, at my_concat")
6276               : Status::OK(),
6277           errors::InvalidArgument(""),
6278       }};
6279 
6280   for (auto p : params) {
6281     Reset();
6282     const int num_inputs = p.input_shapes.size();
6283     EXPECT_EQ(num_inputs, p.input_values.size());
6284     NodeDef node_def = get_concat_nodedef(tf_type_, num_inputs);
6285     // Create inputs.
6286     for (int j = 0; j < num_inputs; ++j) {
6287       string name = StrCat("values_", j);
6288       if (j == 1 && p.input_as_weight) {
6289         AddTestWeights(name, p.input_shapes[j], p.input_values[j], tf_type_);
6290       } else {
6291         AddTestTensor(name, p.input_shapes[j], p.input_values[j]);
6292       }
6293     }
6294     AddTestWeights<int32>("axis", {1}, {p.axis});
6295 
6296     TestOpConverter("my_concat", node_def, p.expected_output_dims,
6297                     p.conversion_status, p.run_status,
6298                     ElementsAreArray(p.expected_output));
6299   }
6300 }
6301 
6302 // Get the NodeDef for Split.
__anonbe2eabc32e02(DataType dtype, int num_split) 6303 auto get_split_nodedef = [](DataType dtype, int num_split) -> NodeDef {
6304   Scope s = Scope::NewRootScope();
6305   auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
6306   auto value = ops::Placeholder(s.WithOpName("value"), dtype);
6307   auto split = ops::Split(s.WithOpName("my_split"), axis, value, num_split);
6308   return split.operation.node()->def();
6309 };
6310 
6311 template <DataType dtype>
TestConvertSplit(OpConverterTest * test)6312 void TestConvertSplit(OpConverterTest* test) {
6313   typedef typename EnumToDataType<dtype>::Type CType;
6314 
6315   struct TestParams {
6316     std::vector<int> input_shape;
6317     std::vector<CType> value;
6318     int axis;
6319     int num_split;
6320     std::vector<int> expected_output_dims;
6321     std::vector<std::vector<CType>> expected_outputs;
6322   };
6323 
6324   const std::vector<CType> common_input = InitTestVector<CType>(6);
6325   std::vector<TestParams> ok_params = {
6326       // Identity (num_split = 1)
6327       {/*input_shape=*/{1, 2, 3}, /*value=*/common_input, /*axis=*/1,
6328        /*num_split=*/1, /*expected_output_dims=*/{1, 2, 3},
6329        /*expected_outputs=*/{InitTestVector<CType>(6)}},
6330       {/*input_shape=*/{1, 2, 3},
6331        /*value=*/common_input,
6332        /*axis=*/3,
6333        /*num_split=*/3,
6334        /*expected_output_dims=*/{1, 2, 1},
6335        /*expected_outputs=*/
6336        {{CType(0), CType(3)}, {CType(1), CType(4)}, {CType(2), CType(5)}}},
6337       {/*input_shape=*/{1, 6},
6338        /*value=*/common_input,
6339        /*axis=*/2,
6340        /*num_split=*/6,
6341        /*expected_output_dims=*/{1, 1},
6342        /*expected_outputs=*/
6343        {{CType(0)},
6344         {CType(1)},
6345         {CType(2)},
6346         {CType(3)},
6347         {CType(4)},
6348         {CType(5)}}},
6349       {/*input_shape=*/{1, 6},
6350        /*value=*/common_input,
6351        /*axis=*/-1,
6352        /*num_split=*/2,
6353        /*expected_output_dims=*/{1, 3},
6354        /*expected_outputs=*/
6355        {InitTestVector<CType>(3), InitTestVector<CType>(3, CType(3))}},
6356   };
6357 
6358   for (int i = 0; i < ok_params.size(); ++i) {
6359     test->Reset();
6360     NodeDef node_def = get_split_nodedef(dtype, ok_params[i].num_split);
6361     // Create inputs.
6362     test->AddTestWeights<int32>("axis", {1}, {ok_params[i].axis});
6363     nvinfer1::DataType trt_type;
6364     TF_ASSERT_OK(TfTypeToTrtType(dtype, &trt_type));
6365     test->AddTestTensor("value", ok_params[i].input_shape, 1, trt_type);
6366     // Convert.
6367     test->RunValidationAndConversion(node_def);
6368 
6369     // Get output tensors and verify output dims.
6370     EXPECT_EQ(ok_params[i].expected_outputs.size(), ok_params[i].num_split);
6371     std::vector<TRT_TensorOrWeights> outputs(ok_params[i].num_split);
6372     DataVec output_data;
6373     for (int j = 0; j < outputs.size(); ++j) {
6374       const string name = j == 0 ? StrCat("my_split") : StrCat("my_split:", j);
6375       TF_EXPECT_OK(test->GetTensorOrWeights(name, &outputs[j]));
6376       EXPECT_TRUE(outputs[j].is_tensor());
6377       ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
6378                                outputs[j].tensor()->getDimensions());
6379       // Create buffer to store output.
6380       output_data.push_back(
6381           {name, test->ConstructTensor<CType>(
6382                      ok_params[i].expected_outputs[j].size())});
6383     }
6384 
6385     // Verify output values are correct.
6386     const DataVec input_data{
6387         {"value", test->AsTensor<CType>(ok_params[i].value)}};
6388     TF_EXPECT_OK(test->BuildAndRun(input_data, &output_data));
6389     for (int j = 0; j < outputs.size(); ++j) {
6390       EXPECT_THAT(GetSpanForData<CType>(output_data[j]),
6391                   ElementsAreArray(ok_params[i].expected_outputs[j]));
6392     }
6393   }
6394 }
6395 
TEST_F(OpConverterTest,ConvertSplit)6396 TEST_F(OpConverterTest, ConvertSplit) {
6397   {
6398     // Axis is a tensor, should fail.
6399     Reset();
6400     NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
6401     AddTestTensor("axis", {1});
6402     AddTestTensor("value", {1, 2, 3});
6403     RunValidationAndConversion(
6404         node_def, error::UNIMPLEMENTED,
6405         "The input \"axis\" for Split must be a constant, at my_split");
6406   }
6407   {
6408     // Axis is out of bounds, should fail.
6409     Reset();
6410     NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
6411     AddTestWeights<int32>("axis", {1}, {4});
6412     AddTestTensor("value", {1, 2, 3});
6413     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6414                                "Axis value of 4 is out of bounds, must be in "
6415                                "range [-4, 4), at my_split");
6416   }
6417   {
6418     // Axis is out of bounds (negative), should fail.
6419     Reset();
6420     NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
6421     AddTestWeights<int32>("axis", {1}, {-5});
6422     AddTestTensor("value", {1, 2, 3});
6423     RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6424                                "Axis value of -5 is out of bounds, must be in "
6425                                "range [-4, 4), at my_split");
6426   }
6427   {
6428     // Axis is batch dimension, should fail.
6429     Reset();
6430     NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
6431     AddTestWeights<int32>("axis", {1}, {0});
6432     AddTestTensor("value", {1, 2, 3});
6433     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6434                                "TensorRT does not allow manipulation of the "
6435                                "batch dimension, at my_split");
6436   }
6437   {
6438     // Value is a weight, should fail.
6439     Reset();
6440     NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
6441     AddTestWeights<int32>("axis", {1}, {1});
6442     AddTestWeights<float>("value", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
6443     RunValidationAndConversion(
6444         node_def, error::UNIMPLEMENTED,
6445         "The input \"value\" for Split must be a tensor, at my_split");
6446   }
6447   {
6448     // Dim is not evenly divisibly by num_split, should fail.
6449     Reset();
6450     NodeDef node_def = get_split_nodedef(DT_FLOAT, 2);
6451     AddTestWeights<int32>("axis", {1}, {3});
6452     AddTestTensor("value", {1, 2, 3});
6453     RunValidationAndConversion(
6454         node_def, error::INVALID_ARGUMENT,
6455         "Dimension 3 of size 3 is not evenly divisible by 2, at my_split");
6456   }
6457   {
6458     // num_split > dim size, should fail.
6459     Reset();
6460     NodeDef node_def = get_split_nodedef(DT_FLOAT, 4);
6461     AddTestWeights<int32>("axis", {1}, {3});
6462     AddTestTensor("value", {1, 2, 3});
6463     RunValidationAndConversion(
6464         node_def, error::INVALID_ARGUMENT,
6465         "Dimension 3 of size 3 is not evenly divisible by 4, at my_split");
6466   }
6467 
6468   TestConvertSplit<DT_FLOAT>(this);
6469   TestConvertSplit<DT_HALF>(this);
6470   TestConvertSplit<DT_INT32>(this);
6471 }
6472 
6473 // Get the NodeDef for Unpack (Unstack in TF API).
__anonbe2eabc32f02(DataType dtype, int num, int axis) 6474 auto get_unpack_nodedef = [](DataType dtype, int num, int axis) -> NodeDef {
6475   Scope s = Scope::NewRootScope();
6476   auto value = ops::Placeholder(s.WithOpName("value"), dtype);
6477   auto unstack_attrs = ops::Unstack::Axis(axis);
6478   auto unstack =
6479       ops::Unstack(s.WithOpName("my_unpack"), value, num, unstack_attrs);
6480   return unstack.operation.node()->def();
6481 };
6482 
6483 struct UnpackTestParams {
6484   std::vector<int> input_shape;
6485   std::vector<float> input_value;
6486   int axis;
6487   int num;
6488   std::vector<int> expected_output_dims;
6489   std::vector<std::vector<float>> expected_outputs;
6490   Status run_status;
6491 };
6492 
TestConvertUnpack(ParameterizedOpConverterTestBase * test,UnpackTestParams & p)6493 void TestConvertUnpack(ParameterizedOpConverterTestBase* test,
6494                        UnpackTestParams& p) {
6495   test->Reset();
6496   NodeDef node_def = get_unpack_nodedef(test->get_tf_type(), p.num, p.axis);
6497   // Create inputs.
6498   test->AddTestTensor("value", p.input_shape, test->get_tf_type(),
6499                       p.input_value);
6500 
6501   std::vector<Matcher<std::vector<float>>> matcher_vec;
6502   std::vector<DataType> datatype_vec;
6503   std::vector<std::vector<int>> expected_output_dims;
6504 
6505   for (int j = 0; j < p.expected_outputs.size(); ++j) {
6506     matcher_vec.push_back(ElementsAreArray(p.expected_outputs[j]));
6507     datatype_vec.push_back(test->get_tf_type());
6508     expected_output_dims.push_back(p.expected_output_dims);
6509   }
6510 
6511   test->TestOpConverterMultiOut(/*name=*/"my_unpack",
6512                                 /*node_def=*/node_def,
6513                                 /*expected_output_dims=*/expected_output_dims,
6514                                 /*expected_conversion_status=*/p.run_status,
6515                                 /*expected_runtime_status=*/p.run_status,
6516                                 /*matcher=*/matcher_vec,
6517                                 /*out_tf_type=*/datatype_vec);
6518 }
6519 
6520 // TODO: Reactivate when INT32 Segfault fixed
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertUnpack)6521 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertUnpack) {
6522   // We need to skip error testing for Dynamic Shape mode, as it is impossible
6523   // to convert Unpack in Dynamic Shape Mode.
6524   if (trt_mode_ != TrtTestMode::kDynamicShape) {
6525     {
6526       // Value is weights, should fail.
6527       Reset();
6528       NodeDef node_def = get_unpack_nodedef(tf_type_, /*num=*/3, /*axis=*/3);
6529       AddTestWeights<float>("value", {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6});
6530       RunValidationAndConversion(
6531           node_def, error::UNIMPLEMENTED,
6532           "The input \"value\" for Unpack must be a tensor, at my_unpack");
6533     }
6534     {
6535       // Axis is out of bounds, should fail.
6536       Reset();
6537       NodeDef node_def = get_unpack_nodedef(tf_type_, /*num=*/1, /*axis=*/4);
6538       AddTestTensor("value", {1, 1, 2, 3});
6539       RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6540                                  "Axis value of 4 is out of bounds, must be in "
6541                                  "range [-4, 4), at my_unpack");
6542     }
6543     {
6544       // Axis is out of bounds (negative), should fail.
6545       Reset();
6546       NodeDef node_def = get_unpack_nodedef(tf_type_, /*num=*/1, /*axis=*/-5);
6547       AddTestTensor("value", {1, 1, 2, 3});
6548       RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6549                                  "Axis value of -5 is out of bounds, must be "
6550                                  "in range [-4, 4), at my_unpack");
6551     }
6552     {
6553       if (trt_mode_ != TrtTestMode::kExplicitBatch) {
6554         // Axis is batch dimension, should fail.
6555         Reset();
6556         NodeDef node_def = get_unpack_nodedef(tf_type_, /*num=*/1, /*axis=*/0);
6557         AddTestTensor("value", {1, 2, 3});
6558         RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6559                                    "TensorRT does not allow manipulation of "
6560                                    "the batch dimension, at my_unpack");
6561       }
6562     }
6563     {
6564       // Dim size does not match num, should fail.
6565       Reset();
6566       NodeDef node_def = get_unpack_nodedef(tf_type_, /*num=*/5, /*axis=*/2);
6567       AddTestTensor("value", {1, 1, 6});
6568       RunValidationAndConversion(
6569           node_def, error::INVALID_ARGUMENT,
6570           "Dimension 2 has size 6 which is not equal to num of 5, at "
6571           "my_unpack");
6572     }
6573     {
6574       // Output would be TF scalar, should fail.
6575       Reset();
6576       NodeDef node_def = get_unpack_nodedef(tf_type_, /*num=*/1, /*axis=*/0);
6577       AddTestTensor("value", {}, tf_type_, {}, {},
6578                     trt_mode_ == TrtTestMode::kImplicitBatch
6579                         ? errors::Internal("Scalars cannot be represented in "
6580                                            "implicit batch mode")
6581                         : Status::OK());
6582 
6583       if (trt_mode_ == TrtTestMode::kImplicitBatch) {
6584         RunValidationAndConversion(
6585             node_def, error::INTERNAL,
6586             "Failed to convert input value to a TRT_TensorOrWeights: Scalar "
6587             "input tensor is not supported since the first dimension is "
6588             "treated "
6589             "as batch dimension by TRT");
6590       } else {
6591         RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6592                                    "Input \"value\" for Unpack must be rank 2 "
6593                                    "or greater, at my_unpack");
6594       }
6595     }
6596   }
6597 
6598   const std::vector<float> common_input = InitTestVector<float>(6);
6599 
6600   Status run_status = trt_mode_ == TrtTestMode::kDynamicShape
6601                           ? errors::Unimplemented(
6602                                 "Strided slice op not implemented for dynamic "
6603                                 "shape input")
6604                           : Status::OK();
6605 
6606   std::vector<UnpackTestParams> params = {
6607       {/*input_shape=*/{1, 1, 2, 1, 3, 1},
6608        /*input_value=*/common_input,
6609        /*axis=*/4,
6610        /*num=*/3,
6611        /*expected_output_dims=*/{1, 1, 2, 1, 1},
6612        /*expected_outputs=*/{{0, 3}, {1, 4}, {2, 5}},
6613        /*run_status=*/run_status},
6614       {/*input_shape=*/{1, 1, 2, 1, 3},
6615        /*input_value=*/common_input,
6616        /*axis=*/4,
6617        /*num=*/3,
6618        /*expected_output_dims=*/{1, 1, 2, 1},
6619        /*expected_outputs=*/{{0, 3}, {1, 4}, {2, 5}},
6620        /*run_status=*/run_status},
6621       {/*input_shape=*/{1, 1, 2, 3},
6622        /*input_value=*/common_input,
6623        /*axis=*/1,
6624        /*num=*/1,
6625        /*expected_output_dims=*/{1, 2, 3},
6626        /*expected_outputs=*/{InitTestVector<float>(6)},
6627        /*run_status=*/run_status},
6628       {/*input_shape=*/{1, 6, 1},
6629        /*input_value=*/common_input,
6630        /*axis=*/-2,
6631        /*num=*/6,
6632        /*expected_output_dims=*/{1, 1},
6633        /*expected_outputs=*/{{0}, {1}, {2}, {3}, {4}, {5}},
6634        /*run_status=*/run_status},
6635       {/*input_shape=*/{1, 6},
6636        /*input_value=*/common_input,
6637        /*axis=*/1,
6638        /*num=*/6,
6639        /*expected_output_dims=*/{1},
6640        /*expected_outputs=*/{{0}, {1}, {2}, {3}, {4}, {5}},
6641        /*run_status=*/run_status},
6642   };
6643   for (auto p : params) {
6644     TestConvertUnpack(this, p);
6645   }
6646 }
6647 
6648 // Get the NodeDef for Pack.
GetPackNodeDef(DataType dtype,int num_inputs,int axis)6649 NodeDef GetPackNodeDef(DataType dtype, int num_inputs, int axis) {
6650   Scope s = Scope::NewRootScope();
6651   std::vector<Input> values;
6652   for (int i = 0; i < num_inputs; ++i) {
6653     const string input_name = StrCat("values_", i);
6654     values.push_back(ops::Placeholder(s.WithOpName(input_name), dtype));
6655   }
6656   // Pack op is renamed to Stack in APIs.
6657   auto pack =
6658       ops::Stack(s.WithOpName("my_pack"), absl::Span<const Input>(values),
6659                  ops::Stack::Axis(axis));
6660   return pack.operation.node()->def();
6661 }
6662 
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertPack)6663 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertPack) {
6664   struct TestParams {
6665     std::vector<std::vector<int>> input_shapes;
6666     std::vector<std::vector<int>> partial_input_shapes;
6667     std::vector<std::vector<float>> input_values;
6668     int axis;
6669     std::vector<int> expected_output_dims;
6670     std::vector<float> expected_output;
6671     Status conversion_status;
6672     Status runtime_status;
6673     bool input_1_is_weight;
6674   };
6675 
6676   const std::vector<std::vector<float>> common_input{
6677       InitTestVector<float>(6), InitTestVector<float>(6, /*start_value=*/6)};
6678   std::vector<TestParams> params = {
6679       // Second input is weight, should fail in implicit batch mode
6680       {/*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6681        /*partial_input_shapes=*/{{}, {}},
6682        /*input_values=*/common_input,
6683        /*axis=*/1,
6684        /*expected_output_dims=*/{1, 2, 2, 3},
6685        /*expected_output=*/InitTestVector<float>(12),
6686        trt_mode_ == TrtTestMode::kImplicitBatch
6687            ? Status{error::UNIMPLEMENTED,
6688                     "The input \"values_1\" for Pack must be a tensor, at "
6689                     "my_pack"}
6690            : Status::OK(),
6691        /*runtime_status*/ Status::OK(),
6692        /*weight_input*/ true},
6693       // Axis is out of bounds, should fail.
6694       {
6695           /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6696           /*partial_input_shapes=*/{{}, {}},
6697           /*input_values=*/common_input,
6698           /*axis=*/-5,
6699           /*expected_output_dims=*/{},
6700           /*expected_output=*/{},
6701           Status{error::INVALID_ARGUMENT,
6702                  "Axis value of -5 is out of bounds, must be in"
6703                  " range [-4, 4), at my_pack"},
6704       },
6705       // Axis is batch dimension, should fail in implicit batch mode.
6706       {/*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6707        /*partial_input_shapes=*/{{}, {}},
6708        /*input_values=*/common_input,
6709        /*axis=*/-4,
6710        /*expected_output_dims=*/{2, 1, 2, 3},
6711        /*expected_output=*/InitTestVector<float>(12),
6712        trt_mode_ == TrtTestMode::kImplicitBatch
6713            ? Status{error::UNIMPLEMENTED,
6714                     "TensorRT does not allow manipulation of the batch "
6715                     "dimension, at my_pack"}
6716            : Status::OK()},
6717       // Inconsistent rank, should fail.
6718       {
6719           /*input_shapes=*/{{1, 2, 3}, {1, 6}},
6720           /*partial_input_shapes=*/{{}, {}},
6721           /*input_values=*/common_input,
6722           /*axis=*/1,
6723           /*expected_output_dims=*/{},
6724           /*expected_output=*/{},
6725           Status{error::INVALID_ARGUMENT,
6726                  "Received inputs with inconsistent rank, at my_pack"},
6727       },
6728       {
6729           /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6730           /*partial_input_shapes=*/{{}, {}},
6731           /*input_values=*/common_input,
6732           /*axis=*/1,
6733           /*expected_output_dims=*/{1, 2, 2, 3},
6734           /*expected_output=*/InitTestVector<float>(12),
6735       },
6736       {
6737           /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6738           /*partial_input_shapes=*/{{}, {}},
6739           /*input_values=*/common_input,
6740           /*axis=*/2,
6741           /*expected_output_dims=*/{1, 2, 2, 3},
6742           /*expected_output=*/
6743           {0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11},
6744       },
6745       {
6746           /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6747           /*partial_input_shapes=*/{{}, {}},
6748           /*input_values=*/common_input,
6749           /*axis=*/3,
6750           /*expected_output_dims=*/{1, 2, 3, 2},
6751           /*expected_output=*/
6752           {0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11},
6753       },
6754       {
6755           /*input_shapes=*/{{1, 2, 3}},
6756           /*partial_input_shapes=*/{{}},
6757           /*input_values=*/{InitTestVector<float>(6)},
6758           /*axis=*/1,
6759           /*expected_output_dims=*/{1, 1, 2, 3},
6760           /*expected_output=*/InitTestVector<float>(6),
6761       },
6762       {
6763           /*input_shapes=*/{{1, 2, 3}},
6764           /*partial_input_shapes=*/{{}},
6765           /*input_values=*/{InitTestVector<float>(6)},
6766           /*axis=*/2,
6767           /*expected_output_dims=*/{1, 2, 1, 3},
6768           /*expected_output=*/InitTestVector<float>(6),
6769       },
6770   };
6771   // Inputs have inconsistent shapes, should fail.
6772   if (trt_mode_ != TrtTestMode::kDynamicShape) {
6773     params.push_back(TestParams{
6774         /*input_shapes=*/{{1, 2, 3}, {1, 3, 2}},
6775         /*partial_input_shapes=*/{{}, {}},
6776         /*input_values=*/common_input,
6777         /*axis=*/1,
6778         /*expected_output_dims=*/{},
6779         /*expected_output=*/InitTestVector<float>(12),
6780         Status{error::INVALID_ARGUMENT,
6781                "Received inputs with inconsistent shape, at my_pack"}});
6782   } else {
6783     // In dynamic shape mode we cannot catch inconsistent shapes at conversion
6784     // time, only during runtime. But TensorRT does not raise a proper runtime
6785     // error, instead it aborts the program with the following message:
6786     //  Assertion failed: t->start.d[i] + t->extent.d[i] <= r.dims.d[i]
6787     // ../builder/cudnnBuilderGraph.cpp:862
6788     // Aborting...
6789     // TODO(tfeher) Add dynamic shapes test once TRT handles shape error
6790     // decently
6791   }
6792   if (trt_mode_ == TrtTestMode::kDynamicShape) {
6793     // Test with mixed dynamic / static shape input tensors
6794     params.push_back(
6795         TestParams{/*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6796                    /*partial_input_shapes=*/{{-1, -1, -1}, {1, 2, 3}},
6797                    /*input_values=*/common_input,
6798                    /*axis=*/2,
6799                    /*expected_output_dims=*/{1, 2, 2, 3},
6800                    /*expected_output=*/
6801                    {0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11}});
6802   }
6803   for (auto p : params) {
6804     Reset();
6805     const int num_inputs = p.input_shapes.size();
6806     EXPECT_EQ(num_inputs, p.input_values.size());
6807 
6808     NodeDef node_def = GetPackNodeDef(tf_type_, num_inputs, p.axis);
6809     // Create inputs.
6810     for (int j = 0; j < num_inputs; ++j) {
6811       if (j == 1 && p.input_1_is_weight) {
6812         AddTestWeights(StrCat("values_", j), p.input_shapes[j],
6813                        p.input_values[j], tf_type_);
6814       } else {
6815         AddTestTensor(StrCat("values_", j), p.input_shapes[j], tf_type_,
6816                       p.input_values[j], p.partial_input_shapes[j]);
6817       }
6818     }
6819     TestOpConverter("my_pack", node_def, p.expected_output_dims,
6820                     p.conversion_status, p.runtime_status,
6821                     ElementsAreArray(p.expected_output));
6822   }
6823 }
6824 
6825 // Get the NodeDef for ArgMin or ArgMax.
6826 template <typename OpType>
GetArgMinMaxNodeDef(DataType input_dtype,DataType output_dtype)6827 NodeDef GetArgMinMaxNodeDef(DataType input_dtype, DataType output_dtype) {
6828   Scope s = Scope::NewRootScope();
6829   auto input = ops::Placeholder(s.WithOpName("input"), input_dtype);
6830   auto dimension = ops::Placeholder(s.WithOpName("dimension"), DT_INT32);
6831   auto attrs = OpType::OutputType(output_dtype);
6832   auto arg = OpType(s.WithOpName("my_arg"), input, dimension, attrs);
6833   return arg.operation.node()->def();
6834 }
6835 
6836 struct ArgMinMaxTestParams {
6837   std::vector<int> input_shape;
6838   std::vector<float> input_value;
6839   int axis;
6840   std::vector<int> expected_output_dims;
6841   std::vector<int> expected_argmax_output;
6842   std::vector<int> expected_argmin_output;
6843   Status status;
6844 };
6845 
6846 template <typename OpType>
TestConvertArgMinMax(ParameterizedOpConverterTestBase * test,DataType _tf_type,ArgMinMaxTestParams & p)6847 void TestConvertArgMinMax(ParameterizedOpConverterTestBase* test,
6848                           DataType _tf_type, ArgMinMaxTestParams& p) {
6849   test->Reset();
6850 
6851   NodeDef node_def = GetArgMinMaxNodeDef<OpType>(_tf_type,
6852                                                  /*output_dtype=*/DT_INT32);
6853 
6854   std::vector<int> expected_out;
6855   if (node_def.op() == "ArgMax") {
6856     expected_out = p.expected_argmax_output;
6857   } else if (node_def.op() == "ArgMin") {
6858     expected_out = p.expected_argmin_output;
6859   } else {
6860     ASSERT_TRUE(false);
6861   }
6862 
6863   test->AddTestTensor("input", p.input_shape, _tf_type, p.input_value);
6864   test->AddTestWeights("dimension", {1}, {p.axis}, DT_INT32);
6865 
6866   test->TestOpConverter("my_arg", node_def, p.expected_output_dims,
6867                         /*expected_conversion_status=*/p.status,
6868                         /*expected_runtime_status=*/Status::OK(),
6869                         /*matcher=*/ElementsAreArray(expected_out), {DT_INT32});
6870 }
6871 
TEST_P(OpConverter_FP32_FP16_Test,ConvertArgMinMax)6872 TEST_P(OpConverter_FP32_FP16_Test, ConvertArgMinMax) {
6873   {
6874     // Dimension is a tensor, should fail.
6875     Reset();
6876     NodeDef node_def =
6877         GetArgMinMaxNodeDef<ops::ArgMax>(tf_type_,
6878                                          /*output_dtype=*/DT_INT32);
6879     AddTestTensor("input", {1, 2, 3});
6880     AddTestTensor("dimension", {1});
6881     RunValidationAndConversion(
6882         node_def, error::UNIMPLEMENTED,
6883         "The input \"dimension\" for ArgMax must be a constant, at my_arg");
6884   }
6885   {
6886     // Output type is INT64, should fail.
6887     Reset();
6888     NodeDef node_def =
6889         GetArgMinMaxNodeDef<ops::ArgMax>(tf_type_,
6890                                          /*output_dtype=*/DT_INT64);
6891     AddTestTensor("input", {1, 2, 3});
6892     AddTestWeights("dimension", {1}, {3}, DT_INT32);
6893     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6894                                "Output type int64 is not supported, at my_arg");
6895   }
6896 
6897   const std::vector<float> common_input = InitTestVector<float>(6);
6898   std::vector<ArgMinMaxTestParams> params = {
6899       {/*input_shape=*/{2, 3},
6900        /*input_value=*/common_input,
6901        /*axis=*/0,
6902        /*expected_output_dims=*/{3},
6903        /*expected_argmax_output=*/{1, 1, 1},
6904        /*expected_argmin_output=*/{0, 0, 0},
6905        trt_mode_ == TrtTestMode::kImplicitBatch
6906            ? errors::Unimplemented("TensorRT does not allow manipulation of "
6907                                    "the batch dimension, at my_arg")
6908            : Status::OK()},
6909       {
6910           /*input_shape=*/{1, 6},
6911           /*input_value=*/common_input,
6912           /*axis=*/1,
6913           /*expected_output_dims=*/{1},
6914           /*expected_argmax_output=*/{5},
6915           /*expected_argmin_output=*/{0},
6916       },
6917       {
6918           /*input_shape=*/{1, 10},
6919           /*input_value=*/
6920           {-5.0f, 3.0f, 5.0f, 1.0f, 6.0f, -9.0f, 7.0f, 1.0f, 0.0f, -1.0f},
6921           /*axis=*/-1,
6922           /*expected_output_dims=*/{1},
6923           /*expected_argmax_output=*/{6},
6924           /*expected_argmin_output=*/{5},
6925       },
6926       {
6927           /*input_shape=*/{1, 2, 3},
6928           /*input_value=*/common_input,
6929           /*axis=*/2,
6930           /*expected_output_dims=*/{1, 2},
6931           /*expected_argmax_output=*/{2, 2},
6932           /*expected_argmin_output=*/{0, 0},
6933       },
6934       {
6935           /*input_shape=*/{1, 2, 3},
6936           /*input_value=*/common_input,
6937           /*axis=*/-2,
6938           /*expected_output_dims=*/{1, 3},
6939           /*expected_argmax_output=*/{1, 1, 1},
6940           /*expected_argmin_output=*/{0, 0, 0},
6941       },
6942       {
6943           /*input_shape=*/{1, 2, 1, 3},
6944           /*input_value=*/common_input,
6945           /*axis=*/3,
6946           /*expected_output_dims=*/{1, 2, 1},
6947           /*expected_argmax_output=*/{2, 2},
6948           /*expected_argmin_output=*/{0, 0},
6949       },
6950       {
6951           /*input_shape=*/{1, 2, 1, 3},
6952           /*input_value=*/common_input,
6953           /*axis=*/-3,
6954           /*expected_output_dims=*/{1, 1, 3},
6955           /*expected_argmax_output=*/{1, 1, 1},
6956           /*expected_argmin_output=*/{0, 0, 0},
6957       },
6958       {/*input_shape=*/{1, 2, 1, 1, 3},
6959        /*input_value=*/common_input,
6960        /*axis=*/4,
6961        /*expected_output_dims=*/{1, 2, 1, 1},
6962        /*expected_argmax_output=*/{2, 2},
6963        /*expected_argmin_output=*/{0, 0},
6964 #if !IS_TRT_VERSION_GE(7, 0, 0, 11)
6965        errors::Unimplemented("op is not able to support tensors with 4+"
6966                              " dimensions (excluding batch size)")
6967 #else
6968        Status::OK()
6969 #endif
6970       },
6971       {/*input_shape=*/{1, 2, 1, 1, 3},
6972        /*input_value=*/common_input,
6973        /*axis=*/-4,
6974        /*expected_output_dims=*/{1, 1, 1, 3},
6975        /*expected_argmax_output=*/{1, 1, 1},
6976        /*expected_argmin_output=*/{0, 0, 0},
6977 #if !IS_TRT_VERSION_GE(7, 0, 0, 11)
6978        errors::Unimplemented("op is not able to support tensors with 4+"
6979                              " dimensions (excluding batch size)")
6980 #else
6981        Status::OK()
6982 #endif
6983       },
6984   };
6985 
6986   for (auto p : params) {
6987     TestConvertArgMinMax<ops::ArgMin>(this, tf_type_, p);
6988     TestConvertArgMinMax<ops::ArgMax>(this, tf_type_, p);
6989   }
6990 }
6991 
6992 // Get the NodeDef for DepthToSpace or SpaceToSpace.
6993 template <typename OpType>
GetDepthSpaceShuffleNodeDef(DataType dtype,int block_size,string data_format)6994 NodeDef GetDepthSpaceShuffleNodeDef(DataType dtype, int block_size,
6995                                     string data_format) {
6996   Scope s = Scope::NewRootScope();
6997   auto input = ops::Placeholder(s.WithOpName("input"), dtype);
6998   auto attrs = OpType::DataFormat(data_format);
6999   auto shuffle = OpType(s.WithOpName("my_shuffle"), input, block_size, attrs);
7000   return shuffle.operation.node()->def();
7001 }
7002 
7003 struct DepthSpaceShuffleTestParams {
7004   std::vector<int> input_dims;
7005   std::vector<int> input_value;
7006   int block_size;
7007   string data_format;
7008   std::vector<int> expected_output_dims;
7009   std::vector<int> expected_output;
7010 };
7011 
7012 template <typename OpType>
TestConvertDepthSpaceShuffle(ParameterizedOpConverterTestBase * test,const std::vector<DepthSpaceShuffleTestParams> & params)7013 void TestConvertDepthSpaceShuffle(
7014     ParameterizedOpConverterTestBase* test,
7015     const std::vector<DepthSpaceShuffleTestParams>& params) {
7016   Status status = Status::OK();
7017 
7018   {
7019     // Input is a weight, should fail.
7020     test->Reset();
7021     NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(
7022         test->get_tf_type(), 2, "NCHW");
7023     test->AddTestWeights<float>("input", {1, 4, 1, 1}, {1, 2, 3, 4});
7024     test->RunValidationAndConversion(
7025         node_def, error::UNIMPLEMENTED,
7026         StrCat("The input \"input\" for ", node_def.op(),
7027                " must be a tensor, at my_shuffle")
7028             .c_str());
7029   }
7030   {
7031     // Input rank != 4
7032     test->Reset();
7033     NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(
7034         test->get_tf_type(), 2, "NCHW");
7035     test->AddTestTensor("input", {1, 16, 32});
7036     test->RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
7037                                      StrCat("The input to ", node_def.op(),
7038                                             " must be rank 4, at "
7039                                             "my_shuffle")
7040                                          .c_str());
7041   }
7042   {
7043     // Unsupported format, should fail.
7044     test->Reset();
7045     NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(
7046         test->get_tf_type(), 2, "NCHW_VECT_C");
7047     test->AddTestTensor("input", {1, 16, 32, 32});
7048     test->RunValidationAndConversion(
7049         node_def, error::UNIMPLEMENTED,
7050         "Data format NCHW_VECT_C is not supported, at my_shuffle");
7051   }
7052   if (test->get_trt_mode() != TrtTestMode::kDynamicShape) {
7053     // In dynamic shape mode, we cannot check input dimension values at
7054     // conversion time therefore we cannot confirm block_size vs input dim
7055     // consistency. We rely on the user to provide a valid TF graph. Otherwise
7056     // TRT will fail with a runtime error.
7057     if (std::is_same<OpType, ops::DepthToSpace>::value) {
7058       // Channels not divisible by block_size, should fail.
7059       test->Reset();
7060       NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(
7061           test->get_tf_type(), 3, "NCHW");
7062       test->AddTestTensor("input", {1, 16, 32, 32});
7063       test->RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
7064                                        "Number of channels must be divisible by"
7065                                        " block_size*block_size, at my_shuffle");
7066     } else {
7067       {  // Width not divisible by block_size, should fail.
7068         test->Reset();
7069         NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::SpaceToDepth>(
7070             test->get_tf_type(), 3, "NCHW");
7071         test->AddTestTensor("input", {1, 16, 9, 32});
7072         test->RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
7073                                          "Width and height must be divisible by"
7074                                          " block_size, at my_shuffle");
7075       }
7076       {
7077         // Height not divisible by block_size, should fail.
7078         test->Reset();
7079         NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::SpaceToDepth>(
7080             test->get_tf_type(), 3, "NCHW");
7081         test->AddTestTensor("input", {1, 16, 32, 9});
7082         test->RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
7083                                          "Width and height must be divisible by"
7084                                          " block_size, at my_shuffle");
7085       }
7086     }
7087   }
7088 
7089   for (auto p : params) {
7090     test->Reset();
7091     NodeDef node_def = GetDepthSpaceShuffleNodeDef<OpType>(
7092         test->get_tf_type(), p.block_size, p.data_format);
7093     test->AddTestTensor("input", p.input_dims, p.input_value);
7094     test->TestOpConverter("my_shuffle", node_def, p.expected_output_dims,
7095                           status, Status::OK(),
7096                           ElementsAreArray(p.expected_output));
7097   }
7098 }
7099 
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertDepthToSpace)7100 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertDepthToSpace) {
7101   const std::vector<int> common_input = InitTestVector<int>(16);
7102   std::vector<DepthSpaceShuffleTestParams> params = {
7103       {
7104           /*input_shape=*/{1, 4, 2, 2},
7105           /*input_value=*/common_input,
7106           /*block_size=*/2,
7107           /*data_format=*/"NCHW",
7108           /*expected_output_dims=*/{1, 1, 4, 4},
7109           /*expected_output=*/
7110           {0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15},
7111       },
7112       {
7113           /*input_shape=*/{1, 2, 2, 4},
7114           /*input_value=*/common_input,
7115           /*block_size=*/2,
7116           /*data_format=*/"NHWC",
7117           /*expected_output_dims=*/{1, 4, 4, 1},
7118           /*expected_output=*/
7119           {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15},
7120       },
7121       {
7122           /*input_shape=*/{1, 16, 1, 1},
7123           /*input_value=*/common_input,
7124           /*block_size=*/4,
7125           /*data_format=*/"NCHW",
7126           /*expected_output_dims=*/{1, 1, 4, 4},
7127           /*expected_output=*/InitTestVector<int>(16),
7128       },
7129       {
7130           /*input_shape=*/{1, 2, 2, 8},
7131           /*input_value=*/InitTestVector<int>(32),
7132           /*block_size=*/2,
7133           /*data_format=*/"NHWC",
7134           /*expected_output_dims=*/{1, 4, 4, 2},
7135           /*expected_output=*/{0,  1,  2,  3,  8,  9,  10, 11, 4,  5,  6,
7136                                7,  12, 13, 14, 15, 16, 17, 18, 19, 24, 25,
7137                                26, 27, 20, 21, 22, 23, 28, 29, 30, 31},
7138       }};
7139 
7140   TestConvertDepthSpaceShuffle<ops::DepthToSpace>(this, params);
7141 }
7142 
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertSpaceToDepth)7143 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertSpaceToDepth) {
7144   const std::vector<int> common_input = InitTestVector<int>(16);
7145   std::vector<DepthSpaceShuffleTestParams> params = {
7146       {
7147           /*input_shape=*/{1, 1, 4, 4},
7148           /*input_value=*/common_input,
7149           /*block_size=*/2,
7150           /*data_format=*/"NCHW",
7151           /*expected_output_dims=*/{1, 4, 2, 2},
7152           /*expected_output=*/
7153           {0, 2, 8, 10, 1, 3, 9, 11, 4, 6, 12, 14, 5, 7, 13, 15},
7154       },
7155       {
7156           /*input_shape=*/{1, 4, 4, 1},
7157           /*input_value=*/common_input,
7158           /*block_size=*/2,
7159           /*data_format=*/"NHWC",
7160           /*expected_output_dims=*/{1, 2, 2, 4},
7161           /*expected_output=*/
7162           {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15},
7163       },
7164       {
7165           /*input_shape=*/{1, 1, 4, 4},
7166           /*input_value=*/common_input,
7167           /*block_size=*/4,
7168           /*data_format=*/"NCHW",
7169           /*expected_output_dims=*/{1, 16, 1, 1},
7170           /*expected_output=*/InitTestVector<int>(16),
7171       },
7172       {
7173           /*input_shape=*/{1, 4, 4, 2},
7174           /*input_value=*/InitTestVector<int>(32),
7175           /*block_size=*/2,
7176           /*data_format=*/"NHWC",
7177           /*expected_output_dims=*/{1, 2, 2, 8},
7178           /*expected_output=*/{0,  1,  2,  3,  8,  9,  10, 11, 4,  5,  6,
7179                                7,  12, 13, 14, 15, 16, 17, 18, 19, 24, 25,
7180                                26, 27, 20, 21, 22, 23, 28, 29, 30, 31},
7181       },
7182   };
7183   TestConvertDepthSpaceShuffle<ops::SpaceToDepth>(this, params);
7184 }
7185 
TEST_P(OpConverter_FP32_FP16_Test,ConvertClipByValue)7186 TEST_P(OpConverter_FP32_FP16_Test, ConvertClipByValue) {
7187   Scope s = Scope::NewRootScope();
7188   auto t = ops::Placeholder(s.WithOpName("t"), tf_type_);
7189   auto clip_value_min =
7190       ops::Placeholder(s.WithOpName("clip_value_min"), tf_type_);
7191   auto clip_value_max =
7192       ops::Placeholder(s.WithOpName("clip_value_max"), tf_type_);
7193   auto clip = ops::ClipByValue(s.WithOpName("my_clip"), t, clip_value_min,
7194                                clip_value_max);
7195   const NodeDef& node_def = clip.operation.node()->def();
7196 
7197   nvinfer1::DataType trt_type_;
7198   TF_ASSERT_OK(TfTypeToTrtType(tf_type_, &trt_type_));
7199 
7200   {
7201     // Input is a weight, should fail.
7202     Reset();
7203     AddTestWeights("t", {1, 2, 3}, {1, 2, 3, 4, 5, 6}, tf_type_);
7204     AddTestWeights("clip_value_min", {1}, {1}, tf_type_);
7205     AddTestWeights("clip_value_max", {1}, {5}, tf_type_);
7206     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
7207                                "The input \"t\" for ClipByValue must be a "
7208                                "tensor, at my_clip");
7209   }
7210   {
7211     // Clip min is a tensor, should fail.
7212     Reset();
7213     AddTestTensor("t", {1, 2, 3});
7214     AddTestTensor("clip_value_min", {1});
7215     AddTestWeights("clip_value_max", {1}, {1}, tf_type_);
7216     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
7217                                "The input \"clip_value_min\" for ClipByValue "
7218                                "must be a constant, at my_clip");
7219   }
7220   {
7221     // Clip max is a tensor, should fail.
7222     Reset();
7223     AddTestTensor("t", {1, 2, 3});
7224     AddTestWeights("clip_value_min", {1}, {1}, tf_type_);
7225     AddTestTensor("clip_value_max", {1});
7226     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
7227                                "The input \"clip_value_max\" for ClipByValue "
7228                                "must be a constant, at my_clip");
7229   }
7230 
7231   struct TestParams {
7232     std::vector<int> dims;
7233     int clip_value_min;
7234     int clip_value_max;
7235     std::vector<float> expected_output;
7236   };
7237 
7238   const std::vector<float> common_input = InitTestVector<float>(6);
7239 
7240   std::vector<TestParams> params = {{
7241                                         /*dims=*/{6},
7242                                         /*clip_value_min=*/2,
7243                                         /*clip_value_max=*/4,
7244                                         /*expected_output=*/{2, 2, 2, 3, 4, 4},
7245                                     },
7246                                     {
7247                                         /*dims=*/{1, 6},
7248                                         /*clip_value_min=*/2,
7249                                         /*clip_value_max=*/4,
7250                                         /*expected_output=*/{2, 2, 2, 3, 4, 4},
7251                                     },
7252                                     {
7253                                         /*dims=*/{1, 2, 3},
7254                                         /*clip_value_min=*/2,
7255                                         /*clip_value_max=*/4,
7256                                         /*expected_output=*/{2, 2, 2, 3, 4, 4},
7257                                     },
7258                                     {
7259                                         /*dims=*/{1, 2, 3, 1},
7260                                         /*clip_value_min=*/2,
7261                                         /*clip_value_max=*/4,
7262                                         /*expected_output=*/{2, 2, 2, 3, 4, 4},
7263                                     },
7264                                     {
7265                                         /*dims=*/{1, 1, 3, 1, 2},
7266                                         /*clip_value_min=*/2,
7267                                         /*clip_value_max=*/4,
7268                                         /*expected_output=*/{2, 2, 2, 3, 4, 4},
7269                                     },
7270                                     {
7271                                         /*dims=*/{1, 1, 3, 1, 2, 1},
7272                                         /*clip_value_min=*/2,
7273                                         /*clip_value_max=*/4,
7274                                         /*expected_output=*/{2, 2, 2, 3, 4, 4},
7275                                     },
7276                                     {
7277                                         /*dims=*/{2, 1, 3},
7278                                         /*clip_value_min=*/-1,
7279                                         /*clip_value_max=*/8,
7280                                         /*expected_output=*/common_input,
7281                                     }};
7282 
7283   for (auto p : params) {
7284     Reset();
7285 
7286     AddTestTensor("t", p.dims, tf_type_, common_input);
7287     AddTestWeights("clip_value_min", {1}, {p.clip_value_min}, tf_type_);
7288     AddTestWeights("clip_value_max", {1}, {p.clip_value_max}, tf_type_);
7289 
7290     TestOpConverter("my_clip", node_def, p.dims,
7291                     /*expected_conversion_status=*/Status::OK(),
7292                     /*expected_runtime_status=*/Status::OK(),
7293                     /*matcher=*/ElementsAreArray(p.expected_output));
7294   }
7295 }
7296 
7297 // Get the NodeDef for SquaredDifference.
GetSquaredDifferenceNodeDef(DataType dtype)7298 NodeDef GetSquaredDifferenceNodeDef(DataType dtype) {
7299   Scope s = Scope::NewRootScope();
7300   auto x = ops::Placeholder(s.WithOpName("x"), dtype);
7301   auto y = ops::Placeholder(s.WithOpName("y"), dtype);
7302   auto squared_diff =
7303       ops::SquaredDifference(s.WithOpName("my_squared_diff"), x, y);
7304   return squared_diff.operation.node()->def();
7305 }
7306 
TEST_P(OpConverter_FP32_FP16_Test,ConvertSquaredDifference)7307 TEST_P(OpConverter_FP32_FP16_Test, ConvertSquaredDifference) {
7308   {
7309     // Input is a weight, should fail.
7310     Reset();
7311     NodeDef node_def = GetSquaredDifferenceNodeDef(tf_type_);
7312     AddTestWeights<float>("x", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
7313     AddTestTensor("y", {1, 1, 2, 3});
7314     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
7315                                "The input \"x\" for SquaredDifference must be "
7316                                "a tensor, at my_squared_diff");
7317   }
7318 
7319   struct TestParams {
7320     std::vector<int> dims_x;
7321     std::vector<int> dims_y;
7322     std::vector<float> value_x;
7323     std::vector<float> value_y;
7324     std::vector<int> expected_output_dims;
7325     std::vector<float> expected_output;
7326     Status status;
7327     Status runtime_status;
7328   };
7329 
7330   const std::vector<float> common_input = InitTestVector<float>(6);
7331   std::vector<TestParams> params = {
7332       {/*dims_x=*/{1, 2, 3},
7333        /*dims_y=*/{1, 7, 5},
7334        /*value_x=*/common_input,
7335        /*value_y=*/std::vector<float>(7 * 5, 0),
7336        /*expected_output_dims=*/{1, 1, 2, 3},
7337        /*expected_output=*/common_input,
7338        trt_mode_ == TrtTestMode::kDynamicShape
7339            ? Status::OK()
7340            : errors::InvalidArgument("Infeasible broadcast scheme"),
7341        errors::Internal(
7342            "Binding index out of range. This can happen if profile is not set, "
7343            "or the network is invalid for the current profile.")},
7344       {
7345           /*dims_x=*/{1, 1, 2, 3},
7346           /*dims_y=*/{1, 1, 2, 3},
7347           /*value_x=*/common_input,
7348           /*value_y=*/{0, -1, 3, 0, 10, -7},
7349           /*expected_output_dims=*/{1, 1, 2, 3},
7350           /*expected_output=*/{0, 4, 1, 9, 36, 144},
7351       },
7352       {
7353           /*dims_x=*/{1, 1, 2, 3},
7354           /*dims_y=*/{1, 1, 1, 3},
7355           /*value_x=*/common_input,
7356           /*value_y=*/{0, 1, 2},
7357           /*expected_output_dims=*/{1, 1, 2, 3},
7358           /*expected_output=*/{0, 0, 0, 9, 9, 9},
7359       },
7360   };
7361 
7362   for (auto p : params) {
7363     Reset();
7364     NodeDef node_def = GetSquaredDifferenceNodeDef(tf_type_);
7365     AddTestTensor("x", p.dims_x, p.value_x);
7366     AddTestTensor("y", p.dims_y, p.value_y);
7367     TestOpConverter("my_squared_diff", node_def, p.expected_output_dims,
7368                     p.status, p.runtime_status,
7369                     ElementsAreArray(p.expected_output));
7370   }
7371 }
7372 
7373 template <typename OpType>
MakeResizeNodeDef(DataType dtype,bool align_corners)7374 NodeDef MakeResizeNodeDef(DataType dtype, bool align_corners) {
7375   Scope s = Scope::NewRootScope();
7376   auto input = ops::Placeholder(s.WithOpName("input"), dtype);
7377   auto size = ops::Placeholder(s.WithOpName("size"), DT_INT32);
7378   auto attrs = typename OpType::Attrs().AlignCorners(align_corners);
7379   auto resize = OpType(s.WithOpName("my_resize"), input, size, attrs);
7380   return resize.operation.node()->def();
7381 }
7382 
7383 struct ResizeTestParams {
7384   std::vector<int> input_dims;
7385   std::vector<int> output_resize_dims;
7386   std::vector<float> input_value;
7387   bool align_corners;
7388   std::vector<int> expected_output_dims;
7389   std::vector<float> expected_nearest_output_values;
7390   std::vector<float> expected_bilinear_output_values;
7391   Status status;
7392 };
7393 
7394 template <typename OpType>
TestConvertResize(ParameterizedOpConverterTestBase * test,ResizeTestParams & p)7395 void TestConvertResize(ParameterizedOpConverterTestBase* test,
7396                        ResizeTestParams& p) {
7397   test->Reset();
7398   // Create resize node.
7399   NodeDef node_def =
7400       MakeResizeNodeDef<OpType>(test->get_tf_type(), p.align_corners);
7401 
7402   test->AddTestTensor("input", p.input_dims, test->get_tf_type(),
7403                       p.input_value);
7404   // Create output size.
7405   test->AddTestWeights("size", {2}, p.output_resize_dims, DT_INT32);
7406 
7407   std::vector<float> expected_out;
7408 
7409   if (node_def.op() == "ResizeBilinear") {
7410     expected_out = p.expected_bilinear_output_values;
7411   } else if (node_def.op() == "ResizeNearestNeighbor") {
7412     expected_out = p.expected_nearest_output_values;
7413   } else {
7414     ASSERT_TRUE(false);
7415   }
7416 
7417   test->TestOpConverter("my_resize", node_def, p.expected_output_dims,
7418                         /*expected_conversion_status=*/p.status,
7419                         /*expected_runtime_status=*/p.status,
7420                         /*matcher=*/ElementsAreArray(expected_out),
7421                         /*out_tf_types=*/{DT_FLOAT});
7422 }
7423 
TEST_P(OpConverter_FP32_FP16_Test,ConvertResize)7424 TEST_P(OpConverter_FP32_FP16_Test, ConvertResize) {
7425   {
7426     // First input is weight, should fail.
7427     Reset();
7428     NodeDef node_def = MakeResizeNodeDef<ops::ResizeBilinear>(tf_type_,
7429                                                               /*align_corners=*/
7430                                                               true);
7431     AddTestWeights<float>("input", {1, 2}, {1, 2});
7432     AddTestWeights<int>("size", {1, 2}, {1, 2});
7433     RunValidationAndConversion(
7434         node_def, error::UNIMPLEMENTED,
7435         "The input \"input\" for ResizeBilinear must be a "
7436         "tensor, at my_resize");
7437   }
7438   {
7439     // Output dimension is a tensor, should fail.
7440     Reset();
7441     NodeDef node_def = MakeResizeNodeDef<ops::ResizeBilinear>(tf_type_,
7442                                                               /*align_corners=*/
7443                                                               true);
7444     AddTestTensor("input", {1, 2});
7445     AddTestTensor("size", {1, 2});
7446     RunValidationAndConversion(
7447         node_def, error::UNIMPLEMENTED,
7448         "The input \"size\" for ResizeBilinear must be a "
7449         "constant, at my_resize");
7450   }
7451 
7452   const auto job_status =
7453       trt_mode_ == TrtTestMode::kDynamicShape
7454           ? errors::Unimplemented(
7455                 "TensorRT IResizeLayer requires input with static "
7456                 "shape")
7457           : Status::OK();
7458 
7459   std::vector<ResizeTestParams> params{
7460       {/*input_dims=*/{1, 1, 2, 1},    // N, H, W, C
7461        /*output_resize_dims=*/{2, 3},  // H_out, W_out
7462        /*input_values=*/{2.0f, -1.0f},
7463        /*align_corners=*/false,
7464        /*expected_output_dims=*/{1, 2, 3, 1},  // N, H, W, C
7465        /*expected_nearest_output_values=*/
7466        {2.0f, 2.0f, -1.0f, 2.0f, 2.0f, -1.0f},
7467        /*expected_bilinear_output_values=*/
7468        {2.0f, 0.f, -1.0f, 2.0f, 0.f, -1.0f},
7469        /*status=*/job_status},
7470       {/*input_dims=*/{1, 1, 2, 1},    // N, H, W, C
7471        /*output_resize_dims=*/{2, 3},  // H_out, W_out
7472        /*input_values=*/{2.0f, -1.0f},
7473        /*align_corners=*/true,
7474        /*expected_output_dims=*/{1, 2, 3, 1},  // N, H, W, C
7475        /*expected_nearest_output_values=*/
7476        {2.0f, 2.0f, -1.0f, 2.0f, 2.0f, -1.0f},
7477        /*expected_bilinear_output_values=*/
7478        {2.0f, 0.5f, -1.0f, 2.0f, 0.5f, -1.0f},
7479        /*status=*/job_status}};
7480 
7481   for (auto p : params) {
7482     TestConvertResize<ops::ResizeNearestNeighbor>(this, p);
7483 
7484 // This use case is not supported as of TRT version 7.1
7485 #if IS_TRT_VERSION_GE(7, 1, 0, 0)
7486     if (!p.align_corners) {
7487       p.status = errors::InvalidArgument(
7488           "Cannot Convert Bilinear Resize when align_corners=False");
7489     }
7490 #endif
7491 
7492     TestConvertResize<ops::ResizeBilinear>(this, p);
7493   }
7494 }
7495 
MakePadNodeDef(std::string name,DataType dtype)7496 NodeDef MakePadNodeDef(std::string name, DataType dtype) {
7497   Scope s = Scope::NewRootScope();
7498   auto input = ops::Placeholder(s.WithOpName("input"), dtype);
7499   auto padding = ops::Placeholder(s.WithOpName("padding"), DT_INT32);
7500   auto pad = ops::Pad(s.WithOpName(name), input, padding);
7501   return pad.operation.node()->def();
7502 }
7503 
7504 struct PadTestParams {
7505   std::vector<int> input_dims;
7506   std::vector<int> pad_dims;
7507   std::vector<int> pad_values;
7508   std::vector<float> input_values;
7509   std::vector<int> expected_output_dims;
7510   std::vector<float> expected_output_values;
7511   Status status;
7512 };
7513 
TEST_P(OpConverter_FP32_FP16_Test,ConvertPad)7514 TEST_P(OpConverter_FP32_FP16_Test, ConvertPad) {
7515   {
7516     // First input is weight, should fail.
7517     Reset();
7518     NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
7519     AddTestWeights("input", {1, 2}, {1, 2}, tf_type_);
7520     AddTestWeights<int>("padding", {1, 2}, {1, 2});
7521     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
7522                                "The input \"tensor\" for Pad must be a "
7523                                "tensor");
7524   }
7525   {
7526     // padding is a tensor, should fail.
7527     Reset();
7528     NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
7529     AddTestTensor("input", {1, 2});
7530     AddTestTensor("padding", {1, 2});
7531     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
7532                                "The input \"paddings\" for Pad must be a "
7533                                "constant");
7534   }
7535   {
7536     // Make sure that ranges are inferred across a Pad.
7537     Reset();
7538     NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
7539     AddTestTensor("input", {1, 1, 2, 1});
7540     AddTestWeights<int>("padding", {4, 2}, {0, 0, 1, 0, 0, 1, 0, 0});
7541     TRT_TensorOrWeights input;
7542     TRT_TensorOrWeights output;
7543     RunValidationAndConversion(node_def);
7544     TF_EXPECT_OK(GetTensorOrWeights("input", &input));
7545     TF_EXPECT_OK(GetTensorOrWeights("my_pad", &output));
7546     ITensorProxyPtr input_tensor = input.tensor();
7547     converter_->ProvideQuantizationRange(&input_tensor, -5.0f, 5.0f);
7548     auto ranges = quantization_ranges();
7549     EXPECT_EQ(5.0f, ranges[input.tensor()->trt_tensor()]);
7550   }
7551 
7552   std::vector<PadTestParams> params{
7553       // 1 padding dim
7554       {
7555           /*input_dims=*/{1, 1, 3, 2},  // N, H, W, C
7556           /*pad_dims=*/{4, 2},          // #dims, {pad_before, pad_after}
7557           /*pad_values*/ {0, 0, 0, 0, 0, 1, 0, 0},
7558           /*input_values=*/{1, 2, 3, 4, 5, 6},
7559           /*expected_output_dims=*/{1, 1, 4, 2},  // N, H, W, C
7560           /*expected_output_values=*/
7561           {1, 2, 3, 4, 5, 6, 0, 0},
7562       },
7563       {
7564           /*input_dims=*/{1, 1, 3, 2},  // N, H, W, C
7565           /*pad_dims=*/{4, 2},          // #dims, {pad_before, pad_after}
7566           /*pad_values*/ {0, 0, 0, 0, 0, 0, 0, 1},
7567           /*input_values=*/{1, 2, 3, 4, 5, 6},
7568           /*expected_output_dims=*/{1, 1, 3, 3},  // N, H, W, C
7569           /*expected_output_values=*/
7570           {1, 2, 0, 3, 4, 0, 5, 6, 0},
7571       },
7572       {
7573           /*input_dims=*/{1, 1, 3, 2},  // N, H, W, C
7574           /*pad_dims=*/{4, 2},          // #dims, {pad_before, pad_after}
7575           /*pad_values*/ {0, 0, 1, 0, 0, 0, 0, 0},
7576           /*input_values=*/{1, 2, 3, 4, 5, 6},
7577           /*expected_output_dims=*/{1, 2, 3, 2},  // N, H, W, C
7578           /*expected_output_values=*/
7579           {0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6},
7580       },
7581       // 2 padding dims
7582       {
7583           /*input_dims=*/{1, 1, 2, 1},  // N, H, W, C
7584           /*pad_dims=*/{4, 2},          // #dims, {pad_before, pad_after}
7585           /*pad_values*/ {0, 0, 1, 0, 0, 1, 0, 0},
7586           /*input_values=*/{2.0f, -1.0f},
7587           /*expected_output_dims=*/{1, 2, 3, 1},  // N, H, W, C
7588           /*expected_output_values=*/
7589           {0.0, 0.0, 0.0, 2.0f, -1.0f, 0.0},
7590       },
7591       PadTestParams{
7592           /*input_dims=*/{1, 1, 2, 2},  // N, H, W, C
7593           /*pad_dims=*/{4, 2},          // #dims, {pad_before, pad_after}
7594           /*pad_values*/ {0, 0, 1, 0, 0, 1, 0, 0},
7595           /*input_values=*/{2, -1, 3., 4},
7596           /*expected_output_dims=*/{1, 2, 3, 2},  // N, H, W, C
7597           /*expected_output_values=*/
7598           {0, 0, 0, 0, 0, 0, 2, -1, 3, 4, 0, 0},
7599       },
7600       PadTestParams{
7601           /*input_dims=*/{1, 1, 2, 1, 2},  // N, C, H, W, D
7602           /*pad_dims=*/{5, 2},             // #dims, {pad_before, pad_after}
7603           /*pad_values*/ {0, 0, 1, 0, 0, 1, 0, 0, 0, 0},
7604           /*input_values=*/{2, -1, 3., 4},
7605           /*expected_output_dims=*/{1, 2, 3, 1, 2},  // N, H, W, C
7606           /*expected_output_values=*/
7607           {0, 0, 0, 0, 0, 0, 2, -1, 3, 4, 0, 0},
7608       },
7609       PadTestParams{
7610           /*input_dims=*/{1, 1, 2, 1, 2},  // N, C, H, W, D
7611           /*pad_dims=*/{5, 2},             // #dims, {pad_before, pad_after}
7612           /*pad_values*/ {0, 0, 0, 1, 0, 0, 1, 1, 0, 0},
7613           /*input_values=*/{2, -1, 3., 4},
7614           /*expected_output_dims=*/{1, 2, 2, 3, 2},  // N, H, W, C
7615           /*expected_output_values=*/
7616           {0., 0., 2., -1., 0., 0., 0., 0., 3., 4., 0., 0.,
7617            0., 0., 0., 0.,  0., 0., 0., 0., 0., 0., 0., 0},
7618       },
7619       PadTestParams{
7620           /*input_dims=*/{1, 1, 2, 1},  // N, H, W, C
7621           /*pad_dims=*/{4, 2},          // #dims, {pad_before, pad_after}
7622           /*pad_values*/ {1, 0, 0, 0, 0, 1, 0, 0},
7623           /*input_values=*/{2.0f, -1.0f},
7624           /*expected_output_dims=*/{2, 1, 3, 1},  // N, H, W, C
7625           /*expected_output_values=*/{0.0, 0.0, 0.0, 2.0f, -1.0f, 0.0},
7626           trt_mode_ == TrtTestMode::kImplicitBatch
7627               ? errors::InvalidArgument("Padding layer does not support "
7628                                         "padding on batch dimension")
7629               : Status::OK()},
7630       PadTestParams{
7631           /*input_dims=*/{1, 1, 2, 1},  // N, H, W, C
7632           /*pad_dims=*/{4, 2},          // #dims, {pad_before, pad_after}
7633           /*pad_values*/ {0, 0, 1, 0, 0, 1, 1, 1},
7634           /*input_values=*/{2.0f, -1.0f},
7635           /*expected_output_dims=*/{},  // N, H, W, C
7636           /*expected_output_values=*/{},
7637           errors::InvalidArgument("Padding layer does not support padding on "
7638                                   "> 2")},
7639       PadTestParams{
7640           /*input_dims=*/{1, 2, 2},  // N, H, W
7641           /*pad_dims=*/{3, 2},       // #dims, {pad_before, pad_after}
7642           /*pad_values*/ {0, 0, 1, 0, 0, 1},
7643           /*input_values=*/{2, -1, 3., 4},
7644           /*expected_output_dims=*/{1, 3, 3},  // N, H, W, C
7645           /*expected_output_values=*/
7646           {0., 0., 0., 2., -1., 0., 3., 4., 0.},
7647           errors::InvalidArgument("Convertpad requires at least 4D input, at "
7648                                   "my_pad")}};
7649 
7650   for (auto p : params) {
7651     Reset();
7652     // Create pad node.
7653     NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
7654     // Create input tensor.
7655     AddTestTensor("input", p.input_dims, p.input_values);
7656     // Create output size.
7657     AddTestWeights<int32>("padding", p.pad_dims, p.pad_values);
7658     TestOpConverter("my_pad", node_def, p.expected_output_dims, p.status,
7659                     p.status, ElementsAreArray(p.expected_output_values));
7660   }
7661 }
7662 }  // namespace convert
7663 }  // namespace tensorrt
7664 }  // namespace tensorflow
7665 
7666 #endif  // GOOGLE_CUDA && GOOGLE_TENSORRT
7667