1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h"
17
18 #include <algorithm>
19 #include <functional>
20 #include <memory>
21 #include <type_traits>
22 #include <unordered_map>
23 #include <vector>
24
25 #if GOOGLE_CUDA && GOOGLE_TENSORRT
26
27 #include <gmock/gmock.h>
28 #include <gtest/gtest.h>
29 #include "absl/algorithm/container.h"
30 #include "absl/strings/match.h"
31 #include "absl/strings/numbers.h"
32 #include "absl/strings/str_cat.h"
33 #include "absl/strings/string_view.h"
34 #include "absl/types/span.h"
35 #include "third_party/gpus/cuda/include/cuda.h"
36 #include "third_party/gpus/cuda/include/cuda_runtime_api.h"
37 #include "tensorflow/cc/framework/ops.h"
38 #include "tensorflow/cc/framework/scope.h"
39 #include "tensorflow/cc/ops/nn_ops_internal.h"
40 #include "tensorflow/cc/ops/standard_ops.h"
41 #include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
42 #include "tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.h"
43 #include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h"
44 #include "tensorflow/core/common_runtime/gpu/gpu_managed_allocator.h"
45 #include "tensorflow/core/framework/allocator.h"
46 #include "tensorflow/core/framework/node_def.pb.h" // NOLINT
47 #include "tensorflow/core/framework/tensor.h"
48 #include "tensorflow/core/framework/tensor.pb.h" // NOLINT
49 #include "tensorflow/core/framework/tensor_shape.h"
50 #include "tensorflow/core/framework/tensor_testutil.h"
51 #include "tensorflow/core/framework/types.h"
52 #include "tensorflow/core/grappler/costs/graph_properties.h"
53 #include "tensorflow/core/lib/core/status.h"
54 #include "tensorflow/core/lib/core/status_test_util.h"
55 #include "tensorflow/core/lib/strings/str_util.h"
56 #include "tensorflow/core/lib/strings/strcat.h"
57 #include "tensorflow/core/platform/protobuf.h"
58 #include "tensorflow/core/platform/test.h"
59 #include "tensorflow/core/protobuf/config.pb.h" // NOLINT
60 #include "tensorflow/core/public/session.h"
61 #include "third_party/tensorrt/NvInfer.h"
62
63 namespace tensorflow {
64 namespace tensorrt {
65
66 // TensorRT modes for testing. We define the following three modes:
67 // 1. Implicit batch mode: The tensors have static (known) input shape and the
68 // the batch dimension (first dim) is removed from the TRT tensor shape. In
69 // a loose notation: trt_shape = tf_shape[1:]. This is the standard mode of
70 // a TensorRT network definition before TensorRT 6.
71 // 2. Explicit batch mode: static (known) input shape, but the batch dimension
72 // is part of the trt tensor shape. (trt_shape = tf_shape)
73 // 3. Dynamic shape mode allows unknown input shapes, and requires explicit
74 // batch size definition (trt_shape = tf_shape).
75 //
76 // Note that the Converter only distinguishes between two modes:
77 // - use_implicit_batch == true, this corresponds to kImplicitBatch,
78 // - use_implicit_batch == false which includes both kExplicitBatch and
79 // kDynamicShape.
80 //
81 // For the converter, the distinction between explicit batch or dynamic shape
82 // mode follows from the input tensors of the network: dynamic shape input
83 // implies dynamic shape mode, while static shape input tensors imply explicit
84 // batch mode. We want to test all these modes, therefore we define the
85 // TrtTestMode with the following three options.
86 enum class TrtTestMode {
87 kImplicitBatch = 0,
88 kExplicitBatch = 1,
89 kDynamicShape = 2
90 };
91
DebugString(const TrtTestMode mode)92 string DebugString(const TrtTestMode mode) {
93 switch (mode) {
94 case TrtTestMode::kImplicitBatch:
95 return "kImplicitBatch";
96 case TrtTestMode::kExplicitBatch:
97 return "kExplicitBatch";
98 case TrtTestMode::kDynamicShape:
99 return "kDynamicShape";
100 default:
101 return "Invalid TrtTestMode";
102 }
103 }
104
105 namespace convert {
106
107 using absl::StrCat;
108 using ::testing::ElementsAre;
109 using ::testing::ElementsAreArray;
110 using ::testing::Matcher;
111
112 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
113 constexpr std::array<TrtTestMode, 3> ValidTrtModes = {
114 TrtTestMode::kImplicitBatch, TrtTestMode::kExplicitBatch,
115 TrtTestMode::kDynamicShape};
116 #else
117 constexpr std::array<TrtTestMode, 1> ValidTrtModes = {
118 TrtTestMode::kImplicitBatch};
119 #endif
120
121 // TODO(laigd): put this into some test utils file.
ExpectStatus(Status status,error::Code code=error::OK,const char * substr=nullptr)122 void ExpectStatus(Status status, error::Code code = error::OK,
123 const char* substr = nullptr) {
124 EXPECT_EQ(code, status.code())
125 << status << " vs expected error code \"" << error::Code_Name(code)
126 << "\" and message \"" << substr << "\"";
127 if (substr) {
128 EXPECT_THAT(status.error_message(), ::testing::HasSubstr(substr)) << status;
129 }
130 }
131
GetTestDims(const std::vector<int> & d)132 nvinfer1::Dims GetTestDims(const std::vector<int>& d) {
133 nvinfer1::Dims dims;
134 dims.nbDims = d.size();
135 for (int i = 0; i < d.size(); ++i) {
136 dims.d[i] = d[i];
137 }
138 return dims;
139 }
140
141 // Prints the vector to the output stream.
142 template <typename T>
operator <<(std::ostream & os,const std::vector<T> & v)143 std::ostream& operator<<(std::ostream& os, const std::vector<T>& v) {
144 if (!v.empty()) {
145 os << '[';
146 std::copy(v.begin(), v.end(), std::ostream_iterator<T>(os, ", "));
147 os << "\b\b]";
148 }
149 return os;
150 }
151
MakeNodeDef(const string & name,const string & op,const std::vector<string> & inputs,const std::map<string,AttrValue> attrs={})152 NodeDef MakeNodeDef(const string& name, const string& op,
153 const std::vector<string>& inputs,
154 const std::map<string, AttrValue> attrs = {}) {
155 NodeDef node_def;
156 node_def.set_name(name);
157 node_def.set_op(op);
158 for (const string& input : inputs) {
159 node_def.add_input(input);
160 }
161 for (const auto& attr : attrs) {
162 (*node_def.mutable_attr())[attr.first] = attr.second;
163 }
164 return node_def;
165 }
166
167 template <typename T>
MakeConstNodeDef(const string & name,const std::vector<T> & vals,const TensorShape & shape)168 NodeDef MakeConstNodeDef(const string& name, const std::vector<T>& vals,
169 const TensorShape& shape) {
170 Scope s = Scope::NewRootScope();
171 Tensor t = test::AsTensor<T>(vals, shape);
172 auto const_op = ops::Const(s.WithOpName(name), t);
173 return const_op.node()->def();
174 }
175
176 template <typename T>
MakeConstNodeDef(const string & name,const std::vector<T> & vals)177 NodeDef MakeConstNodeDef(const string& name, const std::vector<T>& vals) {
178 TensorShape shape;
179 const std::vector<int32> shape_dims = {static_cast<int32>(vals.size())};
180 TF_EXPECT_OK(TensorShapeUtils::MakeShape(shape_dims, &shape));
181 return MakeConstNodeDef(name, vals, shape);
182 }
183
TrtDimsEquals(const nvinfer1::Dims & lhs,const nvinfer1::Dims & rhs)184 bool TrtDimsEquals(const nvinfer1::Dims& lhs, const nvinfer1::Dims& rhs) {
185 if (lhs.nbDims != rhs.nbDims) return false;
186 for (int i = 0; i < lhs.nbDims; ++i) {
187 if (lhs.d[i] != rhs.d[i]) return false;
188 // We don't check the types in the tests.
189 }
190 return true;
191 }
192
TrtDimsEqualsArray(const std::vector<int> & lhs,const nvinfer1::Dims & rhs)193 bool TrtDimsEqualsArray(const std::vector<int>& lhs,
194 const nvinfer1::Dims& rhs) {
195 return TrtDimsEquals(GetTestDims(lhs), rhs);
196 }
197
198 // TODO(laigd): define a parameterized matcher that can compare against the
199 // vector.
ExpectTrtDimsEqualsArray(const std::vector<int> & lhs,const nvinfer1::Dims & rhs)200 void ExpectTrtDimsEqualsArray(const std::vector<int>& lhs,
201 const nvinfer1::Dims& rhs) {
202 EXPECT_TRUE(TrtDimsEqualsArray(lhs, rhs))
203 << "expected: " << DebugString(GetTestDims(lhs)) << "\n"
204 << " actual: " << DebugString(rhs);
205 }
206
ExpectTrtLayerNames(absl::Span<const std::string> names,nvinfer1::INetworkDefinition * network)207 void ExpectTrtLayerNames(absl::Span<const std::string> names,
208 nvinfer1::INetworkDefinition* network) {
209 EXPECT_EQ(network->getNbLayers(), names.size());
210
211 for (int i = 0; i < network->getNbLayers(); i++) {
212 auto layer = network->getLayer(i);
213 EXPECT_EQ(layer->getName(), names[i]);
214 }
215 }
216
VerifyTrtLayerNameNotEmpty(nvinfer1::INetworkDefinition * network)217 void VerifyTrtLayerNameNotEmpty(nvinfer1::INetworkDefinition* network) {
218 for (int i = 0; i < network->getNbLayers(); i++) {
219 auto layer = network->getLayer(i);
220 EXPECT_NE(layer->getName(), nullptr);
221 }
222 }
223
ArrayFloatNear(const std::vector<float> & values,float max_abs_error=1e-5,bool nan_sensitive=false)224 Matcher<std::vector<float>> ArrayFloatNear(const std::vector<float>& values,
225 float max_abs_error = 1e-5,
226 bool nan_sensitive = false) {
227 std::vector<Matcher<float>> matchers;
228 matchers.reserve(values.size());
229 for (const float& v : values) {
230 if (nan_sensitive) {
231 matchers.emplace_back(::testing::NanSensitiveFloatNear(v, max_abs_error));
232 } else if (max_abs_error == 0) {
233 matchers.emplace_back(::testing::FloatEq(v));
234 } else {
235 EXPECT_GE(max_abs_error, 0);
236 matchers.emplace_back(::testing::FloatNear(v, max_abs_error));
237 }
238 }
239 return ElementsAreArray(matchers);
240 }
241
242 template <typename T>
ExpectArrayNear(const std::vector<T> & lhs,absl::Span<const T> rhs)243 void ExpectArrayNear(const std::vector<T>& lhs, absl::Span<const T> rhs) {
244 ASSERT_EQ(lhs.size(), rhs.size());
245 for (int i = 0; i < lhs.size(); i++) {
246 EXPECT_FLOAT_EQ(lhs[i], rhs[i]);
247 }
248 }
249
250 // Eigen::half cannot implicitly convert to float which is required for
251 // EXPECT_FLOAT_EQ.
252 template <>
ExpectArrayNear(const std::vector<Eigen::half> & lhs,absl::Span<const Eigen::half> rhs)253 void ExpectArrayNear(const std::vector<Eigen::half>& lhs,
254 absl::Span<const Eigen::half> rhs) {
255 ASSERT_EQ(lhs.size(), rhs.size());
256 for (int i = 0; i < lhs.size(); i++) {
257 EXPECT_FLOAT_EQ(Eigen::half_impl::half_to_float(lhs[i]),
258 Eigen::half_impl::half_to_float(rhs[i]));
259 }
260 }
261
262 template <typename T>
ExpectArrayAlmostEqual(const std::vector<T> & lhs,absl::Span<const T> rhs,T tolerance)263 void ExpectArrayAlmostEqual(const std::vector<T>& lhs, absl::Span<const T> rhs,
264 T tolerance) {
265 ASSERT_EQ(lhs.size(), rhs.size());
266 for (int i = 0; i < lhs.size(); i++) {
267 EXPECT_NEAR(lhs[i], rhs[i], tolerance);
268 }
269 }
270
271 // Eigen::half cannot implicitly convert to float which is required for
272 // EXPECT_NEAR.
273 template <>
ExpectArrayAlmostEqual(const std::vector<Eigen::half> & lhs,absl::Span<const Eigen::half> rhs,Eigen::half tolerance)274 void ExpectArrayAlmostEqual(const std::vector<Eigen::half>& lhs,
275 absl::Span<const Eigen::half> rhs,
276 Eigen::half tolerance) {
277 ASSERT_EQ(lhs.size(), rhs.size());
278 for (int i = 0; i < lhs.size(); i++) {
279 EXPECT_NEAR(Eigen::half_impl::half_to_float(lhs[i]),
280 Eigen::half_impl::half_to_float(rhs[i]),
281 Eigen::half_impl::half_to_float(tolerance));
282 }
283 }
284
TrtShapedWeightsEquals(const TRT_ShapedWeights & lhs,const TRT_ShapedWeights & rhs)285 bool TrtShapedWeightsEquals(const TRT_ShapedWeights& lhs,
286 const TRT_ShapedWeights& rhs) {
287 return TrtDimsEquals(lhs.shape_, rhs.shape_) &&
288 lhs.TrtDType() == rhs.TrtDType() && lhs.GetValues() == rhs.GetValues();
289 }
290
291 template <typename T>
ValidateWeights(const TRT_ShapedWeights & weights,const std::vector<int> & expected_dims,const std::vector<T> & expected_value)292 void ValidateWeights(const TRT_ShapedWeights& weights,
293 const std::vector<int>& expected_dims,
294 const std::vector<T>& expected_value) {
295 ExpectTrtDimsEqualsArray(expected_dims, weights.shape_);
296 ASSERT_EQ(expected_value.size(), weights.count()) << weights.DebugString();
297 const T* actual_values = static_cast<const T*>(weights.GetValues());
298 for (int i = 0; i < expected_value.size(); ++i) {
299 EXPECT_EQ(expected_value[i], actual_values[i]);
300 }
301 }
302
303 template <typename CType>
InitTestVector(int size,CType start_value=CType (0))304 std::vector<CType> InitTestVector(int size, CType start_value = CType(0)) {
305 std::vector<CType> res;
306 res.reserve(size);
307 for (int i = 0; i < size; ++i) {
308 res.push_back(start_value + CType(i));
309 }
310 return res;
311 }
312
313 template <typename InCType, typename OutCType>
314 struct StaticCaster {
operator ()tensorflow::tensorrt::convert::StaticCaster315 OutCType operator()(InCType in) const { return static_cast<OutCType>(in); }
316 };
317
318 template <typename InCType, typename OutCType>
CastTestVector(const gtl::ArraySlice<InCType> & vals)319 std::vector<OutCType> CastTestVector(
320 const gtl::ArraySlice<InCType>& vals) { // non-absl ok
321 std::vector<OutCType> res(vals.size());
322 std::transform(vals.begin(), vals.end(), res.begin(),
323 StaticCaster<InCType, OutCType>());
324 return res;
325 }
326
327 // Fake ITensor implementation for testing purposes.
328 class FakeITensor : public nvinfer1::ITensor {
329 public:
FakeITensor()330 FakeITensor() : dynamic_range_(0.0f) {}
331
FakeITensor(const nvinfer1::Dims & dims)332 FakeITensor(const nvinfer1::Dims& dims) : dims_(dims), dynamic_range_(0.0f) {}
333
FakeITensor(const std::vector<int> & dims)334 FakeITensor(const std::vector<int>& dims)
335 : dims_(GetTestDims(dims)), dynamic_range_(0.0f) {}
336
setName(const char * name)337 void setName(const char* name) override { name_ = name; }
338
getName() const339 const char* getName() const override { return name_.c_str(); }
340
setDimensions(nvinfer1::Dims dimensions)341 void setDimensions(nvinfer1::Dims dimensions) override { dims_ = dimensions; }
342
getDimensions() const343 nvinfer1::Dims getDimensions() const override { return dims_; }
344
setType(nvinfer1::DataType type)345 void setType(nvinfer1::DataType type) override { type_ = type; }
346
getType() const347 nvinfer1::DataType getType() const override { return type_; }
348
isNetworkInput() const349 bool isNetworkInput() const override { return false; }
350
isNetworkOutput() const351 bool isNetworkOutput() const override { return false; }
352
setBroadcastAcrossBatch(bool broadcastAcrossBatch)353 void setBroadcastAcrossBatch(bool broadcastAcrossBatch) override {}
354
getBroadcastAcrossBatch() const355 bool getBroadcastAcrossBatch() const override { return false; }
356
getLocation() const357 nvinfer1::TensorLocation getLocation() const override { return location_; }
358
setLocation(nvinfer1::TensorLocation location)359 void setLocation(nvinfer1::TensorLocation location) override {
360 location_ = location;
361 }
362
363 #if IS_TRT_VERSION_GE(5, 0, 0, 0)
setDynamicRange(float min,float max)364 bool setDynamicRange(float min, float max) override {
365 dynamic_range_ = std::max(std::abs(min), std::abs(max));
366 return true;
367 }
368
getDynamicRange() const369 float getDynamicRange() const override { return dynamic_range_; }
370 #endif
371
372 #if IS_TRT_VERSION_GE(5, 1, 0, 0)
dynamicRangeIsSet() const373 bool dynamicRangeIsSet() const override { return true; }
374
resetDynamicRange()375 void resetDynamicRange() override {}
376
getDynamicRangeMin() const377 float getDynamicRangeMin() const override { return 0.f; }
378
getDynamicRangeMax() const379 float getDynamicRangeMax() const override { return 0.f; }
380 #endif
381
382 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
setAllowedFormats(nvinfer1::TensorFormats formats)383 void setAllowedFormats(nvinfer1::TensorFormats formats) override {}
384
getAllowedFormats() const385 nvinfer1::TensorFormats getAllowedFormats() const override { return 1; }
386
isShapeTensor() const387 bool isShapeTensor() const override { return false; }
isExecutionTensor() const388 bool isExecutionTensor() const override { return true; }
389
390 #endif
391
392 private:
393 string name_;
394 nvinfer1::Dims dims_;
395 nvinfer1::DataType type_;
396 nvinfer1::TensorLocation location_;
397 float dynamic_range_;
398 };
399
TEST(TRT_ShapedWeights_Test,Basic)400 TEST(TRT_ShapedWeights_Test, Basic) {
401 // Test constructor with no arguments.
402 {
403 TRT_ShapedWeights weights;
404 TRT_ShapedWeights copy(weights);
405 for (auto ptr : {&weights, ©}) {
406 nvinfer1::Weights trt_weights = ptr->GetTrtWeights();
407 EXPECT_EQ(nvinfer1::DataType::kFLOAT, trt_weights.type);
408 EXPECT_EQ(nullptr, trt_weights.values);
409 EXPECT_EQ(0, trt_weights.count);
410
411 EXPECT_EQ(nullptr, ptr->GetValues());
412 EXPECT_EQ(0, ptr->count());
413 EXPECT_EQ(0, ptr->size_bytes());
414 }
415 }
416 // Test constructor with DataType argument.
417 {
418 TRT_ShapedWeights weights(nvinfer1::DataType::kFLOAT);
419 TRT_ShapedWeights copy(weights);
420 for (auto ptr : {&weights, ©}) {
421 nvinfer1::Weights trt_weights = ptr->GetTrtWeights();
422 EXPECT_EQ(nvinfer1::DataType::kFLOAT, trt_weights.type);
423 EXPECT_EQ(nullptr, trt_weights.values);
424 EXPECT_EQ(0, trt_weights.count);
425
426 EXPECT_EQ(nullptr, ptr->GetValues());
427 EXPECT_EQ(0, ptr->count());
428 EXPECT_EQ(0, ptr->size_bytes());
429 }
430 }
431 // Test constructor with DataType and nvinfer1::Dims arguments.
432 {
433 TrtWeightStore store;
434 TRT_ShapedWeights weights =
435 store.GetTempWeights(nvinfer1::DataType::kFLOAT, GetTestDims({2, 5}));
436 TRT_ShapedWeights copy(weights);
437 for (auto ptr : {&weights, ©}) {
438 nvinfer1::Weights trt_weights = ptr->GetTrtWeights();
439 EXPECT_EQ(nvinfer1::DataType::kFLOAT, trt_weights.type);
440 EXPECT_NE(nullptr, trt_weights.values);
441 EXPECT_EQ(10, trt_weights.count);
442
443 EXPECT_EQ(trt_weights.values, ptr->GetValues());
444 EXPECT_EQ(10, ptr->count());
445 EXPECT_EQ(40, ptr->size_bytes());
446 }
447 // Test that it doesn't copy the underlying buffer.
448 EXPECT_EQ(weights.GetValues(), copy.GetValues());
449 }
450 }
451
TEST(TRT_TensorOrWeights_Test,Basic)452 TEST(TRT_TensorOrWeights_Test, Basic) {
453 // Test constructor with no arguments.
454 {
455 TRT_TensorOrWeights tw;
456 TRT_TensorOrWeights copy(tw);
457 TRT_TensorOrWeights assigned;
458 assigned = tw;
459 for (auto ptr : {&tw, ©, &assigned}) {
460 EXPECT_EQ(false, ptr->is_tensor());
461 EXPECT_EQ(false, ptr->is_weights());
462 EXPECT_EQ(-1, ptr->batch_size());
463 }
464 }
465
466 // Test constructor with ITensor and batch size argument.
467 {
468 nvinfer1::Dims dims;
469 dims.nbDims = 1;
470 dims.d[0] = 1;
471 FakeITensor itensor(dims);
472 TRT_TensorOrWeights tw(&itensor);
473 TRT_TensorOrWeights tw1(&itensor, /*batch_size=*/1);
474
475 for (auto original_ptr : {&tw, &tw1}) {
476 TRT_TensorOrWeights copy(*original_ptr);
477 TRT_TensorOrWeights assigned;
478 assigned = *original_ptr;
479
480 for (auto ptr : {original_ptr, ©, &assigned}) {
481 ASSERT_TRUE(ptr->is_tensor());
482 EXPECT_EQ(false, ptr->is_weights());
483 if (original_ptr == &tw) {
484 EXPECT_EQ(-1, ptr->batch_size());
485 } else {
486 EXPECT_EQ(1, ptr->batch_size());
487 }
488 EXPECT_EQ(&itensor, ptr->tensor());
489 ExpectTrtDimsEqualsArray({1}, ptr->GetTrtDims());
490 }
491 }
492 }
493 // Test constructor which creates and owns an ITensor.
494 {
495 nvinfer1::Dims dims;
496 dims.nbDims = 1;
497 dims.d[0] = 1;
498 TRT_TensorOrWeights tw(nvinfer1::DataType::kFLOAT, dims, /*batch_size=*/1);
499 TRT_TensorOrWeights copy(tw);
500 TRT_TensorOrWeights assigned;
501 assigned = tw;
502
503 for (auto ptr : {&tw, ©, &assigned}) {
504 ASSERT_TRUE(ptr->is_tensor());
505 EXPECT_EQ(false, ptr->is_weights());
506 EXPECT_EQ(1, ptr->batch_size());
507 EXPECT_NE(nullptr, ptr->tensor());
508 ExpectTrtDimsEqualsArray({1}, ptr->GetTrtDims());
509 }
510 }
511 // Test constructor with TRT_ShapedWeights argument.
512 {
513 TRT_ShapedWeights weights;
514 TRT_TensorOrWeights tw(weights);
515 TRT_TensorOrWeights copy(tw);
516 TRT_TensorOrWeights assigned;
517 assigned = tw;
518 for (auto ptr : {&tw, ©, &assigned}) {
519 EXPECT_EQ(false, ptr->is_tensor());
520 EXPECT_EQ(true, ptr->is_weights());
521 EXPECT_TRUE(TrtShapedWeightsEquals(weights, ptr->weights()));
522 ExpectTrtDimsEqualsArray({}, ptr->GetTrtDims());
523 }
524 }
525 }
526
527 class ValidatorTest : public ::testing::Test {
528 public:
op_validators(TrtNodeValidator * validator)529 std::unordered_map<string, OpConverter>& op_validators(
530 TrtNodeValidator* validator) {
531 return validator->op_validators_;
532 }
533
ConvertToTensorOrWeights(const Scope & scope,const Node * node,int output_port,TRT_TensorOrWeights * tensor_or_weights)534 Status ConvertToTensorOrWeights(const Scope& scope, const Node* node,
535 int output_port,
536 TRT_TensorOrWeights* tensor_or_weights) {
537 grappler::GrapplerItem item;
538 TF_EXPECT_OK(scope.ToGraphDef(&item.graph));
539 grappler::GraphProperties graph_properties(item);
540 TF_EXPECT_OK(graph_properties.InferStatically(true));
541
542 TrtNodeValidator validator(graph_properties, TrtPrecisionMode::FP32,
543 /*use_calibration=*/false,
544 /*use_implicit_batch=*/true);
545 return validator.ConvertToTensorOrWeights(node->def(), output_port,
546 tensor_or_weights);
547 }
548
GetQuantizeOps(TrtNodeValidator * validator)549 const std::set<string>* GetQuantizeOps(TrtNodeValidator* validator) {
550 return validator->quantize_ops;
551 }
552 };
553
TEST_F(ValidatorTest,QuantizeOpsAreRegistered)554 TEST_F(ValidatorTest, QuantizeOpsAreRegistered) {
555 grappler::GrapplerItem item;
556 grappler::GraphProperties graph_properties(item);
557 TrtNodeValidator validator(graph_properties, TrtPrecisionMode::FP32,
558 /*use_calibration=*/false,
559 /*use_implicit_batch=*/true);
560 for (const string& quantize_op : *GetQuantizeOps(&validator)) {
561 QCHECK(op_validators(&validator).count(quantize_op));
562 }
563 }
564
TEST_F(ValidatorTest,ConvertToTensorOrWeights)565 TEST_F(ValidatorTest, ConvertToTensorOrWeights) {
566 // Convert Const.
567 {
568 Scope s = Scope::NewRootScope();
569 auto node =
570 ops::Const(s.WithOpName("my_const"), {1.0f, 2.0f}, TensorShape({2}));
571 TRT_TensorOrWeights output;
572 ExpectStatus(ConvertToTensorOrWeights(s, node.op().node(),
573 /*output_port=*/0, &output));
574 ValidateWeights<float>(output.weights(), {2}, {1.0, 2.0});
575 }
576
577 // Helper method to run ConvertToTensorOrWeights() with predefined parameters.
578 auto convert_to_tensor_or_weights = [this](const std::vector<int64>& dims,
579 TRT_TensorOrWeights* output) {
580 Scope s = Scope::NewRootScope();
581 const auto attrs = ops::Placeholder::Shape(PartialTensorShape{dims});
582 auto feed = ops::Placeholder(s.WithOpName("feed"), DT_FLOAT, attrs);
583 auto add = ops::Add(s.WithOpName("add"), feed, feed);
584 return this->ConvertToTensorOrWeights(s, add.operation.node(),
585 /*output_port=*/0, output);
586 };
587 // Convert non-Const with #dims > nvinfer1::Dims::MAX_DIMS+1.
588 {
589 TRT_TensorOrWeights output;
590 ExpectStatus(
591 convert_to_tensor_or_weights(
592 std::vector<int64>(nvinfer1::Dims::MAX_DIMS + 2, 1), &output),
593 error::OUT_OF_RANGE, "Input tensor rank is greater than 9");
594 }
595 // Convert non-Const with #dims < 1.
596 {
597 TRT_TensorOrWeights output;
598 ExpectStatus(
599 convert_to_tensor_or_weights({}, &output), error::INVALID_ARGUMENT,
600 "Scalar input tensor is not supported since the first dimension "
601 "is treated as batch dimension by TRT");
602 }
603 // Convert non-Const. We test the case where the non-batch dimension is
604 // unknown as well, to make sure the validator allows that.
605 for (const int32 non_batch_dim : {-1, 2}) {
606 const int32 batch_size = 12;
607 TRT_TensorOrWeights output;
608 ExpectStatus(
609 convert_to_tensor_or_weights({batch_size, non_batch_dim}, &output));
610 ASSERT_TRUE(output.is_tensor());
611 EXPECT_EQ(batch_size, output.batch_size());
612 EXPECT_NE(nullptr, output.tensor());
613 ExpectTrtDimsEqualsArray({non_batch_dim}, output.GetTrtDims());
614 }
615 }
616
TEST_F(ValidatorTest,IsTensorRTCandidate_Basics)617 TEST_F(ValidatorTest, IsTensorRTCandidate_Basics) {
618 Scope s = Scope::NewRootScope();
619 auto input =
620 ops::Const(s.WithOpName("const"), {1.0f, 2.0f}, TensorShape({2}));
621 auto add = ops::Add(s.WithOpName("add"), input, input);
622 const Node* add_node = add.operation.node();
623
624 grappler::GrapplerItem item;
625 TF_EXPECT_OK(s.ToGraphDef(&item.graph));
626 grappler::GraphProperties graph_properties(item);
627 TF_EXPECT_OK(graph_properties.InferStatically(true));
628 TrtNodeValidator validator(graph_properties, TrtPrecisionMode::FP32,
629 /*use_calibration=*/false,
630 /*use_implicit_batch=*/true);
631
632 bool start_conversion = false;
633 bool should_fail = false;
634 auto op_converter = [&start_conversion,
635 &should_fail](OpConverterParams* params) -> Status {
636 if (should_fail) return errors::InvalidArgument("");
637 if (!params->validation_only) start_conversion = true;
638 return Status::OK();
639 };
640
641 // Validator not registered.
642 ASSERT_EQ(1, op_validators(&validator).erase("Add"));
643 ExpectStatus(validator.IsTensorRTCandidate(add_node), error::UNIMPLEMENTED,
644 "Op type Add is not supported.");
645
646 // Register validator.
647 op_validators(&validator)["Add"] = op_converter;
648 TF_EXPECT_OK(validator.IsTensorRTCandidate(add_node));
649 EXPECT_EQ(false, start_conversion);
650
651 // Let the converter return error.
652 should_fail = true;
653 ExpectStatus(validator.IsTensorRTCandidate(add_node),
654 error::INVALID_ARGUMENT);
655 }
656
TEST(TrtNodeValidator,IsTensorRTCandidate)657 TEST(TrtNodeValidator, IsTensorRTCandidate) {
658 // Create a graph containing both TRT-compatible and TRT-incompatible nodes
659 // and use it to test TrtNodeValidator::IsTensorRTCandidate().
660 const std::vector<int32> input_shape_array{2, 2};
661 TensorShape input_shape;
662 TF_EXPECT_OK(TensorShapeUtils::MakeShape(input_shape_array, &input_shape));
663
664 Scope s = Scope::NewRootScope();
665 ops::Placeholder::Attrs feed_attrs;
666 TF_EXPECT_OK(
667 TensorShapeUtils::MakeShape(input_shape_array, &feed_attrs.shape_));
668
669 // Compatible input.
670 auto feed = ops::Placeholder(s.WithOpName("feed"), DT_FLOAT, feed_attrs);
671 auto const_1 = ops::Const(s.WithOpName("const_1"), 1.0f, input_shape);
672
673 // Compatible MatMul.
674 auto matmul = ops::MatMul(s.WithOpName("matmul"), feed, const_1);
675
676 // Incompatible MatMul.
677 ops::MatMul::Attrs matmul_attrs;
678 matmul_attrs.transpose_a_ = true;
679 auto incompatible_matmul = ops::MatMul(s.WithOpName("incompatible_matmul"),
680 feed, const_1, matmul_attrs);
681
682 // Unsupported op.
683 auto unsupported_op = ops::Erf(s.WithOpName("sin"), feed);
684
685 // Incompatible input.
686 auto incompatible_feed = ops::Placeholder(s.WithOpName("feed"), DT_DOUBLE);
687 auto const_2 = ops::Const(s.WithOpName("const_2"), 1.0, input_shape);
688 // Compatible op with incompatible input.
689 auto matmul_with_incompatible_input =
690 ops::MatMul(s.WithOpName("matmul_with_incompatible_input"),
691 incompatible_feed, const_2);
692
693 // Quantize ops.
694 auto quantize_attrs = ops::FakeQuantWithMinMaxArgs::Min(-6.0f).Max(6.0f);
695 auto quantize = ops::FakeQuantWithMinMaxArgs(s.WithOpName("quantize"), feed,
696 quantize_attrs);
697
698 // Get GrapplerItem and GraphProperties.
699 grappler::GrapplerItem item;
700 TF_EXPECT_OK(s.ToGraphDef(&item.graph));
701 Tensor feed_tensor(DT_FLOAT, input_shape);
702 item.feed.push_back(std::make_pair("feed", feed_tensor));
703 grappler::GraphProperties graph_properties(item);
704 TF_EXPECT_OK(graph_properties.InferStatically(true));
705
706 for (const TrtPrecisionMode precision_mode :
707 {TrtPrecisionMode::FP32, TrtPrecisionMode::INT8}) {
708 TrtNodeValidator validator(graph_properties, precision_mode,
709 /*use_calibration=*/false,
710 /*use_implicit_batch=*/true);
711 TF_EXPECT_OK(validator.IsTensorRTCandidate(matmul.operation.node()));
712 ExpectStatus(
713 validator.IsTensorRTCandidate(incompatible_matmul.operation.node()),
714 error::INVALID_ARGUMENT,
715 "Cannot transpose first input if it is a tensor with fewer than 2 "
716 "non-batch dimensions.");
717 ExpectStatus(validator.IsTensorRTCandidate(unsupported_op.operation.node()),
718 error::UNIMPLEMENTED, "Op type Erf is not supported");
719 ExpectStatus(validator.IsTensorRTCandidate(
720 matmul_with_incompatible_input.operation.node()),
721 error::INTERNAL,
722 "Failed to convert input feed_1 to a TRT_TensorOrWeights");
723 if (precision_mode == TrtPrecisionMode::INT8) {
724 TF_EXPECT_OK(validator.IsTensorRTCandidate(quantize.operation.node()));
725 } else {
726 ExpectStatus(validator.IsTensorRTCandidate(quantize.operation.node()),
727 error::UNIMPLEMENTED,
728 "Op type FakeQuantWithMinMaxArgs is not supported");
729 }
730 }
731 }
732
733 class ConverterTest : public ::testing::Test {
734 public:
ConverterTest()735 ConverterTest() { Reset(); }
736
Reset()737 void Reset() {
738 converter_ =
739 std::move(Converter::Create(TrtPrecisionMode::FP32,
740 /*use_calibration=*/false, &logger_,
741 /*use_implicit_batch=*/true,
742 /*engine_name=*/"TRTEngineOp_0_0")
743 .ValueOrDie());
744 weight_store_ = &converter_->weight_store_;
745 }
746
AddOpConverter(const string & op_name,OpConverter op_converter)747 void AddOpConverter(const string& op_name, OpConverter op_converter) {
748 converter_->op_registry_[op_name] = op_converter;
749 }
750
751 // Below we expose private methods of Converter for testing.
752
MaybeUpdateBatchSize(int batch_size)753 Status MaybeUpdateBatchSize(int batch_size) {
754 return converter_->MaybeUpdateBatchSize(batch_size);
755 }
756
AddTensorOrWeights(const string & name,TRT_TensorOrWeights input)757 Status AddTensorOrWeights(const string& name, TRT_TensorOrWeights input) {
758 return converter_->AddTensorOrWeights(name, input);
759 }
760
GetTensorOrWeights(const string & name,TRT_TensorOrWeights * output)761 Status GetTensorOrWeights(const string& name, TRT_TensorOrWeights* output) {
762 return converter_->GetTensorOrWeights(name, output);
763 }
764
GetInputs(const NodeDef & node_def,std::vector<TRT_TensorOrWeights> * inputs) const765 Status GetInputs(const NodeDef& node_def,
766 std::vector<TRT_TensorOrWeights>* inputs) const {
767 return converter_->GetInputs(node_def, inputs);
768 }
769
GetWeightRange(const TRT_ShapedWeights & weights,float * out_min,float * out_max) const770 Status GetWeightRange(const TRT_ShapedWeights& weights, float* out_min,
771 float* out_max) const {
772 return converter_->GetWeightRange(weights, out_min, out_max);
773 }
774
PropagateQuantizationRanges()775 void PropagateQuantizationRanges() {
776 converter_->PropagateQuantizationRanges();
777 }
778
batch_size() const779 int batch_size() const { return converter_->batch_size_; }
780
quantization_ranges()781 std::unordered_map<nvinfer1::ITensor*, float>& quantization_ranges() {
782 return converter_->quantization_ranges_;
783 }
784
785 private:
786 Logger logger_;
787
788 protected:
789 std::unique_ptr<Converter> converter_;
790 TrtWeightStore* weight_store_;
791 };
792
TEST_F(ConverterTest,ConvertNode)793 TEST_F(ConverterTest, ConvertNode) {
794 FakeITensor output_tensors[2];
795 auto op_converter = [&output_tensors](OpConverterParams* params) -> Status {
796 nvinfer1::Dims dims = params->inputs[0].tensor()->getDimensions();
797 for (int i = 0; i < 2; ++i) {
798 dims.d[0] += 1;
799 output_tensors[i].setDimensions(dims);
800 params->outputs->push_back(TRT_TensorOrWeights(&output_tensors[i]));
801 }
802 return Status::OK();
803 };
804 NodeDef node_def = MakeNodeDef("my_op", "MyOp", {"my_input"});
805 TF_EXPECT_OK(converter_->AddInputTensor(
806 "my_input", nvinfer1::DataType::kFLOAT, GetTestDims({123}), 1));
807
808 // Converter not registered.
809 ExpectStatus(converter_->ConvertNode(node_def), error::UNIMPLEMENTED,
810 "No converter registered for op: MyOp");
811
812 // Register the converter and retry.
813 AddOpConverter("MyOp", op_converter);
814 TF_EXPECT_OK(converter_->ConvertNode(node_def));
815
816 TRT_TensorOrWeights actual_output_1;
817 TF_EXPECT_OK(GetTensorOrWeights("my_op", &actual_output_1));
818 EXPECT_EQ(&output_tensors[0], actual_output_1.tensor());
819 EXPECT_EQ(124, actual_output_1.tensor()->getDimensions().d[0]);
820
821 TRT_TensorOrWeights actual_output_2;
822 TF_EXPECT_OK(GetTensorOrWeights("my_op:1", &actual_output_2));
823 EXPECT_EQ(&output_tensors[1], actual_output_2.tensor());
824 EXPECT_EQ(125, actual_output_2.tensor()->getDimensions().d[0]);
825
826 VerifyTrtLayerNameNotEmpty(converter_->network());
827 }
828
TEST_F(ConverterTest,AddAndGetInputs)829 TEST_F(ConverterTest, AddAndGetInputs) {
830 NodeDef node_def;
831 node_def.add_input("^control_input");
832 node_def.add_input("input");
833 node_def.add_input("input:0");
834 node_def.add_input("input:1");
835 node_def.add_input("weird_input:2:3:4:0");
836
837 TF_EXPECT_OK(converter_->AddInputTensor("input", nvinfer1::DataType::kFLOAT,
838 GetTestDims({1}), 1));
839 TF_EXPECT_OK(converter_->AddInputTensor("input:1", nvinfer1::DataType::kINT32,
840 GetTestDims({2, 3}), 1));
841 TF_EXPECT_OK(converter_->AddInputTensor(
842 "weird_input:2:3:4", nvinfer1::DataType::kHALF, GetTestDims({5, 3}), 1));
843
844 std::vector<TRT_TensorOrWeights> inputs;
845 TF_EXPECT_OK(GetInputs(node_def, &inputs));
846
847 EXPECT_EQ(4, inputs.size());
848 EXPECT_EQ(inputs[0].tensor(), inputs[1].tensor());
849
850 EXPECT_EQ(nvinfer1::DataType::kFLOAT, inputs[0].tensor()->getType());
851 EXPECT_EQ(nvinfer1::DataType::kINT32, inputs[2].tensor()->getType());
852 EXPECT_EQ(nvinfer1::DataType::kHALF, inputs[3].tensor()->getType());
853 ExpectTrtDimsEqualsArray({1}, inputs[0].tensor()->getDimensions());
854 ExpectTrtDimsEqualsArray({2, 3}, inputs[2].tensor()->getDimensions());
855 ExpectTrtDimsEqualsArray({5, 3}, inputs[3].tensor()->getDimensions());
856
857 VerifyTrtLayerNameNotEmpty(converter_->network());
858 }
859
TEST_F(ConverterTest,RenameAndMarkOutputTensors)860 TEST_F(ConverterTest, RenameAndMarkOutputTensors) {
861 // Test that the tensor are actually named and marked as output after
862 // Converter::RenameAndMarkOutputTensors() is called.
863
864 // Register a custom converter which shuffles the input. We use it to build a
865 // TRT network whose output will be later marked.
866 std::vector<nvinfer1::ITensor*> output_tensors;
867 auto op_converter = [&output_tensors](OpConverterParams* params) -> Status {
868 nvinfer1::Permutation perm;
869 perm.order[0] = 1;
870 perm.order[1] = 0;
871 for (int i = 0; i < 2; ++i) {
872 nvinfer1::ITensor* input_tensor = params->inputs[0].tensor();
873 nvinfer1::IShuffleLayer* layer =
874 params->converter->network()->addShuffle(*input_tensor);
875 layer->setFirstTranspose(perm);
876 nvinfer1::ITensor* output_tensor = layer->getOutput(0);
877 params->outputs->emplace_back(output_tensor);
878 output_tensors.push_back(output_tensor);
879 }
880 TRT_ShapedWeights output_weights(nvinfer1::DataType::kFLOAT);
881 params->outputs->emplace_back(output_weights);
882 return Status::OK();
883 };
884 AddOpConverter("MyOp", op_converter);
885
886 // Run the conversion.
887 NodeDef node_def = MakeNodeDef("my_op", "MyOp", {"my_input"});
888 TF_EXPECT_OK(converter_->AddInputTensor(
889 "my_input", nvinfer1::DataType::kFLOAT, GetTestDims({1, 2}), 1));
890 TF_EXPECT_OK(converter_->ConvertNode(node_def));
891
892 // Mark a weight as output, should fail.
893 ExpectStatus(
894 converter_->RenameAndMarkOutputTensors({{"my_op:2", "my_output"}}),
895 error::INVALID_ARGUMENT, "Output my_op:2 is weights not tensor");
896
897 // Mark tensors as output, should pass.
898 TF_EXPECT_OK(converter_->RenameAndMarkOutputTensors(
899 {{"my_op", "my_output"}, {"my_op:1", "my_output_1"}}));
900 EXPECT_EQ(2, output_tensors.size());
901 for (auto output_tensor : output_tensors) {
902 ExpectTrtDimsEqualsArray({2, 1}, output_tensor->getDimensions());
903 }
904 EXPECT_EQ("my_output", string(output_tensors[0]->getName()));
905 EXPECT_EQ("my_output_1", string(output_tensors[1]->getName()));
906
907 VerifyTrtLayerNameNotEmpty(converter_->network());
908 }
909
TEST_F(ConverterTest,TransposeTensor)910 TEST_F(ConverterTest, TransposeTensor) {
911 nvinfer1::ITensor* input_tensor = converter_->network()->addInput(
912 "", nvinfer1::DataType::kFLOAT, GetTestDims({2, 3, 5}));
913 nvinfer1::ITensor* output_tensor = nullptr;
914 NodeDef dummy_node_def = MakeNodeDef("dummy_op", "DummyOp", {});
915 // Rank doesn't match.
916 ExpectStatus(
917 converter_->TransposeTensor(input_tensor, {0, 1}, &output_tensor,
918 dummy_node_def, "sub1"),
919 error::INVALID_ARGUMENT,
920 "Rank of perm for transpose does not match with that of the input");
921
922 // Transpose at batch dimension.
923 ExpectStatus(
924 converter_->TransposeTensor(input_tensor, {1, 0, 2, 3}, &output_tensor,
925 dummy_node_def, "sub2"),
926 error::UNIMPLEMENTED, "Transpose at batch dimension is not supported.");
927
928 // OK.
929 TF_EXPECT_OK(converter_->TransposeTensor(
930 input_tensor, {0, 3, 1, 2}, &output_tensor, dummy_node_def, "sub3"));
931 ExpectTrtDimsEqualsArray({5, 2, 3}, output_tensor->getDimensions());
932 ExpectTrtLayerNames({"TRTEngineOp_0_0/dummy_op-sub3:SHUFFLE"},
933 converter_->network());
934 }
935
TestPrepareTensorForShape(const std::vector<int> & input_dims,const std::vector<int> & reshape_dims,const std::vector<int> & expected_tensor_dims,bool input_is_tensor,Converter * converter,TrtWeightStore * weight_store,error::Code expected_code=error::OK,const char * expected_error_msg_substr=nullptr)936 void TestPrepareTensorForShape(
937 const std::vector<int>& input_dims, const std::vector<int>& reshape_dims,
938 const std::vector<int>& expected_tensor_dims, bool input_is_tensor,
939 Converter* converter, TrtWeightStore* weight_store,
940 error::Code expected_code = error::OK,
941 const char* expected_error_msg_substr = nullptr) {
942 TRT_TensorOrWeights input;
943 if (input_is_tensor) {
944 input = TRT_TensorOrWeights(converter->network()->addInput(
945 "", nvinfer1::DataType::kFLOAT, GetTestDims(input_dims)));
946 } else {
947 input = TRT_TensorOrWeights(weight_store->GetTempWeights(
948 nvinfer1::DataType::kFLOAT, GetTestDims(input_dims)));
949 }
950 nvinfer1::ITensor* output_tensor = nullptr;
951
952 NodeDef dummy_node_def = MakeNodeDef("dummy_op", "DummyOp", {});
953 for (bool validation_only : {false, true}) {
954 const Status status =
955 PrepareTensorForShape(converter, input, GetTestDims(reshape_dims),
956 validation_only, &output_tensor, dummy_node_def);
957 if (expected_code == error::OK) {
958 TF_EXPECT_OK(status);
959 if (validation_only) {
960 EXPECT_EQ(nullptr, output_tensor);
961 } else {
962 ExpectTrtDimsEqualsArray(expected_tensor_dims,
963 output_tensor->getDimensions());
964 }
965 } else {
966 ExpectStatus(status, expected_code, expected_error_msg_substr);
967 }
968 }
969 }
970
TEST_F(ConverterTest,PrepareTensorForShape)971 TEST_F(ConverterTest, PrepareTensorForShape) {
972 for (bool input_is_tensor : {true, false}) {
973 // Shape size doesn't match.
974 Reset();
975 TestPrepareTensorForShape({2, 3, 5}, {2, 3, 6}, {}, input_is_tensor,
976 converter_.get(), weight_store_,
977 error::INVALID_ARGUMENT, "Incompatible shapes");
978
979 // Regular shape.
980 Reset();
981 TestPrepareTensorForShape({2, 3, 5}, {10, 3}, {10, 3}, input_is_tensor,
982 converter_.get(), weight_store_);
983
984 // Reshape to zero rank.
985 Reset();
986 TestPrepareTensorForShape({1, 1}, {}, {}, input_is_tensor, converter_.get(),
987 weight_store_);
988 }
989
990 // Tensor input with zero rank.
991 Reset();
992 TestPrepareTensorForShape({}, {1, 1}, {1, 1}, /*input_is_tensor=*/true,
993 converter_.get(), weight_store_);
994
995 // TODO(aaroey): we should check the case where uninferred dimensions are
996 // not an exact divisor of input dim ensions, e.g. for dims {-1, 7}.
997
998 // Infer tensor shape, ok.
999 Reset();
1000 TestPrepareTensorForShape({2, 3, 5}, {-1, 2}, {15, 2},
1001 /*input_is_tensor=*/true, converter_.get(),
1002 weight_store_);
1003
1004 // Infer weight shape, should fail.
1005 Reset();
1006 TestPrepareTensorForShape({2, 3, 5}, {-1, 2}, {15, 2},
1007 /*input_is_tensor=*/false, converter_.get(),
1008 weight_store_, error::INVALID_ARGUMENT,
1009 "Shape is not fully defined");
1010
1011 VerifyTrtLayerNameNotEmpty(converter_->network());
1012 }
1013
TEST_F(ConverterTest,MaybeUpdateBatchSize)1014 TEST_F(ConverterTest, MaybeUpdateBatchSize) {
1015 EXPECT_EQ(-1, batch_size());
1016
1017 TF_EXPECT_OK(MaybeUpdateBatchSize(-1));
1018 EXPECT_EQ(-1, batch_size());
1019
1020 TF_EXPECT_OK(MaybeUpdateBatchSize(123));
1021 EXPECT_EQ(123, batch_size());
1022
1023 TF_EXPECT_OK(MaybeUpdateBatchSize(123));
1024 EXPECT_EQ(123, batch_size());
1025
1026 TF_EXPECT_OK(MaybeUpdateBatchSize(-1));
1027 EXPECT_EQ(123, batch_size());
1028
1029 ExpectStatus(MaybeUpdateBatchSize(124), error::INVALID_ARGUMENT,
1030 "Provided batch size does not match converter batch size");
1031 }
1032
TEST_F(ConverterTest,AddAndGetTensorOrWeights)1033 TEST_F(ConverterTest, AddAndGetTensorOrWeights) {
1034 // Add a tensor.
1035 FakeITensor fake_tensor;
1036 TRT_TensorOrWeights tensor(&fake_tensor);
1037 EXPECT_EQ(-1, tensor.batch_size());
1038 TF_EXPECT_OK(MaybeUpdateBatchSize(123));
1039 TF_EXPECT_OK(AddTensorOrWeights("my_tensor", tensor));
1040
1041 // Get the added tensor.
1042 TRT_TensorOrWeights added_tensor;
1043 TF_EXPECT_OK(GetTensorOrWeights("my_tensor", &added_tensor));
1044 EXPECT_EQ(123, added_tensor.batch_size());
1045
1046 // Add the same tensor again.
1047 ExpectStatus(AddTensorOrWeights("my_tensor", tensor), error::ALREADY_EXISTS,
1048 "tensor/weights my_tensor already exist");
1049 }
1050
1051 template <typename T>
TestGetWeightRange(ConverterTest * test,TrtWeightStore * weight_store)1052 void TestGetWeightRange(ConverterTest* test, TrtWeightStore* weight_store) {
1053 nvinfer1::DataType trt_type;
1054 TF_ASSERT_OK(TfTypeToTrtType(DataTypeToEnum<T>::v(), &trt_type));
1055 TRT_ShapedWeights weights =
1056 weight_store->GetTempWeights(trt_type, GetTestDims({2, 3}));
1057 const std::vector<T> values = {T(3), T(1), T(2), T(6), T(5), T(4)};
1058 memcpy(weights.GetValues(), values.data(), weights.size_bytes());
1059
1060 float out_min = 0.0f;
1061 float out_max = 0.0f;
1062 TF_EXPECT_OK(test->GetWeightRange(weights, &out_min, &out_max));
1063 EXPECT_EQ(1.0f, out_min);
1064 EXPECT_EQ(6.0f, out_max);
1065 }
1066
TEST_F(ConverterTest,GetWeightRange)1067 TEST_F(ConverterTest, GetWeightRange) {
1068 TestGetWeightRange<float>(this, weight_store_);
1069 TestGetWeightRange<Eigen::half>(this, weight_store_);
1070 TestGetWeightRange<int32>(this, weight_store_);
1071 }
1072
TEST_F(ConverterTest,ProvideQuantizationRange)1073 TEST_F(ConverterTest, ProvideQuantizationRange) {
1074 FakeITensor fake_tensor;
1075 // Asymmetric range
1076 converter_->ProvideQuantizationRange(&fake_tensor, 0.0f, 6.0f);
1077 EXPECT_EQ(6.0f, quantization_ranges()[&fake_tensor]);
1078 converter_->ProvideQuantizationRange(&fake_tensor, 1.0f, 6.0f);
1079 EXPECT_EQ(6.0f, quantization_ranges()[&fake_tensor]);
1080 converter_->ProvideQuantizationRange(&fake_tensor, -8.0f, 6.0f);
1081 EXPECT_EQ(8.0f, quantization_ranges()[&fake_tensor]);
1082 converter_->ProvideQuantizationRange(&fake_tensor, -8.123f, -6.123f);
1083 EXPECT_EQ(8.123f, quantization_ranges()[&fake_tensor]);
1084 // Symmetric range
1085 converter_->ProvideQuantizationRange(&fake_tensor, -6.123f, 6.123f);
1086 EXPECT_EQ(6.123f, quantization_ranges()[&fake_tensor]);
1087
1088 VerifyTrtLayerNameNotEmpty(converter_->network());
1089 }
1090
TEST_F(ConverterTest,MaybeApplyQuantizationRanges)1091 TEST_F(ConverterTest, MaybeApplyQuantizationRanges) {
1092 // input -> infer1 -> infer2 -> infer3
1093 FakeITensor input, infer_1, infer_2, infer_3;
1094 FakeITensor not_infer;
1095 Logger logger;
1096 auto int8_converter = Converter::Create(TrtPrecisionMode::INT8,
1097 /*use_calibration=*/true, &logger,
1098 /*use_implicit_batch=*/true,
1099 /*engine_name=*/"")
1100 .ValueOrDie();
1101 int8_converter->ProvideQuantizationRange(&input, -5.0f, 5.0f);
1102 int8_converter->ProvideQuantizationRange(¬_infer, -100.0f, 100.0f);
1103 int8_converter->MarkQuantizationRangesAsInferrable(&input, &infer_1);
1104 int8_converter->MarkQuantizationRangesAsInferrable(&infer_1, &infer_2);
1105 int8_converter->MarkQuantizationRangesAsInferrable(&infer_2, &infer_3);
1106
1107 // Input range should be inferred along the chain and applied to tensors.
1108 int8_converter->MaybeApplyQuantizationRanges();
1109 #if IS_TRT_VERSION_GE(5, 0, 0, 0)
1110 EXPECT_EQ(input.getDynamicRange(), 5.0f);
1111 EXPECT_EQ(infer_1.getDynamicRange(), 5.0f);
1112 EXPECT_EQ(infer_2.getDynamicRange(), 5.0f);
1113 EXPECT_EQ(infer_3.getDynamicRange(), 5.0f);
1114 EXPECT_EQ(not_infer.getDynamicRange(), 100.0f);
1115 #endif
1116
1117 VerifyTrtLayerNameNotEmpty(int8_converter->network());
1118 }
1119
TEST_F(ConverterTest,PropagateQuantizationRanges)1120 TEST_F(ConverterTest, PropagateQuantizationRanges) {
1121 // infer0 <-> infer1 <-> infer2 <-> infer3
1122 // |
1123 // infer4 <-> infer5
1124 FakeITensor infer[6];
1125 FakeITensor not_infer;
1126 converter_->ProvideQuantizationRange(&infer[4], -5.0f, 5.0f);
1127 converter_->MarkQuantizationRangesAsInferrable(&infer[0], &infer[1]);
1128 converter_->MarkQuantizationRangesAsInferrable(&infer[1], &infer[2]);
1129 converter_->MarkQuantizationRangesAsInferrable(&infer[3], &infer[2]);
1130 converter_->MarkQuantizationRangesAsInferrable(&infer[4], &infer[1]);
1131 converter_->MarkQuantizationRangesAsInferrable(&infer[4], &infer[5]);
1132
1133 // Input range should be inferred along the chain.
1134 PropagateQuantizationRanges();
1135 auto ranges = quantization_ranges();
1136 for (int i = 0; i < 6; ++i) {
1137 EXPECT_EQ(5.0f, ranges[&infer[i]]);
1138 }
1139 EXPECT_EQ(ranges.count(¬_infer), 0);
1140
1141 VerifyTrtLayerNameNotEmpty(converter_->network());
1142 }
1143
TEST_F(ConverterTest,GetTrtBroadcastShape)1144 TEST_F(ConverterTest, GetTrtBroadcastShape) {
1145 const bool kIsTensor = true;
1146 const bool kIsNotTensor = false;
1147 auto symmetric_test = [this](const std::vector<int>& operand_1_shape,
1148 const std::vector<int>& operand_2_shape,
1149 const bool operand_1_is_tensor,
1150 const bool operand_2_is_tensor,
1151 const std::vector<int>& expected_operand_1_shape,
1152 const std::vector<int>& expected_operand_2_shape,
1153 error::Code expected_code = error::OK,
1154 const char* expected_error_msg_substr = nullptr,
1155 const int operand_1_batch_size = -1,
1156 const int operand_2_batch_size = -1) {
1157 auto create_tensor_or_weights = [](const std::vector<int>& shape,
1158 bool is_tensor, int batch_size = -1) {
1159 if (is_tensor) {
1160 return TRT_TensorOrWeights{nvinfer1::DataType::kFLOAT,
1161 GetTestDims(shape), batch_size};
1162 }
1163 TRT_ShapedWeights weights;
1164 weights.shape_ = GetTestDims(shape);
1165 return TRT_TensorOrWeights(weights);
1166 };
1167
1168 nvinfer1::Dims operand_1_new_dims, operand_2_new_dims;
1169 TRT_TensorOrWeights operand_1 = create_tensor_or_weights(
1170 operand_1_shape, operand_1_is_tensor, operand_1_batch_size);
1171 TRT_TensorOrWeights operand_2 = create_tensor_or_weights(
1172 operand_2_shape, operand_2_is_tensor, operand_2_batch_size);
1173
1174 // operand_1 broadcast operand_2
1175 ExpectStatus(
1176 GetTrtBroadcastShape(operand_1, operand_2, /*check_feasibility=*/true,
1177 /*use_implicit_batch=*/true, &operand_1_new_dims,
1178 &operand_2_new_dims),
1179 expected_code, expected_error_msg_substr);
1180 if (expected_code == error::OK) {
1181 ExpectTrtDimsEqualsArray(expected_operand_1_shape, operand_1_new_dims);
1182 ExpectTrtDimsEqualsArray(expected_operand_2_shape, operand_2_new_dims);
1183 }
1184 // operand_2 broadcast operand_1
1185 ExpectStatus(
1186 GetTrtBroadcastShape(operand_2, operand_1, /*check_feasibility=*/true,
1187 /*use_implicit_batch=*/true, &operand_2_new_dims,
1188 &operand_1_new_dims),
1189 expected_code, expected_error_msg_substr);
1190 if (expected_code == error::OK) {
1191 ExpectTrtDimsEqualsArray(expected_operand_1_shape, operand_1_new_dims);
1192 ExpectTrtDimsEqualsArray(expected_operand_2_shape, operand_2_new_dims);
1193 }
1194 };
1195
1196 // Both inputs are weights.
1197 symmetric_test(
1198 {1}, {1}, kIsNotTensor, kIsNotTensor, {}, {}, error::INVALID_ARGUMENT,
1199 "Broadcasting requires at least one of the operands be tensors");
1200
1201 // One tensor and one weights.
1202 symmetric_test({1, 1, 1}, {2}, kIsTensor, kIsNotTensor, {1, 1, 1}, {1, 1, 2});
1203 symmetric_test({1, 1, 2}, {2}, kIsTensor, kIsNotTensor, {1, 1, 2}, {1, 1, 2});
1204 symmetric_test({1, 3, 2}, {1}, kIsTensor, kIsNotTensor, {1, 3, 2}, {1, 1, 1});
1205 symmetric_test({1, 1, 1}, {2, 3}, kIsTensor, kIsNotTensor, {1, 1, 1},
1206 {1, 2, 3});
1207 symmetric_test({1, 1, 1}, {2, 3, 4}, kIsTensor, kIsNotTensor, {1, 1, 1},
1208 {2, 3, 4});
1209 symmetric_test({1, 1, 1}, {1, 2, 3, 4}, kIsTensor, kIsNotTensor, {1, 1, 1},
1210 {2, 3, 4});
1211 symmetric_test({1, 3, 4}, {1, 2, 1, 4}, kIsTensor, kIsNotTensor, {1, 3, 4},
1212 {2, 1, 4});
1213 symmetric_test({1, 1, 1}, {2, 1, 1, 1}, kIsTensor, kIsNotTensor, {}, {},
1214 error::INVALID_ARGUMENT, "Infeasible broadcast scheme");
1215 symmetric_test({1, 1, 1}, {2, 1, 1, 1}, kIsTensor, kIsNotTensor, {}, {},
1216 error::INVALID_ARGUMENT, "Infeasible broadcast scheme",
1217 /*operand_1_batch_size=*/2);
1218 symmetric_test({1, 1, 1}, {1, 1, 1, 1, 1}, kIsTensor, kIsNotTensor, {}, {},
1219 error::INVALID_ARGUMENT,
1220 "Broadcasting beyond batch dimension is not supported "
1221 "(tensor #dims 4 vs broadcast #dims 5)");
1222 symmetric_test({3}, {1, 1, 3}, kIsTensor, kIsNotTensor, {}, {},
1223 error::INVALID_ARGUMENT,
1224 "Broadcasting beyond batch dimension is not supported "
1225 "(tensor #dims 2 vs broadcast #dims 3)",
1226 /*operand_1_batch_size=*/2);
1227
1228 // Both inputs are tensors.
1229 symmetric_test({1, 1, 1}, {1, 1}, kIsTensor, kIsTensor, {}, {},
1230 error::INVALID_ARGUMENT,
1231 "Broadcasting beyond batch dimension is not supported "
1232 "(tensor #dims 3 vs broadcast #dims 4)");
1233 symmetric_test({1, 3}, {3}, kIsTensor, kIsTensor, {}, {},
1234 error::INVALID_ARGUMENT,
1235 "Broadcasting beyond batch dimension is not supported "
1236 "(tensor #dims 2 vs broadcast #dims 3)");
1237 symmetric_test({1, 3, 4}, {2, 1, 4}, kIsTensor, kIsTensor, {1, 3, 4},
1238 {2, 1, 4});
1239 symmetric_test({1, 1, 1}, {1, 1, 1, 1}, kIsTensor, kIsTensor, {}, {},
1240 error::INVALID_ARGUMENT,
1241 "Broadcasting beyond batch dimension is not supported "
1242 "(tensor #dims 4 vs broadcast #dims 5)");
1243 symmetric_test({2, 3}, {7, 5}, kIsTensor, kIsTensor, {}, {},
1244 error::INVALID_ARGUMENT, "Infeasible broadcast scheme");
1245
1246 VerifyTrtLayerNameNotEmpty(converter_->network());
1247 }
1248
TEST_F(ConverterTest,CreateConstantLayer)1249 TEST_F(ConverterTest, CreateConstantLayer) {
1250 for (auto dtype : {nvinfer1::DataType::kFLOAT, nvinfer1::DataType::kINT32}) {
1251 TRT_ShapedWeights weights =
1252 weight_store_->GetTempWeights(dtype, GetTestDims({2, 3, 5}));
1253 nvinfer1::ITensor* tensor =
1254 converter_->CreateConstantLayer(weights, GetTestDims({3, 10}));
1255 ASSERT_NE(nullptr, tensor);
1256 EXPECT_EQ(dtype, tensor->getType())
1257 << "Expected " << DebugString(dtype) << " vs. actual "
1258 << DebugString(tensor->getType());
1259 ExpectTrtDimsEqualsArray({3, 10}, tensor->getDimensions());
1260 }
1261
1262 VerifyTrtLayerNameNotEmpty(converter_->network());
1263 }
1264
1265 class ConvertGraphDefToEngineTest : public ::testing::Test {
1266 public:
RunConvertGraphDefToEngine(Scope * s)1267 Status RunConvertGraphDefToEngine(Scope* s) {
1268 GraphDef gdef;
1269 TF_EXPECT_OK(s->ToGraphDef(&gdef));
1270 std::vector<PartialTensorShape> input_shapes;
1271 int batch_size = -1;
1272 for (const NodeDef& node : gdef.node()) {
1273 absl::string_view node_name(node.name());
1274 if (absl::ConsumePrefix(&node_name, IONamePrefixes::kInputPHName)) {
1275 int port = -1;
1276 EXPECT_TRUE(absl::SimpleAtoi(node_name, &port)) << node.name();
1277 if (input_shapes.size() < port + 1) input_shapes.resize(port + 1);
1278 input_shapes[port] =
1279 PartialTensorShape(node.attr().at("shape").shape());
1280 if (batch_size == -1) {
1281 batch_size = input_shapes[port].dim_size(0);
1282 } else {
1283 EXPECT_EQ(batch_size, input_shapes[port].dim_size(0));
1284 }
1285 }
1286 }
1287 // TODO(laigd): execute the engine and get outputs.
1288 return ConvertGraphDefToEngine(
1289 gdef, TrtPrecisionMode::FP32, /*max_batch_size=*/1,
1290 /*max_workspace_size_bytes=*/64 << 20, input_shapes, &logger_,
1291 /*allocator=*/nullptr, /*calibrator=*/nullptr, &engine_,
1292 /*use_calibration=*/false, /*use_implicit_batch=*/true,
1293 /*convert_successfully=*/nullptr, /*profiles=*/nullptr,
1294 "TRTEngineOp_0_0");
1295 }
1296
1297 protected:
1298 TrtUniquePtrType<nvinfer1::ICudaEngine> engine_;
1299
1300 private:
1301 Logger logger_;
1302 };
1303
TEST_F(ConvertGraphDefToEngineTest,IdentityGraph)1304 TEST_F(ConvertGraphDefToEngineTest, IdentityGraph) {
1305 Scope s = Scope::NewRootScope();
1306 auto input =
1307 ops::Placeholder(s.WithOpName(StrCat(IONamePrefixes::kInputPHName, 0)),
1308 DT_FLOAT, ops::Placeholder::Shape({1, 1}));
1309 auto output = ops::Identity(s.WithOpName("identity1"), input);
1310 output = ops::Identity(s.WithOpName("identity2"), output);
1311 output = ops::Identity(s.WithOpName(StrCat(IONamePrefixes::kOutputPHName, 0)),
1312 output);
1313 // If the converter marks the input tensor as output tensor, the conversion
1314 // below will fail with:
1315 // > TensorRTOutputPH_0 cannot be both input and output
1316 // > Network must have at least one output
1317 TF_EXPECT_OK(RunConvertGraphDefToEngine(&s));
1318 }
1319
1320 // Returns a vector of shapes from a vector of input tensors. This can be used
1321 // to create optimization profiles.
GetShapeFromDataVec(DataVec input_data,std::vector<TensorShape> * shape_vec)1322 Status GetShapeFromDataVec(DataVec input_data,
1323 std::vector<TensorShape>* shape_vec) {
1324 shape_vec->reserve(input_data.size());
1325 std::transform(input_data.begin(), input_data.end(),
1326 std::back_inserter(*shape_vec),
1327 [](InputOutputData x) { return x.tensor.shape(); });
1328 return Status::OK();
1329 }
1330
1331 template <typename T>
GetSpanForData(const InputOutputData & data)1332 inline absl::Span<const T> GetSpanForData(const InputOutputData& data) {
1333 const auto& tensor_map = data.tensor.flat<T>();
1334 return absl::Span<const T>(tensor_map.data(), tensor_map.size());
1335 }
1336
GetDataAsFloat(InputOutputData & data)1337 std::vector<float> GetDataAsFloat(InputOutputData& data) {
1338 if (data.tensor.dtype() == DT_FLOAT) {
1339 auto span = GetSpanForData<float>(data);
1340 return std::vector<float>(span.begin(), span.end());
1341 }
1342 if (data.tensor.dtype() == DT_HALF) {
1343 return CastTestVector<Eigen::half, float>(
1344 GetSpanForData<Eigen::half>(data));
1345 }
1346 if (data.tensor.dtype() == DT_INT32) {
1347 return CastTestVector<int32, float>(GetSpanForData<int32>(data));
1348 }
1349 LOG(FATAL) << "DataType not supported for testing "
1350 << DataTypeString(data.tensor.dtype());
1351 }
1352 // Class to test various op converters, using both a TrtNodeValidator and
1353 // Converter.
1354 class OpConverterTest : public ::testing::Test {
1355 public:
OpConverterTest()1356 OpConverterTest()
1357 : tensor_buffer_allocator_(new GpuManagedAllocator()),
1358 scope_(Scope::NewRootScope()) {
1359 QCHECK_EQ(0, cudaStreamCreate(&stream_));
1360 Reset();
1361 }
1362
~OpConverterTest()1363 ~OpConverterTest() override { QCHECK_EQ(0, cudaStreamDestroy(stream_)); }
1364
GetTensorOrWeights(const string & name,TRT_TensorOrWeights * output)1365 Status GetTensorOrWeights(const string& name, TRT_TensorOrWeights* output) {
1366 return converter_->GetTensorOrWeights(name, output);
1367 }
1368
Reset(TrtPrecisionMode precision_mode_to_test=TrtPrecisionMode::FP32,TrtTestMode trt_mode=TrtTestMode::kImplicitBatch)1369 void Reset(TrtPrecisionMode precision_mode_to_test = TrtPrecisionMode::FP32,
1370 TrtTestMode trt_mode = TrtTestMode::kImplicitBatch) {
1371 // Destroy existing TRT objects in a proper order.
1372 converter_.reset(nullptr);
1373 engine_.reset(nullptr);
1374
1375 // Re-create them in proper order.
1376 converter_ =
1377 std::move(Converter::Create(precision_mode_to_test,
1378 /*use_calibration=*/false, &logger_,
1379 /*use_implicit_batch=*/trt_mode ==
1380 TrtTestMode::kImplicitBatch,
1381 /*engine_name=*/"")
1382 .ValueOrDie());
1383
1384 // Reset other related artifacts.
1385 scope_ = Scope::NewRootScope();
1386 }
1387
1388 // Constructs a flat tensor with 'vals' in Unified Memory.
1389 template <typename T>
AsTensor(gtl::ArraySlice<T> vals)1390 Tensor AsTensor(gtl::ArraySlice<T> vals) { // non-absl ok
1391 Tensor ret(tensor_buffer_allocator_.get(), DataTypeToEnum<T>::value,
1392 {static_cast<int64>(vals.size())});
1393 std::copy_n(vals.data(), vals.size(), ret.flat<T>().data());
1394 return ret;
1395 }
1396
1397 // Constructs a tensor of "shape" with values "vals" in Unified Memory.
1398 template <typename T>
AsTensor(gtl::ArraySlice<T> vals,const TensorShape & shape)1399 Tensor AsTensor(gtl::ArraySlice<T> vals, // non-absl ok
1400 const TensorShape& shape) {
1401 Tensor ret(tensor_buffer_allocator_.get(), DataTypeToEnum<T>::value,
1402 {static_cast<int64>(vals.size())});
1403 CHECK(ret.CopyFrom(AsTensor(vals), shape));
1404 return ret;
1405 }
1406
1407 // Constructs a tensor with given values (vals). The tensor type is defined by
1408 // the tf_type argument, its shape is given by input_dims. The tensor is
1409 // constructed using the allocator of OpConverterTest in Unified Memory.
1410 template <typename T>
AsTensor(std::vector<T> vals,const std::vector<int> input_dims,DataType tf_type)1411 Tensor AsTensor(std::vector<T> vals, const std::vector<int> input_dims,
1412 DataType tf_type) {
1413 Tensor ret(tensor_buffer_allocator_.get(), tf_type,
1414 {static_cast<int64>(vals.size())});
1415 if (tf_type == DT_FLOAT) {
1416 auto conv_vals = CastTestVector<T, float>(vals);
1417 std::copy_n(conv_vals.data(), conv_vals.size(), ret.flat<float>().data());
1418 } else if (tf_type == DT_HALF) {
1419 auto conv_vals = CastTestVector<T, Eigen::half>(vals);
1420 std::copy_n(conv_vals.data(), conv_vals.size(),
1421 ret.flat<Eigen::half>().data());
1422 } else if (tf_type == DT_INT32) {
1423 auto conv_vals = CastTestVector<T, int32>(vals);
1424 std::copy_n(conv_vals.data(), conv_vals.size(), ret.flat<int32>().data());
1425 } else {
1426 LOG(FATAL) << "Cannot create tensor with type "
1427 << DataTypeString(tf_type);
1428 }
1429 TensorShape shape;
1430 TF_EXPECT_OK(TensorShapeUtils::MakeShape(input_dims, &shape));
1431 CHECK(ret.CopyFrom(ret, shape));
1432 return ret;
1433 }
1434
1435 // Constructs a flat tensor in Unified Memory.
1436 template <typename T>
ConstructTensor(int data_size,const T & value=T ())1437 Tensor ConstructTensor(int data_size, const T& value = T()) {
1438 std::vector<T> values(data_size, value);
1439 return AsTensor<T>(values);
1440 }
1441
1442 // Constructs a flat tensor in Unified Memory.
1443 template <typename T>
ConstructTensor(int data_size,const T & value,DataType tf_type)1444 Tensor ConstructTensor(int data_size, const T& value, DataType tf_type) {
1445 std::vector<T> values(data_size, value);
1446 return AsTensor<T>(values, {data_size}, tf_type);
1447 }
1448
CheckDataTypeMatches(const DataVec & datas)1449 void CheckDataTypeMatches(const DataVec& datas) {
1450 for (const auto& data : datas) {
1451 const int input_index = engine_->getBindingIndex(data.name.c_str());
1452 ASSERT_NE(-1, input_index);
1453 const nvinfer1::DataType trt_dtype =
1454 engine_->getBindingDataType(input_index);
1455 DataType tf_type;
1456 TF_ASSERT_OK(TrtTypeToTfType(trt_dtype, &tf_type));
1457 ASSERT_EQ(data.tensor.dtype(), tf_type)
1458 << DataTypeString(data.tensor.dtype()) << " vs. "
1459 << DataTypeString(tf_type);
1460 }
1461 }
1462
BuildAndRun(const DataVec & input_data,DataVec * output_data,const int batch_size=1)1463 Status BuildAndRun(const DataVec& input_data, DataVec* output_data,
1464 const int batch_size = 1) {
1465 // Mark the output tensor as TRT engine output.
1466 std::vector<Converter::EngineOutputInfo> output_info;
1467 for (const auto& data : *output_data) {
1468 nvinfer1::DataType trt_type;
1469 TF_RETURN_IF_ERROR(TfTypeToTrtType(data.tensor.dtype(), &trt_type));
1470 output_info.push_back({data.name, data.name, trt_type});
1471 }
1472 TF_RETURN_IF_ERROR(converter_->RenameAndMarkOutputTensors(output_info));
1473
1474 // Build the TRT engine.
1475 if (engine_.get() != nullptr) {
1476 return errors::Internal("Engine already exists");
1477 }
1478 TrtShapeOptimizationProfile profiles(
1479 ProfileStrategy::kImplicitBatchModeCompatible);
1480 if (!converter_->use_implicit_batch()) {
1481 // Create a single optimization profile for explicit batch mode
1482 std::vector<TensorShape> input_shapes;
1483 TF_RETURN_IF_ERROR(GetShapeFromDataVec(input_data, &input_shapes));
1484 profiles.AddShape(input_shapes);
1485 std::vector<PartialTensorShape> input_partial_shapes;
1486 TF_RETURN_IF_ERROR(
1487 GetNetworkInputShapes(converter_->network(), &input_partial_shapes));
1488 profiles.InitProfiles(input_partial_shapes);
1489 }
1490 TF_RETURN_IF_ERROR(
1491 converter_->BuildCudaEngine(&engine_,
1492 /*max_batch_size=*/batch_size,
1493 /*max_workspace_size_bytes=*/1 << 26,
1494 /*allocator=*/nullptr,
1495 /*calibrator=*/nullptr,
1496 /*profiles=*/&profiles));
1497 CHECK_NOTNULL(engine_.get());
1498 CheckDataTypeMatches(input_data);
1499 CheckDataTypeMatches(*output_data);
1500
1501 const int num_bindings = input_data.size() + output_data->size();
1502 std::vector<void*> buffers(num_bindings);
1503
1504 if (engine_->getNbBindings() != num_bindings) {
1505 return errors::Internal("Number of bindings do not match");
1506 }
1507 // Since we have only 1 optimization profile (which is enabled by default)
1508 // it is fine to create execution context directly, instead of calling
1509 // profiles.CreateExecutionContexts()
1510 TrtUniquePtrType<nvinfer1::IExecutionContext> execution_context(
1511 engine_->createExecutionContext());
1512
1513 // Prepare input bindings.
1514 TF_RETURN_IF_ERROR(SetTrtEngineInputs(
1515 engine_.get(), execution_context.get(), 0, buffers,
1516 converter_->use_implicit_batch(), batch_size, nullptr, &input_data));
1517 // Prepare output bindings.
1518 TF_RETURN_IF_ERROR(SetTrtEngineOutputs(
1519 engine_.get(), execution_context.get(), 0, buffers,
1520 converter_->use_implicit_batch(), batch_size, nullptr, output_data));
1521 // Execute the TRT engine.
1522 TF_RETURN_IF_ERROR(TrtEnqueue(execution_context.get(), buffers, stream_,
1523 converter_->use_implicit_batch(),
1524 batch_size));
1525 cudaStreamSynchronize(stream_);
1526 return Status::OK();
1527 }
1528
1529 // Adds ITensor for both validation and conversion, assuming explicit batch
1530 // dimension is included in dims (ie for an NCHW tensor dims = {N, C, H, W}).
AddTestTensorWithTFDims(const string & name,const std::vector<int32> & dims,nvinfer1::DataType trt_type=nvinfer1::DataType::kFLOAT,Status add_input_status=Status::OK ())1531 void AddTestTensorWithTFDims(
1532 const string& name, const std::vector<int32>& dims,
1533 nvinfer1::DataType trt_type = nvinfer1::DataType::kFLOAT,
1534 Status add_input_status = Status::OK()) {
1535 DataType tf_type;
1536 TF_ASSERT_OK(TrtTypeToTfType(trt_type, &tf_type));
1537 ops::Placeholder::Attrs attrs;
1538 TF_EXPECT_OK(TensorShapeUtils::MakeShape(dims, &attrs.shape_));
1539
1540 auto input = ops::Placeholder(scope_.WithOpName(name), tf_type, attrs);
1541 node_inputs_[name] = input.output;
1542
1543 // Add a real ITensor for conversion conditionally.
1544 const nvinfer1::Dims trt_dims =
1545 TensorShapeToTrtDims(attrs.shape_, converter_->use_implicit_batch());
1546 if (!converter_->use_implicit_batch() || HasStaticShape(trt_dims)) {
1547 int batch_size = dims[0];
1548 Status status =
1549 converter_->AddInputTensor(name, trt_type, trt_dims, batch_size);
1550 ASSERT_EQ(add_input_status, status);
1551 }
1552 }
1553
1554 // Adds ITensor for both validation and conversion. The difference compared to
1555 // AddTestTensorWithTFDims is in the meaning of the dims parameter. To define
1556 // a tensor with NCHW shape, here we set dims = {C,H,W} and batch_size = N.
1557 // TODO(tfeher) remove this function once all test are updated to use the
1558 // other version of AddTestTensor (defined by
1559 // ParameterizedOpConverterTestBase).
AddTestTensor(const string & name,const std::vector<int32> & dims,int batch_size=1,nvinfer1::DataType trt_dtype=nvinfer1::DataType::kFLOAT)1560 void AddTestTensor(
1561 const string& name, const std::vector<int32>& dims, int batch_size = 1,
1562 nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT) {
1563 std::vector<int32> dims_with_batch(dims.size() + 1);
1564 dims_with_batch[0] = batch_size;
1565 std::copy(dims.begin(), dims.end(), dims_with_batch.begin() + 1);
1566 AddTestTensorWithTFDims(name, dims_with_batch, trt_dtype);
1567 if (HasStaticShape(dims)) {
1568 ASSERT_EQ(batch_size, converter_->batch_size_);
1569 }
1570 }
1571
1572 // Add weights for both validation and conversion.
1573 template <typename T>
AddTestWeights(const string & name,const std::vector<int> & dims,const std::vector<T> & values)1574 void AddTestWeights(const string& name, const std::vector<int>& dims,
1575 const std::vector<T>& values) {
1576 // Add weights for validation.
1577 TensorShape shape;
1578 TF_EXPECT_OK(TensorShapeUtils::MakeShape(dims, &shape));
1579 Tensor t = AsTensor<T>(values, shape);
1580 node_inputs_[name] = ops::Const(scope_.WithOpName(name), t);
1581
1582 // Add weights for conversion.
1583 nvinfer1::DataType dtype;
1584 TF_ASSERT_OK(TfTypeToTrtType(DataTypeToEnum<T>::v(), &dtype));
1585 const nvinfer1::Dims trt_dims = GetTestDims(dims);
1586 const int64_t num_elements = TrtWeightDimsNumElements(trt_dims);
1587 QCHECK_EQ(num_elements, values.size())
1588 << num_elements << " vs " << values.size();
1589 TRT_ShapedWeights weights(dtype);
1590 if (num_elements) {
1591 weights = converter_->weight_store_.GetTempWeights(dtype, trt_dims);
1592 QCHECK_EQ(weights.size_bytes(), sizeof(T) * values.size())
1593 << weights.size_bytes() << " vs " << sizeof(T) * values.size();
1594 memcpy(weights.GetValues(), values.data(), weights.size_bytes());
1595 }
1596 TF_EXPECT_OK(
1597 converter_->AddTensorOrWeights(name, TRT_TensorOrWeights{weights}));
1598 }
1599
1600 template <typename T = int32>
AddTestWeights(const string & name,const std::vector<int> & dims,const std::vector<T> & values,DataType tf_type)1601 void AddTestWeights(const string& name, const std::vector<int>& dims,
1602 const std::vector<T>& values, DataType tf_type) {
1603 if (tf_type == DT_FLOAT) {
1604 AddTestWeights(name, dims, CastTestVector<T, float>(values));
1605 } else if (tf_type == DT_HALF) {
1606 AddTestWeights(name, dims, CastTestVector<T, Eigen::half>(values));
1607 } else if (tf_type == DT_INT32) {
1608 AddTestWeights(name, dims, CastTestVector<T, int32>(values));
1609 } else {
1610 FAIL() << "Cannot create test weights with type "
1611 << DataTypeString(tf_type);
1612 }
1613 }
1614
1615 // Test validation in validation-only mode.
RunValidation(const Node * node)1616 Status RunValidation(const Node* node) {
1617 grappler::GrapplerItem item;
1618 TF_EXPECT_OK(scope_.ToGraphDef(&item.graph));
1619 grappler::GraphProperties graph_properties(item);
1620 TF_EXPECT_OK(graph_properties.InferStatically(true));
1621
1622 TrtNodeValidator validator(graph_properties, converter_->precision_mode(),
1623 /*use_calibration=*/false,
1624 converter_->use_implicit_batch());
1625 return validator.IsTensorRTCandidate(node);
1626 }
1627
RunConversion(const Node * node,error::Code expected_code=error::OK,const char * expected_msg_substr=nullptr)1628 void RunConversion(const Node* node, error::Code expected_code = error::OK,
1629 const char* expected_msg_substr = nullptr) {
1630 ExpectStatus(converter_->ConvertNode(node->def()), expected_code,
1631 expected_msg_substr);
1632 if (expected_code == error::OK) {
1633 VerifyTrtLayerNameNotEmpty(converter_->network());
1634 }
1635 }
1636
1637 // Helper method to run both validation and conversion, when the expected
1638 // output are same.
RunValidationAndConversion(const NodeDef & node_def,error::Code expected_code=error::OK,const char * expected_msg_substr=nullptr,bool should_run_conversion=true)1639 void RunValidationAndConversion(const NodeDef& node_def,
1640 error::Code expected_code = error::OK,
1641 const char* expected_msg_substr = nullptr,
1642 bool should_run_conversion = true) {
1643 // Add the node to the graph.
1644 // TODO(laigd): we should accept a function that adds the node using
1645 // `scope_`, so individual test case can reuse the scope object and we don't
1646 // need to add the edges here by ourselves.
1647 Graph* graph = scope_.graph();
1648 Status status;
1649 Node* node = graph->AddNode(std::move(node_def), &status);
1650 TF_EXPECT_OK(status);
1651 for (int i = 0; i < node_def.input().size(); ++i) {
1652 const string& input_name = node_def.input(i);
1653 const auto& itr = node_inputs_.find(input_name);
1654 QCHECK(itr != node_inputs_.end());
1655 const Output& input = itr->second;
1656 graph->AddEdge(input.node(), input.index(), node, i);
1657 }
1658
1659 status = RunValidation(node);
1660 if (should_run_conversion && status.ok()) {
1661 RunConversion(node, expected_code, expected_msg_substr);
1662 } else {
1663 ExpectStatus(status, expected_code, expected_msg_substr);
1664 }
1665 }
1666
1667 // Helper method to run both validation and conversion, and check the output
1668 // shapes.
RunValidationAndConversion(const NodeDef & node_def,const Status & status,const char * output_name,const std::vector<std::vector<int>> & exp_out_dims)1669 void RunValidationAndConversion(
1670 const NodeDef& node_def, const Status& status, const char* output_name,
1671 const std::vector<std::vector<int>>& exp_out_dims) {
1672 RunValidationAndConversion(node_def, status.code(),
1673 status.error_message().c_str(), true);
1674 if (status.ok()) {
1675 // TODO(tfeher): Enable this check in explicit_batch_mode.
1676 // In dynamic shape mode the output dims cannot be tested here. In that
1677 // case we need to wait for the concrate input shapes to be defined (by
1678 // setBindingDimensions before enqueue) before we can check the output
1679 // dims.
1680 if (converter_->use_implicit_batch()) {
1681 for (int i = 0; i < exp_out_dims.size(); i++) {
1682 TRT_TensorOrWeights output;
1683 string name = i == 0 ? output_name : StrCat(output_name, ":", i);
1684 TF_EXPECT_OK(GetTensorOrWeights(name.c_str(), &output));
1685 ASSERT_TRUE(output.is_tensor());
1686 if (!exp_out_dims[i].empty()) {
1687 // Removing batch dim.
1688 auto out_dims = std::vector<int>(exp_out_dims[i].begin() + 1,
1689 exp_out_dims[i].end());
1690 VLOG(2) << "Testing output shape for tensor " << name;
1691 ExpectTrtDimsEqualsArray(out_dims,
1692 output.tensor()->getDimensions());
1693 }
1694 }
1695 }
1696 }
1697 }
1698
1699 // Expose quantization_ranges_ for tests
quantization_ranges()1700 std::unordered_map<nvinfer1::ITensor*, float>& quantization_ranges() {
1701 return converter_->quantization_ranges_;
1702 }
1703
PropagateQuantizationRanges()1704 void PropagateQuantizationRanges() {
1705 converter_->PropagateQuantizationRanges();
1706 }
1707 std::unique_ptr<Converter> converter_;
1708
1709 private:
1710 Logger logger_;
1711 TrtUniquePtrType<nvinfer1::ICudaEngine> engine_;
1712 cudaStream_t stream_;
1713 std::unique_ptr<Allocator> tensor_buffer_allocator_;
1714 // The scope that contains the graph being converted. Because
1715 // tensor_buffer_allocator_ provides the storage for tensor contents that are
1716 // represented as attributes for graph nodes within scope_,
1717 // tensor_buffer_allocator_ needs to be available when destructing scope_.
1718 // Therefore, scope_ comes after tensor_buffer_allocator_ in the class member
1719 // field list.
1720 Scope scope_;
1721 std::unordered_map<string, Output> node_inputs_;
1722 };
1723
1724 // General test parameters to be used with ops that take a single input tensor.
1725 struct TestParamBase {
1726 // Concrete input dimensions for the test (including the batch dim)
1727 std::vector<int> input_dims;
1728
1729 // Dimensions to define an input with PartialTensorShape. This can be used to
1730 // define networks with dynamic input shape. It can be left empty, in that
1731 // case AddTestTensor sets partial shapes that are appropriate to TrtTestMode.
1732 std::vector<int> partial_input_dims;
1733
1734 // Concrete (static) output dimensions, including batch size as first dim
1735 std::vector<int> expected_output_dims;
1736
1737 // Parameter vector, has converter specific meaning.
1738 std::vector<int> param;
1739
1740 // Expected status of conversion (with concrete error message)
1741 Status status;
1742
1743 // Expected status of BuildAndRun
1744 Status runtime_status;
1745 };
1746
operator <<(std::ostream & os,const TestParamBase & p)1747 std::ostream& operator<<(std::ostream& os, const TestParamBase& p) {
1748 os << "input_dims" << p.input_dims;
1749 if (!p.partial_input_dims.empty()) {
1750 os << ", partial_input_dims" << p.partial_input_dims;
1751 }
1752 if (!p.expected_output_dims.empty()) {
1753 os << ", exp_out_dims" << p.expected_output_dims;
1754 }
1755 if (!p.param.empty()) {
1756 os << ", param" << p.param;
1757 }
1758 os << ", " << p.status;
1759 return os;
1760 }
1761
1762 // Parameterized version of OpConverterTest. We have the following parameters:
1763 // 1. TrtTestMode: implicit batch, explicit batch, dynamic shape modes
1764 // 2. DataType of the input TF tensors: DT_FLOAT, DT_HALF, DT_INT32
1765 // 3. TrtPrecisionMode argument for the Converter: FP32, FP16, INT8
1766 // We will introduce subclasses that will be instantiated using different
1767 // combinations of the DataType and TrtPrecisionMode parameters.
1768 class ParameterizedOpConverterTestBase
1769 : public OpConverterTest,
1770 public ::testing::WithParamInterface<
1771 std::tuple<TrtTestMode, DataType, TrtPrecisionMode>> {
1772 public:
ParameterizedOpConverterTestBase()1773 ParameterizedOpConverterTestBase()
1774 : trt_mode_(std::get<0>(GetParam())),
1775 tf_type_(std::get<1>(GetParam())),
1776 converter_precision_(std::get<2>(GetParam())) {
1777 LOG(INFO) << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%";
1778 LOG(INFO) << "tf_type_: " << DebugString(tf_type_);
1779 LOG(INFO) << "trt_mode_: " << DebugString(trt_mode_);
1780 LOG(INFO) << "converter_precision_: " << DebugString(converter_precision_);
1781 LOG(INFO) << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%";
1782 }
1783
Reset()1784 void Reset() {
1785 OpConverterTest::Reset(converter_precision_, trt_mode_);
1786 input_data_.clear();
1787 }
1788
1789 // Getters of protected attributes
get_tf_type()1790 DataType get_tf_type() { return tf_type_; }
get_trt_mode()1791 TrtTestMode get_trt_mode() { return trt_mode_; }
get_converter_precision()1792 TrtPrecisionMode get_converter_precision() { return converter_precision_; }
1793
1794 // Adds an input ITensor for TRT network. Also creates the corresponding TF
1795 // tensor, and stores it in the list of inputs (input_data_).
1796 //
1797 // The TF tensor is always created with concrete static input shape given by
1798 // dims. The ITensor can have static or dynamic shape based on the trt_mode
1799 // attribute. The ITensor shape is set automatically according to the trt_mode
1800 // parameter, unless the user overrides it with an explicit
1801 // partial_input_shape_dims argument.
1802 //
1803 // Parameters:
1804 // - name of the input node
1805 // - dims actual dimensions of the tensor that we will use during the test
1806 // (including explicit batch dim)
1807 // - values initial values for the TF tensor
1808 // - dtype data type of the tensor
1809 // - partial_input_shape dimensions which can include unknown shapes. This can
1810 // be empty, in that case the partial_input_shape will be set automatically
1811 // depending on the trt_mode argument. (This argument also includes explicit
1812 // batch dim).
1813 // - add_input_status adding ITensor to the network can fail in implicit batch
1814 // mode if the batch size is inconsistent. Using the add_input_status arg we
1815 // can test such errors.
1816 //
1817 template <typename T = int>
AddTestTensor(const string & name,const std::vector<int32> & dims,DataType tf_type,const std::vector<T> & values,const std::vector<int32> & partial_input_shape_dims={},Status add_input_status=Status::OK ())1818 void AddTestTensor(const string& name, const std::vector<int32>& dims,
1819 DataType tf_type, const std::vector<T>& values,
1820 const std::vector<int32>& partial_input_shape_dims = {},
1821 Status add_input_status = Status::OK()) {
1822 if (!dims.empty()) {
1823 const auto num_elements = std::accumulate(
1824 std::begin(dims), std::end(dims), 1, std::multiplies<double>());
1825 if (num_elements != values.size()) {
1826 LOG(WARNING) << "Expected Test Tensor Shape: " << DebugString(dims)
1827 << ", Received Input Tensor: " << DebugString(values);
1828 }
1829 }
1830
1831 std::vector<int32> partial_shape;
1832 if (!partial_input_shape_dims.empty()) {
1833 partial_shape = partial_input_shape_dims;
1834 } else {
1835 if (trt_mode_ == TrtTestMode::kDynamicShape) {
1836 // In dynamic shape mode we make all dims unknown.
1837 partial_shape = std::vector<int32>(dims.size(), -1);
1838 } else {
1839 // Use static (known) input shapes.
1840 partial_shape = dims;
1841 }
1842 }
1843 nvinfer1::DataType trt_type;
1844 TF_ASSERT_OK(TfTypeToTrtType(tf_type, &trt_type));
1845 AddTestTensorWithTFDims(name, partial_shape, trt_type, add_input_status);
1846 if (!values.empty()) {
1847 VLOG(2) << "Adding test tensor: " << name << " "
1848 << DataTypeString(tf_type);
1849 InputOutputData data{name, AsTensor(values, dims, tf_type)};
1850 VLOG(2) << "Added tensor: " << data.name
1851 << DataTypeString(data.tensor.dtype());
1852 input_data_.push_back(data);
1853 }
1854 }
1855
1856 // Adds test tensor (same as above) but with the default tf_type defined by
1857 // the test params.
1858 template <typename T = int>
AddTestTensor(const string & name,const std::vector<int32> & dims,const std::vector<T> & values={},const std::vector<int32> & partial_input_shape_dims={})1859 void AddTestTensor(const string& name, const std::vector<int32>& dims,
1860 const std::vector<T>& values = {},
1861 const std::vector<int32>& partial_input_shape_dims = {}) {
1862 AddTestTensor<T>(name, dims, tf_type_, values, partial_input_shape_dims);
1863 }
1864
1865 // Builds and runs the converted network. Checks output tensor shape. Tests
1866 // output values using a matcher. The network can have multiple input and
1867 // output tensors. The inputs are defined by the input_data_ member variable.
BuildAndRun(const string & name,const std::vector<std::vector<int>> & expected_output_dims,const Status & expected_runtime_status,const std::vector<Matcher<std::vector<float>>> & matcher,const std::vector<DataType> & out_tf_types={})1868 void BuildAndRun(const string& name,
1869 const std::vector<std::vector<int>>& expected_output_dims,
1870 const Status& expected_runtime_status,
1871 const std::vector<Matcher<std::vector<float>>>& matcher,
1872 const std::vector<DataType>& out_tf_types = {}) {
1873 TensorShape shape;
1874 const int n_output = expected_output_dims.size();
1875 ASSERT_EQ(n_output, matcher.size());
1876 DataVec output_data;
1877 for (int i = 0; i < n_output; i++) {
1878 TF_EXPECT_OK(
1879 TensorShapeUtils::MakeShape(expected_output_dims[i], &shape));
1880 string out_name = (i == 0) ? name : StrCat(name, ":", i);
1881 DataType out_tf_type =
1882 out_tf_types.size() > i ? out_tf_types[i] : tf_type_;
1883 InputOutputData data{
1884 out_name, ConstructTensor(shape.num_elements(), 0, out_tf_type)};
1885 output_data.push_back(data);
1886 }
1887 const int batch_size =
1888 input_data_.empty() ? 1 : input_data_[0].tensor.shape().dim_size(0);
1889 Status stat =
1890 OpConverterTest::BuildAndRun(input_data_, &output_data, batch_size);
1891 ASSERT_EQ(expected_runtime_status.ok(), stat.ok())
1892 << "expected status: " << expected_runtime_status
1893 << ", actual status: " << stat;
1894 if (expected_runtime_status.ok() && stat.ok()) {
1895 for (int i = 0; i < n_output; i++) {
1896 // Check the shape of the actual output tensors
1897 TF_EXPECT_OK(
1898 TensorShapeUtils::MakeShape(expected_output_dims[i], &shape));
1899 EXPECT_TRUE(output_data[i].tensor.shape() == shape)
1900 << "Expected shape: " << shape.DebugString() << ", actual shape"
1901 << output_data[i].tensor.shape().DebugString();
1902 EXPECT_THAT(GetDataAsFloat(output_data[i]), matcher[i]);
1903 }
1904 }
1905 }
1906
1907 // Runs validation and conversion. If conversion is successfull then builds
1908 // the TRT network, executes it and checks the output. Handles multiple output
1909 // tensors.
TestOpConverterMultiOut(const string & name,const NodeDef node_def,const std::vector<std::vector<int>> & expected_output_dims,const Status & expected_conversion_status,const Status & expected_runtime_status,const std::vector<Matcher<std::vector<float>>> & matcher,const std::vector<DataType> & out_tf_type={})1910 void TestOpConverterMultiOut(
1911 const string& name, const NodeDef node_def,
1912 const std::vector<std::vector<int>>& expected_output_dims,
1913 const Status& expected_conversion_status,
1914 const Status& expected_runtime_status,
1915 const std::vector<Matcher<std::vector<float>>>& matcher,
1916 const std::vector<DataType>& out_tf_type = {}) {
1917 RunValidationAndConversion(node_def, expected_conversion_status,
1918 name.c_str(), expected_output_dims);
1919 if (expected_conversion_status.ok()) {
1920 BuildAndRun(name, expected_output_dims, expected_runtime_status, matcher,
1921 out_tf_type);
1922 }
1923 }
1924
1925 // Runs validation and conversion. If conversion is successfull then builds
1926 // the TRT network, executes it and checks the output.
TestOpConverter(const string & name,const NodeDef node_def,const std::vector<int> & expected_output_dims,const Status & expected_conversion_status,const Status & expected_runtime_status,const Matcher<std::vector<float>> & matcher,const std::vector<DataType> & out_tf_types={})1927 void TestOpConverter(const string& name, const NodeDef node_def,
1928 const std::vector<int>& expected_output_dims,
1929 const Status& expected_conversion_status,
1930 const Status& expected_runtime_status,
1931 const Matcher<std::vector<float>>& matcher,
1932 const std::vector<DataType>& out_tf_types = {}) {
1933 RunValidationAndConversion(
1934 node_def, expected_conversion_status, name.c_str(),
1935 std::vector<std::vector<int>>({expected_output_dims}));
1936 if (expected_conversion_status.ok()) {
1937 BuildAndRun(name, std::vector<std::vector<int>>({expected_output_dims}),
1938 expected_runtime_status,
1939 std::vector<Matcher<std::vector<float>>>({matcher}),
1940 out_tf_types);
1941 }
1942 }
1943
1944 protected:
1945 const TrtTestMode trt_mode_;
1946 const DataType tf_type_;
1947 const TrtPrecisionMode converter_precision_;
1948 DataVec input_data_;
1949 };
1950
1951 // Op converter test in FP32 mode. While for debugging purposes it might make
1952 // sense to run over all possible combinations, normally a subset of them
1953 // would be sufficient:
1954 // - All valid options to TrtTestMode (implicit, explicit, dynamic shape)
1955 // - DataType: is the TF data type of the input tensors. This usually only
1956 // influences the data type added by Converter::AddInputTensor. We test the
1957 // valid combinations of input data types in AddAndGetInputs, therefore
1958 // for most of the OpConverterTest its is sufficient to test for DT_FLOAT.
1959 // - TrtPrecisionMode: valid options are FP32, FP16 and INT8. This influences
1960 // how TRT handles the precision inside the TRT network, but should not matter
1961 // for the TF -> TRT conversion. Therefore it should be sufficient to test
1962 // for FP32.
1963 class OpConverter_FP32_Test : public ParameterizedOpConverterTestBase {};
1964 // Base class for tests that need to be tested for both FP32 and FP16.
1965 class OpConverter_FP32_FP16_Test : public ParameterizedOpConverterTestBase {};
1966 // Base class for tests that need to be tested for FP32, FP16, and INT32
1967 class OpConverter_FP32_FP16_INT32_Test
1968 : public ParameterizedOpConverterTestBase {};
1969
1970 // Instantiate parameter combinations to OpConverter_<DT_X...>_Test
1971 INSTANTIATE_TEST_CASE_P(
1972 OpConvTestInstantiation, OpConverter_FP32_Test,
1973 ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
1974 ::testing::Values(DT_FLOAT),
1975 ::testing::Values(TrtPrecisionMode::FP32)));
1976
1977 INSTANTIATE_TEST_CASE_P(
1978 OpConvTestInstantiation, OpConverter_FP32_FP16_Test,
1979 ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
1980 ::testing::Values(DT_FLOAT, DT_HALF),
1981 ::testing::Values(TrtPrecisionMode::FP32)));
1982
1983 INSTANTIATE_TEST_CASE_P(
1984 OpConvTestInstantiation, OpConverter_FP32_FP16_INT32_Test,
1985 ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
1986 ::testing::Values(DT_FLOAT, DT_HALF, DT_INT32),
1987 ::testing::Values(TrtPrecisionMode::FP32)));
1988
1989 template <typename T>
CopyTensorElements(const Tensor & tensor,protobuf::RepeatedField<T> * out)1990 void CopyTensorElements(const Tensor& tensor, protobuf::RepeatedField<T>* out) {
1991 out->Clear();
1992 if (tensor.NumElements() == 0) return;
1993
1994 // TensorProto does not need to have all the elements present and can truncate
1995 // trailing elements with the same value for compressed representation. Such
1996 // elements are derived based on the tensor shape.
1997 const auto flat = tensor.flat<T>();
1998 int64 last_index = 0;
1999 for (int64 i = 0; i < tensor.NumElements(); ++i) {
2000 if (flat(i) != flat(last_index)) {
2001 last_index = i;
2002 }
2003 }
2004
2005 int num_out_elements = last_index + 1;
2006 out->Reserve(num_out_elements);
2007 out->AddNAlreadyReserved(num_out_elements);
2008 const T* src = flat.data();
2009 T* dst = out->mutable_data();
2010 std::copy(src, src + num_out_elements, dst);
2011 }
2012
2013 template <DataType dtype, typename InputCType, typename OutputCType>
TestConvertConst(OpConverterTest * test)2014 void TestConvertConst(OpConverterTest* test) {
2015 NodeDef node_def;
2016 node_def.set_name("my_const");
2017 node_def.set_op("Const");
2018
2019 auto reset_and_test = [&node_def, test](
2020 const Tensor& tensor, const bool as_tensor_content,
2021 const std::vector<int>& expected_dims,
2022 const std::vector<OutputCType>& expected_value) {
2023 test->Reset();
2024
2025 TensorProto* tensor_attr =
2026 (*node_def.mutable_attr())["value"].mutable_tensor();
2027 tensor_attr->Clear();
2028
2029 if (as_tensor_content) {
2030 tensor.AsProtoTensorContent(tensor_attr);
2031 } else {
2032 tensor.shape().AsProto(tensor_attr->mutable_tensor_shape());
2033 tensor_attr->set_dtype(tensor.dtype());
2034
2035 if (tensor.dtype() == DT_FLOAT) {
2036 CopyTensorElements<float>(tensor, tensor_attr->mutable_float_val());
2037 } else if (tensor.dtype() == DT_INT32) {
2038 CopyTensorElements<int32>(tensor, tensor_attr->mutable_int_val());
2039 } else {
2040 tensor.AsProtoField(tensor_attr);
2041 }
2042 }
2043 test->RunValidationAndConversion(node_def);
2044 TRT_TensorOrWeights output;
2045 TF_EXPECT_OK(test->GetTensorOrWeights("my_const", &output));
2046 ValidateWeights(output.weights(), expected_dims, expected_value);
2047 };
2048
2049 auto& attr = *node_def.mutable_attr();
2050 attr["dtype"].set_type(dtype);
2051 {
2052 // By default empty tensor will pick DT_FLOAT as data type and we fix it
2053 // here.
2054 Tensor t(dtype); // Empty tensor.
2055 reset_and_test(t, false, {}, {});
2056 }
2057 {
2058 Tensor t = test::AsScalar<InputCType>(12);
2059 reset_and_test(t, false, {1}, {12});
2060 reset_and_test(t, true, {1}, {12});
2061 }
2062 {
2063 Tensor t = test->AsTensor<InputCType>({1, 2});
2064 reset_and_test(t, false, {2}, {1, 2});
2065 reset_and_test(t, true, {2}, {1, 2});
2066 }
2067 {
2068 Tensor t =
2069 test->AsTensor<InputCType>({1, 2, 3, 4, 5, 6}, TensorShape({2, 3}));
2070 reset_and_test(t, false, {2, 3}, {1, 2, 3, 4, 5, 6});
2071 reset_and_test(t, true, {2, 3}, {1, 2, 3, 4, 5, 6});
2072 }
2073 {
2074 // Set all tensor elements to the same value. Such tensors are encoded
2075 // using a single element list in tensor proto.
2076 Tensor t =
2077 test->AsTensor<InputCType>({1, 1, 1, 1, 1, 1}, TensorShape({2, 3}));
2078 reset_and_test(t, false, {2, 3}, {1, 1, 1, 1, 1, 1});
2079 reset_and_test(t, true, {2, 3}, {1, 1, 1, 1, 1, 1});
2080 }
2081 {
2082 // Set trailing tensor elements to the same value. Such tensors are
2083 // encoded by truncating all equal elements except the first one.
2084 Tensor t =
2085 test->AsTensor<InputCType>({2, 2, 1, 1, 1, 1}, TensorShape({2, 3}));
2086 reset_and_test(t, false, {2, 3}, {2, 2, 1, 1, 1, 1});
2087 reset_and_test(t, true, {2, 3}, {2, 2, 1, 1, 1, 1});
2088 }
2089 }
2090
TEST_F(OpConverterTest,ConvertConst)2091 TEST_F(OpConverterTest, ConvertConst) {
2092 {
2093 Reset();
2094 NodeDef node_def = MakeConstNodeDef<double>("my_const", {});
2095 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
2096 "Unsupported tensorflow data type double");
2097 }
2098 {
2099 Reset();
2100 Tensor tensor = AsTensor<int64>({1, std::numeric_limits<int64>::max(), 1, 1,
2101 1, std::numeric_limits<int64>::lowest()},
2102 TensorShape({2, 3}));
2103 NodeDef node_def;
2104 node_def.set_name("my_const");
2105 node_def.set_op("Const");
2106 (*node_def.mutable_attr())["dtype"].set_type(DT_INT64);
2107 TensorProto* tensor_attr =
2108 (*node_def.mutable_attr())["value"].mutable_tensor();
2109 tensor_attr->Clear();
2110 tensor.AsProtoTensorContent(tensor_attr);
2111 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
2112 "outside the range of int32");
2113 }
2114
2115 TestConvertConst<DT_FLOAT, float, float>(this);
2116 TestConvertConst<DT_INT8, int8, int32>(this);
2117 TestConvertConst<DT_UINT8, uint8, int32>(this);
2118 TestConvertConst<DT_INT16, int16, int32>(this);
2119 TestConvertConst<DT_UINT16, uint16, int32>(this);
2120 TestConvertConst<DT_INT32, int32, int32>(this);
2121 TestConvertConst<DT_UINT32, uint32, int32>(this);
2122 TestConvertConst<DT_INT64, int64, int32>(this);
2123 TestConvertConst<DT_UINT64, uint64, int32>(this);
2124 }
2125
2126 template <typename T>
CreateFusedBatchNormOp(DataType tf_type,std::string data_format,bool is_training,float epsilon)2127 NodeDef CreateFusedBatchNormOp(DataType tf_type, std::string data_format,
2128 bool is_training, float epsilon) {
2129 Scope s = Scope::NewRootScope();
2130 auto x = ops::Placeholder(s.WithOpName("x"), tf_type);
2131 auto scale = ops::Placeholder(s.WithOpName("scale"), tf_type);
2132 auto offset = ops::Placeholder(s.WithOpName("offset"), tf_type);
2133 auto mean = ops::Placeholder(s.WithOpName("mean"), tf_type);
2134 auto variance = ops::Placeholder(s.WithOpName("variance"), tf_type);
2135 typename T::Attrs attrs;
2136 attrs.data_format_ = data_format;
2137 attrs.is_training_ = is_training;
2138 if (epsilon > 0) {
2139 attrs.epsilon_ = epsilon;
2140 } else {
2141 EXPECT_GE(epsilon, 0);
2142 }
2143 return T(s.WithOpName("my_batchnorm"), x, scale, offset, mean, variance,
2144 attrs)
2145 .operation.node()
2146 ->def();
2147 }
2148
TEST_P(OpConverter_FP32_Test,ConvertFusedBatchNorm)2149 TEST_P(OpConverter_FP32_Test, ConvertFusedBatchNorm) {
2150 using OpFunc = std::function<NodeDef(DataType, std::string, bool, float)>;
2151 std::vector<OpFunc> get_node_def_vec{
2152 CreateFusedBatchNormOp<ops::FusedBatchNorm>,
2153 CreateFusedBatchNormOp<ops::FusedBatchNormV2>,
2154 CreateFusedBatchNormOp<ops::FusedBatchNormV3>};
2155
2156 struct TestParam {
2157 std::string data_format;
2158 int tensor_input_idx; // Index of an input that will be provided as tensor.
2159 bool is_training;
2160 float epsilon;
2161 Status conversion_status;
2162 bool keep_channel_unknown;
2163 };
2164
2165 struct NodeInput {
2166 std::string name;
2167 std::vector<int> dims;
2168 std::vector<float> val;
2169 };
2170 std::vector<NodeInput> node_input{
2171 {"x", {2, 3, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}},
2172 {"scale", {3}, {7, 8, 9}},
2173 {"offset", {3}, {10, 20, 30}},
2174 {"mean", {3}, {1, 2, 3}},
2175 {"variance", {3}, {4, 5, 6}}};
2176
2177 std::vector<float> expected_output{10.0, 13.495633, 23.574135, 27.148273,
2178 37.342354, 41.013527, 30.9738, 34.469433,
2179 45.018955, 48.59309, 59.369415, 63.04059};
2180 for (auto get_node_def : get_node_def_vec) {
2181 NodeDef tmp_node_def = get_node_def(tf_type_, "NCHW", true, 0);
2182 std::string op_name = tmp_node_def.op();
2183 std::vector<TestParam> test_param{
2184 {"NHWC", 0, false, 0,
2185 errors::Unimplemented(StrCat(
2186 op_name, " only supports data_format=NCHW, at my_batchnorm"))},
2187 {"NCHW", 0, true, 0,
2188 errors::Unimplemented(StrCat(
2189 op_name, " only supports is_training=false, at my_batchnorm"))},
2190 {"NCHW", 1, false, 0,
2191 errors::Unimplemented(StrCat("The input \"scale\" for ", op_name,
2192 " must be a constant, at my_batchnorm"))},
2193 {"NCHW", 2, false, 0,
2194 errors::Unimplemented(StrCat("The input \"offset\" for ", op_name,
2195 " must be a constant, at my_batchnorm"))},
2196 {"NCHW", 3, false, 0,
2197 errors::Unimplemented(StrCat("The input \"mean\" for ", op_name,
2198 " must be a constant, at my_batchnorm"))},
2199 {"NCHW", 4, false, 0,
2200 errors::Unimplemented(StrCat("The input \"variance\" for ", op_name,
2201 " must be a constant, at my_batchnorm"))},
2202 {"NCHW", 0, false, 0.01}}; // The last one is the only test that runs.
2203 if (trt_mode_ == TrtTestMode::kDynamicShape) {
2204 test_param.push_back(
2205 {"NCHW", 0, false, 0.01,
2206 errors::InvalidArgument(
2207 "Channel dimension must be static, at my_batchnorm"),
2208 true});
2209 }
2210 for (auto p : test_param) {
2211 Reset();
2212 NodeDef node_def =
2213 get_node_def(tf_type_, p.data_format, p.is_training, p.epsilon);
2214 for (int i = 0; i < node_input.size(); i++) {
2215 if (i == 0 || i == p.tensor_input_idx) {
2216 // The first input (x) is always added as a tensor, and it hase shape
2217 // NCHW. The other inputs are per channel values (1D, size C).
2218 //
2219 // In implicit batch mode, it is not possible to add any of the 1D
2220 // inputs as a tensor: the first dim is always treated as batch dim in
2221 // implicit batch mode, and that has to agree for all tensors. We have
2222 // two input tensors with shapes NCHW and C and in general N != C.
2223 // The converter already picked up N from the fist input, and reports
2224 // an error when we try to add any other tensors with not matching
2225 // first dim.
2226 //
2227 // This restriction does not apply in explicit batch mode: the tensors
2228 // can have different first dim. The converter still expects that only
2229 // the first arg is a tensor. TODO(tfeher) Check if one can relax this
2230 // restriction.
2231 Status expected_status =
2232 (i != 0 && trt_mode_ == TrtTestMode::kImplicitBatch)
2233 ? errors::InvalidArgument(
2234 StrCat("Batch size doesn't match for tensor ",
2235 node_input[i].name,
2236 ": Provided batch size does not match "
2237 "converter batch size: 3 vs 2"))
2238 : Status::OK();
2239 std::vector<int> partial_input_shape;
2240 if (i == 0 && trt_mode_ == TrtTestMode::kDynamicShape &&
2241 !p.keep_channel_unknown) {
2242 // keep channel dim static (known)
2243 partial_input_shape.resize(4, -1);
2244 partial_input_shape[1] = node_input[i].dims[1];
2245 }
2246 AddTestTensor(node_input[i].name, node_input[i].dims, tf_type_,
2247 node_input[i].val, partial_input_shape,
2248 expected_status);
2249
2250 } else {
2251 AddTestWeights(node_input[i].name, node_input[i].dims,
2252 node_input[i].val, tf_type_);
2253 }
2254 }
2255 TestOpConverter("my_batchnorm", node_def, node_input[0].dims,
2256 p.conversion_status, Status::OK(),
2257 ArrayFloatNear(expected_output));
2258 }
2259 }
2260 }
2261
TEST_P(OpConverter_FP32_Test,ConvertTranspose)2262 TEST_P(OpConverter_FP32_Test, ConvertTranspose) {
2263 // Get the NodeDef for Transpose.
2264 Scope s = Scope::NewRootScope();
2265 auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
2266 auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
2267 auto transpose = ops::Transpose(s.WithOpName("my_transpose"), input, weights);
2268 const NodeDef& node_def = transpose.operation.node()->def();
2269
2270 std::vector<TestParamBase> test_params = {
2271 // For the first test we leave param empty. This signals to use a
2272 // input as weight which will be invalid
2273 TestParamBase{{3, 1, 2, 1},
2274 {},
2275 {},
2276 {},
2277 Status(error::UNIMPLEMENTED,
2278 "The input \"perm\" for Transpose must be a "
2279 "constant, at my_transpose")},
2280 TestParamBase{{1, 1, 2, 3},
2281 {},
2282 {},
2283 {0, 1, 2},
2284 Status(error::INVALID_ARGUMENT,
2285 "Rank of perm for transpose does not match with "
2286 "that of the input.")},
2287 // Transpose batch dim
2288 TestParamBase{
2289 {1, 1, 2, 3},
2290 {},
2291 {3, 2, 1, 1},
2292 {3, 2, 1, 0},
2293 (trt_mode_ == TrtTestMode::kImplicitBatch)
2294 ? Status(error::UNIMPLEMENTED,
2295 "Transpose at batch dimension is not supported")
2296 : Status::OK()},
2297 TestParamBase{{1, 1, 2, 3}, {}, {1, 3, 1, 2}, {0, 3, 1, 2}},
2298 };
2299 if (trt_mode_ == TrtTestMode::kDynamicShape) {
2300 // Dynamic shape tests where some shapes are known
2301 test_params.push_back(TestParamBase{
2302 {1, 1, 2, 3}, {-1, 1, 2, -1}, {1, 3, 1, 2}, {0, 3, 1, 2}});
2303 }
2304 std::vector<float> expected_values{1, 4, 2, 5, 3, 6};
2305 for (auto p : test_params) {
2306 SCOPED_TRACE(p);
2307 Reset();
2308 AddTestTensor("input", p.input_dims, {1, 2, 3, 4, 5, 6},
2309 p.partial_input_dims);
2310 if (p.param.empty()) {
2311 AddTestTensor("weights", {3});
2312 } else {
2313 AddTestWeights<int32>("weights", {static_cast<int>(p.param.size())},
2314 p.param);
2315 }
2316 TestOpConverter("my_transpose", node_def, p.expected_output_dims, p.status,
2317 p.runtime_status, ElementsAreArray(expected_values));
2318 }
2319 }
2320
TEST_F(OpConverterTest,ConvertReshape)2321 TEST_F(OpConverterTest, ConvertReshape) {
2322 // Get the NodeDef for Reshape.
2323 Scope s = Scope::NewRootScope();
2324 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
2325 auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
2326 auto reshape = ops::Reshape(s.WithOpName("my_reshape"), input, weights);
2327 const NodeDef& node_def = reshape.operation.node()->def();
2328
2329 {
2330 // Shape is a tensor, should fail.
2331 Reset();
2332 AddTestTensor("input", {1, 2, 3});
2333 AddTestTensor("weights", {3});
2334 RunValidationAndConversion(
2335 node_def, error::UNIMPLEMENTED,
2336 "The input \"shape\" for Reshape must be a constant, at my_reshape");
2337 }
2338 {
2339 // Reshape to scalar, should fail.
2340 Reset();
2341 AddTestTensor("input", {1, 2, 3});
2342 AddTestWeights<int32>("weights", {0}, {});
2343 RunValidationAndConversion(
2344 node_def, error::UNIMPLEMENTED,
2345 "Reshape to shape=[] is not supported, at my_reshape");
2346 }
2347 {
2348 // Reshape tensor with zero rank to empty tensor, should fail.
2349 Reset();
2350 AddTestTensor("input", {});
2351 AddTestWeights<int32>("weights", {1, 0, 1}, {});
2352 RunValidationAndConversion(
2353 node_def, error::UNIMPLEMENTED,
2354 "Reshape to shape=[] is not supported, at my_reshape");
2355 }
2356
2357 struct TestParams {
2358 int batch_size;
2359 std::vector<int> tensor_dims;
2360 std::vector<int> shape;
2361 };
2362
2363 // Reshape at batch dimension, should fail.
2364 std::vector<TestParams> params = {
2365 TestParams{1, {1, 2, 3}, {3, 1, 1, 2}},
2366 TestParams{1, {1, 2, -1}, {-1, 1, 1, 2}},
2367 TestParams{1, {1, 2, 3}, {-1, 1, 1, 2}},
2368 TestParams{-1, {1, 2, 3}, {1, 1, 1, 2}},
2369 TestParams{-1, {-1, 2, 3}, {1, 1, 1, 6}}, // TODO(laigd): it should pass.
2370 };
2371 for (int i = 0; i < params.size(); ++i) {
2372 Reset();
2373 const std::vector<int>& dims = params[i].tensor_dims;
2374 AddTestTensor("input", dims, params[i].batch_size);
2375 AddTestWeights<int32>("weights", {4}, params[i].shape);
2376 RunValidationAndConversion(
2377 node_def, error::UNIMPLEMENTED,
2378 "Reshape on batch dimension is not supported, at my_reshape",
2379 /*should_run_conversion=*/(dims[0] > 0 && dims[1] > 0 && dims[2] > 0));
2380 }
2381
2382 // Reshape on non batch dimensions, ok.
2383 std::vector<TestParams> ok_params = {
2384 TestParams{-1, {1, 2, 3}, {-1, 1, 3, 2}},
2385 TestParams{1, {1, 2, 3}, {-1, 1, 3, 2}},
2386 TestParams{1, {1, 2, 3}, {1, 1, 3, 2}},
2387 TestParams{2, {1, 2, 3}, {2, 1, 3, 2}},
2388 TestParams{1, {1, 1}, {1}},
2389 TestParams{1, {}, {1, 1}},
2390 TestParams{2, {1, 1}, {2}},
2391 TestParams{2, {}, {2, 1}},
2392 };
2393 for (int i = 0; i < ok_params.size(); ++i) {
2394 const int batch_size = std::max(1, ok_params[i].batch_size);
2395 const auto& shape = ok_params[i].shape;
2396 Reset();
2397 AddTestTensor("input", ok_params[i].tensor_dims, batch_size);
2398 AddTestWeights<int32>("weights", {static_cast<int>(shape.size())}, shape);
2399 RunValidationAndConversion(node_def);
2400
2401 TRT_TensorOrWeights output;
2402 TF_EXPECT_OK(GetTensorOrWeights("my_reshape", &output));
2403 ASSERT_TRUE(output.is_tensor());
2404 const std::vector<int> expected_output_dims(shape.begin() + 1, shape.end());
2405 const nvinfer1::Dims actual_output_dims = output.tensor()->getDimensions();
2406 ExpectTrtDimsEqualsArray(expected_output_dims, actual_output_dims);
2407
2408 std::vector<float> input_vec(TrtTensorDimsNumElements(actual_output_dims) *
2409 batch_size);
2410 std::iota(input_vec.begin(), input_vec.end(), 1);
2411 const DataVec input_data{{"input", AsTensor<float>(input_vec)}};
2412 DataVec output_data{
2413 {"my_reshape", ConstructTensor<float>(input_vec.size())}};
2414 TF_EXPECT_OK(BuildAndRun(input_data, &output_data, batch_size));
2415 EXPECT_THAT(GetSpanForData<float>(output_data[0]),
2416 ElementsAreArray(input_vec));
2417 }
2418 }
2419
TEST_P(OpConverter_FP32_Test,ConvertShape)2420 TEST_P(OpConverter_FP32_Test, ConvertShape) {
2421 // Get the NodeDef for Shape op.
2422 Scope s = Scope::NewRootScope();
2423 auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
2424 auto shape = ops::Shape(s.WithOpName("my_shape"), input);
2425 const NodeDef& node_def = shape.operation.node()->def();
2426
2427 Status conversion_status =
2428 (trt_mode_ == TrtTestMode::kImplicitBatch)
2429 ? errors::Unimplemented(
2430 "Shape is only supported for explicit batch mode.")
2431 : Status::OK();
2432 std::vector<TestParamBase> test_params = {
2433 // TODO(b/166274212): Enable the test parameter for TensorRT 7.1.3.
2434 #if !IS_TRT_VERSION_GE(7, 1, 3, 0)
2435 TestParamBase{{1, 2, 3}, {}, {3}, {}, conversion_status},
2436 #endif
2437 // Add input as weight (we use non empty param ({1}) to trigger this).
2438 TestParamBase{{1, 2, 3}, {}, {3}, {1}, conversion_status},
2439 };
2440
2441 auto input_is_weight = [](const TestParamBase p) { return !p.param.empty(); };
2442 for (auto p : test_params) {
2443 SCOPED_TRACE(p);
2444 Reset();
2445 // The number of elements of the input tensor. We leave it 0 in case we do
2446 // not need to add an input tensor. This happens in explicit batch mode: the
2447 // shape is known at conversion time and therefore the shape is added to the
2448 // network as a constant layer. In this case the single node network that
2449 // we use for the unit test have no actual input tensor when it is converted
2450 // to a TensorRT network.
2451 int n_elements = 0;
2452 if (input_is_weight(p) || trt_mode_ != TrtTestMode::kExplicitBatch) {
2453 // Calculate the number of elements for adding input data.
2454 n_elements = std::accumulate(p.input_dims.begin(), p.input_dims.end(), 1,
2455 std::multiplies<int>());
2456 }
2457 std::vector<float> input_val(n_elements, 1);
2458 if (!input_is_weight(p)) {
2459 AddTestTensor("input", p.input_dims, input_val);
2460 } else {
2461 AddTestWeights("input", p.input_dims, input_val, tf_type_);
2462 }
2463 TestOpConverter("my_shape", node_def, p.expected_output_dims, p.status,
2464 p.runtime_status, ElementsAreArray(p.input_dims),
2465 {DT_INT32});
2466 }
2467 }
2468
2469 // Helper function for testing MatMul and BatchMatMul
2470 // get_matmul corresponds to the function used to generate the node. It should
2471 // accept (DataType, transpose_a, transpose_b) as parameters.
TestMatMulHelper(OpConverterTest * test,const std::function<NodeDef (DataType,bool,bool)> & get_matmul,const std::string & op_name)2472 void TestMatMulHelper(
2473 OpConverterTest* test,
2474 const std::function<NodeDef(DataType, bool, bool)>& get_matmul,
2475 const std::string& op_name) {
2476 // HACK: This needs to be done in a better way.
2477 const bool is_batch_matmul = op_name == "BatchMatMul";
2478 {
2479 // Unsupported data type.
2480 test->Reset();
2481 NodeDef node_def = get_matmul(DT_INT32, false, false);
2482 test->AddTestTensor("input", {2}, /*batch_size=*/1,
2483 nvinfer1::DataType::kINT32);
2484 test->AddTestWeights<int32>("weights", {2, 1}, {3, 5});
2485 test->RunValidationAndConversion(
2486 node_def, error::UNIMPLEMENTED,
2487 StrCat("Data type int32 is not supported for ", op_name,
2488 ", must be one of [float, half], at my_matmul")
2489 .c_str());
2490 }
2491 // OK.
2492 for (bool transpose_a : {false, true}) {
2493 for (bool transpose_b : {false, true}) {
2494 test->Reset();
2495 NodeDef node_def = get_matmul(DT_FLOAT, transpose_a, transpose_b);
2496 test->AddTestTensor("input", {2}, /*batch_size=*/1);
2497 test->AddTestWeights<float>("weights", {2, 2}, {0, 1, 2, 3});
2498 if (is_batch_matmul) {
2499 test->RunValidationAndConversion(
2500 node_def, error::UNIMPLEMENTED,
2501 "TensorRT does not support batched constants.");
2502 continue;
2503 } else if (transpose_a) {
2504 test->RunValidationAndConversion(
2505 node_def, error::INVALID_ARGUMENT,
2506 "Cannot transpose first input if it is a tensor with fewer than 2 "
2507 "non-batch dimensions");
2508 continue;
2509 }
2510 test->RunValidationAndConversion(node_def);
2511 TRT_TensorOrWeights output;
2512 TF_EXPECT_OK(test->GetTensorOrWeights("my_matmul", &output));
2513 ASSERT_TRUE(output.is_tensor());
2514 ExpectTrtDimsEqualsArray({2}, output.tensor()->getDimensions());
2515
2516 const DataVec input_data{{"input", test->AsTensor<float>({0, 1})}};
2517 DataVec output_data{{"my_matmul", test->ConstructTensor<float>(2)}};
2518 TF_EXPECT_OK(test->BuildAndRun(input_data, &output_data));
2519 if (transpose_b) {
2520 EXPECT_THAT(GetSpanForData<float>(output_data[0]), ElementsAre(1, 3));
2521 } else {
2522 EXPECT_THAT(GetSpanForData<float>(output_data[0]), ElementsAre(2, 3));
2523 }
2524 }
2525 }
2526 // OK, 3D inputs
2527 for (bool transpose_b : {false, true}) {
2528 test->Reset();
2529 NodeDef node_def = get_matmul(DT_FLOAT, /*transpose_a=*/false, transpose_b);
2530 test->AddTestTensor("input", {2}, /*batch_size=*/1);
2531 test->AddTestWeights<float>("weights", {2, 2}, {0, 1, 2, 3});
2532 if (is_batch_matmul) {
2533 test->RunValidationAndConversion(
2534 node_def, error::UNIMPLEMENTED,
2535 "TensorRT does not support batched constants.");
2536 continue;
2537 }
2538 test->RunValidationAndConversion(node_def);
2539 TRT_TensorOrWeights output;
2540 TF_EXPECT_OK(test->GetTensorOrWeights("my_matmul", &output));
2541 ASSERT_TRUE(output.is_tensor());
2542 ExpectTrtDimsEqualsArray({2}, output.tensor()->getDimensions());
2543 const DataVec input_data{{"input", test->AsTensor<float>({0, 1})}};
2544 DataVec output_data{{"my_matmul", test->ConstructTensor<float>(2)}};
2545 TF_EXPECT_OK(test->BuildAndRun(input_data, &output_data));
2546 if (transpose_b) {
2547 EXPECT_THAT(GetSpanForData<float>(output_data[0]), ElementsAre(1, 3));
2548 } else {
2549 EXPECT_THAT(GetSpanForData<float>(output_data[0]), ElementsAre(2, 3));
2550 }
2551 }
2552 }
2553
2554 template <typename LayerType>
CheckAddedLayers(OpConverterTest * test,bool expect_found)2555 void CheckAddedLayers(OpConverterTest* test, bool expect_found) {
2556 bool layer_found = false;
2557 for (int i = 0; i < test->converter_->network()->getNbLayers(); i++) {
2558 nvinfer1::ILayer* layer = test->converter_->network()->getLayer(i);
2559 if (dynamic_cast<LayerType*>(layer)) {
2560 layer_found = true;
2561 }
2562 }
2563 EXPECT_EQ(expect_found, layer_found);
2564 }
2565
TEST_F(OpConverterTest,ConvertMatMul)2566 TEST_F(OpConverterTest, ConvertMatMul) {
2567 // Get the NodeDef for MatMul.
2568 auto get_matmul_nodedef = [](DataType dtype, bool transpose_a,
2569 bool transpose_b) -> NodeDef {
2570 Scope s = Scope::NewRootScope();
2571 auto input = ops::Placeholder(s.WithOpName("input"), dtype);
2572 auto weights = ops::Placeholder(s.WithOpName("weights"), dtype);
2573 const auto matmul_attrs =
2574 ops::MatMul::TransposeA(transpose_a).TransposeB(transpose_b);
2575 auto matmul =
2576 ops::MatMul(s.WithOpName("my_matmul"), input, weights, matmul_attrs);
2577 return matmul.operation.node()->def();
2578 };
2579
2580 // Additional test cases specific to MatMul
2581 {
2582 // Can only transpose A if it is 2D in TRT
2583 Reset();
2584 NodeDef node_def = get_matmul_nodedef(DT_FLOAT, true, false);
2585 AddTestTensor("input", {2}, /*batch_size=*/1);
2586 AddTestWeights<float>("weights", {2, 2}, {0, 1, 2, 3});
2587 RunValidationAndConversion(
2588 node_def, error::INVALID_ARGUMENT,
2589 "Cannot transpose first input if it is a tensor with fewer than 2 "
2590 "non-batch dimensions.");
2591 }
2592 {
2593 // B must always have 2 non-batch dimensions
2594 Reset();
2595 NodeDef node_def = get_matmul_nodedef(DT_FLOAT, false, false);
2596 AddTestTensor("input", {2}, /*batch_size=*/1);
2597 AddTestTensor("weights", {2}, /*batch_size=*/1);
2598 RunValidationAndConversion(
2599 node_def, error::INVALID_ARGUMENT,
2600 "Second input must either be a constant, or contain at least 2 "
2601 "non-batch dimensions.");
2602 }
2603 {
2604 // We can never transpose weights that are not 2D.
2605 Reset();
2606 NodeDef node_def = get_matmul_nodedef(DT_FLOAT, true, false);
2607 AddTestWeights<float>("input", {1, 1, 2}, {0, 1});
2608 AddTestTensor("weights", {2, 2}, /*batch_size=*/1);
2609 RunValidationAndConversion(
2610 node_def, error::INVALID_ARGUMENT,
2611 "Cannot currently transpose constant input if it is not 2 dimensional");
2612 }
2613 {
2614 // Make sure that INT8 mode uses IFullyConnectedLayer when possible.
2615 Reset(TrtPrecisionMode::INT8);
2616 NodeDef node_def = get_matmul_nodedef(DT_FLOAT, false, false);
2617 AddTestTensor("input", {2, 1, 1});
2618 AddTestWeights<float>("weights", {2, 2}, {0, 1, 2, 3});
2619 RunValidationAndConversion(node_def);
2620 CheckAddedLayers<nvinfer1::IMatrixMultiplyLayer>(this, false);
2621 CheckAddedLayers<nvinfer1::IFullyConnectedLayer>(this, true);
2622 }
2623 {
2624 // Make sure that INT8 mode doesn't try to use IFullyConnectedLayer when not
2625 // compatible. In this case we can't use FC because weights is a tensor.
2626 Reset(TrtPrecisionMode::INT8);
2627 NodeDef node_def = get_matmul_nodedef(DT_FLOAT, false, false);
2628 AddTestTensor("input", {2, 1, 1});
2629 AddTestTensor("weights", {2, 2});
2630 RunValidationAndConversion(node_def);
2631 CheckAddedLayers<nvinfer1::IMatrixMultiplyLayer>(this, true);
2632 CheckAddedLayers<nvinfer1::IFullyConnectedLayer>(this, false);
2633 }
2634 TestMatMulHelper(this, get_matmul_nodedef, "MatMul");
2635 }
2636
TEST_F(OpConverterTest,ConvertBatchMatMul)2637 TEST_F(OpConverterTest, ConvertBatchMatMul) {
2638 // Get the NodeDef for BatchMatMul.
2639 auto get_batch_matmul_nodedef = [](DataType dtype, bool transpose_a,
2640 bool transpose_b) -> NodeDef {
2641 Scope s = Scope::NewRootScope();
2642 auto input = ops::Placeholder(s.WithOpName("input"), dtype);
2643 auto weights = ops::Placeholder(s.WithOpName("weights"), dtype);
2644 const auto matmul_attrs =
2645 ops::BatchMatMul::AdjX(transpose_a).AdjY(transpose_b);
2646 auto matmul = ops::BatchMatMul(s.WithOpName("my_matmul"), input, weights,
2647 matmul_attrs);
2648 return matmul.operation.node()->def();
2649 };
2650
2651 {
2652 // Can't broadcast two tensor inputs of different rank.
2653 Reset();
2654 NodeDef node_def = get_batch_matmul_nodedef(DT_FLOAT, false, false);
2655 AddTestTensor("input", {1, 2, 2}, /*batch_size=*/2);
2656 AddTestTensor("weights", {2}, /*batch_size=*/2);
2657 RunValidationAndConversion(
2658 node_def, error::UNIMPLEMENTED,
2659 "Inputs must have the same rank if they are both tensors.");
2660 }
2661 {
2662 // Make sure that INT8 mode doesn't try to use IFullyConnectedLayer when not
2663 // compatible. In this case we can't use FC because transpose_a is true.
2664 Reset(TrtPrecisionMode::INT8);
2665 NodeDef node_def = get_batch_matmul_nodedef(DT_FLOAT, true, false);
2666 AddTestTensor("input", {1, 2, 2});
2667 AddTestWeights<float>("weights", {2, 2}, {0, 1, 2, 3});
2668 RunValidationAndConversion(node_def);
2669 CheckAddedLayers<nvinfer1::IMatrixMultiplyLayer>(this, true);
2670 CheckAddedLayers<nvinfer1::IFullyConnectedLayer>(this, false);
2671 }
2672
2673 for (bool transpose_a : {false, true}) {
2674 for (bool transpose_b : {false, true}) {
2675 Reset();
2676 NodeDef node_def =
2677 get_batch_matmul_nodedef(DT_FLOAT, transpose_a, transpose_b);
2678 AddTestTensor("input", {2, 2}, /*batch_size=*/1);
2679 AddTestWeights<float>("weights", {1, 2, 2}, {1, 2, 3, 4});
2680
2681 RunValidationAndConversion(node_def);
2682 TRT_TensorOrWeights output;
2683 TF_EXPECT_OK(GetTensorOrWeights("my_matmul", &output));
2684 ASSERT_TRUE(output.is_tensor());
2685 ExpectTrtDimsEqualsArray({2, 2}, output.tensor()->getDimensions());
2686 const DataVec input_data{{"input", AsTensor<float>({0, 1, 2, 3})}};
2687 DataVec output_data{{"my_matmul", ConstructTensor<float>(4)}};
2688 TF_EXPECT_OK(BuildAndRun(input_data, &output_data));
2689 if (!transpose_a && !transpose_b) {
2690 EXPECT_THAT(GetSpanForData<float>(output_data[0]),
2691 ElementsAre(3, 4, 11, 16));
2692 } else if (transpose_a && transpose_b) {
2693 EXPECT_THAT(GetSpanForData<float>(output_data[0]),
2694 ElementsAre(4, 8, 7, 15));
2695 } else if (transpose_a) {
2696 EXPECT_THAT(GetSpanForData<float>(output_data[0]),
2697 ElementsAre(6, 8, 10, 14));
2698 } else if (transpose_b) {
2699 EXPECT_THAT(GetSpanForData<float>(output_data[0]),
2700 ElementsAre(2, 4, 8, 18));
2701 }
2702 }
2703 }
2704
2705 TestMatMulHelper(this, get_batch_matmul_nodedef, "BatchMatMul");
2706 }
2707
TEST_P(OpConverter_FP32_FP16_Test,ConvertBiasAdd)2708 TEST_P(OpConverter_FP32_FP16_Test, ConvertBiasAdd) {
2709 // Note that kINT32 is not supported by IScaleLayer, so we don't test
2710 // DT_INT32 type here. DT_FLOAT and DT_HALF are tested.
2711 // Get the NodeDef for BiasAdd.
2712 auto get_biasadd_nodedef = [](const string& data_format,
2713 DataType tf_type) -> NodeDef {
2714 Scope s = Scope::NewRootScope();
2715 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
2716 auto weights = ops::Placeholder(s.WithOpName("weights"), tf_type);
2717 const auto biasadd_attrs = ops::BiasAdd::DataFormat(data_format);
2718 auto biasadd =
2719 ops::BiasAdd(s.WithOpName("my_biasadd"), input, weights, biasadd_attrs);
2720 return biasadd.operation.node()->def();
2721 };
2722
2723 for (const string& data_format : {"NHWC", "NCHW"}) {
2724 for (const int trt_input_rank : {1, 2, 3, 4}) {
2725 Reset();
2726 NodeDef node_def = get_biasadd_nodedef(data_format, tf_type_);
2727
2728 // Add input, dims_array will be like {2, 1, ..., 1, 3}
2729 std::vector<int32> dims_array(trt_input_rank + 1, 1);
2730 if (trt_input_rank == 1) {
2731 dims_array[1] = (data_format == "NHWC" ? 3 : 2);
2732 } else {
2733 dims_array[1] = 2;
2734 dims_array[trt_input_rank] = 3;
2735 }
2736 const int num_input = TrtTensorDimsNumElements(GetTestDims(dims_array));
2737 ASSERT_EQ(trt_input_rank > 1 ? 6 : (data_format == "NHWC" ? 3 : 2),
2738 num_input);
2739 std::vector<float> input_data(num_input, 0);
2740
2741 AddTestTensor("input", dims_array, input_data);
2742
2743 const int channel_size = (data_format == "NHWC" ? 3 : 2);
2744 std::vector<float> bias(channel_size);
2745 for (int i = 0; i < channel_size; ++i) {
2746 bias[i] = i + 1; // bias will be {1, 2, 3, ...}
2747 }
2748 AddTestWeights("weights", {channel_size}, bias, tf_type_);
2749
2750 // Build and run the engine.
2751 std::vector<float> output_data;
2752
2753 if (trt_input_rank == 1) {
2754 if (data_format == "NHWC") {
2755 output_data = {1, 2, 3};
2756 } else {
2757 output_data = {1, 2};
2758 }
2759 } else {
2760 if (data_format == "NHWC") {
2761 output_data = {1, 2, 3, 1, 2, 3};
2762 } else {
2763 output_data = {1, 1, 1, 2, 2, 2};
2764 }
2765 }
2766 TestOpConverter("my_biasadd", node_def, dims_array, Status::OK(),
2767 Status::OK(), ElementsAreArray(output_data));
2768 }
2769 }
2770 }
2771
2772 template <typename OpType>
GetBinaryOpNodeDef(DataType dtype)2773 NodeDef GetBinaryOpNodeDef(DataType dtype) {
2774 Scope s = Scope::NewRootScope();
2775 auto input_l = ops::Placeholder(s.WithOpName("input1"), dtype);
2776 auto input_r = ops::Placeholder(s.WithOpName("input2"), dtype);
2777 auto op = OpType(s.WithOpName("my_binary"), input_l, input_r);
2778 return op.operation.node()->def();
2779 }
2780
TEST_P(OpConverter_FP32_FP16_Test,ConvertBinary)2781 TEST_P(OpConverter_FP32_FP16_Test, ConvertBinary) {
2782 {
2783 AttrValue dtype;
2784 dtype.set_type(tf_type_);
2785 // Both inputs are weights.
2786 Reset();
2787 NodeDef node_def =
2788 MakeNodeDef("my_add", "Add", {"weights1", "weights2"}, {{"T", dtype}});
2789 AddTestWeights<float>("weights1", {1}, {1});
2790 AddTestWeights<float>("weights2", {1}, {1});
2791 RunValidationAndConversion(
2792 node_def, error::UNIMPLEMENTED,
2793 "Constant folding is falled back to TensorFlow, binary op received "
2794 "both input as constant at: my_add");
2795 }
2796
2797 using OpFunc = std::function<NodeDef(DataType)>;
2798 std::map<std::string, std::pair<OpFunc, std::vector<float>>> op_test_info;
2799 #define ADD_OP(name, op, v1, v2, v3, v4, v5, v6, v7, v8) \
2800 op_test_info[name] = \
2801 std::make_pair(GetBinaryOpNodeDef<op>, \
2802 std::vector<float>(v1, v2, v3, v4, v5, v6, v7, v8))
2803 ADD_OP("Add", ops::Add, {5, 8, 6, 9, 5, 8, 6, 9});
2804 ADD_OP("AddV2", ops::AddV2, {5, 8, 6, 9, 5, 8, 6, 9});
2805 ADD_OP("Sub", ops::Sub, {1, 4, 0, 3, 1, 4, 0, 3});
2806 ADD_OP("Mul", ops::Mul, {6, 12, 9, 18, 6, 12, 9, 18});
2807 ADD_OP("Div", ops::Div, {1.5, 3, 1, 2, 1.5, 3, 1, 2});
2808 ADD_OP("RealDiv", ops::RealDiv, {1.5, 3, 1, 2, 1.5, 3, 1, 2});
2809 ADD_OP("FloorDiv", ops::FloorDiv, {1, 3, 1, 2, 1, 3, 1, 2});
2810 ADD_OP("Minimum", ops::Minimum, {2, 2, 3, 3, 2, 2, 3, 3});
2811 ADD_OP("Maximum", ops::Maximum, {3, 6, 3, 6, 3, 6, 3, 6});
2812 ADD_OP("Pow", ops::Pow, {9, 36, 27, 216, 9, 36, 27, 216});
2813 #undef ADD_OP
2814 // Add all ops supported by ConvertBinary.
2815 auto* supported_ops = BinaryOperationMap();
2816 // Test combinations of tensor vs weight inputs (except when both inputs are
2817 // weights).
2818 for (const bool operand_1_is_tensor : {true, false}) {
2819 for (const bool operand_2_is_tensor : {true, false}) {
2820 if (!operand_1_is_tensor && !operand_2_is_tensor) continue;
2821 for (auto& iter : *supported_ops) {
2822 string op_name = iter.first;
2823 SCOPED_TRACE(StrCat(op_name, "_", operand_1_is_tensor ? "T" : "W",
2824 operand_2_is_tensor ? "T" : "W"));
2825 Reset();
2826 if (!op_test_info.count(op_name)) {
2827 FAIL() << "Binary op test map does not contain op " << op_name;
2828 }
2829 NodeDef node_def = op_test_info[op_name].first(tf_type_);
2830 std::vector<std::string> input_names;
2831 std::vector<std::vector<int>> input_dims;
2832 std::vector<std::vector<float>> input_values;
2833 if (operand_1_is_tensor) {
2834 AddTestTensor("input1", {2, 1, 2}, {3, 6, 3, 6});
2835 } else {
2836 AddTestWeights("input1", {1, 2}, std::vector<float>{3, 6}, tf_type_);
2837 }
2838 if (operand_2_is_tensor) {
2839 AddTestTensor("input2", {2, 2, 1}, {2, 3, 2, 3});
2840 } else {
2841 AddTestWeights("input2", {2, 1}, std::vector<float>{2, 3}, tf_type_);
2842 }
2843 TestOpConverter("my_binary", node_def, {2, 2, 2}, Status::OK(),
2844 Status::OK(),
2845 ElementsAreArray(op_test_info[op_name].second));
2846 }
2847 }
2848 }
2849 }
2850
GetAddNNodeDef(const std::vector<string> & input_names,DataType dtype)2851 NodeDef GetAddNNodeDef(const std::vector<string>& input_names, DataType dtype) {
2852 Scope s = Scope::NewRootScope();
2853 OutputList inputs;
2854 for (const string& name : input_names) {
2855 inputs.push_back(ops::Placeholder(s.WithOpName(name), dtype));
2856 }
2857 auto op = ops::AddN(s.WithOpName("my_addn"), inputs);
2858 return op.operation.node()->def();
2859 }
2860
2861 struct AddNTestParams {
2862 std::vector<float> input_values;
2863 std::vector<string> input_names;
2864 std::vector<int> dimensions;
2865 std::vector<float> expected_output;
2866 Status status;
2867 };
2868
TestAddN(ParameterizedOpConverterTestBase * test,AddNTestParams & p)2869 void TestAddN(ParameterizedOpConverterTestBase* test, AddNTestParams& p) {
2870 // All inputs are tensors.
2871 test->Reset();
2872 const NodeDef node_def = GetAddNNodeDef(p.input_names, test->get_tf_type());
2873
2874 if (p.input_values.size() % p.input_names.size() != 0) {
2875 LOG(ERROR) << "The number of input values: `" << p.input_values.size()
2876 << "` is not a multiple of the number of inputs: `"
2877 << p.input_names.size() << "`";
2878 ASSERT_TRUE(false);
2879 }
2880
2881 DataVec input_data;
2882 int input_offset = 0;
2883 const int window_size = p.input_values.size() / p.input_names.size();
2884 for (const string& name : p.input_names) {
2885 std::vector<float>::const_iterator start_pos =
2886 p.input_values.begin() + input_offset;
2887 std::vector<float>::const_iterator end_pos = start_pos + window_size;
2888 std::vector<float> sub_input_val(start_pos, end_pos);
2889 input_offset += window_size;
2890
2891 test->AddTestTensor(name, p.dimensions, test->get_tf_type(), sub_input_val);
2892 }
2893
2894 test->TestOpConverter("my_addn", node_def, p.dimensions,
2895 /*expected_conversion_status=*/p.status,
2896 /*expected_runtime_status=*/p.status,
2897 /*matcher=*/ElementsAreArray(p.expected_output),
2898 /*out_tf_types=*/{test->get_tf_type()});
2899 }
2900
TEST_P(OpConverter_FP32_FP16_Test,ConvertAddN)2901 TEST_P(OpConverter_FP32_FP16_Test, ConvertAddN) {
2902 {
2903 // Weights with batch dim that is not 1.
2904 Reset();
2905 const NodeDef node_def = GetAddNNodeDef({"tensor", "weights"}, tf_type_);
2906 AddTestTensor("tensor", /*dims=*/{1, 2});
2907 AddTestWeights<float>("weights", {2, 1, 2}, {0, 1, 2, 3});
2908 RunValidationAndConversion(
2909 node_def, error::INVALID_ARGUMENT,
2910 "Weights input to AddN is required to have batch dimension 1.");
2911 }
2912
2913 const std::vector<float> common_input = InitTestVector<float>(6);
2914
2915 std::vector<AddNTestParams> params = {
2916 {/*input_values=*/common_input,
2917 /*input_names=*/{"inp1", "inp2", "inp3"},
2918 /*dimensions=*/{1, 1, 2, 1, 1},
2919 /*expected_output=*/{6, 9},
2920 /*status=*/Status::OK()},
2921 {/*input_values=*/common_input,
2922 /*input_names=*/{"inp1", "inp2"},
2923 /*dimensions=*/{1, 1, 3, 1, 1},
2924 /*expected_output=*/{3, 5, 7},
2925 /*status=*/Status::OK()},
2926 {/*input_values=*/common_input,
2927 /*input_names=*/{"inp1", "inp2", "inp3"},
2928 /*dimensions=*/{1, 2, 1, 1},
2929 /*expected_output=*/{6, 9},
2930 /*status=*/Status::OK()},
2931 {/*input_values=*/common_input,
2932 /*input_names=*/{"inp1", "inp2"},
2933 /*dimensions=*/{1, 1, 3, 1},
2934 /*expected_output=*/{3, 5, 7},
2935 /*status=*/Status::OK()},
2936 {/*input_values=*/common_input,
2937 /*input_names=*/{"inp1", "inp2", "inp3"},
2938 /*dimensions=*/{1, 2, 1},
2939 /*expected_output=*/{6, 9},
2940 /*status=*/Status::OK()},
2941 {/*input_values=*/common_input,
2942 /*input_names=*/{"inp1", "inp2"},
2943 /*dimensions=*/{1, 1, 3},
2944 /*expected_output=*/{3, 5, 7},
2945 /*status=*/Status::OK()},
2946 {/*input_value=*/common_input,
2947 /*input_names=*/{"inp1", "inp2", "inp3"},
2948 /*dimensions=*/{2, 1},
2949 /*expected_output=*/{6, 9},
2950 /*status=*/Status::OK()},
2951 {/*input_values=*/common_input,
2952 /*input_names=*/{"inp1", "inp2"},
2953 /*dimensions=*/{1, 3},
2954 /*expected_output=*/{3, 5, 7},
2955 /*status=*/Status::OK()},
2956 {/*input_values=*/common_input,
2957 /*input_names=*/{"inp1", "inp2", "inp3"},
2958 /*dimensions=*/{2},
2959 /*expected_output=*/{6, 9},
2960 /*status=*/Status::OK()},
2961 {/*input_values=*/common_input,
2962 /*input_names=*/{"inp1", "inp2"},
2963 /*dimensions=*/{3},
2964 /*expected_output=*/{3, 5, 7},
2965 /*status=*/Status::OK()},
2966 {/*input_values=*/common_input,
2967 /*input_names=*/{"inp1", "inp2", "inp3", "inp4", "inp5", "inp6"},
2968 /*dimensions=*/{1},
2969 /*expected_output=*/{15},
2970 /*status=*/Status::OK()},
2971 };
2972
2973 for (auto p : params) {
2974 TestAddN(this, p);
2975 }
2976 }
2977
TEST_F(OpConverterTest,ConvertQuantize)2978 TEST_F(OpConverterTest, ConvertQuantize) {
2979 {
2980 // FakeQuantWithMinMaxArgs attributes are empty, should fail.
2981 Reset(TrtPrecisionMode::INT8);
2982 NodeDef node_def =
2983 MakeNodeDef("my_quantize", "FakeQuantWithMinMaxArgs", {"input"});
2984 AddTestTensor("input", {1, 2, 3});
2985 RunValidationAndConversion(
2986 node_def, error::INVALID_ARGUMENT,
2987 "Min or max attribute not found for FakeQuantWithMinMaxArgs "
2988 "at my_quantize");
2989 }
2990 {
2991 // FakeQuantWithMinMaxArgs ranges set via attributes, ok.
2992 Reset(TrtPrecisionMode::INT8);
2993 Scope s = Scope::NewRootScope();
2994 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
2995 auto quantize_attrs = ops::FakeQuantWithMinMaxArgs::Min(-6.0f).Max(6.0f);
2996 auto quantize = ops::FakeQuantWithMinMaxArgs(s.WithOpName("my_quantize"),
2997 input, quantize_attrs);
2998 const NodeDef& node_def = quantize.operation.node()->def();
2999 AddTestTensor("input", {1, 2, 3});
3000 RunValidationAndConversion(node_def);
3001 TRT_TensorOrWeights output;
3002 TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3003 ASSERT_TRUE(output.is_tensor());
3004 auto ranges = quantization_ranges();
3005 EXPECT_EQ(1, ranges.count(output.tensor()));
3006 EXPECT_EQ(6.0f, ranges[output.tensor()]);
3007 }
3008 {
3009 // FakeQuantWithMinMaxVars ranges set via inputs, ok.
3010 Reset(TrtPrecisionMode::INT8);
3011 Scope s = Scope::NewRootScope();
3012 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3013 auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3014 auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3015 auto quantize = ops::FakeQuantWithMinMaxVars(
3016 s.WithOpName("my_quantize"), input, weights_min, weights_max);
3017 const NodeDef& node_def = quantize.operation.node()->def();
3018 AddTestTensor("input", {1, 2, 3});
3019 AddTestWeights<float>("weights_min", {1}, {-6.0f});
3020 AddTestWeights<float>("weights_max", {1}, {6.0f});
3021 RunValidationAndConversion(node_def);
3022 TRT_TensorOrWeights output;
3023 TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3024 ASSERT_TRUE(output.is_tensor());
3025 auto ranges = quantization_ranges();
3026 EXPECT_EQ(1, ranges.count(output.tensor()));
3027 EXPECT_EQ(6.0f, ranges[output.tensor()]);
3028 }
3029 {
3030 // QuantizeAndDequantizeV2 ranges set via inputs, ok.
3031 Reset(TrtPrecisionMode::INT8);
3032 Scope s = Scope::NewRootScope();
3033 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3034 auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3035 auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3036 auto quantize = ops::QuantizeAndDequantizeV2(
3037 s.WithOpName("my_quantize"), input, weights_min, weights_max);
3038 const NodeDef& node_def = quantize.operation.node()->def();
3039 AddTestTensor("input", {1, 2, 3});
3040 AddTestWeights<float>("weights_min", {1}, {-6.0f});
3041 AddTestWeights<float>("weights_max", {1}, {6.0f});
3042 RunValidationAndConversion(node_def);
3043 TRT_TensorOrWeights output;
3044 TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3045 ASSERT_TRUE(output.is_tensor());
3046 auto ranges = quantization_ranges();
3047 EXPECT_EQ(1, ranges.count(output.tensor()));
3048 EXPECT_EQ(6.0f, ranges[output.tensor()]);
3049 }
3050 {
3051 // QuantizeAndDequantizeV2 Range inputs are tensors, should fail.
3052 Reset(TrtPrecisionMode::INT8);
3053 Scope s = Scope::NewRootScope();
3054 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3055 auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3056 auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3057 auto quantize = ops::QuantizeAndDequantizeV2(
3058 s.WithOpName("my_quantize"), input, weights_min, weights_max);
3059 const NodeDef& node_def = quantize.operation.node()->def();
3060 AddTestTensor("input", {1, 2, 3});
3061 AddTestTensor("weights_min", {1});
3062 AddTestTensor("weights_max", {1});
3063 RunValidationAndConversion(
3064 node_def, error::UNIMPLEMENTED,
3065 "The input \"input_min\" for QuantizeAndDequantizeV2 must be a constant"
3066 ", at my_quantize");
3067 }
3068 {
3069 // QuantizeAndDequantizeV3 ranges set via inputs, ok.
3070 Reset(TrtPrecisionMode::INT8);
3071 Scope s = Scope::NewRootScope();
3072 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3073 auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3074 auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3075 auto num_bits = ops::Placeholder(s.WithOpName("num_bits"), DT_INT32);
3076 auto quantize = ops::QuantizeAndDequantizeV3(
3077 s.WithOpName("my_quantize"), input, weights_min, weights_max, num_bits);
3078 const NodeDef& node_def = quantize.operation.node()->def();
3079 AddTestTensor("input", {1, 2, 3});
3080 AddTestWeights<float>("weights_min", {1}, {-6.0f});
3081 AddTestWeights<float>("weights_max", {1}, {6.0f});
3082 AddTestWeights<int>("num_bits", {1}, {8});
3083 RunValidationAndConversion(node_def);
3084 TRT_TensorOrWeights output;
3085 TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3086 ASSERT_TRUE(output.is_tensor());
3087 auto ranges = quantization_ranges();
3088 EXPECT_EQ(1, ranges.count(output.tensor()));
3089 EXPECT_EQ(6.0f, ranges[output.tensor()]);
3090 }
3091 }
3092
TEST_P(OpConverter_FP32_FP16_Test,ConvertSquare)3093 TEST_P(OpConverter_FP32_FP16_Test, ConvertSquare) {
3094 {
3095 // Input is weights, should fail.
3096 Reset();
3097 Scope s = Scope::NewRootScope();
3098 auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
3099 auto square = ops::Square(s.WithOpName("my_square"), input);
3100 NodeDef node_def = square.operation.node()->def();
3101 AddTestWeights("input", {1, 2, 3}, {1, 2, 3, 4, -5, 6}, tf_type_);
3102 RunValidationAndConversion(
3103 node_def, error::UNIMPLEMENTED,
3104 "The input \"x\" for Square must be a tensor, at my_square");
3105 }
3106
3107 Reset();
3108
3109 Scope s = Scope::NewRootScope();
3110 auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
3111 auto square = ops::Square(s.WithOpName("my_square"), input);
3112 NodeDef node_def = square.operation.node()->def();
3113
3114 const int num_inputs = 20;
3115 std::vector<float> inputs(num_inputs);
3116 std::vector<float> expected_outputs(num_inputs);
3117
3118 for (int i = 0; i < num_inputs; ++i) {
3119 const float value = (i - 9);
3120 inputs[i] = value;
3121 expected_outputs[i] = value * value;
3122 }
3123 AddTestTensor("input", {1, 1, 20}, tf_type_, inputs);
3124
3125 TestOpConverter("my_square", node_def, {1, 1, 20}, Status::OK(), Status::OK(),
3126 ArrayFloatNear(expected_outputs, 0));
3127 }
3128
3129 #if IS_TRT_VERSION_GE(7, 1, 3, 0)
TEST_P(OpConverter_FP32_Test,ConvertCombinedNMS)3130 TEST_P(OpConverter_FP32_Test, ConvertCombinedNMS) {
3131 // Get the NodeDef for CombinedNMS.
3132 auto get_nms_nodedef = [](DataType tf_type, bool clip_boxes = true,
3133 bool pad_per_class = false) -> NodeDef {
3134 Scope s = Scope::NewRootScope();
3135 auto boxes_tensor = ops::Placeholder(s.WithOpName("boxes"), tf_type);
3136 auto scores_tensor = ops::Placeholder(s.WithOpName("scores"), tf_type);
3137 auto max_output_size_per_class =
3138 ops::Placeholder(s.WithOpName("max_output_size_per_class"), DT_INT32);
3139 auto max_total_size =
3140 ops::Placeholder(s.WithOpName("max_total_size"), DT_INT32);
3141 auto iou_threshold =
3142 ops::Placeholder(s.WithOpName("iou_threshold"), tf_type);
3143 auto score_threshold =
3144 ops::Placeholder(s.WithOpName("score_threshold"), tf_type);
3145 auto nms_attrs = ops::CombinedNonMaxSuppression::Attrs()
3146 .PadPerClass(pad_per_class)
3147 .ClipBoxes(clip_boxes);
3148
3149 auto nms_op = ops::CombinedNonMaxSuppression(
3150 s.WithOpName("my_nms"), boxes_tensor, scores_tensor,
3151 max_output_size_per_class, max_total_size, iou_threshold,
3152 score_threshold, nms_attrs);
3153 return nms_op.operation.node()->def();
3154 };
3155
3156 struct TestParams {
3157 const std::string description;
3158 const std::vector<int32> boxes_tensor_dims;
3159 const std::vector<int32> scores_tensor_dims;
3160 const std::vector<float> boxes_values;
3161 const std::vector<float> scores_values;
3162 const int32 max_output_size_per_class;
3163 const int32 max_total_size;
3164 const float iou_threshold;
3165 const float score_threshold;
3166 bool pad_per_class;
3167 bool clip_boxes;
3168 const std::vector<std::vector<int32>> expected_output_dims;
3169 const std::vector<float> exp_boxes;
3170 const std::vector<float> exp_scores;
3171 const std::vector<float> exp_classes;
3172 const std::vector<float> exp_num_detections;
3173 Status conversion_status;
3174 Status runtime_status;
3175 };
3176
3177 Status conv_status =
3178 trt_mode_ == TrtTestMode::kDynamicShape
3179 ? errors::Unimplemented(
3180 "TensorRT BatchedNMS Plugin requires input with static shape")
3181 : Status::OK();
3182
3183 std::vector<TestParams> params = {
3184 // TODO(aaroey): there is a bug in TRT's CombinedNonMaxSuppression
3185 // implementation that, the extra output classes that are outside of the
3186 // range specified by valid_detections[i] are not zeros but -1s.
3187 TestParams{
3188 "Test 1: Original test",
3189 {1, 1, 3, 4}, // boxes dims
3190 {1, 1, 3}, // scores dims
3191 {0, 0, 0.3, 0.4, 0, 0, 0.3, 0.4, 0, 0, 0.3, 0.4}, // boxes values
3192 {0.4, 0.7, 0.3}, // scores values
3193 3, // max_output_size_per_class
3194 2, // max_total_size
3195 .5f, // IOU threshold
3196 0, // score_threshold
3197 false, // pad_per_class
3198 true, // clip_boxes
3199 {{1, 2, 4}, // expected_nmsed_boxes_dims
3200 {1, 2}, // expected_nmsed_scores_dims
3201 {1, 2}, // expected_nmsed_classes_dims
3202 {1}}, // expected_valid_detections_dims
3203 {0, 0, 0.3, 0.4, 0, 0, 0.3, 0.4}, // exp_boxes_values
3204 {0.7, 0.4}, // exp_scores
3205 {1, 0}, // exp_classes
3206 {2}, // exp_num_detections
3207 conv_status},
3208 // Test with clip_boxes = False
3209 TestParams{
3210 "Test 2: clip_boxes",
3211 {1, 5, 1, 4}, // boxes dims
3212 {1, 5, 1}, // scores dims
3213 // boxes values:
3214 {0, 0, 5, 10, 0, 4, 5, 14, 8, 0, 12, 4, 6, 2, 10, 6, 8, 9, 11, 12},
3215 {5, 4, 3, 2, 1}, // scores values
3216 4, // max_output_size_per_class
3217 4, // max_total_size
3218 0.1, // IOU threshold
3219 0, // score threshold
3220 false, // pad_per_class
3221 false, // clip_boxes
3222 {{1, 4, 4}, // expected nmsed_boxes_dims
3223 {1, 4}, // expected nmsed_scores_dims
3224 {1, 4}, // expected_nmsed_classes_dims
3225 {1}}, // expected_valid_detections_dims
3226 // exp_boxes_values:
3227 {0, 0, 5, 10, 8, 0, 12, 4, 8, 9, 11, 12, 0, 0, 0, 0},
3228 {5, 3, 1, 0}, // exp_scores
3229 {0, 0, 0, -1}, // exp_classes
3230 {3}, // exp_num_detections
3231 conv_status},
3232 // Test with clip_boxes = False, and nonzero score threshold
3233 TestParams{
3234 "Test 3: score threshold",
3235 {1, 5, 1, 4}, // boxes dims
3236 {1, 5, 1}, // scores dims
3237 // boxes values:
3238 {0, 0, 5, 10, 0, 4, 5, 14, 8, 0, 12, 4, 6, 2, 10, 6, 8, 9, 11, 12},
3239 {5, 4, 3, 2, 1}, // scores values
3240 4, // max_output_size_per_class
3241 4, // max_total_size
3242 0.1, // IOU threshold
3243 2, // score threshold
3244 false, // pad_per_class
3245 false, // clip_boxes
3246 {{1, 4, 4}, // expected nmsed_boxes_dims
3247 {1, 4}, // expected nmsed_scores_dims
3248 {1, 4}, // expected_nmsed_classes_dims
3249 {1}}, // expected_valid_detections_dims
3250 // exp_boxes_values:
3251 {0, 0, 5, 10, 8, 0, 12, 4, 0, 0, 0, 0, 0, 0, 0, 0},
3252 {5, 3, 0, 0}, // exp_scores
3253 {0, 0, -1, -1}, // exp_classes
3254 {2}, // exp_num_detections
3255 conv_status},
3256 // Test where the boxes are defined as with max value first for the box
3257 // coordinates. This test fails before TRT 7.1.3.
3258 TestParams{
3259 "Test 4: max coord first",
3260 {1, 5, 1, 4}, // boxes dims
3261 {1, 5, 1}, // scores dims
3262 // boxes values:
3263 {5, 10, 0, 0, 5, 14, 0, 4, 12, 4, 8, 0, 10, 6, 6, 2, 11, 12, 8, 9},
3264 {5, 4, 3, 2, 1}, // scores values
3265 4, // max_output_size_per_class
3266 4, // max_total_size
3267 0.1, // IOU threshold
3268 0, // score threshold
3269 false, // pad_per_class
3270 false, // clip_boxes
3271 {{1, 4, 4}, // expected nmsed_boxes_dims
3272 {1, 4}, // expected nmsed_scores_dims
3273 {1, 4}, // expected_nmsed_classes_dims
3274 {1}}, // expected_valid_detections_dims
3275 // exp_boxes_values:
3276 {5, 10, 0, 0, 12, 4, 8, 0, 11, 12, 8, 9, 0, 0, 0, 0},
3277 {5, 3, 1, 0}, // exp_scores
3278 {0, 0, 0, -1}, // exp_classes
3279 {3}, // exp_num_detections
3280 conv_status},
3281 };
3282
3283 for (auto p : params) {
3284 Reset();
3285 SCOPED_TRACE(p.description);
3286 AddTestTensor("boxes", p.boxes_tensor_dims, p.boxes_values);
3287 AddTestTensor("scores", p.scores_tensor_dims, p.scores_values);
3288 AddTestWeights<int32>("max_output_size_per_class", {1},
3289 {p.max_output_size_per_class});
3290 AddTestWeights<int32>("max_total_size", {1}, {p.max_total_size});
3291 AddTestWeights<float>("iou_threshold", {1}, {p.iou_threshold}, tf_type_);
3292 AddTestWeights<float>("score_threshold", {1}, {p.score_threshold},
3293 tf_type_);
3294
3295 auto node_def = get_nms_nodedef(tf_type_, p.clip_boxes, p.pad_per_class);
3296
3297 TestOpConverterMultiOut("my_nms", node_def, p.expected_output_dims,
3298 p.conversion_status, p.runtime_status,
3299 {
3300 ElementsAreArray(p.exp_boxes),
3301 ElementsAreArray(p.exp_scores),
3302 ElementsAreArray(p.exp_classes),
3303 ElementsAreArray(p.exp_num_detections),
3304 },
3305 {tf_type_, tf_type_, tf_type_, DT_INT32});
3306 }
3307 }
3308 #endif // IS_TRT_VERSION_GE(7, 1, 3, 0)
3309
3310 template <typename T>
CreateUnaryOp(DataType tf_type)3311 NodeDef CreateUnaryOp(DataType tf_type) {
3312 Scope s = Scope::NewRootScope();
3313 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
3314 return T(s.WithOpName("my_unary"), input).operation.node()->def();
3315 }
3316
3317 constexpr float kLeakyReluAlpha = 0.2f;
3318 template <>
CreateUnaryOp(DataType tf_type)3319 NodeDef CreateUnaryOp<ops::internal::LeakyRelu>(DataType tf_type) {
3320 Scope s = Scope::NewRootScope();
3321 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
3322 return ops::internal::LeakyRelu(
3323 s.WithOpName("my_unary"), input,
3324 ops::internal::LeakyRelu::Alpha(kLeakyReluAlpha))
3325 .operation.node()
3326 ->def();
3327 }
3328
TEST_P(OpConverter_FP32_Test,ConvertActivation)3329 TEST_P(OpConverter_FP32_Test, ConvertActivation) {
3330 {
3331 // Input is weights, should fail.
3332 Reset();
3333 const NodeDef& node_def = CreateUnaryOp<ops::Relu>(tf_type_);
3334 AddTestWeights<int32>("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
3335 RunValidationAndConversion(
3336 node_def, error::UNIMPLEMENTED,
3337 "The input \"input\" for Relu must be a tensor, at my_unary");
3338 }
3339
3340 constexpr float kSeluAlpha = 1.7580993408473768599402175208123f;
3341 constexpr float kSeluScale = 1.0507009873554804934193349852946f;
3342 using OpFunc = std::function<NodeDef(DataType)>;
3343 using ValFunc = float (*)(float);
3344 std::map<std::string, std::pair<OpFunc, ValFunc>> op_map;
3345
3346 #define ADD_OP(name, op, compute) \
3347 op_map[name] = std::make_pair(CreateUnaryOp<op>, compute)
3348 ADD_OP("LeakyRelu", ops::internal::LeakyRelu,
3349 [](float x) { return (x > 0.0f) ? x : x * kLeakyReluAlpha; });
3350 ADD_OP("Relu", ops::Relu, [](float x) { return (x > 0.0f) ? x : 0.0f; });
3351 ADD_OP("Relu6", ops::Relu6,
3352 [](float x) { return std::min(std::max(x, 0.0f), 6.0f); });
3353 ADD_OP("Sigmoid", ops::Sigmoid,
3354 [](float x) { return 1.0f / (1.0f + std::exp(-x)); });
3355 ADD_OP("Tanh", ops::Tanh, static_cast<ValFunc>(std::tanh));
3356 ADD_OP("Elu", ops::Elu,
3357 [](float x) { return (x > 0.0f) ? x : std::exp(x) - 1; });
3358 ADD_OP("Selu", ops::Selu, [](float x) {
3359 return (x > 0.0f) ? kSeluScale * x
3360 : kSeluScale * kSeluAlpha * (std::exp(x) - 1);
3361 });
3362 ADD_OP("Softsign", ops::Softsign,
3363 [](float x) { return x / (std::abs(x) + 1); });
3364 ADD_OP("Softplus", ops::Softplus,
3365 [](float x) { return std::log(std::exp(x) + 1); });
3366 #undef ADD_OP
3367
3368 // Get list of ops to test.
3369 std::vector<string> ops_to_test;
3370 // Add all ops supported by ConvertActivation.
3371 auto* map = ActivationTypeMap();
3372 ops_to_test.reserve(map->size());
3373 for (auto& pair : *map) {
3374 ops_to_test.push_back(pair.first);
3375 }
3376 // Add other activation ops to test.
3377 ops_to_test.push_back("Relu6");
3378 ops_to_test.push_back("LeakyRelu");
3379 auto p = TestParamBase{
3380 {1, 1, 2, 3}, // input dims
3381 {}, // input partial dims
3382 {1, 1, 2, 3}, // expected output dims
3383 };
3384 // Ok.
3385 for (const string& op_name : ops_to_test) {
3386 if (!op_map.count(op_name)) {
3387 FAIL() << "Activation op test map does not contain op " << op_name;
3388 }
3389 Reset();
3390 NodeDef node_def = op_map[op_name].first(tf_type_);
3391 const std::vector<float> input = {-100, -2, -1, 0, 1, 88};
3392 AddTestTensor("input", p.input_dims, input);
3393
3394 // std::exp in Softplus will overflow for input > 88
3395 std::vector<float> output_values;
3396 std::transform(input.begin(), input.end(),
3397 std::back_inserter(output_values), op_map[op_name].second);
3398 TestOpConverter("my_unary", node_def, p.expected_output_dims, Status::OK(),
3399 Status::OK(), ArrayFloatNear(output_values, 0, false));
3400
3401 TRT_TensorOrWeights output;
3402 TF_EXPECT_OK(GetTensorOrWeights("my_unary", &output));
3403
3404 // Certain activations should set quantization range automatically.
3405 auto ranges = quantization_ranges();
3406 if (op_name == "Relu6") {
3407 EXPECT_EQ(ranges[output.tensor()], 6.0f);
3408 } else if (op_name == "Sigmoid" || op_name == "Tanh" ||
3409 op_name == "Softsign") {
3410 EXPECT_EQ(ranges[output.tensor()], 1.0f);
3411 }
3412 }
3413 }
3414
TEST_P(OpConverter_FP32_Test,ConvertExpandDims)3415 TEST_P(OpConverter_FP32_Test, ConvertExpandDims) {
3416 // Get the NodeDef for ExpandDims.
3417 Scope s = Scope::NewRootScope();
3418 auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
3419 auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
3420 auto expanddims =
3421 ops::ExpandDims(s.WithOpName("my_expanddims"), input, weights);
3422 const NodeDef& node_def = expanddims.operation.node()->def();
3423 {
3424 // Input is weights, should fail.
3425 Reset();
3426 AddTestWeights<int32>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
3427 AddTestWeights<int32>("weights", {1}, {1});
3428 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
3429 "The input \"input\" for ExpandDims must be a "
3430 "tensor, at my_expanddims");
3431 }
3432 {
3433 // Axis is a tensor, should fail.
3434 Reset();
3435 AddTestTensor("input", {3, 2, 1});
3436 AddTestTensor("weights", {3});
3437 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
3438 "The input \"axis\" for ExpandDims must be a "
3439 "constant, at my_expanddims");
3440 }
3441 std::vector<TestParamBase> test_params = {
3442 TestParamBase{{1, 1, 2, 3},
3443 {},
3444 {1, 1, 1, 2, 3},
3445 {0},
3446 trt_mode_ == TrtTestMode::kImplicitBatch
3447 ? Status(error::UNIMPLEMENTED,
3448 "TensorRT does not allow manipulation of the "
3449 "batch dimension, at my_expanddims")
3450 : Status::OK()},
3451 TestParamBase{{1, 1, 2, 3},
3452 {},
3453 {1, 1, 1, 2, 3},
3454 {-5},
3455 trt_mode_ == TrtTestMode::kImplicitBatch
3456 ? Status(error::UNIMPLEMENTED,
3457 "TensorRT does not allow manipulation of the "
3458 "batch dimension, at my_expanddims")
3459 : Status::OK()},
3460 TestParamBase{{1, 1, 2, 3},
3461 {},
3462 {},
3463 {5},
3464 Status(error::INVALID_ARGUMENT,
3465 "Axis value of 5 is out of bounds, must be in range"
3466 " [-5, 5), at my_expanddims")},
3467 TestParamBase{{1, 1, 2, 3},
3468 {},
3469 {},
3470 {-6},
3471 Status(error::INVALID_ARGUMENT,
3472 "Axis value of -6 is out of bounds, must be in range"
3473 " [-5, 5), at my_expanddims")},
3474 TestParamBase{{1, 2, 3}, {}, {1, 1, 2, 3}, {1}},
3475 TestParamBase{{1, 2, 3}, {}, {1, 1, 2, 3}, {-3}},
3476 TestParamBase{{1, 2, 3}, {}, {1, 2, 3, 1}, {3}},
3477 TestParamBase{{1, 2, 3}, {}, {1, 2, 3, 1}, {-1}},
3478 TestParamBase{{1, 2, 3}, {}, {1, 2, 1, 3}, {2}},
3479 TestParamBase{{1, 2, 3}, {}, {1, 2, 1, 3}, {-2}},
3480 TestParamBase{{1, 6}, {}, {1, 1, 6}, {1}},
3481 TestParamBase{{1, 6}, {}, {1, 6, 1}, {-1}},
3482 };
3483 for (auto p : test_params) {
3484 Reset();
3485 AddTestTensor("input", p.input_dims, {1, 2, 3, 4, 5, 6});
3486 AddTestWeights<int32>("weights", {1}, {p.param[0]});
3487 TestOpConverter("my_expanddims", node_def, p.expected_output_dims, p.status,
3488 p.runtime_status, ElementsAreArray({1, 2, 3, 4, 5, 6}));
3489 }
3490 }
3491
TEST_P(OpConverter_FP32_Test,ConvertSqueeze)3492 TEST_P(OpConverter_FP32_Test, ConvertSqueeze) {
3493 const bool use_implicit_batch = (trt_mode_ == TrtTestMode::kImplicitBatch);
3494 // Get the NodeDef for Squeeze.
3495 auto get_squeeze_nodedef = [](std::vector<int> axes,
3496 DataType tf_type) -> NodeDef {
3497 Scope s = Scope::NewRootScope();
3498 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
3499 if (!axes.empty()) {
3500 ops::Squeeze::Attrs squeeze_attrs;
3501 squeeze_attrs.axis_ = gtl::ArraySlice<int>(axes); // non-absl ok
3502 auto squeeze =
3503 ops::Squeeze(s.WithOpName("my_squeeze"), input, squeeze_attrs);
3504 return squeeze.operation.node()->def();
3505 } else {
3506 auto squeeze = ops::Squeeze(s.WithOpName("my_squeeze"), input);
3507 return squeeze.operation.node()->def();
3508 }
3509 };
3510 std::vector<TestParamBase> test_params = {
3511 TestParamBase{
3512 {1, 2, 1, 3}, // input dims
3513 {}, // input partial dims
3514 {2, 3}, // expected output dims
3515 {}, // axis
3516 trt_mode_ == TrtTestMode::kExplicitBatch
3517 ? Status::OK()
3518 : Status{error::UNIMPLEMENTED,
3519 "Squeeze is not implemented for empty squeeze_dims, at "
3520 "my_squeeze"}},
3521 TestParamBase{{1, 2, 1, 3},
3522 {},
3523 {2, 1, 3},
3524 {0},
3525 use_implicit_batch
3526 ? Status{error::UNIMPLEMENTED,
3527 "TensorRT does not allow manipulation of the "
3528 "batch dimension, at my_squeeze"}
3529 : Status::OK()},
3530 TestParamBase{{1, 2, 1, 3},
3531 {},
3532 {2, 1, 3},
3533 {-4},
3534 use_implicit_batch
3535 ? Status{error::UNIMPLEMENTED,
3536 "TensorRT does not allow manipulation of the "
3537 "batch dimension, at my_squeeze"}
3538 : Status::OK()},
3539 TestParamBase{
3540 {1, 1, 2, 3},
3541 {},
3542 {},
3543 {4},
3544 Status{error::INVALID_ARGUMENT,
3545 "Axis value of 4 is out of bounds, must be in range [-4, 4), "
3546 "at my_squeeze"}},
3547 TestParamBase{
3548 {1, 1, 2, 3},
3549 {},
3550 {},
3551 {-5},
3552 Status{error::INVALID_ARGUMENT,
3553 "Axis value of -5 is out of bounds, must be in range [-4, 4), "
3554 "at my_squeeze"}},
3555 TestParamBase{{1, 1, 2, 3}, {}, {1, 2, 3}, {1}},
3556 TestParamBase{{1, 1, 2, 3}, {}, {1, 2, 3}, {-3}},
3557 TestParamBase{{1, 2, 3, 1}, {}, {1, 2, 3}, {3}},
3558 TestParamBase{{1, 2, 3, 1}, {}, {1, 2, 3}, {-1}},
3559 TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {1, 3, 5}},
3560 TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {3, 1, 5}},
3561 TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {-1, -3, -5}},
3562 TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {1, -3, 5}},
3563 TestParamBase{{1, 1, 6}, {}, {1, 6}, {1}},
3564 TestParamBase{{1, 6, 1}, {}, {1, 6}, {2}},
3565 };
3566 auto squeeze_non_singleton = TestParamBase{
3567 {1, 1, 2, 3},
3568 {},
3569 {},
3570 {2},
3571 Status{error::INVALID_ARGUMENT,
3572 "Dimension 2 with size 2 cannot be squeezed because it must be "
3573 "size 1, at my_squeeze"}};
3574
3575 if (trt_mode_ == TrtTestMode::kDynamicShape) {
3576 // In this test we try to squeeze axis=2 which has size > 1. In dynamic
3577 // shape mode the converter sees only -1, so it cannot catch this error.
3578 squeeze_non_singleton.status = Status::OK(); // conversion status
3579 squeeze_non_singleton.runtime_status =
3580 errors::InvalidArgument("Negative number of dimensions -1");
3581 // Dynamic shape tests with partially known input shape
3582 test_params.push_back(TestParamBase{{2, 1, 3}, {2, -1, 3}, {2, 3}, {1}});
3583 test_params.push_back(TestParamBase{{2, 1, 3}, {2, 1, -1}, {2, 3}, {1}});
3584 }
3585 test_params.push_back(squeeze_non_singleton);
3586
3587 for (TestParamBase p : test_params) {
3588 SCOPED_TRACE(p);
3589 Reset();
3590 NodeDef node_def = get_squeeze_nodedef(p.param, tf_type_);
3591 AddTestTensor("input", p.input_dims, {1, 2, 3, 4, 5, 6},
3592 p.partial_input_dims);
3593 TestOpConverter("my_squeeze", node_def, p.expected_output_dims, p.status,
3594 p.runtime_status, ElementsAreArray({1, 2, 3, 4, 5, 6}));
3595 }
3596 }
3597
TEST_F(OpConverterTest,ConvertStridedSlice)3598 TEST_F(OpConverterTest, ConvertStridedSlice) {
3599 // Get nodedef for StridedSlice layer.
3600 auto get_strided_slice_nodedef =
3601 [](int64 begin_mask = 0, int64 end_mask = 0, int64 ellipsis_mask = 0,
3602 int64 new_axis_mask = 0, int64 shrink_axis_mask = 0) -> NodeDef {
3603 Scope s = Scope::NewRootScope();
3604 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3605 auto begin = ops::Placeholder(s.WithOpName("begin"), DT_INT32);
3606 auto end = ops::Placeholder(s.WithOpName("end"), DT_INT32);
3607 auto strides = ops::Placeholder(s.WithOpName("strides"), DT_INT32);
3608 ops::StridedSlice::Attrs attrs = ops::StridedSlice::Attrs()
3609 .BeginMask(begin_mask)
3610 .EndMask(end_mask)
3611 .EllipsisMask(ellipsis_mask)
3612 .NewAxisMask(new_axis_mask)
3613 .ShrinkAxisMask(shrink_axis_mask);
3614 auto strided_slice = ops::StridedSlice(s.WithOpName("my_strided_slice"),
3615 input, begin, end, strides, attrs);
3616 return strided_slice.operation.node()->def();
3617 };
3618
3619 {
3620 // Input is weights, should fail.
3621 Reset();
3622 NodeDef node_def = get_strided_slice_nodedef();
3623 AddTestWeights<int32>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
3624 AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
3625 AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
3626 AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
3627 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
3628 "The input \"input\" for StridedSlice must be a "
3629 "tensor, at my_strided_slice");
3630 }
3631 {
3632 // Begin, end, strides are tensors, should fail.
3633 Reset();
3634 NodeDef node_def = get_strided_slice_nodedef();
3635 AddTestTensor("input", {1, 2, 3});
3636 AddTestTensor("begin", {4});
3637 AddTestTensor("end", {4});
3638 AddTestTensor("strides", {4});
3639 RunValidationAndConversion(
3640 node_def, error::UNIMPLEMENTED,
3641 "The input \"begin\" for StridedSlice must be a constant, at "
3642 "my_strided_slice");
3643 }
3644 {
3645 // Modify batch dim, should fail.
3646 Reset();
3647 NodeDef node_def = get_strided_slice_nodedef();
3648 AddTestTensor("input", {1, 2, 3});
3649 AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
3650 AddTestWeights<int32>("end", {4}, {0, 1, 2, 3});
3651 AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
3652 RunValidationAndConversion(
3653 node_def, error::UNIMPLEMENTED,
3654 "TensorRT does not allow modifications to the batch dimension, at "
3655 "my_strided_slice");
3656 }
3657 {
3658 // Dynamic batch size without end_mask, should fail.
3659 Reset();
3660 NodeDef node_def = get_strided_slice_nodedef();
3661 AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1);
3662 AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
3663 AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
3664 AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
3665 RunValidationAndConversion(
3666 node_def, error::UNIMPLEMENTED,
3667 "TensorRT does not allow modifications to the batch dimension, at "
3668 "my_strided_slice");
3669 }
3670 {
3671 // Dynamic batch size but using end_mask, ok.
3672 Reset();
3673 NodeDef node_def = get_strided_slice_nodedef(/*begin_mask=*/0,
3674 /*end_mask=*/1);
3675 AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1);
3676 AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
3677 AddTestWeights<int32>("end", {4}, {0, 1, 2, 2});
3678 AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
3679 RunValidationAndConversion(node_def);
3680 }
3681 // TRT 5.1+ supports strides (disabled until 5.1.3.1 due to bugs)
3682 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
3683 {
3684 // Negative strides, should fail.
3685 Reset();
3686 NodeDef node_def = get_strided_slice_nodedef();
3687 AddTestTensor("input", {1, 2, 3});
3688 AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
3689 AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
3690 AddTestWeights<int32>("strides", {4}, {1, 1, 1, -1});
3691 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
3692 "Negative or zero stride values are not "
3693 "supported for StridedSlice, at "
3694 "my_strided_slice");
3695 }
3696 #else
3697 {
3698 // Stride is not 1, should fail.
3699 Reset();
3700 NodeDef node_def = get_strided_slice_nodedef();
3701 AddTestTensor("input", {1, 2, 3});
3702 AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
3703 AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
3704 AddTestWeights<int32>("strides", {4}, {1, 2, 1, 3});
3705 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
3706 "Strides other than 1 are not supported with "
3707 "this version of TRT, at my_strided_slice");
3708 }
3709 #endif
3710 {
3711 // Size of sliced dim is negative, should fail.
3712 Reset();
3713 NodeDef node_def = get_strided_slice_nodedef();
3714 AddTestTensor("input", {1, 2, 3});
3715 AddTestWeights<int32>("begin", {4}, {0, 0, 2, 0});
3716 AddTestWeights<int32>("end", {4}, {1, 1, 0, 3});
3717 AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
3718 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
3719 "\"size\" cannot be negative or zero for "
3720 "StridedSlice, at my_strided_slice");
3721 }
3722
3723 struct TestParams {
3724 std::vector<int> input_dims;
3725 std::vector<int> begin;
3726 std::vector<int> end;
3727 std::vector<int> strides;
3728 int begin_mask;
3729 int end_mask;
3730 int ellipsis_mask;
3731 int new_axis_mask;
3732 int shrink_axis_mask;
3733 std::vector<int> expected_output_dims;
3734 std::vector<float> expected_output;
3735 };
3736
3737 auto get_mask = [](const std::vector<int>& mask) {
3738 int result = 0;
3739 for (int i = 0; i < mask.size(); i++) {
3740 if (mask[i]) result += (1 << i);
3741 }
3742 return result;
3743 };
3744
3745 // Same input is used for all tests.
3746 const std::vector<float> ok_input = {1, 2, 3, 4, 5, 6};
3747
3748 // Ok.
3749 std::vector<TestParams> ok_params = {
3750 // 2D Crop.
3751 TestParams{
3752 /*input_dims=*/{1, 2, 3},
3753 /*begin=*/{0, 0, 0, 0},
3754 /*end=*/{0, 0, 1, 2},
3755 /*strides=*/{1, 1, 1, 1},
3756 /*begin_mask=*/get_mask({0, 0, 0, 0}),
3757 /*end_mask=*/get_mask({1, 1, 0, 0}),
3758 /*ellipsis_mask=*/0,
3759 /*new_axis_mask=*/0,
3760 /*shrink_axis_mask=*/0,
3761 /*expected_output_dims=*/{1, 1, 2},
3762 /*expected_output=*/{1, 2},
3763 },
3764 TestParams{
3765 /*input_dims=*/{1, 2, 3},
3766 /*begin=*/{0, 0, 1, 1},
3767 /*end=*/{0, 0, 0, 0},
3768 /*strides=*/{1, 1, 1, 1},
3769 /*begin_mask=*/get_mask({0, 0, 0, 0}),
3770 /*end_mask=*/get_mask({1, 1, 1, 1}),
3771 /*ellipsis_mask=*/0,
3772 /*new_axis_mask=*/0,
3773 /*shrink_axis_mask=*/0,
3774 /*expected_output_dims=*/{1, 1, 2},
3775 /*expected_output=*/{5, 6},
3776 },
3777 TestParams{
3778 /*input_dims=*/{1, 2, 3},
3779 /*begin=*/{0, 0, 1, 1},
3780 /*end=*/{0, 1, 2, 3},
3781 /*strides=*/{1, 1, 1, 1},
3782 /*begin_mask=*/get_mask({0, 0, 0, 0}),
3783 /*end_mask=*/get_mask({1, 1, 0, 0}),
3784 /*ellipsis_mask=*/0,
3785 /*new_axis_mask=*/0,
3786 /*shrink_axis_mask=*/0,
3787 /*expected_output_dims=*/{1, 1, 2},
3788 /*expected_output=*/{5, 6},
3789 },
3790 // 2D Crop, with transpose.
3791 TestParams{
3792 /*input_dims=*/{2, 3, 1},
3793 /*begin=*/{0, 0, 0, 0},
3794 /*end=*/{0, 1, 2, 1},
3795 /*strides=*/{1, 1, 1, 1},
3796 /*begin_mask=*/get_mask({0, 0, 0, 0}),
3797 /*end_mask=*/get_mask({1, 0, 0, 0}),
3798 /*ellipsis_mask=*/0,
3799 /*new_axis_mask=*/0,
3800 /*shrink_axis_mask=*/0,
3801 /*expected_output_dims=*/{1, 2, 1},
3802 /*expected_output=*/{1, 2},
3803 },
3804 TestParams{
3805 /*input_dims=*/{2, 3, 1},
3806 /*begin=*/{0, 1, 1, 0},
3807 /*end=*/{0, 2, 3, 1},
3808 /*strides=*/{1, 1, 1, 1},
3809 /*begin_mask=*/get_mask({0, 0, 0, 0}),
3810 /*end_mask=*/get_mask({1, 0, 0, 0}),
3811 /*ellipsis_mask=*/0,
3812 /*new_axis_mask=*/0,
3813 /*shrink_axis_mask=*/0,
3814 /*expected_output_dims=*/{1, 2, 1},
3815 /*expected_output=*/{5, 6},
3816 },
3817 TestParams{
3818 /*input_dims=*/{2, 1, 3},
3819 /*begin=*/{0, 0, 0, 0},
3820 /*end=*/{0, 1, 1, 2},
3821 /*strides=*/{1, 1, 1, 1},
3822 /*begin_mask=*/get_mask({0, 0, 0, 0}),
3823 /*end_mask=*/get_mask({1, 0, 0, 0}),
3824 /*ellipsis_mask=*/0,
3825 /*new_axis_mask=*/0,
3826 /*shrink_axis_mask=*/0,
3827 /*expected_output_dims=*/{1, 1, 2},
3828 /*expected_output=*/{1, 2},
3829 },
3830 TestParams{
3831 /*input_dims=*/{2, 1, 3},
3832 /*begin=*/{0, 1, 0, 1},
3833 /*end=*/{0, 2, 1, 3},
3834 /*strides=*/{1, 1, 1, 1},
3835 /*begin_mask=*/get_mask({0, 0, 0, 0}),
3836 /*end_mask=*/get_mask({1, 0, 0, 0}),
3837 /*ellipsis_mask=*/0,
3838 /*new_axis_mask=*/0,
3839 /*shrink_axis_mask=*/0,
3840 /*expected_output_dims=*/{1, 1, 2},
3841 /*expected_output=*/{5, 6},
3842 },
3843 // 2D Crop, with reshape.
3844 TestParams{
3845 /*input_dims=*/{2, 3},
3846 /*begin=*/{0, 0, 0},
3847 /*end=*/{0, 1, 2},
3848 /*strides=*/{1, 1, 1},
3849 /*begin_mask=*/get_mask({0, 0, 0}),
3850 /*end_mask=*/get_mask({1, 0, 0}),
3851 /*ellipsis_mask=*/0,
3852 /*new_axis_mask=*/0,
3853 /*shrink_axis_mask=*/0,
3854 /*expected_output_dims=*/{1, 2},
3855 /*expected_output=*/{1, 2},
3856 },
3857 TestParams{
3858 /*input_dims=*/{2, 3},
3859 /*begin=*/{0, 1, 1},
3860 /*end=*/{0, 0, 0},
3861 /*strides=*/{1, 1, 1},
3862 /*begin_mask=*/get_mask({0, 0, 0}),
3863 /*end_mask=*/get_mask({1, 1, 1}),
3864 /*ellipsis_mask=*/0,
3865 /*new_axis_mask=*/0,
3866 /*shrink_axis_mask=*/0,
3867 /*expected_output_dims=*/{1, 2},
3868 /*expected_output=*/{5, 6},
3869 },
3870 // 1D Crop.
3871 TestParams{
3872 /*input_dims=*/{1, 2, 3},
3873 /*begin=*/{0, 0, 0, 0},
3874 /*end=*/{0, 0, 0, 2},
3875 /*strides=*/{1, 1, 1, 1},
3876 /*begin_mask=*/get_mask({0, 0, 0, 0}),
3877 /*end_mask=*/get_mask({1, 1, 1, 0}),
3878 /*ellipsis_mask=*/0,
3879 /*new_axis_mask=*/0,
3880 /*shrink_axis_mask=*/0,
3881 /*expected_output_dims=*/{1, 2, 2},
3882 /*expected_output=*/{1, 2, 4, 5},
3883 },
3884 TestParams{
3885 /*input_dims=*/{1, 2, 3},
3886 /*begin=*/{0, 0, 1, 0},
3887 /*end=*/{0, 0, 0, 0},
3888 /*strides=*/{1, 1, 1, 1},
3889 /*begin_mask=*/get_mask({0, 0, 0, 0}),
3890 /*end_mask=*/get_mask({1, 1, 1, 1}),
3891 /*ellipsis_mask=*/0,
3892 /*new_axis_mask=*/0,
3893 /*shrink_axis_mask=*/0,
3894 /*expected_output_dims=*/{1, 1, 3},
3895 /*expected_output=*/{4, 5, 6},
3896 },
3897 // 1D Crop, with transpose.
3898 TestParams{
3899 /*input_dims=*/{2, 3, 1},
3900 /*begin=*/{0, 0, 0, 0},
3901 /*end=*/{0, 1, 0, 0},
3902 /*strides=*/{1, 1, 1, 1},
3903 /*begin_mask=*/get_mask({0, 0, 0, 0}),
3904 /*end_mask=*/get_mask({1, 0, 1, 1}),
3905 /*ellipsis_mask=*/0,
3906 /*new_axis_mask=*/0,
3907 /*shrink_axis_mask=*/0,
3908 /*expected_output_dims=*/{1, 3, 1},
3909 /*expected_output=*/{1, 2, 3},
3910 },
3911 TestParams{
3912 /*input_dims=*/{2, 3, 1},
3913 /*begin=*/{0, 1, 0, 0},
3914 /*end=*/{0, 0, 0, 0},
3915 /*strides=*/{1, 1, 1, 1},
3916 /*begin_mask=*/get_mask({0, 0, 0, 0}),
3917 /*end_mask=*/get_mask({1, 1, 1, 1}),
3918 /*ellipsis_mask=*/0,
3919 /*new_axis_mask=*/0,
3920 /*shrink_axis_mask=*/0,
3921 /*expected_output_dims=*/{1, 3, 1},
3922 /*expected_output=*/{4, 5, 6},
3923 },
3924 // 1D Crop, with reshape.
3925 TestParams{
3926 /*input_dims=*/{6},
3927 /*begin=*/{0, 0},
3928 /*end=*/{0, 3},
3929 /*strides=*/{1, 1},
3930 /*begin_mask=*/get_mask({0, 0}),
3931 /*end_mask=*/get_mask({1, 0}),
3932 /*ellipsis_mask=*/0,
3933 /*new_axis_mask=*/0,
3934 /*shrink_axis_mask=*/0,
3935 /*expected_output_dims=*/{3},
3936 /*expected_output=*/{1, 2, 3},
3937 },
3938 TestParams{
3939 /*input_dims=*/{1, 6},
3940 /*begin=*/{0, 0, 2},
3941 /*end=*/{0, 0, 5},
3942 /*strides=*/{1, 1, 1},
3943 /*begin_mask=*/get_mask({0, 0, 0}),
3944 /*end_mask=*/get_mask({1, 1, 0}),
3945 /*ellipsis_mask=*/0,
3946 /*new_axis_mask=*/0,
3947 /*shrink_axis_mask=*/0,
3948 /*expected_output_dims=*/{1, 3},
3949 /*expected_output=*/{3, 4, 5},
3950 },
3951 TestParams{
3952 /*input_dims=*/{6, 1},
3953 /*begin=*/{0, 2, 0},
3954 /*end=*/{0, 5, 0},
3955 /*strides=*/{1, 1, 1},
3956 /*begin_mask=*/get_mask({0, 0, 0}),
3957 /*end_mask=*/get_mask({1, 0, 1}),
3958 /*ellipsis_mask=*/0,
3959 /*new_axis_mask=*/0,
3960 /*shrink_axis_mask=*/0,
3961 /*expected_output_dims=*/{3, 1},
3962 /*expected_output=*/{3, 4, 5},
3963 },
3964 // Negative axis.
3965 TestParams{
3966 /*input_dims=*/{6, 1},
3967 /*begin=*/{0, -6, 0},
3968 /*end=*/{0, -3, 0},
3969 /*strides=*/{1, 1, 1},
3970 /*begin_mask=*/get_mask({0, 0, 0}),
3971 /*end_mask=*/get_mask({1, 0, 1}),
3972 /*ellipsis_mask=*/0,
3973 /*new_axis_mask=*/0,
3974 /*shrink_axis_mask=*/0,
3975 /*expected_output_dims=*/{3, 1},
3976 /*expected_output=*/{1, 2, 3},
3977 },
3978 TestParams{
3979 /*input_dims=*/{6, 1},
3980 /*begin=*/{0, 0, 0},
3981 /*end=*/{0, -1, 0},
3982 /*strides=*/{1, 1, 1},
3983 /*begin_mask=*/get_mask({0, 0, 0}),
3984 /*end_mask=*/get_mask({1, 0, 1}),
3985 /*ellipsis_mask=*/0,
3986 /*new_axis_mask=*/0,
3987 /*shrink_axis_mask=*/0,
3988 /*expected_output_dims=*/{5, 1},
3989 /*expected_output=*/{1, 2, 3, 4, 5},
3990 },
3991 // Clamp out of bounds begin and end.
3992 TestParams{
3993 /*input_dims=*/{1, 2, 3},
3994 /*begin=*/{0, 0, -9999, -9},
3995 /*end=*/{0, 1, 1000, 4},
3996 /*strides=*/{1, 1, 1, 1},
3997 /*begin_mask=*/get_mask({0, 0, 0, 0}),
3998 /*end_mask=*/get_mask({1, 0, 0, 0}),
3999 /*ellipsis_mask=*/0,
4000 /*new_axis_mask=*/0,
4001 /*shrink_axis_mask=*/0,
4002 /*expected_output_dims=*/{1, 2, 3},
4003 /*expected_output=*/{1, 2, 3, 4, 5, 6},
4004 },
4005 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
4006 // Strides
4007 TestParams{
4008 /*input_dims=*/{6},
4009 /*begin=*/{0, 0},
4010 /*end=*/{0, 5},
4011 /*strides=*/{1, 2},
4012 /*begin_mask=*/get_mask({0, 0}),
4013 /*end_mask=*/get_mask({1, 0}),
4014 /*ellipsis_mask=*/0,
4015 /*new_axis_mask=*/0,
4016 /*shrink_axis_mask=*/0,
4017 /*expected_output_dims=*/{3},
4018 /*expected_output=*/{1, 3, 5},
4019 },
4020 TestParams{
4021 /*input_dims=*/{6},
4022 /*begin=*/{0, 0},
4023 /*end=*/{0, 6},
4024 /*strides=*/{1, 2},
4025 /*begin_mask=*/get_mask({0, 0}),
4026 /*end_mask=*/get_mask({1, 0}),
4027 /*ellipsis_mask=*/0,
4028 /*new_axis_mask=*/0,
4029 /*shrink_axis_mask=*/0,
4030 /*expected_output_dims=*/{3},
4031 /*expected_output=*/{1, 3, 5},
4032 },
4033 TestParams{
4034 /*input_dims=*/{6},
4035 /*begin=*/{0, 1},
4036 /*end=*/{0, 6},
4037 /*strides=*/{1, 2},
4038 /*begin_mask=*/get_mask({0, 0}),
4039 /*end_mask=*/get_mask({1, 0}),
4040 /*ellipsis_mask=*/0,
4041 /*new_axis_mask=*/0,
4042 /*shrink_axis_mask=*/0,
4043 /*expected_output_dims=*/{3},
4044 /*expected_output=*/{2, 4, 6},
4045 },
4046 TestParams{
4047 /*input_dims=*/{6},
4048 /*begin=*/{0, 2},
4049 /*end=*/{0, 6},
4050 /*strides=*/{1, 3},
4051 /*begin_mask=*/get_mask({0, 0}),
4052 /*end_mask=*/get_mask({1, 0}),
4053 /*ellipsis_mask=*/0,
4054 /*new_axis_mask=*/0,
4055 /*shrink_axis_mask=*/0,
4056 /*expected_output_dims=*/{2},
4057 /*expected_output=*/{3, 6},
4058 },
4059 #endif
4060 // ellipsis_mask
4061 TestParams{
4062 /*input_dims=*/{1, 2, 3},
4063 /*begin=*/{0, 1},
4064 /*end=*/{0, 2},
4065 /*strides=*/{1, 1},
4066 /*begin_mask=*/get_mask({0, 0, 0, 0}),
4067 /*end_mask=*/get_mask({0, 0, 0, 0}),
4068 /*ellipsis_mask=*/get_mask({1, 0, 0, 0}),
4069 /*new_axis_mask=*/0,
4070 /*shrink_axis_mask=*/0,
4071 /*expected_output_dims=*/{1, 2, 1},
4072 /*expected_output=*/{2, 5},
4073 },
4074 TestParams{
4075 /*input_dims=*/{1, 2, 3},
4076 /*begin=*/{0, 0, 1},
4077 /*end=*/{0, 0, 2},
4078 /*strides=*/{1, 1, 1},
4079 /*begin_mask=*/get_mask({1, 0, 0, 0}),
4080 /*end_mask=*/get_mask({1, 0, 0, 0}),
4081 /*ellipsis_mask=*/get_mask({0, 1, 0, 0}),
4082 /*new_axis_mask=*/0,
4083 /*shrink_axis_mask=*/0,
4084 /*expected_output_dims=*/{1, 2, 1},
4085 /*expected_output=*/{2, 5},
4086 },
4087 TestParams{
4088 /*input_dims=*/{1, 2, 3},
4089 /*begin=*/{0, 0, 0, 1},
4090 /*end=*/{0, 1, 2, 2},
4091 /*strides=*/{1, 1, 1, 1},
4092 /*begin_mask=*/get_mask({0, 0, 0, 0}),
4093 /*end_mask=*/get_mask({0, 0, 0, 0}),
4094 /*ellipsis_mask=*/get_mask({1, 0, 0, 0}),
4095 /*new_axis_mask=*/0,
4096 /*shrink_axis_mask=*/0,
4097 /*expected_output_dims=*/{1, 2, 1},
4098 /*expected_output=*/{2, 5},
4099 },
4100 TestParams{
4101 /*input_dims=*/{1, 2, 3},
4102 /*begin=*/{0, 0, 0, 1},
4103 /*end=*/{1, 1, 2, 2},
4104 /*strides=*/{1, 1, 1, 1},
4105 /*begin_mask=*/get_mask({0, 0, 0, 0}),
4106 /*end_mask=*/get_mask({0, 0, 0, 0}),
4107 /*ellipsis_mask=*/get_mask({0, 1, 0, 0}),
4108 /*new_axis_mask=*/0,
4109 /*shrink_axis_mask=*/0,
4110 /*expected_output_dims=*/{1, 2, 1},
4111 /*expected_output=*/{2, 5},
4112 },
4113 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
4114 TestParams{
4115 /*input_dims=*/{1, 2, 3},
4116 /*begin=*/{0, 0, 0, 0, 1},
4117 /*end=*/{0, 1, 1, 2, 2},
4118 /*strides=*/{1, 1, 1, 1, 1},
4119 /*begin_mask=*/get_mask({0, 0, 0, 0}),
4120 /*end_mask=*/get_mask({0, 0, 0, 0}),
4121 /*ellipsis_mask=*/get_mask({1, 0, 0, 0}),
4122 /*new_axis_mask=*/0,
4123 /*shrink_axis_mask=*/0,
4124 /*expected_output_dims=*/{1, 2, 1},
4125 /*expected_output=*/{2, 5},
4126 },
4127 // shrink_axis_mask
4128 TestParams{
4129 /*input_dims=*/{1, 2, 3},
4130 /*begin=*/{0, 0, 0, 1},
4131 /*end=*/{0, 0, 0, 2},
4132 /*strides=*/{1, 1, 1, 1},
4133 /*begin_mask=*/get_mask({1, 1, 1, 0}),
4134 /*end_mask=*/get_mask({1, 1, 1, 0}),
4135 /*ellipsis_mask=*/0,
4136 /*new_axis_mask=*/0,
4137 /*shrink_axis_mask=*/get_mask({0, 0, 0, 1}),
4138 /*expected_output_dims=*/{1, 2},
4139 /*expected_output=*/{2, 5},
4140 },
4141 TestParams{
4142 /*input_dims=*/{1, 2, 3},
4143 /*begin=*/{0, 0, 0, 1},
4144 /*end=*/{0, 1, 2, 2},
4145 /*strides=*/{1, 1, 1, 1},
4146 /*begin_mask=*/get_mask({1, 0, 0, 0}),
4147 /*end_mask=*/get_mask({1, 0, 0, 0}),
4148 /*ellipsis_mask=*/0,
4149 /*new_axis_mask=*/0,
4150 /*shrink_axis_mask=*/get_mask({0, 1, 0, 1}),
4151 /*expected_output_dims=*/{2},
4152 /*expected_output=*/{2, 5},
4153 },
4154 TestParams{
4155 /*input_dims=*/{6},
4156 /*begin=*/{0, 0},
4157 /*end=*/{0, 1},
4158 /*strides=*/{1, 1},
4159 /*begin_mask=*/get_mask({1, 0}),
4160 /*end_mask=*/get_mask({1, 0}),
4161 /*ellipsis_mask=*/0,
4162 /*new_axis_mask=*/0,
4163 /*shrink_axis_mask=*/get_mask({0, 1}),
4164 /*expected_output_dims=*/{},
4165 /*expected_output=*/{1},
4166 },
4167 #endif // IS_TRT_VERSION_GE(5, 1, 3, 1)
4168 };
4169
4170 for (int i = 0; i < ok_params.size(); i++) {
4171 Reset();
4172 NodeDef node_def = get_strided_slice_nodedef(
4173 ok_params[i].begin_mask, ok_params[i].end_mask,
4174 ok_params[i].ellipsis_mask, ok_params[i].new_axis_mask,
4175 ok_params[i].shrink_axis_mask);
4176 AddTestTensor("input", ok_params[i].input_dims);
4177 AddTestWeights<int32>("begin",
4178 {static_cast<int>(ok_params[i].begin.size())},
4179 ok_params[i].begin);
4180 AddTestWeights<int32>("end", {static_cast<int>(ok_params[i].end.size())},
4181 ok_params[i].end);
4182 AddTestWeights<int32>("strides",
4183 {static_cast<int>(ok_params[i].strides.size())},
4184 ok_params[i].strides);
4185 RunValidationAndConversion(node_def);
4186
4187 TRT_TensorOrWeights output;
4188 TF_EXPECT_OK(GetTensorOrWeights("my_strided_slice", &output));
4189 ASSERT_TRUE(output.is_tensor());
4190 ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
4191 output.tensor()->getDimensions());
4192
4193 const DataVec input_data{{"input", AsTensor<float>(ok_input)}};
4194 DataVec output_data{
4195 {"my_strided_slice",
4196 ConstructTensor<float>(ok_params[i].expected_output.size())}};
4197 TF_EXPECT_OK(BuildAndRun(input_data, &output_data));
4198 EXPECT_THAT(GetSpanForData<float>(output_data[0]),
4199 ElementsAreArray(ok_params[i].expected_output));
4200 }
4201 }
4202
TEST_F(OpConverterTest,ConvertSlice)4203 TEST_F(OpConverterTest, ConvertSlice) {
4204 // Get nodedef for Slice layer.
4205 auto get_slice_nodedef = []() -> NodeDef {
4206 Scope s = Scope::NewRootScope();
4207 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
4208 auto begin = ops::Placeholder(s.WithOpName("begin"), DT_INT32);
4209 auto size = ops::Placeholder(s.WithOpName("size"), DT_INT32);
4210 auto slice = ops::Slice(s.WithOpName("my_slice"), input, begin, size);
4211 return slice.operation.node()->def();
4212 };
4213
4214 {
4215 // Begin is below bounds, should fail.
4216 Reset();
4217 NodeDef node_def = get_slice_nodedef();
4218 AddTestTensor("input", {1, 2, 3});
4219 AddTestWeights<int32>("begin", {4}, {0, 0, -1, 0});
4220 AddTestWeights<int32>("size", {4}, {1, 1, 2, 3});
4221 RunValidationAndConversion(
4222 node_def, error::INVALID_ARGUMENT,
4223 "\"begin\" for dimension 2 in Slice is out of range, at my_slice");
4224 }
4225 {
4226 // Begin is above bounds, should fail.
4227 Reset();
4228 NodeDef node_def = get_slice_nodedef();
4229 AddTestTensor("input", {1, 2, 3});
4230 AddTestWeights<int32>("begin", {4}, {0, 0, 3, 0});
4231 AddTestWeights<int32>("size", {4}, {1, 1, 2, 3});
4232 RunValidationAndConversion(
4233 node_def, error::INVALID_ARGUMENT,
4234 "\"begin\" for dimension 2 in Slice is out of range, at my_slice");
4235 }
4236 {
4237 // Size is below bounds, should fail.
4238 Reset();
4239 NodeDef node_def = get_slice_nodedef();
4240 AddTestTensor("input", {1, 2, 3});
4241 AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
4242 AddTestWeights<int32>("size", {4}, {1, 1, 2, -2});
4243 RunValidationAndConversion(
4244 node_def, error::INVALID_ARGUMENT,
4245 "\"begin\" + \"size\" for dimension 3 in Slice is out of range, at "
4246 "my_slice");
4247 }
4248 {
4249 // Size is above bounds, should fail.
4250 Reset();
4251 NodeDef node_def = get_slice_nodedef();
4252 AddTestTensor("input", {1, 2, 3});
4253 AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
4254 AddTestWeights<int32>("size", {4}, {1, 1, 3, 3});
4255 RunValidationAndConversion(
4256 node_def, error::INVALID_ARGUMENT,
4257 "\"begin\" + \"size\" for dimension 2 in Slice is out of range, at "
4258 "my_slice");
4259 }
4260 {
4261 // Modify batch dim, should fail.
4262 Reset();
4263 NodeDef node_def = get_slice_nodedef();
4264 AddTestTensor("input", {1, 2, 3});
4265 AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
4266 AddTestWeights<int32>("size", {4}, {0, 1, 2, 3});
4267 RunValidationAndConversion(
4268 node_def, error::UNIMPLEMENTED,
4269 "TensorRT does not allow modifications to the batch dimension, at "
4270 "my_slice");
4271 }
4272 {
4273 // Dynamic batch size with size[0] not -1, should fail.
4274 Reset();
4275 NodeDef node_def = get_slice_nodedef();
4276 AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1);
4277 AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
4278 AddTestWeights<int32>("size", {4}, {1, 1, 2, 3});
4279 RunValidationAndConversion(
4280 node_def, error::UNIMPLEMENTED,
4281 "TensorRT does not allow modifications to the batch dimension, at "
4282 "my_slice");
4283 }
4284 {
4285 // Dynamic batch size but using size[0] of -1, ok.
4286 Reset();
4287 NodeDef node_def = get_slice_nodedef();
4288 AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1);
4289 AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
4290 AddTestWeights<int32>("size", {4}, {-1, 1, 2, 2});
4291 RunValidationAndConversion(node_def);
4292 }
4293
4294 struct TestParams {
4295 std::vector<int> input_dims;
4296 std::vector<int> begin;
4297 std::vector<int> size;
4298 std::vector<int> expected_output_dims;
4299 std::vector<int> expected_output;
4300 };
4301
4302 // Ok.
4303 std::vector<TestParams> ok_params = {
4304 TestParams{{1, 2, 3},
4305 {0, 0, 0, 0},
4306 {-1, -1, -1, -1},
4307 {1, 2, 3},
4308 {1, 2, 3, 4, 5, 6}},
4309 TestParams{
4310 {1, 2, 3}, {0, 0, 0, 0}, {1, 1, 2, 3}, {1, 2, 3}, {1, 2, 3, 4, 5, 6}},
4311 TestParams{
4312 {1, 2, 3}, {0, 0, 0, 0}, {1, -1, 2, 2}, {1, 2, 2}, {1, 2, 4, 5}},
4313 TestParams{{6}, {0, 1}, {1, 5}, {5}, {2, 3, 4, 5, 6}},
4314 TestParams{{6}, {0, 1}, {-1, 3}, {3}, {2, 3, 4}},
4315 };
4316
4317 for (int i = 0; i < ok_params.size(); i++) {
4318 Reset();
4319 NodeDef node_def = get_slice_nodedef();
4320 AddTestTensor("input", ok_params[i].input_dims);
4321 AddTestWeights<int32>("begin",
4322 {static_cast<int>(ok_params[i].begin.size())},
4323 ok_params[i].begin);
4324 AddTestWeights<int32>("size", {static_cast<int>(ok_params[i].size.size())},
4325 ok_params[i].size);
4326 RunValidationAndConversion(node_def);
4327
4328 TRT_TensorOrWeights output;
4329 TF_EXPECT_OK(GetTensorOrWeights("my_slice", &output));
4330 ASSERT_TRUE(output.is_tensor());
4331 ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
4332 output.tensor()->getDimensions());
4333
4334 const DataVec input_data{{"input", AsTensor<float>({1, 2, 3, 4, 5, 6})}};
4335 DataVec output_data{{"my_slice", ConstructTensor<float>(
4336 ok_params[i].expected_output.size())}};
4337 TF_EXPECT_OK(BuildAndRun(input_data, &output_data));
4338 EXPECT_THAT(GetSpanForData<float>(output_data[0]),
4339 ElementsAreArray(ok_params[i].expected_output));
4340 }
4341 }
4342
TEST_P(OpConverter_FP32_Test,ConvertConv2D)4343 TEST_P(OpConverter_FP32_Test, ConvertConv2D) {
4344 // Get nodedef for Conv2D layer.
4345 DataType tf_type = tf_type_;
4346 auto get_conv2d_nodedef =
4347 [tf_type](std::vector<int> strides = {1, 1, 1, 1},
4348 string padding = "SAME", string data_format = "NCHW",
4349 std::vector<int> dilations = {1, 1, 1, 1}) -> NodeDef {
4350 Scope s = Scope::NewRootScope();
4351 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
4352 auto filter = ops::Placeholder(s.WithOpName("weights"), tf_type);
4353 ops::Conv2D::Attrs attrs =
4354 ops::Conv2D::Attrs().DataFormat(data_format).Dilations(dilations);
4355 auto conv2d = ops::Conv2D(s.WithOpName("my_conv2d"), input, filter, strides,
4356 padding, attrs);
4357 return conv2d.operation.node()->def();
4358 };
4359
4360 {
4361 // Input is weights, should fail.
4362 Reset();
4363 NodeDef node_def = get_conv2d_nodedef();
4364 AddTestWeights<float>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
4365 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4366 RunValidationAndConversion(
4367 node_def, error::UNIMPLEMENTED,
4368 "The input \"input\" for Conv2D must be a tensor, at my_conv2d");
4369 }
4370 {
4371 // Filter is tensor, should fail.
4372 Reset();
4373 NodeDef node_def = get_conv2d_nodedef();
4374 AddTestTensor("input", {3, 1, 2, 1});
4375 AddTestTensor("weights", {3, 3, 1, 1});
4376 RunValidationAndConversion(
4377 node_def, error::UNIMPLEMENTED,
4378 "The input \"filter\" for Conv2D must be a constant, at my_conv2d");
4379 }
4380 {
4381 // Filter is not 4D, should fail.
4382 Reset();
4383 NodeDef node_def = get_conv2d_nodedef();
4384 AddTestTensor("input", {1, 1, 2, 3});
4385 AddTestWeights<float>("weights", {3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4386 RunValidationAndConversion(
4387 node_def, error::INVALID_ARGUMENT,
4388 "Conv2D expects kernel of dimension 4, at my_conv2d");
4389 }
4390 {
4391 // Dilations is not 4D, should fail.
4392 Reset();
4393 NodeDef node_def =
4394 get_conv2d_nodedef({1, 1, 1, 1}, "SAME", "NCHW", {1, 1, 1});
4395 AddTestTensor("input", {1, 1, 2, 3});
4396 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4397 RunValidationAndConversion(
4398 node_def, error::INVALID_ARGUMENT,
4399 "Convolution dilations field must specify 4 dimensions, at my_conv2d");
4400 }
4401 {
4402 // Dilation value is not 1 for channel, should fail.
4403 Reset();
4404 NodeDef node_def =
4405 get_conv2d_nodedef({1, 1, 1, 1}, "SAME", "NCHW", {1, 2, 1, 1});
4406 AddTestTensor("input", {1, 1, 2, 3});
4407 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4408 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4409 "Dilation rate must be 1 for batch and channel "
4410 "dimensions, at my_conv2d");
4411 }
4412 {
4413 // Dilation value is not 1 for channel (NHWC), should fail.
4414 Reset();
4415 NodeDef node_def =
4416 get_conv2d_nodedef({1, 1, 1, 1}, "SAME", "NHWC", {1, 1, 1, 2});
4417 AddTestTensor("input", {1, 2, 3, 1});
4418 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4419 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4420 "Dilation rate must be 1 for batch and channel "
4421 "dimensions, at my_conv2d");
4422 }
4423 {
4424 // Strides is not 4D, should fail.
4425 Reset();
4426 NodeDef node_def =
4427 get_conv2d_nodedef({1, 1, 1}, "SAME", "NCHW", {1, 1, 1, 1});
4428 AddTestTensor("input", {1, 1, 2, 3});
4429 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4430 RunValidationAndConversion(
4431 node_def, error::INVALID_ARGUMENT,
4432 "Convolution strides field must specify 4 dimensions, at my_conv2d");
4433 }
4434 {
4435 // Stride value is not 1 for channel, should fail.
4436 Reset();
4437 NodeDef node_def =
4438 get_conv2d_nodedef({1, 2, 1, 1}, "SAME", "NCHW", {1, 1, 1, 1});
4439 AddTestTensor("input", {1, 1, 2, 3});
4440 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4441 RunValidationAndConversion(
4442 node_def, error::UNIMPLEMENTED,
4443 "Stride must be 1 for batch and channel dimensions, at my_conv2d");
4444 }
4445 if (trt_mode_ == TrtTestMode::kDynamicShape) {
4446 Reset();
4447 NodeDef node_def = get_conv2d_nodedef();
4448 // Channel dim unknown, should fail.
4449 nvinfer1::DataType trt_type;
4450 TF_ASSERT_OK(TfTypeToTrtType(tf_type_, &trt_type));
4451 AddTestTensorWithTFDims("input", {-1, -1, -1, -1}, trt_type);
4452 AddTestWeights<float>("weights", {1, 2, 1, 1}, {-1, 1});
4453 RunValidationAndConversion(
4454 node_def, error::INVALID_ARGUMENT,
4455 "Channel dimension must be static, at my_conv2d");
4456 }
4457
4458 struct TestParams {
4459 std::vector<int> input_dims;
4460 std::vector<float> input;
4461 std::vector<int> filter_dims;
4462 std::vector<float> filter;
4463 std::vector<int> strides;
4464 string padding;
4465 string data_format;
4466 std::vector<int> dilations;
4467 std::vector<int> expected_output_dims;
4468 std::vector<float> expected_output;
4469 };
4470
4471 // Ok.
4472 std::vector<TestParams> ok_params = {
4473 // Basic
4474 TestParams{/*input_dims=*/{1, 1, 2, 3},
4475 /*input=*/{0, 1, 2, 3, 3, 4},
4476 /*filter_dims=*/{1, 2, 1, 1},
4477 /*filter=*/{-1, 1},
4478 /*strides=*/{1, 1, 1, 1},
4479 /*padding=*/"VALID",
4480 /*data_format=*/"NCHW",
4481 /*dilations=*/{1, 1, 1, 1},
4482 /*expected_output_dims=*/{1, 1, 2, 2},
4483 /*expected_output=*/{1, 1, 0, 1}},
4484 // SAME padding (Asymmetric)
4485 TestParams{/*input_dims=*/{1, 1, 2, 3},
4486 /*input=*/{0, 1, 2, 3, 3, 4},
4487 /*filter_dims=*/{1, 2, 1, 1},
4488 /*filter=*/{-1, 1},
4489 /*strides=*/{1, 1, 1, 1},
4490 /*padding=*/"SAME",
4491 /*data_format=*/"NCHW",
4492 /*dilations=*/{1, 1, 1, 1},
4493 /*expected_output_dims=*/{1, 1, 2, 3},
4494 /*expected_output=*/{1, 1, -2, 0, 1, -4}},
4495 // SAME padding (Symmetric)
4496 TestParams{/*input_dims=*/{1, 1, 2, 3},
4497 /*input=*/{0, 1, 2, 3, 3, 4},
4498 /*filter_dims=*/{1, 3, 1, 1},
4499 /*filter=*/{-1, 0, 1},
4500 /*strides=*/{1, 1, 1, 1},
4501 /*padding=*/"SAME",
4502 /*data_format=*/"NCHW",
4503 /*dilations=*/{1, 1, 1, 1},
4504 /*expected_output_dims=*/{1, 1, 2, 3},
4505 /*expected_output=*/{1, 2, -1, 3, 1, -3}},
4506 // NHWC
4507 TestParams{/*input_dims=*/{1, 2, 3, 1},
4508 /*input=*/{0, 1, 2, 3, 3, 4},
4509 /*filter_dims=*/{1, 2, 1, 1},
4510 /*filter=*/{-1, 1},
4511 /*strides=*/{1, 1, 1, 1},
4512 /*padding=*/"VALID",
4513 /*data_format=*/"NHWC",
4514 /*dilations=*/{1, 1, 1, 1},
4515 /*expected_output_dims=*/{1, 2, 2, 1},
4516 /*expected_output=*/{1, 1, 0, 1}},
4517 // Dilated
4518 TestParams{/*input_dims=*/{1, 1, 2, 3},
4519 /*input=*/{0, 1, 2, 3, 3, 4},
4520 /*filter_dims=*/{1, 2, 1, 1},
4521 /*filter=*/{-1, 1},
4522 /*strides=*/{1, 1, 1, 1},
4523 /*padding=*/"VALID",
4524 /*data_format=*/"NCHW",
4525 /*dilations=*/{1, 1, 1, 2},
4526 /*expected_output_dims=*/{1, 1, 2, 1},
4527 /*expected_output=*/{2, 1}},
4528 // Strided
4529 TestParams{/*input_dims=*/{1, 1, 2, 4},
4530 /*input=*/{0, 1, 2, 2, 3, 4, 4, 7},
4531 /*filter_dims=*/{1, 2, 1, 1},
4532 /*filter=*/{-1, 1},
4533 /*strides=*/{1, 1, 1, 2},
4534 /*padding=*/"VALID",
4535 /*data_format=*/"NCHW",
4536 /*dilations=*/{1, 1, 1, 1},
4537 /*expected_output_dims=*/{1, 1, 2, 2},
4538 /*expected_output=*/{1, 0, 1, 3}},
4539 };
4540
4541 for (int i = 0; i < ok_params.size(); i++) {
4542 Reset();
4543 NodeDef node_def =
4544 get_conv2d_nodedef(ok_params[i].strides, ok_params[i].padding,
4545 ok_params[i].data_format, ok_params[i].dilations);
4546 std::vector<int> partial_input_shape;
4547 if (trt_mode_ == TrtTestMode::kDynamicShape) {
4548 // The channel dim cannot have unknown size, fix that.
4549 partial_input_shape.resize(ok_params[i].input_dims.size(), -1);
4550 int channel_id = (ok_params[i].data_format == "NCHW") ? 1 : 3;
4551 partial_input_shape[channel_id] = ok_params[i].input_dims[channel_id];
4552 }
4553
4554 AddTestTensor("input", ok_params[i].input_dims, tf_type_,
4555 ok_params[i].input, partial_input_shape);
4556 AddTestWeights<float>("weights", ok_params[i].filter_dims,
4557 ok_params[i].filter);
4558
4559 TestOpConverter("my_conv2d", node_def, ok_params[i].expected_output_dims,
4560 Status::OK(), Status::OK(),
4561 ElementsAreArray(ok_params[i].expected_output));
4562 }
4563 }
4564
TEST_F(OpConverterTest,ConvertConv2DBackpropInput)4565 TEST_F(OpConverterTest, ConvertConv2DBackpropInput) {
4566 // Get nodedef for Conv2D layer.
4567 auto get_conv2d_backprop_input_nodedef =
4568 [](std::vector<int> strides = {1, 1, 1, 1}, string padding = "SAME",
4569 string data_format = "NCHW",
4570 std::vector<int> dilations = {1, 1, 1, 1}) -> NodeDef {
4571 Scope s = Scope::NewRootScope();
4572 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
4573 auto filter = ops::Placeholder(s.WithOpName("weights"), DT_FLOAT);
4574 auto input_sizes = ops::Placeholder(s.WithOpName("input_sizes"), DT_INT32);
4575 ops::Conv2DBackpropInput::Attrs attrs = ops::Conv2DBackpropInput::Attrs()
4576 .DataFormat(data_format)
4577 .Dilations(dilations);
4578 auto conv2d = ops::Conv2DBackpropInput(
4579 s.WithOpName("my_conv2d_backprop_input"), input_sizes, filter, input,
4580 strides, padding, attrs);
4581 return conv2d.operation.node()->def();
4582 };
4583
4584 {
4585 // Dilation + Conv2DBackpropInput, should fail.
4586 Reset();
4587 NodeDef node_def = get_conv2d_backprop_input_nodedef({1, 1, 1, 1}, "SAME",
4588 "NHWC", {1, 1, 2, 1});
4589 AddTestTensor("input", {2, 3, 1});
4590 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4591 AddTestWeights<int>("input_sizes", {4}, {1, 2, 3, 1});
4592 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4593 "Dilation with Conv2DBackpropInput "
4594 "(conv2d_transpose) is not supported, "
4595 "at my_conv2d_backprop_input");
4596 }
4597
4598 struct TestParams {
4599 std::vector<int> input_dims;
4600 std::vector<float> input;
4601 std::vector<int> filter_dims;
4602 std::vector<float> filter;
4603 std::vector<int> strides;
4604 string padding;
4605 string data_format;
4606 std::vector<int> dilations;
4607 std::vector<int> expected_output_dims;
4608 std::vector<float> expected_output;
4609 };
4610
4611 // Ok.
4612 std::vector<TestParams> ok_params = {
4613 // Transpose Strided
4614 TestParams{/*input_dims=*/{1, 2, 2},
4615 /*input=*/{0, 1, 2, 3},
4616 /*filter_dims=*/{1, 2, 1, 1},
4617 /*filter=*/{-1, 1},
4618 /*strides=*/{1, 1, 1, 2},
4619 /*padding=*/"SAME",
4620 /*data_format=*/"NCHW",
4621 /*dilations=*/{1, 1, 1, 1},
4622 /*expected_output_dims=*/{1, 2, 4},
4623 /*expected_output=*/{0, 0, -1, 1, -2, 2, -3, 3}},
4624 // Transpose Strided NHWC
4625 TestParams{/*input_dims=*/{2, 2, 1},
4626 /*input=*/{0, 1, 2, 3},
4627 /*filter_dims=*/{1, 2, 1, 1},
4628 /*filter=*/{-1, 1},
4629 /*strides=*/{1, 1, 2, 1},
4630 /*padding=*/"SAME",
4631 /*data_format=*/"NHWC",
4632 /*dilations=*/{1, 1, 1, 1},
4633 /*expected_output_dims=*/{2, 4, 1},
4634 /*expected_output=*/{0, 0, -1, 1, -2, 2, -3, 3}},
4635 // Transpose Strided NHWC with VALID padding
4636 TestParams{/*input_dims=*/{3, 1, 1},
4637 /*input=*/{0, 1, 2},
4638 /*filter_dims=*/{2, 1, 1, 1},
4639 /*filter=*/{-1, 1},
4640 /*strides=*/{1, 2, 1, 1},
4641 /*padding=*/"VALID",
4642 /*data_format=*/"NHWC",
4643 /*dilations=*/{1, 1, 1, 1},
4644 /*expected_output_dims=*/{7, 1, 1},
4645 /*expected_output=*/{0, 0, -1, 1, -2, 2, 0}},
4646 };
4647
4648 for (int i = 0; i < ok_params.size(); i++) {
4649 for (int input_sizes_length : {2, 4}) {
4650 Reset();
4651 NodeDef node_def = get_conv2d_backprop_input_nodedef(
4652 ok_params[i].strides, ok_params[i].padding, ok_params[i].data_format,
4653 ok_params[i].dilations);
4654 AddTestTensor("input", ok_params[i].input_dims);
4655 AddTestWeights<float>("weights", ok_params[i].filter_dims,
4656 ok_params[i].filter);
4657
4658 std::vector<int> tf_input_sizes = ok_params[i].expected_output_dims;
4659 if (input_sizes_length == 4) {
4660 tf_input_sizes.insert(tf_input_sizes.begin(),
4661 1); // Add batch dimension.
4662 QCHECK_EQ(4, tf_input_sizes.size());
4663 AddTestWeights<int>("input_sizes", {4}, tf_input_sizes);
4664 } else {
4665 // Remove the channel dimension.
4666 if (ok_params[i].data_format == "NHWC") {
4667 tf_input_sizes.pop_back();
4668 } else {
4669 tf_input_sizes.erase(tf_input_sizes.begin());
4670 }
4671 QCHECK_EQ(2, tf_input_sizes.size());
4672 AddTestWeights<int>("input_sizes", {2}, tf_input_sizes);
4673 }
4674
4675 RunValidationAndConversion(node_def);
4676 TRT_TensorOrWeights output;
4677 TF_EXPECT_OK(GetTensorOrWeights("my_conv2d_backprop_input", &output));
4678 ASSERT_TRUE(output.is_tensor());
4679 ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
4680 output.tensor()->getDimensions());
4681
4682 const DataVec input_data{{"input", AsTensor<float>(ok_params[i].input)}};
4683 DataVec output_data{
4684 {"my_conv2d_backprop_input",
4685 ConstructTensor<float>(ok_params[i].expected_output.size())}};
4686 TF_EXPECT_OK(BuildAndRun(input_data, &output_data));
4687 EXPECT_THAT(GetSpanForData<float>(output_data[0]),
4688 ElementsAreArray(ok_params[i].expected_output));
4689 }
4690 }
4691 }
4692
4693 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
TEST_F(OpConverterTest,ConvertConv3D)4694 TEST_F(OpConverterTest, ConvertConv3D) {
4695 // Get nodedef for Conv3D layer.
4696 auto get_conv3d_nodedef =
4697 [](std::vector<int> strides = {1, 1, 1, 1, 1}, string padding = "SAME",
4698 string data_format = "NCDHW",
4699 std::vector<int> dilations = {1, 1, 1, 1, 1},
4700 bool is_conv3d_backprop_input = false) -> NodeDef {
4701 Scope s = Scope::NewRootScope();
4702 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
4703 auto filter = ops::Placeholder(s.WithOpName("weights"), DT_FLOAT);
4704
4705 if (is_conv3d_backprop_input) {
4706 auto input_sizes =
4707 ops::Placeholder(s.WithOpName("input_sizes"), DT_INT32);
4708 ops::Conv3DBackpropInputV2::Attrs attrs =
4709 ops::Conv3DBackpropInputV2::Attrs()
4710 .DataFormat(data_format)
4711 .Dilations(dilations);
4712 auto conv3d =
4713 ops::Conv3DBackpropInputV2(s.WithOpName("my_conv3d"), input_sizes,
4714 filter, input, strides, padding, attrs);
4715 return conv3d.operation.node()->def();
4716 } else {
4717 ops::Conv3D::Attrs attrs =
4718 ops::Conv3D::Attrs().DataFormat(data_format).Dilations(dilations);
4719 auto conv3d = ops::Conv3D(s.WithOpName("my_conv3d"), input, filter,
4720 strides, padding, attrs);
4721 return conv3d.operation.node()->def();
4722 }
4723 };
4724
4725 {
4726 // Input is weights, should fail.
4727 Reset();
4728 NodeDef node_def = get_conv3d_nodedef();
4729
4730 AddTestWeights<float>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
4731 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4732 RunValidationAndConversion(
4733 node_def, error::UNIMPLEMENTED,
4734 "The input \"input\" for Conv3D must be a tensor, at my_conv3d");
4735 }
4736 {
4737 // Filter is tensor, should fail.
4738 Reset();
4739 NodeDef node_def = get_conv3d_nodedef();
4740 AddTestTensor("input", {1, 2, 3});
4741 AddTestTensor("weights", {3, 3, 1, 1, 3, 3, 1, 1});
4742 RunValidationAndConversion(
4743 node_def, error::UNIMPLEMENTED,
4744 "The input \"filter\" for Conv3D must be a constant, at my_conv3d");
4745 }
4746 {
4747 // Filter is not 5D, should fail.
4748 Reset();
4749 NodeDef node_def = get_conv3d_nodedef();
4750 AddTestTensor("input", {1, 2, 3});
4751 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
4752 RunValidationAndConversion(
4753 node_def, error::INVALID_ARGUMENT,
4754 "Conv3D expects kernel of dimension 5, at my_conv3d");
4755 }
4756 {
4757 // Dilations is not 5D, should fail.
4758 Reset();
4759 NodeDef node_def =
4760 get_conv3d_nodedef({1, 1, 1, 1, 1}, "SAME", "NCDHW", {1, 1, 1, 1});
4761 AddTestTensor("input", {1, 2, 3});
4762 AddTestWeights<float>(
4763 "weights", {3, 3, 1, 1, 1},
4764 {1, 2, 3, 4, 5, 6, 7, 8, 9}); // Dimensions, then values
4765 RunValidationAndConversion(
4766 node_def, error::INVALID_ARGUMENT,
4767 "Convolution dilations field must specify 5 dimensions, at my_conv3d");
4768 }
4769 {
4770 // Dilation value is not 1 for channel, should fail.
4771 Reset();
4772 NodeDef node_def =
4773 get_conv3d_nodedef({1, 1, 1, 1, 1}, "SAME", "NCDHW", {1, 2, 1, 1, 1});
4774 AddTestTensor("input", {1, 2, 3});
4775 AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
4776 {1, 2, 3, 4, 5, 6, 7, 8, 9});
4777 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4778 "Dilation rate must be 1 for batch and channel "
4779 "dimensions, at my_conv3d");
4780 }
4781 {
4782 // Dilation value is not 1 for channel (NDHWC), should fail.
4783 Reset();
4784 NodeDef node_def =
4785 get_conv3d_nodedef({1, 1, 1, 1, 1}, "SAME", "NDHWC", {1, 1, 1, 1, 2});
4786 AddTestTensor("input", {2, 3, 1});
4787 AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
4788 {1, 2, 3, 4, 5, 6, 7, 8, 9});
4789 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4790 "Dilation rate must be 1 for batch and channel "
4791 "dimensions, at my_conv3d");
4792 }
4793 {
4794 // Dilation + Conv3DBackpropInputV2, should fail.
4795 Reset();
4796 NodeDef node_def = get_conv3d_nodedef({1, 1, 1, 1, 1}, "SAME", "NDHWC",
4797 {1, 1, 2, 1, 1}, true);
4798 AddTestTensor("input", {2, 3, 1});
4799 AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
4800 {1, 2, 3, 4, 5, 6, 7, 8, 9});
4801 AddTestWeights<int>("input_sizes", {4}, {1, 2, 3, 1});
4802 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4803 "Dilation with Conv3DBackpropInputV2 "
4804 "(conv3d_transpose) is not supported, "
4805 "at my_conv3d");
4806 }
4807 {
4808 // Asymmetric+ Conv3DBackpropInputV2, should fail.
4809 Reset();
4810 NodeDef node_def = get_conv3d_nodedef({1, 1, 1, 1, 1}, "SAME", "NDHWC",
4811 {1, 1, 1, 1, 1}, true);
4812 AddTestTensor("input", {1, 2, 2, 2});
4813 AddTestWeights<float>("weights", {1, 1, 2, 1, 1}, {1, 1});
4814 AddTestWeights<int>("input_sizes", {8}, {1, 2, 3, 4, 5, 6, 7, 8});
4815 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4816 "Asymmetric padding with Conv3DBackpropInputV2 "
4817 "(conv3d_transpose) is not supported, at "
4818 "my_conv3d");
4819 }
4820 {
4821 // Strides is not 5D, should fail.
4822 Reset();
4823 NodeDef node_def = get_conv3d_nodedef({1, 1, 1, 1, 1, 1}, "SAME", "NCDHW",
4824 {1, 1, 1, 1, 1});
4825 AddTestTensor("input", {1, 2, 2, 2});
4826 AddTestWeights<float>("weights", {1, 1, 2, 1, 1}, {1, 1});
4827 RunValidationAndConversion(
4828 node_def, error::INVALID_ARGUMENT,
4829 "Convolution strides field must specify 5 dimensions, at my_conv3d");
4830 }
4831 {
4832 // Stride value is not 1 for channel, should fail.
4833 Reset();
4834 NodeDef node_def =
4835 get_conv3d_nodedef({1, 2, 1, 1, 1}, "SAME", "NCDHW", {1, 1, 1, 1, 1});
4836 AddTestTensor("input", {1, 2, 3});
4837 AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
4838 {1, 2, 3, 4, 5, 6, 7, 8, 9});
4839 RunValidationAndConversion(
4840 node_def, error::UNIMPLEMENTED,
4841 "Stride must be 1 for batch and channel dimensions, at my_conv3d");
4842 }
4843 struct TestParams {
4844 std::vector<int> input_dims;
4845 std::vector<float> input;
4846 std::vector<int> filter_dims;
4847 std::vector<float> filter;
4848 std::vector<int> strides;
4849 string padding;
4850 string data_format;
4851 std::vector<int> dilations;
4852 bool is_conv3d_backprop_input;
4853 std::vector<int> expected_output_dims;
4854 std::vector<float> expected_output;
4855 };
4856
4857 // Start here
4858 std::vector<TestParams> ok_params = {
4859 // Basic - just 1x1 conv - input = output
4860 TestParams{
4861 /*input_dims=*/{1, 3, 3, 3}, // CDHW
4862 /*input=*/{1, 2, 15, 3, 6, -3, 22, 1, 88, 56, 36, 1, 1, 105,
4863 1, 16, -28, 1, 42, 9, 3, 1, 7, 1, 11, 61, 5},
4864 /*filter_dims=*/{1, 1, 1, 1, 1}, // DRSCK
4865 /*filter=*/{1},
4866 /*strides=*/{1, 1, 1, 1, 1},
4867 /*padding=*/"VALID",
4868 /*data_format=*/"NCDHW",
4869 /*dilations=*/{1, 1, 1, 1, 1},
4870 /*is_conv3d_backprop_input=*/false,
4871 /*expected_output_dims=*/{1, 3, 3, 3},
4872 /*expected_output=*/{1, 2, 15, 3, 6, -3, 22, 1, 88,
4873 56, 36, 1, 1, 105, 1, 16, -28, 1,
4874 42, 9, 3, 1, 7, 1, 11, 61, 5}},
4875 // Basic - 2x1 filter
4876 TestParams{/*input_dims=*/{1, 3, 3, 3}, // CDHW
4877 /*input=*/{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4878 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6},
4879 /*filter_dims=*/{2, 1, 1, 1, 1}, // DRSCK
4880 /*filter=*/{1, 1},
4881 /*strides=*/{1, 1, 1, 1, 1},
4882 /*padding=*/"VALID",
4883 /*data_format=*/"NCDHW",
4884 /*dilations=*/{1, 1, 1, 1, 1},
4885 /*is_conv3d_backprop_input=*/false,
4886 /*expected_output_dims=*/{1, 2, 3, 3},
4887 /*expected_output=*/
4888 {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7}},
4889 // SAME padding (Asymmetric)
4890 TestParams{
4891 /*input_dims=*/{1, 2, 3, 2}, // CDHW
4892 /*input=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
4893 /*filter_dims=*/{2, 1, 1, 1, 1}, // DRSCK
4894 /*filter=*/{-1, 1},
4895 /*strides=*/{1, 1, 1, 1, 1},
4896 /*padding=*/"SAME",
4897 /*data_format=*/"NCDHW",
4898 /*dilations=*/{1, 1, 1, 1, 1},
4899 /*is_conv3d_backprop_input=*/false,
4900 /*expected_output_dims=*/{1, 2, 3, 2},
4901 /*expected_output=*/
4902 {6, 6, 6, 6, 6, 6, -6, -7, -8, -9, -10,
4903 -11} // Diff in first 2 depths is const 6
4904 },
4905 // SAME padding (Symmetric)
4906 TestParams{
4907 /*input_dims=*/{1, 2, 3, 2}, // CDHW
4908 /*input=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
4909 /*filter_dims=*/{3, 1, 1, 1, 1}, // DRSCK
4910 /*filter=*/{-1, 0, 1},
4911 /*strides=*/{1, 1, 1, 1, 1},
4912 /*padding=*/"SAME",
4913 /*data_format=*/"NCDHW",
4914 /*dilations=*/{1, 1, 1, 1, 1},
4915 /*is_conv3d_backprop_input=*/false,
4916 /*expected_output_dims=*/{1, 2, 3, 2},
4917 /*expected_output=*/
4918 {6, 7, 8, 9, 10, 11, 0, -1, -2, -3, -4,
4919 -5} // Swaps front two depths, negates
4920 },
4921
4922 // NDHWC (multi-channel)
4923 TestParams{
4924 /*input_dims=*/{2, 3, 2, 2}, // DHWC
4925 /*input=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
4926 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
4927 /*filter_dims=*/{2, 1, 1, 2, 1}, // DRSCK
4928 /*filter=*/{-1, 1, 1, -1},
4929 /*strides=*/{1, 1, 1, 1, 1},
4930 /*padding=*/"VALID",
4931 /*data_format=*/"NDHWC",
4932 /*dilations=*/{1, 1, 1, 1, 1},
4933 /*is_conv3d_backprop_input=*/false,
4934 /*expected_output_dims=*/{1, 3, 2, 1},
4935 /*expected_output=*/{0, 0, 0, 0, 0, 0} // Each filter opposes the
4936 // other
4937 },
4938
4939 // Dilated
4940 TestParams{
4941 /*input_dims=*/{1, 3, 3, 3}, // CDHW
4942 /*input=*/{1, 1, 1, 1, 1, 1, 1, 1, 1, -10, -10, -10, -10, -10,
4943 -10, -10, -10, -10, 7, 7, 7, 7, 7, 7, 7, 7, 7},
4944 /*filter_dims=*/{2, 1, 1, 1, 1}, // DRSCK
4945 /*filter=*/{1, 1},
4946 /*strides=*/{1, 1, 1, 1, 1},
4947 /*padding=*/"VALID",
4948 /*data_format=*/"NCDHW",
4949 /*dilations=*/{1, 1, 2, 1, 1},
4950 /*is_conv3d_backprop_input=*/false,
4951 /*expected_output_dims=*/{1, 1, 3, 3},
4952 /*expected_output=*/{8, 8, 8, 8, 8, 8, 8, 8, 8} // Only front depth
4953 // is valid, skips
4954 // neg values
4955 },
4956 // Strided
4957 TestParams{
4958 /*input_dims=*/{1, 3, 3, 3},
4959 /*input=*/{1, 0, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0,
4960 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 7, 0, 8},
4961 /*filter_dims=*/{1, 1, 1, 1, 1},
4962 /*filter=*/{1},
4963 /*strides=*/{1, 1, 2, 2, 2},
4964 /*padding=*/"VALID",
4965 /*data_format=*/"NCDHW",
4966 /*dilations=*/{1, 1, 1, 1, 1},
4967 /*is_conv3d_backprop_input=*/false,
4968 /*expected_output_dims=*/{1, 2, 2, 2},
4969 /*expected_output=*/{1, 2, 3, 4, 5, 6, 7, 8} // Should only pick up
4970 // the corners
4971 },
4972 // Transpose Strided
4973 TestParams{/*input_dims=*/{1, 2, 2, 2}, // CDHW
4974 /*input=*/{1, 2, 3, 4, 5, 6, 7, 8},
4975 /*filter_dims=*/{1, 1, 1, 1, 1},
4976 /*filter=*/{1},
4977 /*strides=*/{1, 1, 2, 2, 2},
4978 /*padding=*/"VALID",
4979 /*data_format=*/"NCDHW",
4980 /*dilations=*/{1, 1, 1, 1, 1},
4981 /*is_conv3d_backprop_input=*/true,
4982 /*expected_output_dims=*/{1, 3, 3, 3},
4983 /*expected_output=*/
4984 {1, 0, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0,
4985 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 7, 0, 8}}, // Cube
4986 // expands and
4987 // fills
4988 // center with
4989 // zeroes
4990
4991 };
4992
4993 for (int i = 0; i < ok_params.size(); i++) {
4994 Reset();
4995 NodeDef node_def = get_conv3d_nodedef(
4996 ok_params[i].strides, ok_params[i].padding, ok_params[i].data_format,
4997 ok_params[i].dilations, ok_params[i].is_conv3d_backprop_input);
4998 AddTestTensor("input", ok_params[i].input_dims);
4999 AddTestWeights<float>("weights", ok_params[i].filter_dims,
5000 ok_params[i].filter);
5001 if (ok_params[i].is_conv3d_backprop_input) {
5002 AddTestWeights<float>(
5003 "input_sizes",
5004 {static_cast<int>(ok_params[i].expected_output.size())},
5005 ok_params[i].expected_output);
5006 }
5007 RunValidationAndConversion(node_def);
5008 TRT_TensorOrWeights output;
5009 TF_EXPECT_OK(GetTensorOrWeights("my_conv3d", &output));
5010 ASSERT_TRUE(output.is_tensor());
5011 ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
5012 output.tensor()->getDimensions());
5013
5014 const DataVec input_data{{"input", AsTensor<float>(ok_params[i].input)}};
5015 DataVec output_data{
5016 {"my_conv3d",
5017 ConstructTensor<float>(ok_params[i].expected_output.size())}};
5018 TF_EXPECT_OK(BuildAndRun(input_data, &output_data));
5019 EXPECT_THAT(GetSpanForData<float>(output_data[0]),
5020 ElementsAreArray(ok_params[i].expected_output));
5021 }
5022 }
5023 #endif
5024
5025 template <typename T>
CreatePoolOp(DataType tf_type,std::vector<int> ksize,std::vector<int> strides,string padding,string data_format)5026 NodeDef CreatePoolOp(DataType tf_type, std::vector<int> ksize,
5027 std::vector<int> strides, string padding,
5028 string data_format) {
5029 Scope s = Scope::NewRootScope();
5030 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
5031 typename T::Attrs attrs;
5032 attrs.data_format_ = data_format;
5033 return T(s.WithOpName("my_pool"), input, ksize, strides, padding, attrs)
5034 .operation.node()
5035 ->def();
5036 }
TEST_P(OpConverter_FP32_Test,ConvertPool)5037 TEST_P(OpConverter_FP32_Test, ConvertPool) {
5038 // Get nodedef for MaxPool and AvgPool layers (2D or 3D).
5039 auto get_pool_nodedef =
5040 [](DataType tf_type, int nDim, std::vector<int> ksize = {},
5041 std::vector<int> strides = {}, string padding = "SAME",
5042 string data_format = "", const bool is_max_pooling = true) -> NodeDef {
5043 if (ksize.empty()) {
5044 ksize = nDim == 2 ? std::vector<int>{1, 1, 1, 1}
5045 : std::vector<int>{1, 1, 1, 1, 1};
5046 }
5047 if (strides.empty()) {
5048 strides = nDim == 2 ? std::vector<int>{1, 1, 1, 1}
5049 : std::vector<int>{1, 1, 1, 1, 1};
5050 }
5051 if (data_format == "") {
5052 data_format = nDim == 2 ? "NCHW" : "NCDHW";
5053 }
5054 if (is_max_pooling) {
5055 if (nDim == 3) {
5056 return CreatePoolOp<ops::MaxPool3D>(tf_type, ksize, strides, padding,
5057 data_format);
5058 } else {
5059 return CreatePoolOp<ops::MaxPool>(tf_type, ksize, strides, padding,
5060 data_format);
5061 }
5062 } else {
5063 if (nDim == 3) {
5064 return CreatePoolOp<ops::AvgPool3D>(tf_type, ksize, strides, padding,
5065 data_format);
5066 } else {
5067 return CreatePoolOp<ops::AvgPool>(tf_type, ksize, strides, padding,
5068 data_format);
5069 }
5070 }
5071 };
5072
5073 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
5074 std::vector<int> test_nDims{2, 3};
5075 #else
5076 std::vector<int> test_nDims{2};
5077 #endif
5078
5079 for (int nDim : test_nDims) {
5080 // Input is weights, should fail.
5081 Reset();
5082 NodeDef node_def = get_pool_nodedef(tf_type_, nDim);
5083
5084 AddTestWeights<float>("input", {1, 1, 1, 2, 3}, {1, 2, 3, 4, 5, 6});
5085 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
5086 StrCat("The input \"input\" for ", node_def.op(),
5087 " must be a tensor, at my_pool")
5088 .c_str());
5089 }
5090
5091 struct TestParams {
5092 std::vector<int> input_dims;
5093 std::vector<float> input;
5094 std::vector<int> ksize;
5095 std::vector<int> strides;
5096 string padding;
5097 string data_format;
5098 std::vector<int> expected_output_dims;
5099 // The expected outputs for the following operations: MaxPool2D, AvgPool2D,
5100 // MaxPool3D, AvgPool3D
5101 std::vector<std::vector<float>> expected_outputs;
5102 };
5103
5104 // We use common_input as the input to test both 2D and 3D pooling operations,
5105 // to simplify TestParams. For 2D operations, only the first 1/3 of the values
5106 // are used.
5107 const std::vector<float> common_input{-4, 2, 15, 3, 6, -3, 22, 1, 88,
5108 56, 36, 1, 1, 105, 1, 16, -28, 1,
5109 42, 9, 3, 1, 7, 1, 11, 61, 5};
5110 // The output of 2D ops for the case where the op is equivalent to the
5111 // identity op.
5112 const std::vector<float> common_2d_output{-4, 2, 15, 3, 6, -3, 22, 1, 88};
5113 std::vector<TestParams> ok_params = {
5114 // Basic - just 1x1 max pooling - input = output
5115 TestParams{
5116 /*input_dims=*/{1, 1, 3, 3, 3},
5117 /*input=*/common_input,
5118 /*ksize=*/{1, 1, 1, 1, 1},
5119 /*strides=*/{1, 1, 1, 1, 1},
5120 /*padding=*/"VALID",
5121 /*data_format=*/"NCDHW",
5122 /*expected_output_dims=*/{1, 1, 3, 3, 3},
5123 /*expected_outputs=*/
5124 {common_2d_output, common_2d_output, common_input, common_input}},
5125 // Basic - just 1x1 max pooling - input = output, SAME padding
5126 TestParams{
5127 /*input_dims=*/{1, 1, 3, 3, 3},
5128 /*input=*/common_input,
5129 /*ksize=*/{1, 1, 1, 1, 1},
5130 /*strides=*/{1, 1, 1, 1, 1},
5131 /*padding=*/"SAME",
5132 /*data_format=*/"NCDHW",
5133 /*expected_output_dims=*/{1, 1, 3, 3, 3},
5134 /*expected_outputs=*/
5135 {common_2d_output, common_2d_output, common_input, common_input}},
5136 // 3x3 pooling NCDHW
5137 TestParams{/*input_dims=*/{1, 1, 3, 3, 3},
5138 /*input=*/common_input,
5139 /*ksize=*/{1, 1, 3, 3, 3},
5140 /*strides=*/{1, 1, 1, 1, 1},
5141 /*padding=*/"VALID",
5142 /*data_format=*/"NCDHW",
5143 /*expected_output_dims=*/{1, 1, 1, 1, 1},
5144 /*expected_outputs=*/{{88}, {14.444445}, {105}, {17}}},
5145 // 3x3 pooling, NDHWC
5146 TestParams{/*input_dims=*/{1, 3, 3, 3, 1},
5147 /*input=*/common_input,
5148 /*ksize=*/{1, 3, 3, 3, 1},
5149 /*strides=*/{1, 1, 1, 1, 1},
5150 /*padding=*/"VALID",
5151 /*data_format=*/"NDHWC",
5152 /*expected_output_dims=*/{1, 1, 1, 1, 1},
5153 /*expected_outputs=*/{{88}, {14.444445}, {105}, {17}}},
5154 // Strided
5155 TestParams{/*input_dims=*/{1, 1, 3, 3, 3},
5156 /*input=*/{1, 0, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0,
5157 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 7, 0, 8},
5158 /*ksize=*/{1, 1, 1, 1, 1},
5159 /*strides=*/{1, 1, 2, 2, 2},
5160 /*padding=*/"VALID",
5161 /*data_format=*/"NCDHW",
5162 /*expected_output_dims=*/{1, 1, 2, 2, 2},
5163 /*expected_outputs=*/
5164 {{1, 2, 3, 4}, // Should only pick up the corners
5165 {1, 2, 3, 4},
5166 {1, 2, 3, 4, 5, 6, 7, 8},
5167 {1, 2, 3, 4, 5, 6, 7, 8}}},
5168 };
5169
5170 for (auto p : ok_params) {
5171 int test_counter = 0;
5172 for (int nDim : test_nDims) {
5173 auto input = p.input;
5174 auto input_dims = p.input_dims;
5175 auto ksize = p.ksize;
5176 auto strides = p.strides;
5177 auto expected_output_dims = p.expected_output_dims;
5178 std::string data_format = p.data_format;
5179 if (nDim == 2) {
5180 input.resize(9);
5181 data_format = p.data_format == "NDHWC" ? "NHWC" : "NCHW";
5182 // Remove one of the spatial dimensions
5183 input_dims.erase(input_dims.begin() + 2);
5184 ksize.erase(ksize.begin() + 2);
5185 strides.erase(strides.begin() + 2);
5186 expected_output_dims.erase(expected_output_dims.begin() + 2);
5187 }
5188 for (bool is_max_pooling : {true, false}) {
5189 Reset();
5190 NodeDef node_def =
5191 get_pool_nodedef(tf_type_, nDim, ksize, strides, p.padding,
5192 data_format, is_max_pooling);
5193 AddTestTensor("input", input_dims, input);
5194 TestOpConverter("my_pool", node_def, expected_output_dims, Status::OK(),
5195 Status::OK(),
5196 ElementsAreArray(p.expected_outputs.at(test_counter)));
5197 test_counter++;
5198 }
5199 }
5200 }
5201 }
5202
TEST_P(OpConverter_FP32_FP16_Test,ConvertTopK)5203 TEST_P(OpConverter_FP32_FP16_Test, ConvertTopK) {
5204 // Get the NodeDef for TopKV2.
5205 Scope s = Scope::NewRootScope();
5206 auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
5207 auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
5208 auto topk = ops::TopK(s.WithOpName("my_topk"), input, weights);
5209 const NodeDef& node_def = topk.operation.node()->def();
5210 {
5211 // K is a tensor, should fail.
5212 Reset();
5213 AddTestTensor("input", {1, 1, 2, 3});
5214 AddTestTensor("weights", {1}, DT_INT32, {});
5215 RunValidationAndConversion(
5216 node_def, error::UNIMPLEMENTED,
5217 "The input \"k\" for TopKV2 must be a constant, at my_topk");
5218 }
5219 {
5220 // Ok.
5221 Reset();
5222 AddTestTensor("input", {1, 1, 2, 5}, {-9, 3, 5, 1, 6, -5, 7, 1, 0, -1});
5223 AddTestWeights<int32>("weights", {1}, {2});
5224 std::vector<std::vector<int>> expected_output_dims{{1, 1, 2, 2},
5225 {1, 1, 2, 2}};
5226 TestOpConverterMultiOut("my_topk", node_def, expected_output_dims,
5227 Status::OK(), Status::OK(),
5228 {ElementsAre(6, 5, 7, 1), ElementsAre(4, 2, 1, 2)},
5229 {tf_type_, DT_INT32});
5230 }
5231 }
5232
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertGather)5233 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertGather) {
5234 // Get the NodeDef for GatherV2.
5235 Scope s = Scope::NewRootScope();
5236 auto params = ops::Placeholder(s.WithOpName("params"), tf_type_);
5237 auto indices = ops::Placeholder(s.WithOpName("indices"), DT_INT32);
5238 auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
5239 auto gather = ops::GatherV2(s.WithOpName("my_gather"), params, indices, axis);
5240 const NodeDef& node_def = gather.operation.node()->def();
5241 {
5242 // Axis is a tensor, should fail.
5243 Reset();
5244 AddTestTensor("params", {1, 1, 2, 3}, tf_type_, {});
5245 AddTestTensor("indices", {1, 2}, DT_INT32, {});
5246 AddTestTensor("axis", {1}, DT_INT32, {});
5247 RunValidationAndConversion(
5248 node_def, error::UNIMPLEMENTED,
5249 "The input \"axis\" for GatherV2 must be a constant, at my_gather");
5250 }
5251 {
5252 // Axis is out of bounds, should fail.
5253 Reset();
5254 AddTestTensor("params", {1, 1, 2, 3});
5255 AddTestTensor("indices", {1, 2}, DT_INT32, {});
5256 AddTestWeights<int32>("axis", {1}, {4});
5257 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
5258 "Axis value of 4 is out of bounds, must be in "
5259 "range [-4, 4), at my_gather");
5260 }
5261
5262 struct TestParams {
5263 // TF shape of the input 'params' (including batch dimension).
5264 std::vector<int> params_shape;
5265 // TF shape of the input 'indices' (including batch dimension).
5266 std::vector<int> indices_shape;
5267 std::vector<int> indices;
5268 int axis;
5269 // Expected TF shape of the output (including batch dimension).
5270 std::vector<int> expected_output_shape;
5271 std::vector<int> expected_output;
5272 bool params_is_tensor;
5273 Status status;
5274 Status runtime_status;
5275 Status add_index_status;
5276 };
5277
5278 // Input is the same {1, 2, 3, 4, 5, 6} for all cases.
5279 const std::vector<int> params_input = {1, 2, 3, 4, 5, 6};
5280 std::vector<TestParams> test_params = {
5281 // Axis is batch dimension, should fail in implicit batch mode.
5282 TestParams{/*params_shape=*/{2, 1, 1, 3},
5283 /*indices_shape=*/{2},
5284 /*indices=*/{1, 0},
5285 /*axis=*/0,
5286 /*expected_output_shape=*/{2, 1, 1, 3},
5287 /*expected_output=*/{4, 5, 6, 1, 2, 3},
5288 /*params_is_tensor=*/true,
5289 trt_mode_ == TrtTestMode::kImplicitBatch
5290 ? Status{error::UNIMPLEMENTED,
5291 "TensorRT does not allow manipulation of the"
5292 " batch dimension, at my_gather"}
5293 : Status::OK()},
5294 // Batch size of indices is not 1 when params is a tensor.
5295 TestParams{/*params_shape=*/{2, 1, 3},
5296 /*indices_shape=*/{2, 1},
5297 /*indices=*/{2, 0},
5298 /*axis=*/2,
5299 /*expected_output_shape=*/{2, 1, 2, 1},
5300 /*expected_output=*/{3, 1, 6, 4},
5301 /*params_is_tensor=*/true,
5302 trt_mode_ == TrtTestMode::kImplicitBatch
5303 ? Status{error::UNIMPLEMENTED,
5304 "Indices must have a batch size of 1 when params"
5305 " is a tensor."}
5306 : Status::OK()},
5307 // Axis is not zero when params is a weight, should fail in implicit batch
5308 // mode.
5309 TestParams{/*params_shape=*/{2, 1, 3},
5310 /*indices_shape=*/{2},
5311 /*indices=*/{1, 2},
5312 /*axis=*/2,
5313 /*expected_output_shape=*/{2, 1, 2},
5314 /*expected_output=*/{2, 3, 5, 6},
5315 /*params_is_tensor=*/false,
5316 trt_mode_ == TrtTestMode::kImplicitBatch
5317 ? Status{error::UNIMPLEMENTED,
5318 "The input axis must be zero when params is a"
5319 " weight."}
5320 : Status::OK()},
5321 // Params with only batch dimension.
5322 TestParams{/*params_shape=*/{6},
5323 /*indices_shape=*/{2},
5324 /*indices=*/{1, 3},
5325 /*axis=*/0,
5326 /*expected_output_shape=*/{2},
5327 /*expected_output=*/{2, 4},
5328 /*params_is_tensor=*/true,
5329 trt_mode_ == TrtTestMode::kImplicitBatch // conversion_status
5330 ? Status{error::UNIMPLEMENTED,
5331 "TensorRT does not allow manipulation of the "
5332 "batch dimension, at my_gather"}
5333 : Status::OK(),
5334 Status::OK(), // runtime_status
5335 trt_mode_ == TrtTestMode::kImplicitBatch // add_index_status
5336 ? Status{error::INVALID_ARGUMENT,
5337 "Batch size doesn't match for tensor indices: "
5338 "Provided batch size does not match converter "
5339 "batch size: 2 vs 6"}
5340 : Status::OK()},
5341 // Vector indices, and output rank is rank(params).
5342 TestParams{
5343 /*params_shape=*/{1, 1, 2, 3},
5344 /*indices_shape=*/{1},
5345 /*indices=*/{0},
5346 /*axis=*/3,
5347 /*expected_output_shape=*/{1, 1, 2, 1},
5348 /*expected_output=*/{1, 4},
5349 /*params_is_tensor=*/true,
5350 },
5351 TestParams{
5352 /*params_shape=*/{1, 1, 2, 3},
5353 /*indices_shape=*/{1},
5354 /*indices=*/{1},
5355 /*axis=*/2,
5356 /*expected_output_shape=*/{1, 1, 1, 3},
5357 /*expected_output=*/{4, 5, 6},
5358 /*params_is_tensor=*/true,
5359 },
5360 // Indices with rank>1, and output rank is rank(params) + rank(indices) -
5361 // 1
5362 TestParams{
5363 /*params_shape=*/{1, 1, 2, 3},
5364 /*indices_shape=*/{1, 1},
5365 /*indices=*/{0},
5366 /*axis=*/3,
5367 /*expected_output_shape=*/{1, 1, 2, 1, 1},
5368 /*expected_output=*/{1, 4},
5369 /*params_is_tensor=*/true,
5370 },
5371 TestParams{
5372 /*params_shape=*/{1, 1, 2, 3},
5373 /*indices_shape=*/{1, 1},
5374 /*indices=*/{1},
5375 /*axis=*/3,
5376 /*expected_output_shape=*/{1, 1, 2, 1, 1},
5377 /*expected_output=*/{2, 5},
5378 /*params_is_tensor=*/true,
5379 },
5380 TestParams{
5381 /*params_shape=*/{1, 1, 2, 3},
5382 /*indices_shape=*/{1, 1},
5383 /*indices=*/{2},
5384 /*axis=*/-1,
5385 /*expected_output_shape=*/{1, 1, 2, 1, 1},
5386 /*expected_output=*/{3, 6},
5387 /*params_is_tensor=*/true,
5388 },
5389 TestParams{
5390 /*params_shape=*/{1, 1, 2, 3},
5391 /*indices_shape=*/{1, 3},
5392 /*indices=*/{2, 0, 1},
5393 /*axis=*/3,
5394 /*expected_output_shape=*/{1, 1, 2, 1, 3},
5395 /*expected_output=*/{3, 1, 2, 6, 4, 5},
5396 /*params_is_tensor=*/true,
5397 },
5398 TestParams{
5399 /*params_shape=*/{1, 3, 2},
5400 /*indices_shape=*/{1, 2, 2},
5401 /*indices=*/{0, 0, 1, 0},
5402 /*axis=*/2,
5403 /*expected_output_shape=*/{1, 3, 1, 2, 2},
5404 /*expected_output=*/{1, 1, 2, 1, 3, 3, 4, 3, 5, 5, 6, 5},
5405 /*params_is_tensor=*/true,
5406 },
5407 TestParams{
5408 /*params_shape=*/{1, 2, 3},
5409 /*indices_shape=*/{1},
5410 /*indices=*/{0},
5411 /*axis=*/0,
5412 /*expected_output_shape=*/{1, 2, 3},
5413 /*expected_output=*/{1, 2, 3, 4, 5, 6},
5414 /*params_is_tensor=*/false,
5415 },
5416 TestParams{
5417 /*params_shape=*/{3, 2},
5418 /*indices_shape=*/{1, 2},
5419 /*indices=*/{0, 1},
5420 /*axis=*/0,
5421 /*expected_output_shape=*/{1, 2, 2},
5422 /*expected_output=*/{1, 2, 3, 4},
5423 /*params_is_tensor=*/false,
5424 },
5425 TestParams{
5426 /*params_shape=*/{2, 3},
5427 /*indices_shape=*/{1, 1, 2},
5428 /*indices=*/{0, 1},
5429 /*axis=*/0,
5430 /*expected_output_shape=*/{1, 1, 2, 3},
5431 /*expected_output=*/{1, 2, 3, 4, 5, 6},
5432 /*params_is_tensor=*/false,
5433 },
5434 TestParams{
5435 /*params_shape=*/{3, 2},
5436 /*indices_shape=*/{2, 2},
5437 /*indices=*/{0, 2, 1, 0},
5438 /*axis=*/0,
5439 /*expected_output_shape=*/{2, 2, 2},
5440 /*expected_output=*/{1, 2, 5, 6, 3, 4, 1, 2},
5441 /*params_is_tensor=*/false,
5442 },
5443 };
5444
5445 for (auto p : test_params) {
5446 Reset();
5447 if (p.params_is_tensor) {
5448 AddTestTensor("params", p.params_shape, params_input);
5449 } else {
5450 AddTestWeights("params", p.params_shape, params_input, tf_type_);
5451 }
5452 AddTestTensor("indices", p.indices_shape, DT_INT32, p.indices, {},
5453 p.add_index_status);
5454 AddTestWeights<int32>("axis", {1}, {p.axis});
5455 TestOpConverter("my_gather", node_def, p.expected_output_shape, p.status,
5456 p.runtime_status, ElementsAreArray(p.expected_output));
5457 }
5458 }
5459
5460 template <typename OpType>
CreateReduceOp(DataType tf_type,bool keep_dims)5461 NodeDef CreateReduceOp(DataType tf_type, bool keep_dims) {
5462 Scope s = Scope::NewRootScope();
5463 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
5464 auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
5465 typename OpType::Attrs op_attrs;
5466 op_attrs.keep_dims_ = keep_dims;
5467 auto op = OpType(s.WithOpName("my_reduce"), input, axis, op_attrs);
5468 return op.operation.node()->def();
5469 }
5470
5471 // Applies reduction op on sub-sequences of input
5472 // output[i] = reduce(input[m * i : m * (i +1)])
CalcReduce(string op_name,std::vector<float> input,int m,float (* op)(float,float),float init)5473 std::vector<float> CalcReduce(string op_name, std::vector<float> input, int m,
5474 float (*op)(float, float), float init) {
5475 std::vector<float> output(input.size() / m);
5476 for (int i = 0; i < output.size(); i++) {
5477 auto begin = input.begin() + i * m;
5478 auto end = input.begin() + (i + 1) * m;
5479 output[i] = std::accumulate(begin, end, init, op);
5480 if (op_name == "Mean") {
5481 output[i] /= m;
5482 }
5483 }
5484 return output;
5485 }
TEST_P(OpConverter_FP32_Test,ConvertReduce)5486 TEST_P(OpConverter_FP32_Test, ConvertReduce) {
5487 {
5488 // Input is weights, should fail.
5489 Reset();
5490 const NodeDef node_def = CreateReduceOp<ops::Sum>(tf_type_, false);
5491 AddTestWeights<float>("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
5492 AddTestWeights<int32>("axis", {1}, {1});
5493 RunValidationAndConversion(
5494 node_def, error::UNIMPLEMENTED,
5495 "The input \"input\" for Sum must be a tensor, at my_reduce");
5496 }
5497 {
5498 // Axis is weights, should fail.
5499 Reset();
5500 const NodeDef node_def = CreateReduceOp<ops::Sum>(tf_type_, false);
5501 AddTestTensor("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
5502 AddTestTensor("axis", {1}, DT_INT32, {1});
5503 RunValidationAndConversion(
5504 node_def, error::UNIMPLEMENTED,
5505 "The input \"axis\" for Sum must be a constant, at my_reduce");
5506 }
5507 using OpFunc = std::function<NodeDef(DataType, bool)>;
5508 using ValFunc = float (*)(float, float);
5509 struct ReduceTestDescriptor {
5510 string name;
5511 OpFunc get_node;
5512 ValFunc val_func;
5513 float init_val;
5514 };
5515 std::vector<ReduceTestDescriptor> op_test_info{
5516 {"Sum", CreateReduceOp<ops::Sum>, [](float x, float y) { return x + y; },
5517 0},
5518 {"Prod", CreateReduceOp<ops::Prod>,
5519 [](float x, float y) { return x * y; }, 1},
5520 {"Mean", CreateReduceOp<ops::Mean>,
5521 [](float x, float y) { return x + y; }, 0},
5522 {"Min", CreateReduceOp<ops::Min>,
5523 [](float x, float y) { return y < x ? y : x; }, 1000},
5524 {"Max", CreateReduceOp<ops::Max>,
5525 [](float x, float y) { return x < y ? y : x; }, -1000}};
5526
5527 std::vector<float> input_values{1, 2, 3, 4, 5, 6};
5528 struct TestParams {
5529 std::vector<int> input_dims;
5530 std::vector<float> input_values;
5531 // Helper array contains the same elements as input but permuted in a way
5532 // that the reduction can be calculated over contiguous elements using
5533 // CalcReduce
5534 std::vector<float> helper_array;
5535 std::vector<int> axis;
5536 int stride; // product of input_dims along axis
5537 Status conversion_status;
5538 };
5539 std::vector<TestParams> params{
5540 // Out of range tests
5541 TestParams{{2, 3, 1}, input_values, input_values, {3}, 3},
5542 TestParams{{2, 3, 1}, input_values, input_values, {-4}, 3},
5543 // Ok tests
5544 TestParams{{2, 3, 1}, input_values, {1, 4, 2, 5, 3, 6}, {0}, 2},
5545 TestParams{{2, 3, 1}, input_values, input_values, {1}, 3},
5546 TestParams{{2, 3, 1}, input_values, input_values, {2}, 1},
5547 TestParams{{2, 3, 1}, input_values, input_values, {0, 1}, 6},
5548 // Ok tests with negative axis values
5549 TestParams{{2, 3, 1}, input_values, {1, 4, 2, 5, 3, 6}, {-3}, 2},
5550 TestParams{{2, 3, 1}, input_values, input_values, {-2}, 3},
5551 TestParams{{2, 3, 1}, input_values, input_values, {-1}, 1},
5552 TestParams{{2, 3, 1}, input_values, input_values, {-3, 1}, 6},
5553 };
5554
5555 for (bool keep_dims : {false, true}) {
5556 for (auto& op : op_test_info) {
5557 for (auto p : params) {
5558 SCOPED_TRACE(StrCat(op.name, keep_dims ? "keep_dims" : ""));
5559 Reset();
5560 NodeDef node_def = op.get_node(tf_type_, keep_dims);
5561
5562 AddTestTensor("input", p.input_dims, p.input_values);
5563 AddTestWeights<int32>("axis", {static_cast<int>(p.axis.size())},
5564 p.axis);
5565 std::vector<int> expected_output_dims(p.input_dims);
5566
5567 // Set expected output dim and conversion error messages
5568 for (int ax : p.axis) {
5569 int rank = p.input_dims.size();
5570 if (ax >= rank || ax < -rank) {
5571 p.conversion_status =
5572 errors::InvalidArgument("Axis value of ", ax,
5573 " is out of bounds, must be in "
5574 "range [",
5575 -rank, ", ", rank, "), at my_reduce");
5576 } else {
5577 int ax_positive = ax >= 0 ? ax : ax + rank;
5578 // Zero marks elements that we will remove later.
5579 expected_output_dims[ax_positive] = keep_dims ? 1 : 0;
5580 if (trt_mode_ == TrtTestMode::kImplicitBatch &&
5581 (ax == 0 || ax == -rank)) {
5582 p.conversion_status = errors::Unimplemented(
5583 "TensorRT does not allow manipulation of the batch "
5584 "dimension, at my_reduce");
5585 }
5586 }
5587 }
5588 expected_output_dims.erase(std::remove(expected_output_dims.begin(),
5589 expected_output_dims.end(), 0),
5590 expected_output_dims.end());
5591 VLOG(2) << "out dims "
5592 << absl::StrCat("[", absl::StrJoin(expected_output_dims, ","),
5593 "]");
5594 std::vector<float> expected_values = CalcReduce(
5595 op.name, p.helper_array, p.stride, op.val_func, op.init_val);
5596 TestOpConverter("my_reduce", node_def, expected_output_dims,
5597 p.conversion_status, Status::OK(),
5598 ArrayFloatNear(expected_values));
5599 }
5600 }
5601 }
5602 }
5603
CreateCastOp(DataType tf_type)5604 NodeDef CreateCastOp(DataType tf_type) {
5605 Scope s = Scope::NewRootScope();
5606 auto input = ops::Placeholder(s.WithOpName("input"), DT_HALF);
5607 return ops::Cast(s.WithOpName("my_unary"), input, DT_FLOAT)
5608 .operation.node()
5609 ->def();
5610 }
5611
TEST_P(OpConverter_FP32_Test,ConvertUnary)5612 TEST_P(OpConverter_FP32_Test, ConvertUnary) {
5613 {
5614 // Input is weights, should fail.
5615 Reset();
5616 const NodeDef node_def = CreateUnaryOp<ops::Neg>(tf_type_);
5617 AddTestWeights<float>("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
5618 RunValidationAndConversion(
5619 node_def, error::UNIMPLEMENTED,
5620 "The input \"x\" for Neg must be a tensor, at my_unary");
5621 }
5622 using OpFunc = std::function<NodeDef(DataType)>;
5623 using ValFunc = float (*)(float);
5624 std::map<std::string, std::pair<OpFunc, ValFunc>> op_map;
5625 #define ADD_OP(name, op, compute) \
5626 op_map[name] = \
5627 std::make_pair(CreateUnaryOp<op>, static_cast<ValFunc>(compute))
5628 ADD_OP("Abs", ops::Abs, std::abs);
5629 ADD_OP("Acos", ops::Acos, std::acos);
5630 ADD_OP("Acosh", ops::Acosh, std::acosh);
5631 ADD_OP("Asin", ops::Asin, std::asin);
5632 ADD_OP("Asinh", ops::Asinh, std::asinh);
5633 ADD_OP("Atan", ops::Atan, std::atan);
5634 ADD_OP("Atanh", ops::Atanh, std::atanh);
5635 op_map["Cast"] = std::make_pair(CreateCastOp, [](float x) { return x; });
5636 ADD_OP("Ceil", ops::Ceil, std::ceil);
5637 ADD_OP("Cos", ops::Cos, std::cos);
5638 ADD_OP("Cosh", ops::Cosh, std::cosh);
5639 ADD_OP("Exp", ops::Exp, std::exp);
5640 ADD_OP("Floor", ops::Floor, std::floor);
5641 ADD_OP("Log", ops::Log, std::log);
5642 ADD_OP("Neg", ops::Neg, [](float x) { return -x; });
5643 ADD_OP("Reciprocal", ops::Reciprocal, [](float x) { return 1.0f / x; });
5644 ADD_OP("Rsqrt", ops::Rsqrt, [](float x) { return 1.0f / std::sqrt(x); });
5645 ADD_OP("Sin", ops::Sin, std::sin);
5646 ADD_OP("Sinh", ops::Sinh, std::sinh);
5647 ADD_OP("Sqrt", ops::Sqrt, std::sqrt);
5648 ADD_OP("Tan", ops::Tan, std::tan);
5649 #undef ADD_OP
5650 // Get list of ops to test.
5651 std::vector<string> ops_to_test;
5652 // Add all ops supported by ConvertUnary.
5653 auto* map = UnaryOperationMap();
5654 ops_to_test.reserve(map->size());
5655 for (auto& pair : *map) {
5656 ops_to_test.push_back(pair.first);
5657 }
5658 // Add other unary ops to test.
5659 ops_to_test.push_back("Rsqrt");
5660 // Prepare test parameters
5661 auto p = TestParamBase{
5662 {1, 1, 2, 3}, // input dims
5663 {}, // input partial dims
5664 {1, 1, 2, 3}, // expected output dims
5665 };
5666 for (const string& op_name : ops_to_test) {
5667 SCOPED_TRACE(op_name);
5668 Reset();
5669 if (!op_map.count(op_name)) {
5670 FAIL() << "Unary op test map does not contain op " << op_name;
5671 }
5672 NodeDef node_def = op_map[op_name].first(tf_type_);
5673
5674 // TODO(bixia): we assume this test is only instantiated for DT_FLOAT for
5675 // now. Need to find a better way to express input and output types.
5676 //
5677 // TODO(tfeher): improve tests by defining an expected output data type and
5678 // check that. Currently only the shape and values of the output are
5679 // checked.
5680 DataType input_tf_type = op_name == "Cast" ? DT_HALF : tf_type_;
5681
5682 std::vector<float> input_values{-0.9f, 0.6f, 0.0f, -3.5f, 100.0f, 2.9f};
5683 AddTestTensor("input", p.input_dims, input_tf_type, input_values);
5684 std::vector<float> output;
5685 std::transform(input_values.begin(), input_values.end(),
5686 std::back_inserter(output), op_map[op_name].second);
5687 TestOpConverter("my_unary", node_def, p.expected_output_dims, Status::OK(),
5688 p.runtime_status, ArrayFloatNear(output, 0.0001, true));
5689 }
5690 }
5691
5692 // Get the NodeDef for ConcatV2.
5693 // TODO(hinsu): Consider switching this to static function.
__anon73f76ade2702(DataType dtype, int num_inputs) 5694 auto get_concat_nodedef = [](DataType dtype, int num_inputs) -> NodeDef {
5695 Scope s = Scope::NewRootScope();
5696 std::vector<Input> values;
5697 for (int i = 0; i < num_inputs; ++i) {
5698 const string input_name = StrCat("values_", i);
5699 values.push_back(ops::Placeholder(s.WithOpName(input_name), dtype));
5700 }
5701 auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
5702 auto concat = ops::Concat(s.WithOpName("my_concat"),
5703 absl::Span<const Input>(values), axis);
5704 return concat.operation.node()->def();
5705 };
5706
5707 #if IS_TRT_VERSION_GE(7, 0, 0, 0)
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertConcat)5708 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertConcat) {
5709 #else
5710 TEST_P(OpConverter_FP32_FP16_Test, ConvertConcat) {
5711 #endif
5712 {
5713 // Axis is a tensor, should fail.
5714 Reset();
5715 NodeDef node_def = get_concat_nodedef(tf_type_, 2);
5716 AddTestTensor("values_0", {1, 1, 2, 3});
5717 AddTestTensor("values_1", {1, 1, 2, 3});
5718 AddTestTensor("axis", {1});
5719 RunValidationAndConversion(
5720 node_def, error::UNIMPLEMENTED,
5721 "The input \"axis\" for ConcatV2 must be a constant, at my_concat");
5722 }
5723 {
5724 // Axis is out of bounds, should fail.
5725 Reset();
5726 NodeDef node_def = get_concat_nodedef(tf_type_, 2);
5727 AddTestTensor("values_0", {1, 1, 2, 3});
5728 AddTestTensor("values_1", {1, 1, 2, 3});
5729 AddTestWeights<int32>("axis", {1}, {4});
5730 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
5731 "Axis value of 4 is out of bounds, must be in "
5732 "range [-4, 4), at my_concat");
5733 }
5734 {
5735 // Inputs have inconsistent ranks, should fail.
5736 Reset();
5737 NodeDef node_def = get_concat_nodedef(tf_type_, 2);
5738 AddTestTensor("values_0", {1, 1, 2, 3});
5739 AddTestTensor("values_1", {1, 1, 6});
5740 AddTestWeights<int32>("axis", {1}, {1});
5741 RunValidationAndConversion(
5742 node_def, error::INVALID_ARGUMENT,
5743 "Received inputs with inconsistent rank, at my_concat");
5744 }
5745
5746 struct TestParams {
5747 std::vector<std::vector<int>> input_shapes;
5748 std::vector<std::vector<int>> input_values;
5749 int axis;
5750 std::vector<int> expected_output_dims;
5751 std::vector<int> expected_output;
5752 Status conversion_status;
5753 Status run_status;
5754 bool input_as_weight;
5755 };
5756
5757 const std::vector<std::vector<int>> common_input{InitTestVector<int>(6),
5758 InitTestVector<int>(6, 6)};
5759
5760 std::vector<TestParams> params = {
5761 {
5762 /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
5763 /*input_values=*/common_input,
5764 /*axis=*/1,
5765 /*expected_output_dims=*/{1, 2, 2, 3},
5766 /*expected_output=*/InitTestVector<int>(12),
5767 },
5768 {
5769 /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
5770 /*input_values=*/common_input,
5771 /*axis=*/2,
5772 /*expected_output_dims=*/{1, 1, 4, 3},
5773 /*expected_output=*/InitTestVector<int>(12),
5774 },
5775 {
5776 /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
5777 /*input_values=*/common_input,
5778 /*axis=*/3,
5779 /*expected_output_dims=*/{1, 1, 2, 6},
5780 /*expected_output=*/
5781 {0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11},
5782 },
5783 {
5784 /*input_shapes=*/{{1, 1}, {1, 2}, {1, 3}, {1, 1}, {1, 1}, {1, 2}},
5785 /*input_values=*/
5786 {{1}, {2, 3}, {4, 5, 6}, {7}, {8}, {9, 10}},
5787 /*axis=*/1,
5788 /*expected_output_dims=*/{1, 10},
5789 /*expected_output=*/
5790 InitTestVector<int>(10, /*start_value=*/1),
5791 },
5792 {
5793 // An input is a weight
5794 /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
5795 /*input_values=*/common_input,
5796 /*axis=*/1,
5797 /*expected_output_dims=*/{1, 2, 2, 3},
5798 /*expected_output=*/InitTestVector<int>(12),
5799 /*conversion_status=*/
5800 errors::Unimplemented("The input \"values_1\" for ConcatV2 "
5801 "must be a tensor, at my_concat"),
5802 /*run_status=*/Status::OK(),
5803 /*input_as_weight=*/true,
5804 },
5805 {
5806 // Axis is batch dimension, should fail in implicit batch mode.
5807 /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
5808 /*input_values=*/common_input,
5809 /*axis=*/0,
5810 /*expected_output_dims=*/{2, 1, 2, 3},
5811 /*expected_output=*/InitTestVector<int>(12),
5812 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
5813 ? errors::Unimplemented(
5814 "TensorRT does not allow manipulation of the "
5815 "batch dimension, at my_concat")
5816 : Status::OK(),
5817 },
5818 {
5819 // Inconsistent input shape, runtime error in dynamic shape mode.
5820 /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 3, 2}},
5821 /*input_values=*/common_input,
5822 /*axis=*/1,
5823 /*expected_output_dims=*/{2, 1, 2, 3},
5824 /*expected_output=*/InitTestVector<int>(12),
5825 trt_mode_ != TrtTestMode::kDynamicShape
5826 ? errors::InvalidArgument(
5827 "Received inputs with inconsistent shape, at my_concat")
5828 : Status::OK(),
5829 errors::InvalidArgument(""),
5830 }};
5831
5832 for (auto p : params) {
5833 Reset();
5834 const int num_inputs = p.input_shapes.size();
5835 EXPECT_EQ(num_inputs, p.input_values.size());
5836 NodeDef node_def = get_concat_nodedef(tf_type_, num_inputs);
5837 // Create inputs.
5838 for (int j = 0; j < num_inputs; ++j) {
5839 string name = StrCat("values_", j);
5840 if (j == 1 && p.input_as_weight) {
5841 AddTestWeights(name, p.input_shapes[j], p.input_values[j], tf_type_);
5842 } else {
5843 AddTestTensor(name, p.input_shapes[j], p.input_values[j]);
5844 }
5845 }
5846 AddTestWeights<int32>("axis", {1}, {p.axis});
5847
5848 TestOpConverter("my_concat", node_def, p.expected_output_dims,
5849 p.conversion_status, p.run_status,
5850 ElementsAreArray(p.expected_output));
5851 }
5852 }
5853
5854 // Get the NodeDef for Split.
5855 auto get_split_nodedef = [](DataType dtype, int num_split) -> NodeDef {
5856 Scope s = Scope::NewRootScope();
5857 auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
5858 auto value = ops::Placeholder(s.WithOpName("value"), dtype);
5859 auto split = ops::Split(s.WithOpName("my_split"), axis, value, num_split);
5860 return split.operation.node()->def();
5861 };
5862
5863 template <DataType dtype>
5864 void TestConvertSplit(OpConverterTest* test) {
5865 typedef typename EnumToDataType<dtype>::Type CType;
5866
5867 struct TestParams {
5868 std::vector<int> input_shape;
5869 std::vector<CType> value;
5870 int axis;
5871 int num_split;
5872 std::vector<int> expected_output_dims;
5873 std::vector<std::vector<CType>> expected_outputs;
5874 };
5875
5876 const std::vector<CType> common_input = InitTestVector<CType>(6);
5877 std::vector<TestParams> ok_params = {
5878 // Identity (num_split = 1)
5879 {/*input_shape=*/{1, 2, 3}, /*value=*/common_input, /*axis=*/1,
5880 /*num_split=*/1, /*expected_output_dims=*/{1, 2, 3},
5881 /*expected_outputs=*/{InitTestVector<CType>(6)}},
5882 {/*input_shape=*/{1, 2, 3},
5883 /*value=*/common_input,
5884 /*axis=*/3,
5885 /*num_split=*/3,
5886 /*expected_output_dims=*/{1, 2, 1},
5887 /*expected_outputs=*/
5888 {{CType(0), CType(3)}, {CType(1), CType(4)}, {CType(2), CType(5)}}},
5889 {/*input_shape=*/{1, 6},
5890 /*value=*/common_input,
5891 /*axis=*/2,
5892 /*num_split=*/6,
5893 /*expected_output_dims=*/{1, 1},
5894 /*expected_outputs=*/
5895 {{CType(0)},
5896 {CType(1)},
5897 {CType(2)},
5898 {CType(3)},
5899 {CType(4)},
5900 {CType(5)}}},
5901 {/*input_shape=*/{1, 6},
5902 /*value=*/common_input,
5903 /*axis=*/-1,
5904 /*num_split=*/2,
5905 /*expected_output_dims=*/{1, 3},
5906 /*expected_outputs=*/
5907 {InitTestVector<CType>(3), InitTestVector<CType>(3, CType(3))}},
5908 };
5909
5910 for (int i = 0; i < ok_params.size(); ++i) {
5911 test->Reset();
5912 NodeDef node_def = get_split_nodedef(dtype, ok_params[i].num_split);
5913 // Create inputs.
5914 test->AddTestWeights<int32>("axis", {1}, {ok_params[i].axis});
5915 nvinfer1::DataType trt_type;
5916 TF_ASSERT_OK(TfTypeToTrtType(dtype, &trt_type));
5917 test->AddTestTensor("value", ok_params[i].input_shape, 1, trt_type);
5918 // Convert.
5919 test->RunValidationAndConversion(node_def);
5920
5921 // Get output tensors and verify output dims.
5922 EXPECT_EQ(ok_params[i].expected_outputs.size(), ok_params[i].num_split);
5923 std::vector<TRT_TensorOrWeights> outputs(ok_params[i].num_split);
5924 DataVec output_data;
5925 for (int j = 0; j < outputs.size(); ++j) {
5926 const string name = j == 0 ? StrCat("my_split") : StrCat("my_split:", j);
5927 TF_EXPECT_OK(test->GetTensorOrWeights(name, &outputs[j]));
5928 EXPECT_TRUE(outputs[j].is_tensor());
5929 ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
5930 outputs[j].tensor()->getDimensions());
5931 // Create buffer to store output.
5932 output_data.push_back(
5933 {name, test->ConstructTensor<CType>(
5934 ok_params[i].expected_outputs[j].size())});
5935 }
5936
5937 // Verify output values are correct.
5938 const DataVec input_data{
5939 {"value", test->AsTensor<CType>(ok_params[i].value)}};
5940 TF_EXPECT_OK(test->BuildAndRun(input_data, &output_data));
5941 for (int j = 0; j < outputs.size(); ++j) {
5942 EXPECT_THAT(GetSpanForData<CType>(output_data[j]),
5943 ElementsAreArray(ok_params[i].expected_outputs[j]));
5944 }
5945 }
5946 }
5947
5948 TEST_F(OpConverterTest, ConvertSplit) {
5949 {
5950 // Axis is a tensor, should fail.
5951 Reset();
5952 NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
5953 AddTestTensor("axis", {1});
5954 AddTestTensor("value", {1, 2, 3});
5955 RunValidationAndConversion(
5956 node_def, error::UNIMPLEMENTED,
5957 "The input \"axis\" for Split must be a constant, at my_split");
5958 }
5959 {
5960 // Axis is out of bounds, should fail.
5961 Reset();
5962 NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
5963 AddTestWeights<int32>("axis", {1}, {4});
5964 AddTestTensor("value", {1, 2, 3});
5965 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
5966 "Axis value of 4 is out of bounds, must be in "
5967 "range [-4, 4), at my_split");
5968 }
5969 {
5970 // Axis is out of bounds (negative), should fail.
5971 Reset();
5972 NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
5973 AddTestWeights<int32>("axis", {1}, {-5});
5974 AddTestTensor("value", {1, 2, 3});
5975 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
5976 "Axis value of -5 is out of bounds, must be in "
5977 "range [-4, 4), at my_split");
5978 }
5979 {
5980 // Axis is batch dimension, should fail.
5981 Reset();
5982 NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
5983 AddTestWeights<int32>("axis", {1}, {0});
5984 AddTestTensor("value", {1, 2, 3});
5985 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
5986 "TensorRT does not allow manipulation of the "
5987 "batch dimension, at my_split");
5988 }
5989 {
5990 // Value is a weight, should fail.
5991 Reset();
5992 NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
5993 AddTestWeights<int32>("axis", {1}, {1});
5994 AddTestWeights<float>("value", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
5995 RunValidationAndConversion(
5996 node_def, error::UNIMPLEMENTED,
5997 "The input \"value\" for Split must be a tensor, at my_split");
5998 }
5999 {
6000 // Dim is not evenly divisibly by num_split, should fail.
6001 Reset();
6002 NodeDef node_def = get_split_nodedef(DT_FLOAT, 2);
6003 AddTestWeights<int32>("axis", {1}, {3});
6004 AddTestTensor("value", {1, 2, 3});
6005 RunValidationAndConversion(
6006 node_def, error::INVALID_ARGUMENT,
6007 "Dimension 3 of size 3 is not evenly divisble by 2, at my_split");
6008 }
6009 {
6010 // num_split > dim size, should fail.
6011 Reset();
6012 NodeDef node_def = get_split_nodedef(DT_FLOAT, 4);
6013 AddTestWeights<int32>("axis", {1}, {3});
6014 AddTestTensor("value", {1, 2, 3});
6015 RunValidationAndConversion(
6016 node_def, error::INVALID_ARGUMENT,
6017 "Dimension 3 of size 3 is not evenly divisble by 4, at my_split");
6018 }
6019
6020 TestConvertSplit<DT_FLOAT>(this);
6021 TestConvertSplit<DT_HALF>(this);
6022 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
6023 TestConvertSplit<DT_INT32>(this);
6024 #endif
6025 }
6026
6027 // Get the NodeDef for Unpack (Unstack in TF API).
6028 auto get_unpack_nodedef = [](DataType dtype, int num, int axis) -> NodeDef {
6029 Scope s = Scope::NewRootScope();
6030 auto value = ops::Placeholder(s.WithOpName("value"), dtype);
6031 auto unstack_attrs = ops::Unstack::Axis(axis);
6032 auto unstack =
6033 ops::Unstack(s.WithOpName("my_unpack"), value, num, unstack_attrs);
6034 return unstack.operation.node()->def();
6035 };
6036
6037 template <DataType dtype>
6038 void TestConvertUnpack(OpConverterTest* test) {
6039 typedef typename EnumToDataType<dtype>::Type CType;
6040
6041 struct TestParams {
6042 std::vector<int> input_shape;
6043 std::vector<CType> value;
6044 int axis;
6045 int num;
6046 std::vector<int> expected_output_dims;
6047 std::vector<std::vector<CType>> expected_outputs;
6048 };
6049
6050 const std::vector<CType> common_input = InitTestVector<CType>(6);
6051 std::vector<TestParams> ok_params = {
6052 {/*input_shape=*/{1, 2, 3}, /*value=*/common_input, /*axis=*/1,
6053 /*num=*/1, /*expected_output_dims=*/{2, 3},
6054 /*expected_outputs=*/{InitTestVector<CType>(6)}},
6055 {/*input_shape=*/{1, 2, 3},
6056 /*value=*/common_input,
6057 /*axis=*/3,
6058 /*num=*/3,
6059 /*expected_output_dims=*/{1, 2},
6060 /*expected_outputs=*/
6061 {{CType(0), CType(3)}, {CType(1), CType(4)}, {CType(2), CType(5)}}},
6062 {/*input_shape=*/{6, 1},
6063 /*value=*/common_input,
6064 /*axis=*/-2,
6065 /*num=*/6,
6066 /*expected_output_dims=*/{1},
6067 /*expected_outputs=*/
6068 {{CType(0)},
6069 {CType(1)},
6070 {CType(2)},
6071 {CType(3)},
6072 {CType(4)},
6073 {CType(5)}}},
6074 {/*input_shape=*/{6},
6075 /*value=*/common_input,
6076 /*axis=*/1,
6077 /*num=*/6,
6078 /*expected_output_dims=*/{},
6079 /*expected_outputs=*/
6080 {{CType(0)},
6081 {CType(1)},
6082 {CType(2)},
6083 {CType(3)},
6084 {CType(4)},
6085 {CType(5)}}},
6086 };
6087
6088 for (int i = 0; i < ok_params.size(); ++i) {
6089 test->Reset();
6090 NodeDef node_def =
6091 get_unpack_nodedef(dtype, ok_params[i].num, ok_params[i].axis);
6092 // Create inputs.
6093 nvinfer1::DataType trt_type;
6094 TF_ASSERT_OK(TfTypeToTrtType(dtype, &trt_type));
6095 test->AddTestTensor("value", ok_params[i].input_shape, 1, trt_type);
6096 // Convert.
6097 test->RunValidationAndConversion(node_def);
6098
6099 // Get output tensors and verify output dims.
6100 EXPECT_EQ(ok_params[i].expected_outputs.size(), ok_params[i].num);
6101 std::vector<TRT_TensorOrWeights> outputs(ok_params[i].num);
6102 DataVec output_data;
6103 for (int j = 0; j < outputs.size(); ++j) {
6104 const string name = j == 0 ? "my_unpack" : StrCat("my_unpack:", j);
6105 TF_EXPECT_OK(test->GetTensorOrWeights(name, &outputs[j]));
6106 EXPECT_TRUE(outputs[j].is_tensor());
6107 ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
6108 outputs[j].tensor()->getDimensions());
6109 // Create buffer to store output.
6110 output_data.push_back(
6111 {name, test->ConstructTensor<CType>(
6112 ok_params[i].expected_outputs[j].size())});
6113 }
6114
6115 // Verify output values are correct.
6116 const DataVec input_data{
6117 {"value", test->AsTensor<CType>(ok_params[i].value)}};
6118 TF_EXPECT_OK(test->BuildAndRun(input_data, &output_data));
6119 for (int j = 0; j < outputs.size(); ++j) {
6120 EXPECT_THAT(GetSpanForData<CType>(output_data[j]),
6121 ElementsAreArray(ok_params[i].expected_outputs[j]));
6122 }
6123 }
6124 }
6125
6126 TEST_F(OpConverterTest, ConvertUnpack) {
6127 {
6128 // Value is weights, should fail.
6129 Reset();
6130 NodeDef node_def = get_unpack_nodedef(DT_FLOAT, /*num=*/3, /*axis=*/3);
6131 AddTestWeights<float>("value", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
6132 RunValidationAndConversion(
6133 node_def, error::UNIMPLEMENTED,
6134 "The input \"value\" for Unpack must be a tensor, at my_unpack");
6135 }
6136 {
6137 // Axis is out of bounds, should fail.
6138 Reset();
6139 NodeDef node_def = get_unpack_nodedef(DT_FLOAT, /*num=*/1, /*axis=*/4);
6140 AddTestTensor("value", {1, 2, 3});
6141 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6142 "Axis value of 4 is out of bounds, must be in "
6143 "range [-4, 4), at my_unpack");
6144 }
6145 {
6146 // Axis is out of bounds (negative), should fail.
6147 Reset();
6148 NodeDef node_def = get_unpack_nodedef(DT_FLOAT, /*num=*/1, /*axis=*/-5);
6149 AddTestTensor("value", {1, 2, 3});
6150 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6151 "Axis value of -5 is out of bounds, must be in "
6152 "range [-4, 4), at my_unpack");
6153 }
6154 {
6155 // Axis is batch dimension, should fail.
6156 Reset();
6157 NodeDef node_def = get_unpack_nodedef(DT_FLOAT, /*num=*/1, /*axis=*/0);
6158 AddTestTensor("value", {1, 2, 3});
6159 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6160 "TensorRT does not allow manipulation of the "
6161 "batch dimension, at my_unpack");
6162 }
6163 {
6164 // Dim size does not match num, should fail.
6165 Reset();
6166 NodeDef node_def = get_unpack_nodedef(DT_FLOAT, /*num=*/5, /*axis=*/2);
6167 AddTestTensor("value", {1, 6});
6168 RunValidationAndConversion(
6169 node_def, error::INVALID_ARGUMENT,
6170 "Dimension 2 has size 6 which is not equal to num of 5, at my_unpack");
6171 }
6172 {
6173 // Output would be TF scalar, should fail.
6174 Reset();
6175 NodeDef node_def = get_unpack_nodedef(DT_FLOAT, /*num=*/1, /*axis=*/0);
6176 AddTestTensor("value", {});
6177 RunValidationAndConversion(
6178 node_def, error::UNIMPLEMENTED,
6179 "Input \"value\" for Unpack must be rank 2 or greater, at my_unpack");
6180 }
6181
6182 TestConvertUnpack<DT_FLOAT>(this);
6183 TestConvertUnpack<DT_HALF>(this);
6184 #if IS_TRT_VERSION_GE(5, 1, 3, 1)
6185 TestConvertUnpack<DT_INT32>(this);
6186 #endif
6187 }
6188
6189 // Get the NodeDef for Pack.
6190 NodeDef GetPackNodeDef(DataType dtype, int num_inputs, int axis) {
6191 Scope s = Scope::NewRootScope();
6192 std::vector<Input> values;
6193 for (int i = 0; i < num_inputs; ++i) {
6194 const string input_name = StrCat("values_", i);
6195 values.push_back(ops::Placeholder(s.WithOpName(input_name), dtype));
6196 }
6197 // Pack op is renamed to Stack in APIs.
6198 auto pack =
6199 ops::Stack(s.WithOpName("my_pack"), absl::Span<const Input>(values),
6200 ops::Stack::Axis(axis));
6201 return pack.operation.node()->def();
6202 }
6203
6204 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
6205 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertPack) {
6206 #else
6207 TEST_P(OpConverter_FP32_FP16_Test, ConvertPack) {
6208 #endif
6209 struct TestParams {
6210 std::vector<std::vector<int>> input_shapes;
6211 std::vector<std::vector<int>> partial_input_shapes;
6212 std::vector<std::vector<float>> input_values;
6213 int axis;
6214 std::vector<int> expected_output_dims;
6215 std::vector<float> expected_output;
6216 Status conversion_status;
6217 Status runtime_status;
6218 bool input_1_is_weight;
6219 };
6220
6221 const std::vector<std::vector<float>> common_input{
6222 InitTestVector<float>(6), InitTestVector<float>(6, /*start_value=*/6)};
6223 std::vector<TestParams> params = {
6224 // Second input is weight, should fail in implicit batch mode
6225 {/*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6226 /*partial_input_shapes=*/{{}, {}},
6227 /*input_values=*/common_input,
6228 /*axis=*/1,
6229 /*expected_output_dims=*/{1, 2, 2, 3},
6230 /*expected_output=*/InitTestVector<float>(12),
6231 trt_mode_ == TrtTestMode::kImplicitBatch
6232 ? Status{error::UNIMPLEMENTED,
6233 "The input \"values_1\" for Pack must be a tensor, at "
6234 "my_pack"}
6235 : Status::OK(),
6236 /*runtime_status*/ Status::OK(),
6237 /*weight_input*/ true},
6238 // Axis is out of bounds, should fail.
6239 {
6240 /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6241 /*partial_input_shapes=*/{{}, {}},
6242 /*input_values=*/common_input,
6243 /*axis=*/-5,
6244 /*expected_output_dims=*/{},
6245 /*expected_output=*/{},
6246 Status{error::INVALID_ARGUMENT,
6247 "Axis value of -5 is out of bounds, must be in"
6248 " range [-4, 4), at my_pack"},
6249 },
6250 // Axis is batch dimension, should fail in implicit batch mode.
6251 {/*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6252 /*partial_input_shapes=*/{{}, {}},
6253 /*input_values=*/common_input,
6254 /*axis=*/-4,
6255 /*expected_output_dims=*/{2, 1, 2, 3},
6256 /*expected_output=*/InitTestVector<float>(12),
6257 trt_mode_ == TrtTestMode::kImplicitBatch
6258 ? Status{error::UNIMPLEMENTED,
6259 "TensorRT does not allow manipulation of the batch "
6260 "dimension, at my_pack"}
6261 : Status::OK()},
6262 // Inconsistent rank, should fail.
6263 {
6264 /*input_shapes=*/{{1, 2, 3}, {1, 6}},
6265 /*partial_input_shapes=*/{{}, {}},
6266 /*input_values=*/common_input,
6267 /*axis=*/1,
6268 /*expected_output_dims=*/{},
6269 /*expected_output=*/{},
6270 Status{error::INVALID_ARGUMENT,
6271 "Received inputs with inconsistent rank, at my_pack"},
6272 },
6273 {
6274 /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6275 /*partial_input_shapes=*/{{}, {}},
6276 /*input_values=*/common_input,
6277 /*axis=*/1,
6278 /*expected_output_dims=*/{1, 2, 2, 3},
6279 /*expected_output=*/InitTestVector<float>(12),
6280 },
6281 {
6282 /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6283 /*partial_input_shapes=*/{{}, {}},
6284 /*input_values=*/common_input,
6285 /*axis=*/2,
6286 /*expected_output_dims=*/{1, 2, 2, 3},
6287 /*expected_output=*/
6288 {0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11},
6289 },
6290 {
6291 /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6292 /*partial_input_shapes=*/{{}, {}},
6293 /*input_values=*/common_input,
6294 /*axis=*/3,
6295 /*expected_output_dims=*/{1, 2, 3, 2},
6296 /*expected_output=*/
6297 {0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11},
6298 },
6299 {
6300 /*input_shapes=*/{{1, 2, 3}},
6301 /*partial_input_shapes=*/{{}},
6302 /*input_values=*/{InitTestVector<float>(6)},
6303 /*axis=*/1,
6304 /*expected_output_dims=*/{1, 1, 2, 3},
6305 /*expected_output=*/InitTestVector<float>(6),
6306 },
6307 {
6308 /*input_shapes=*/{{1, 2, 3}},
6309 /*partial_input_shapes=*/{{}},
6310 /*input_values=*/{InitTestVector<float>(6)},
6311 /*axis=*/2,
6312 /*expected_output_dims=*/{1, 2, 1, 3},
6313 /*expected_output=*/InitTestVector<float>(6),
6314 },
6315 };
6316 // Inputs have inconsistent shapes, should fail.
6317 if (trt_mode_ != TrtTestMode::kDynamicShape) {
6318 params.push_back(TestParams{
6319 /*input_shapes=*/{{1, 2, 3}, {1, 3, 2}},
6320 /*partial_input_shapes=*/{{}, {}},
6321 /*input_values=*/common_input,
6322 /*axis=*/1,
6323 /*expected_output_dims=*/{},
6324 /*expected_output=*/InitTestVector<float>(12),
6325 Status{error::INVALID_ARGUMENT,
6326 "Received inputs with inconsistent shape, at my_pack"}});
6327 } else {
6328 // In dynamic shape mode we cannot catch inconsistent shapes at conversion
6329 // time, only during runtime. But TensorRT does not raise a proper runtime
6330 // error, instead it aborts the program with the following message:
6331 // Assertion failed: t->start.d[i] + t->extent.d[i] <= r.dims.d[i]
6332 // ../builder/cudnnBuilderGraph.cpp:862
6333 // Aborting...
6334 // TODO(tfeher) Add dynamic shapes test once TRT handles shape error
6335 // decently
6336 }
6337 if (trt_mode_ == TrtTestMode::kDynamicShape) {
6338 // Test with mixed dynamic / static shape input tensors
6339 params.push_back(
6340 TestParams{/*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
6341 /*partial_input_shapes=*/{{-1, -1, -1}, {1, 2, 3}},
6342 /*input_values=*/common_input,
6343 /*axis=*/2,
6344 /*expected_output_dims=*/{1, 2, 2, 3},
6345 /*expected_output=*/
6346 {0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11}});
6347 }
6348 for (auto p : params) {
6349 Reset();
6350 const int num_inputs = p.input_shapes.size();
6351 EXPECT_EQ(num_inputs, p.input_values.size());
6352
6353 NodeDef node_def = GetPackNodeDef(tf_type_, num_inputs, p.axis);
6354 // Create inputs.
6355 for (int j = 0; j < num_inputs; ++j) {
6356 if (j == 1 && p.input_1_is_weight) {
6357 AddTestWeights(StrCat("values_", j), p.input_shapes[j],
6358 p.input_values[j], tf_type_);
6359 } else {
6360 AddTestTensor(StrCat("values_", j), p.input_shapes[j], tf_type_,
6361 p.input_values[j], p.partial_input_shapes[j]);
6362 }
6363 }
6364 TestOpConverter("my_pack", node_def, p.expected_output_dims,
6365 p.conversion_status, p.runtime_status,
6366 ElementsAreArray(p.expected_output));
6367 }
6368 }
6369
6370 // Get the NodeDef for ArgMin or ArgMax.
6371 template <typename OpType>
6372 NodeDef GetArgMinMaxNodeDef(DataType input_dtype, DataType output_dtype) {
6373 Scope s = Scope::NewRootScope();
6374 auto input = ops::Placeholder(s.WithOpName("input"), input_dtype);
6375 auto dimension = ops::Placeholder(s.WithOpName("dimension"), DT_INT32);
6376 auto attrs = OpType::OutputType(output_dtype);
6377 auto arg = OpType(s.WithOpName("my_arg"), input, dimension, attrs);
6378 return arg.operation.node()->def();
6379 }
6380
6381 struct ArgMinMaxTestParams {
6382 std::vector<int> input_shape;
6383 std::vector<float> input_value;
6384 int axis;
6385 std::vector<int> expected_output_dims;
6386 std::vector<int> expected_argmax_output;
6387 std::vector<int> expected_argmin_output;
6388 Status status;
6389 };
6390
6391 template <typename OpType>
6392 void TestConvertArgMinMax(ParameterizedOpConverterTestBase* test,
6393 DataType _tf_type, ArgMinMaxTestParams& p) {
6394 test->Reset();
6395
6396 NodeDef node_def = GetArgMinMaxNodeDef<OpType>(_tf_type,
6397 /*output_dtype=*/DT_INT32);
6398
6399 std::vector<int> expected_out;
6400 if (node_def.op() == "ArgMax") {
6401 expected_out = p.expected_argmax_output;
6402 } else if (node_def.op() == "ArgMin") {
6403 expected_out = p.expected_argmin_output;
6404 } else {
6405 ASSERT_TRUE(false);
6406 }
6407
6408 test->AddTestTensor("input", p.input_shape, _tf_type, p.input_value);
6409 test->AddTestWeights("dimension", {1}, {p.axis}, DT_INT32);
6410
6411 test->TestOpConverter("my_arg", node_def, p.expected_output_dims,
6412 /*expected_conversion_status=*/p.status,
6413 /*expected_runtime_status=*/Status::OK(),
6414 /*matcher=*/ElementsAreArray(expected_out), {DT_INT32});
6415 }
6416
6417 TEST_P(OpConverter_FP32_FP16_Test, ConvertArgMinMax) {
6418 {
6419 // Dimension is a tensor, should fail.
6420 Reset();
6421 NodeDef node_def =
6422 GetArgMinMaxNodeDef<ops::ArgMax>(tf_type_,
6423 /*output_dtype=*/DT_INT32);
6424 AddTestTensor("input", {1, 2, 3});
6425 AddTestTensor("dimension", {1});
6426 RunValidationAndConversion(
6427 node_def, error::UNIMPLEMENTED,
6428 "The input \"dimension\" for ArgMax must be a constant, at my_arg");
6429 }
6430 {
6431 // Output type is INT64, should fail.
6432 Reset();
6433 NodeDef node_def =
6434 GetArgMinMaxNodeDef<ops::ArgMax>(tf_type_,
6435 /*output_dtype=*/DT_INT64);
6436 AddTestTensor("input", {1, 2, 3});
6437 AddTestWeights("dimension", {1}, {3}, DT_INT32);
6438 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6439 "Output type int64 is not supported, at my_arg");
6440 }
6441
6442 const std::vector<float> common_input = InitTestVector<float>(6);
6443 std::vector<ArgMinMaxTestParams> params = {
6444 {/*input_shape=*/{2, 3},
6445 /*input_value=*/common_input,
6446 /*axis=*/0,
6447 /*expected_output_dims=*/{3},
6448 /*expected_argmax_output=*/{1, 1, 1},
6449 /*expected_argmin_output=*/{0, 0, 0},
6450 trt_mode_ == TrtTestMode::kImplicitBatch
6451 ? errors::Unimplemented("TensorRT does not allow manipulation of "
6452 "the batch dimension, at my_arg")
6453 : Status::OK()},
6454 {
6455 /*input_shape=*/{1, 6},
6456 /*input_value=*/common_input,
6457 /*axis=*/1,
6458 /*expected_output_dims=*/{1},
6459 /*expected_argmax_output=*/{5},
6460 /*expected_argmin_output=*/{0},
6461 },
6462 {
6463 /*input_shape=*/{1, 10},
6464 /*input_value=*/
6465 {-5.0f, 3.0f, 5.0f, 1.0f, 6.0f, -9.0f, 7.0f, 1.0f, 0.0f, -1.0f},
6466 /*axis=*/-1,
6467 /*expected_output_dims=*/{1},
6468 /*expected_argmax_output=*/{6},
6469 /*expected_argmin_output=*/{5},
6470 },
6471 {
6472 /*input_shape=*/{1, 2, 3},
6473 /*input_value=*/common_input,
6474 /*axis=*/2,
6475 /*expected_output_dims=*/{1, 2},
6476 /*expected_argmax_output=*/{2, 2},
6477 /*expected_argmin_output=*/{0, 0},
6478 },
6479 {
6480 /*input_shape=*/{1, 2, 3},
6481 /*input_value=*/common_input,
6482 /*axis=*/-2,
6483 /*expected_output_dims=*/{1, 3},
6484 /*expected_argmax_output=*/{1, 1, 1},
6485 /*expected_argmin_output=*/{0, 0, 0},
6486 },
6487 {
6488 /*input_shape=*/{1, 2, 1, 3},
6489 /*input_value=*/common_input,
6490 /*axis=*/3,
6491 /*expected_output_dims=*/{1, 2, 1},
6492 /*expected_argmax_output=*/{2, 2},
6493 /*expected_argmin_output=*/{0, 0},
6494 },
6495 {
6496 /*input_shape=*/{1, 2, 1, 3},
6497 /*input_value=*/common_input,
6498 /*axis=*/-3,
6499 /*expected_output_dims=*/{1, 1, 3},
6500 /*expected_argmax_output=*/{1, 1, 1},
6501 /*expected_argmin_output=*/{0, 0, 0},
6502 },
6503 {/*input_shape=*/{1, 2, 1, 1, 3},
6504 /*input_value=*/common_input,
6505 /*axis=*/4,
6506 /*expected_output_dims=*/{1, 2, 1, 1},
6507 /*expected_argmax_output=*/{2, 2},
6508 /*expected_argmin_output=*/{0, 0},
6509 #if !IS_TRT_VERSION_GE(7, 0, 0, 11)
6510 errors::Unimplemented("op is not able to support tensors with 4+"
6511 " dimensions (excluding batch size)")
6512 #else
6513 Status::OK()
6514 #endif
6515 },
6516 {/*input_shape=*/{1, 2, 1, 1, 3},
6517 /*input_value=*/common_input,
6518 /*axis=*/-4,
6519 /*expected_output_dims=*/{1, 1, 1, 3},
6520 /*expected_argmax_output=*/{1, 1, 1},
6521 /*expected_argmin_output=*/{0, 0, 0},
6522 #if !IS_TRT_VERSION_GE(7, 0, 0, 11)
6523 errors::Unimplemented("op is not able to support tensors with 4+"
6524 " dimensions (excluding batch size)")
6525 #else
6526 Status::OK()
6527 #endif
6528 },
6529 };
6530
6531 for (auto p : params) {
6532 TestConvertArgMinMax<ops::ArgMin>(this, tf_type_, p);
6533 TestConvertArgMinMax<ops::ArgMax>(this, tf_type_, p);
6534 }
6535 }
6536
6537 // Get the NodeDef for DepthToSpace or SpaceToSpace.
6538 template <typename OpType>
6539 NodeDef GetDepthSpaceShuffleNodeDef(DataType dtype, int block_size,
6540 string data_format) {
6541 Scope s = Scope::NewRootScope();
6542 auto input = ops::Placeholder(s.WithOpName("input"), dtype);
6543 auto attrs = OpType::DataFormat(data_format);
6544 auto shuffle = OpType(s.WithOpName("my_shuffle"), input, block_size, attrs);
6545 return shuffle.operation.node()->def();
6546 }
6547
6548 template <typename CType>
6549 struct DepthSpaceShuffleTestParams {
6550 std::vector<int> input_dims;
6551 std::vector<CType> input_value;
6552 int block_size;
6553 string data_format;
6554 std::vector<int> expected_output_dims;
6555 std::vector<CType> expected_output;
6556 };
6557
6558 template <typename OpType, DataType dtype, typename CType>
6559 void TestConvertDepthSpaceShuffle(
6560 OpConverterTest* test,
6561 const std::vector<DepthSpaceShuffleTestParams<CType>>& params) {
6562 for (int i = 0; i < params.size(); ++i) {
6563 test->Reset();
6564
6565 NodeDef node_def = GetDepthSpaceShuffleNodeDef<OpType>(
6566 dtype, params[i].block_size, params[i].data_format);
6567 nvinfer1::DataType trt_type;
6568 TF_ASSERT_OK(TfTypeToTrtType(dtype, &trt_type));
6569 test->AddTestTensor("input", params[i].input_dims, 1, trt_type);
6570 test->RunValidationAndConversion(node_def);
6571
6572 TRT_TensorOrWeights output;
6573 TF_EXPECT_OK(test->GetTensorOrWeights("my_shuffle", &output));
6574 EXPECT_TRUE(output.is_tensor());
6575 ExpectTrtDimsEqualsArray(params[i].expected_output_dims,
6576 output.tensor()->getDimensions());
6577
6578 DataVec input_data{{"input", test->AsTensor<CType>(params[i].input_value)}};
6579 DataVec output_data{{"my_shuffle", test->ConstructTensor<CType>(
6580 params[i].expected_output.size())}};
6581 TF_EXPECT_OK(test->BuildAndRun(input_data, &output_data));
6582 EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
6583 ElementsAreArray(params[i].expected_output));
6584 }
6585 }
6586
6587 template <DataType dtype>
6588 void TestConvertDepthToSpace(OpConverterTest* test) {
6589 typedef typename EnumToDataType<dtype>::Type CType;
6590 const std::vector<CType> common_input = InitTestVector<CType>(16);
6591 std::vector<DepthSpaceShuffleTestParams<CType>> params = {
6592 {
6593 /*input_shape=*/{4, 2, 2},
6594 /*input_value=*/common_input,
6595 /*block_size=*/2,
6596 /*data_format=*/"NCHW",
6597 /*expected_output_dims=*/{1, 4, 4},
6598 /*expected_output=*/
6599 CastTestVector<int, CType>(
6600 {0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15}),
6601 },
6602 {
6603 /*input_shape=*/{2, 2, 4},
6604 /*input_value=*/common_input,
6605 /*block_size=*/2,
6606 /*data_format=*/"NHWC",
6607 /*expected_output_dims=*/{4, 4, 1},
6608 /*expected_output=*/
6609 CastTestVector<int, CType>(
6610 {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}),
6611 },
6612 {
6613 /*input_shape=*/{16, 1, 1},
6614 /*input_value=*/common_input,
6615 /*block_size=*/4,
6616 /*data_format=*/"NCHW",
6617 /*expected_output_dims=*/{1, 4, 4},
6618 /*expected_output=*/InitTestVector<CType>(16),
6619 },
6620 {
6621 /*input_shape=*/{2, 2, 8},
6622 /*input_value=*/InitTestVector<CType>(32),
6623 /*block_size=*/2,
6624 /*data_format=*/"NHWC",
6625 /*expected_output_dims=*/{4, 4, 2},
6626 /*expected_output=*/CastTestVector<int, CType>({0, 1, 2, 3, 8,
6627 9, 10, 11, 4, 5,
6628 6, 7, 12, 13, 14,
6629 15, 16, 17, 18, 19,
6630 24, 25, 26, 27, 20,
6631 21, 22, 23, 28, 29,
6632 30, 31}),
6633 },
6634 };
6635
6636 TestConvertDepthSpaceShuffle<ops::DepthToSpace, dtype, CType>(test, params);
6637 }
6638
6639 TEST_F(OpConverterTest, ConvertDepthToSpace) {
6640 {
6641 // Input is a weight, should fail.
6642 Reset();
6643 NodeDef node_def =
6644 GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(DT_FLOAT, 2, "NCHW");
6645 AddTestWeights<float>("input", {4, 1, 1}, {1, 2, 3, 4});
6646 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6647 "The input \"input\" for DepthToSpace must be a "
6648 "tensor, at my_shuffle");
6649 }
6650 {
6651 // Input rank != 4
6652 Reset();
6653 NodeDef node_def =
6654 GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(DT_FLOAT, 2, "NCHW");
6655 AddTestTensor("input", {16, 32});
6656 RunValidationAndConversion(
6657 node_def, error::INVALID_ARGUMENT,
6658 "The input to DepthToSpace must be rank 4, at my_shuffle");
6659 }
6660 {
6661 // Channels not divisible by block_size, should fail.
6662 Reset();
6663 NodeDef node_def =
6664 GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(DT_FLOAT, 3, "NCHW");
6665 AddTestTensor("input", {16, 32, 32});
6666 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6667 "Number of channels must be divisible by "
6668 "block_size*block_size, at my_shuffle");
6669 }
6670 {
6671 // Unsupported format, should fail.
6672 Reset();
6673 NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(
6674 DT_FLOAT, 2, "NCHW_VECT_C");
6675 AddTestTensor("input", {16, 32, 32});
6676 RunValidationAndConversion(
6677 node_def, error::UNIMPLEMENTED,
6678 "Data format NCHW_VECT_C is not supported, at my_shuffle");
6679 }
6680
6681 TestConvertDepthToSpace<DT_FLOAT>(this);
6682 TestConvertDepthToSpace<DT_HALF>(this);
6683 TestConvertDepthToSpace<DT_INT32>(this);
6684 }
6685
6686 template <DataType dtype>
6687 void TestConvertSpaceToDepth(OpConverterTest* test) {
6688 typedef typename EnumToDataType<dtype>::Type CType;
6689 const std::vector<CType> common_input = InitTestVector<CType>(16);
6690 std::vector<DepthSpaceShuffleTestParams<CType>> params = {
6691 {
6692 /*input_shape=*/{1, 4, 4},
6693 /*input_value=*/common_input,
6694 /*block_size=*/2,
6695 /*data_format=*/"NCHW",
6696 /*expected_output_dims=*/{4, 2, 2},
6697 /*expected_output=*/
6698 CastTestVector<int, CType>(
6699 {0, 2, 8, 10, 1, 3, 9, 11, 4, 6, 12, 14, 5, 7, 13, 15}),
6700 },
6701 {
6702 /*input_shape=*/{4, 4, 1},
6703 /*input_value=*/common_input,
6704 /*block_size=*/2,
6705 /*data_format=*/"NHWC",
6706 /*expected_output_dims=*/{2, 2, 4},
6707 /*expected_output=*/
6708 CastTestVector<int, CType>(
6709 {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}),
6710 },
6711 {
6712 /*input_shape=*/{1, 4, 4},
6713 /*input_value=*/common_input,
6714 /*block_size=*/4,
6715 /*data_format=*/"NCHW",
6716 /*expected_output_dims=*/{16, 1, 1},
6717 /*expected_output=*/InitTestVector<CType>(16),
6718 },
6719 {
6720 /*input_shape=*/{4, 4, 2},
6721 /*input_value=*/InitTestVector<CType>(32),
6722 /*block_size=*/2,
6723 /*data_format=*/"NHWC",
6724 /*expected_output_dims=*/{2, 2, 8},
6725 /*expected_output=*/CastTestVector<int, CType>({0, 1, 2, 3, 8,
6726 9, 10, 11, 4, 5,
6727 6, 7, 12, 13, 14,
6728 15, 16, 17, 18, 19,
6729 24, 25, 26, 27, 20,
6730 21, 22, 23, 28, 29,
6731 30, 31}),
6732 },
6733 };
6734
6735 TestConvertDepthSpaceShuffle<ops::SpaceToDepth, dtype, CType>(test, params);
6736 }
6737
6738 TEST_F(OpConverterTest, ConvertSpaceToDepth) {
6739 {
6740 // Input is a weight, should fail.
6741 Reset();
6742 NodeDef node_def =
6743 GetDepthSpaceShuffleNodeDef<ops::SpaceToDepth>(DT_FLOAT, 2, "NCHW");
6744 AddTestWeights<float>("input", {4, 1, 1}, {1, 2, 3, 4});
6745 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6746 "The input \"input\" for SpaceToDepth must be a "
6747 "tensor, at my_shuffle");
6748 }
6749 {
6750 // Input rank != 4
6751 Reset();
6752 NodeDef node_def =
6753 GetDepthSpaceShuffleNodeDef<ops::SpaceToDepth>(DT_FLOAT, 2, "NCHW");
6754 AddTestTensor("input", {16, 32});
6755 RunValidationAndConversion(
6756 node_def, error::INVALID_ARGUMENT,
6757 "The input to SpaceToDepth must be rank 4, at my_shuffle");
6758 }
6759 {
6760 // Width not divisble by block_size, should fail.
6761 Reset();
6762 NodeDef node_def =
6763 GetDepthSpaceShuffleNodeDef<ops::SpaceToDepth>(DT_FLOAT, 3, "NCHW");
6764 AddTestTensor("input", {16, 9, 32});
6765 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6766 "Width and height must be divisible by "
6767 "block_size, at my_shuffle");
6768 }
6769 {
6770 // Height not divisble by block_size, should fail.
6771 Reset();
6772 NodeDef node_def =
6773 GetDepthSpaceShuffleNodeDef<ops::SpaceToDepth>(DT_FLOAT, 3, "NCHW");
6774 AddTestTensor("input", {16, 32, 9});
6775 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6776 "Width and height must be divisible by "
6777 "block_size, at my_shuffle");
6778 }
6779 {
6780 // Unsupported format, should fail.
6781 Reset();
6782 NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::SpaceToDepth>(
6783 DT_FLOAT, 2, "NCHW_VECT_C");
6784 AddTestTensor("input", {16, 32, 32});
6785 RunValidationAndConversion(
6786 node_def, error::UNIMPLEMENTED,
6787 "Data format NCHW_VECT_C is not supported, at my_shuffle");
6788 }
6789
6790 TestConvertSpaceToDepth<DT_FLOAT>(this);
6791 TestConvertSpaceToDepth<DT_HALF>(this);
6792 TestConvertSpaceToDepth<DT_INT32>(this);
6793 }
6794
6795 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
6796 TEST_P(OpConverter_FP32_FP16_Test, ConvertClipByValue) {
6797 Scope s = Scope::NewRootScope();
6798 auto t = ops::Placeholder(s.WithOpName("t"), tf_type_);
6799 auto clip_value_min =
6800 ops::Placeholder(s.WithOpName("clip_value_min"), tf_type_);
6801 auto clip_value_max =
6802 ops::Placeholder(s.WithOpName("clip_value_max"), tf_type_);
6803 auto clip = ops::ClipByValue(s.WithOpName("my_clip"), t, clip_value_min,
6804 clip_value_max);
6805 const NodeDef& node_def = clip.operation.node()->def();
6806
6807 nvinfer1::DataType trt_type_;
6808 TF_ASSERT_OK(TfTypeToTrtType(tf_type_, &trt_type_));
6809
6810 {
6811 // Input is a weight, should fail.
6812 Reset();
6813 AddTestWeights("t", {1, 2, 3}, {1, 2, 3, 4, 5, 6}, tf_type_);
6814 AddTestWeights("clip_value_min", {1}, {1}, tf_type_);
6815 AddTestWeights("clip_value_max", {1}, {5}, tf_type_);
6816 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6817 "The input \"t\" for ClipByValue must be a "
6818 "tensor, at my_clip");
6819 }
6820 {
6821 // Clip min is a tensor, should fail.
6822 Reset();
6823 AddTestTensor("t", {1, 2, 3});
6824 AddTestTensor("clip_value_min", {1});
6825 AddTestWeights("clip_value_max", {1}, {1}, tf_type_);
6826 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6827 "The input \"clip_value_min\" for ClipByValue "
6828 "must be a constant, at my_clip");
6829 }
6830 {
6831 // Clip max is a tensor, should fail.
6832 Reset();
6833 AddTestTensor("t", {1, 2, 3});
6834 AddTestWeights("clip_value_min", {1}, {1}, tf_type_);
6835 AddTestTensor("clip_value_max", {1});
6836 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6837 "The input \"clip_value_max\" for ClipByValue "
6838 "must be a constant, at my_clip");
6839 }
6840
6841 struct TestParams {
6842 std::vector<int> dims;
6843 int clip_value_min;
6844 int clip_value_max;
6845 std::vector<float> expected_output;
6846 };
6847
6848 const std::vector<float> common_input = InitTestVector<float>(6);
6849
6850 std::vector<TestParams> params = {{
6851 /*dims=*/{6},
6852 /*clip_value_min=*/2,
6853 /*clip_value_max=*/4,
6854 /*expected_output=*/{2, 2, 2, 3, 4, 4},
6855 },
6856 {
6857 /*dims=*/{1, 6},
6858 /*clip_value_min=*/2,
6859 /*clip_value_max=*/4,
6860 /*expected_output=*/{2, 2, 2, 3, 4, 4},
6861 },
6862 {
6863 /*dims=*/{1, 2, 3},
6864 /*clip_value_min=*/2,
6865 /*clip_value_max=*/4,
6866 /*expected_output=*/{2, 2, 2, 3, 4, 4},
6867 },
6868 {
6869 /*dims=*/{1, 2, 3, 1},
6870 /*clip_value_min=*/2,
6871 /*clip_value_max=*/4,
6872 /*expected_output=*/{2, 2, 2, 3, 4, 4},
6873 },
6874 {
6875 /*dims=*/{1, 1, 3, 1, 2},
6876 /*clip_value_min=*/2,
6877 /*clip_value_max=*/4,
6878 /*expected_output=*/{2, 2, 2, 3, 4, 4},
6879 },
6880 {
6881 /*dims=*/{1, 1, 3, 1, 2, 1},
6882 /*clip_value_min=*/2,
6883 /*clip_value_max=*/4,
6884 /*expected_output=*/{2, 2, 2, 3, 4, 4},
6885 },
6886 {
6887 /*dims=*/{2, 1, 3},
6888 /*clip_value_min=*/-1,
6889 /*clip_value_max=*/8,
6890 /*expected_output=*/common_input,
6891 }};
6892
6893 for (auto p : params) {
6894 Reset();
6895
6896 AddTestTensor("t", p.dims, tf_type_, common_input);
6897 AddTestWeights("clip_value_min", {1}, {p.clip_value_min}, tf_type_);
6898 AddTestWeights("clip_value_max", {1}, {p.clip_value_max}, tf_type_);
6899
6900 TestOpConverter("my_clip", node_def, p.dims,
6901 /*expected_conversion_status=*/Status::OK(),
6902 /*expected_runtime_status=*/Status::OK(),
6903 /*matcher=*/ElementsAreArray(p.expected_output));
6904 }
6905 }
6906 #endif // IS_TRT_VERSION_GE(5, 1, 2, 0)
6907
6908 // Get the NodeDef for SquaredDifference.
6909 NodeDef GetSquaredDifferenceNodeDef(DataType dtype) {
6910 Scope s = Scope::NewRootScope();
6911 auto x = ops::Placeholder(s.WithOpName("x"), dtype);
6912 auto y = ops::Placeholder(s.WithOpName("y"), dtype);
6913 auto squared_diff =
6914 ops::SquaredDifference(s.WithOpName("my_squared_diff"), x, y);
6915 return squared_diff.operation.node()->def();
6916 }
6917
6918 TEST_P(OpConverter_FP32_FP16_Test, ConvertSquaredDifference) {
6919 {
6920 // Input is a weight, should fail.
6921 Reset();
6922 NodeDef node_def = GetSquaredDifferenceNodeDef(tf_type_);
6923 AddTestWeights<float>("x", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
6924 AddTestTensor("y", {1, 1, 2, 3});
6925 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6926 "The input \"x\" for SquaredDifference must be "
6927 "a tensor, at my_squared_diff");
6928 }
6929
6930 struct TestParams {
6931 std::vector<int> dims_x;
6932 std::vector<int> dims_y;
6933 std::vector<float> value_x;
6934 std::vector<float> value_y;
6935 std::vector<int> expected_output_dims;
6936 std::vector<float> expected_output;
6937 Status status;
6938 Status runtime_status;
6939 };
6940
6941 const std::vector<float> common_input = InitTestVector<float>(6);
6942 std::vector<TestParams> params = {
6943 {/*dims_x=*/{1, 2, 3},
6944 /*dims_y=*/{1, 7, 5},
6945 /*value_x=*/common_input,
6946 /*value_y=*/std::vector<float>(7 * 5, 0),
6947 /*expected_output_dims=*/{1, 1, 2, 3},
6948 /*expected_output=*/common_input,
6949 trt_mode_ == TrtTestMode::kDynamicShape
6950 ? Status::OK()
6951 : errors::InvalidArgument("Infeasible broadcast scheme"),
6952 errors::Internal(
6953 "Binding index out of range. This can happen if profile is not set, "
6954 "or the network is invalid for the current profile.")},
6955 {
6956 /*dims_x=*/{1, 1, 2, 3},
6957 /*dims_y=*/{1, 1, 2, 3},
6958 /*value_x=*/common_input,
6959 /*value_y=*/{0, -1, 3, 0, 10, -7},
6960 /*expected_output_dims=*/{1, 1, 2, 3},
6961 /*expected_output=*/{0, 4, 1, 9, 36, 144},
6962 },
6963 {
6964 /*dims_x=*/{1, 1, 2, 3},
6965 /*dims_y=*/{1, 1, 1, 3},
6966 /*value_x=*/common_input,
6967 /*value_y=*/{0, 1, 2},
6968 /*expected_output_dims=*/{1, 1, 2, 3},
6969 /*expected_output=*/{0, 0, 0, 9, 9, 9},
6970 },
6971 };
6972
6973 for (auto p : params) {
6974 Reset();
6975 NodeDef node_def = GetSquaredDifferenceNodeDef(tf_type_);
6976 AddTestTensor("x", p.dims_x, p.value_x);
6977 AddTestTensor("y", p.dims_y, p.value_y);
6978 TestOpConverter("my_squared_diff", node_def, p.expected_output_dims,
6979 p.status, p.runtime_status,
6980 ElementsAreArray(p.expected_output));
6981 }
6982 }
6983
6984 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
6985 template <typename OpType>
6986 NodeDef MakeResizeNodeDef(DataType dtype, bool align_corners) {
6987 Scope s = Scope::NewRootScope();
6988 auto input = ops::Placeholder(s.WithOpName("input"), dtype);
6989 auto size = ops::Placeholder(s.WithOpName("size"), DT_INT32);
6990 auto attrs = typename OpType::Attrs().AlignCorners(align_corners);
6991 auto resize = OpType(s.WithOpName("my_resize"), input, size, attrs);
6992 return resize.operation.node()->def();
6993 }
6994
6995 struct ResizeTestParams {
6996 std::vector<int> input_dims;
6997 std::vector<int> output_resize_dims;
6998 std::vector<float> input_value;
6999 bool align_corners;
7000 std::vector<int> expected_output_dims;
7001 std::vector<float> expected_nearest_output_values;
7002 std::vector<float> expected_bilinear_output_values;
7003 Status status;
7004 };
7005
7006 template <typename OpType>
7007 void TestConvertResize(ParameterizedOpConverterTestBase* test,
7008 ResizeTestParams& p) {
7009 test->Reset();
7010 // Create resize node.
7011 NodeDef node_def =
7012 MakeResizeNodeDef<OpType>(test->get_tf_type(), p.align_corners);
7013
7014 test->AddTestTensor("input", p.input_dims, test->get_tf_type(),
7015 p.input_value);
7016 // Create output size.
7017 test->AddTestWeights("size", {2}, p.output_resize_dims, DT_INT32);
7018
7019 std::vector<float> expected_out;
7020
7021 if (node_def.op() == "ResizeBilinear") {
7022 expected_out = p.expected_bilinear_output_values;
7023 } else if (node_def.op() == "ResizeNearestNeighbor") {
7024 expected_out = p.expected_nearest_output_values;
7025 } else {
7026 ASSERT_TRUE(false);
7027 }
7028
7029 test->TestOpConverter("my_resize", node_def, p.expected_output_dims,
7030 /*expected_conversion_status=*/p.status,
7031 /*expected_runtime_status=*/p.status,
7032 /*matcher=*/ElementsAreArray(expected_out),
7033 /*out_tf_types=*/{DT_FLOAT});
7034 }
7035
7036 TEST_P(OpConverter_FP32_FP16_Test, ConvertResize) {
7037 {
7038 // First input is weight, should fail.
7039 Reset();
7040 NodeDef node_def = MakeResizeNodeDef<ops::ResizeBilinear>(tf_type_,
7041 /*align_corners=*/
7042 true);
7043 AddTestWeights<float>("input", {1, 2}, {1, 2});
7044 AddTestWeights<int>("size", {1, 2}, {1, 2});
7045 RunValidationAndConversion(
7046 node_def, error::UNIMPLEMENTED,
7047 "The input \"input\" for ResizeBilinear must be a "
7048 "tensor, at my_resize");
7049 }
7050 {
7051 // Output dimension is a tensor, should fail.
7052 Reset();
7053 NodeDef node_def = MakeResizeNodeDef<ops::ResizeBilinear>(tf_type_,
7054 /*align_corners=*/
7055 true);
7056 AddTestTensor("input", {1, 2});
7057 AddTestTensor("size", {1, 2});
7058 RunValidationAndConversion(
7059 node_def, error::UNIMPLEMENTED,
7060 "The input \"size\" for ResizeBilinear must be a "
7061 "constant, at my_resize");
7062 }
7063
7064 const auto job_status =
7065 trt_mode_ == TrtTestMode::kDynamicShape
7066 ? errors::Unimplemented(
7067 "TensorRT IResizeLayer requires input with static "
7068 "shape")
7069 : Status::OK();
7070
7071 std::vector<ResizeTestParams> params{
7072 {/*input_dims=*/{1, 1, 2, 1}, // N, H, W, C
7073 /*output_resize_dims=*/{2, 3}, // H_out, W_out
7074 /*input_values=*/{2.0f, -1.0f},
7075 /*align_corners=*/false,
7076 /*expected_output_dims=*/{1, 2, 3, 1}, // N, H, W, C
7077 /*expected_nearest_output_values=*/
7078 {2.0f, 2.0f, -1.0f, 2.0f, 2.0f, -1.0f},
7079 /*expected_bilinear_output_values=*/
7080 {2.0f, 0.f, -1.0f, 2.0f, 0.f, -1.0f},
7081 /*status=*/job_status},
7082 {/*input_dims=*/{1, 1, 2, 1}, // N, H, W, C
7083 /*output_resize_dims=*/{2, 3}, // H_out, W_out
7084 /*input_values=*/{2.0f, -1.0f},
7085 /*align_corners=*/true,
7086 /*expected_output_dims=*/{1, 2, 3, 1}, // N, H, W, C
7087 /*expected_nearest_output_values=*/
7088 {2.0f, 2.0f, -1.0f, 2.0f, 2.0f, -1.0f},
7089 /*expected_bilinear_output_values=*/
7090 {2.0f, 0.5f, -1.0f, 2.0f, 0.5f, -1.0f},
7091 /*status=*/job_status}};
7092
7093 for (auto p : params) {
7094 TestConvertResize<ops::ResizeNearestNeighbor>(this, p);
7095
7096 // This use case is not supported as of TRT version 7.1
7097 #if IS_TRT_VERSION_GE(7, 1, 0, 0)
7098 if (!p.align_corners) {
7099 p.status = errors::InvalidArgument(
7100 "Cannot Convert Bilinear Resize when align_corners=False");
7101 }
7102 #endif
7103
7104 TestConvertResize<ops::ResizeBilinear>(this, p);
7105 }
7106 }
7107 #endif // IS_TRT_VERSION_GE(6, 0, 0, 0)
7108
7109 NodeDef MakePadNodeDef(std::string name, DataType dtype) {
7110 Scope s = Scope::NewRootScope();
7111 auto input = ops::Placeholder(s.WithOpName("input"), dtype);
7112 auto padding = ops::Placeholder(s.WithOpName("padding"), DT_INT32);
7113 auto pad = ops::Pad(s.WithOpName(name), input, padding);
7114 return pad.operation.node()->def();
7115 }
7116
7117 struct PadTestParams {
7118 std::vector<int> input_dims;
7119 std::vector<int> pad_dims;
7120 std::vector<int> pad_values;
7121 std::vector<float> input_values;
7122 std::vector<int> expected_output_dims;
7123 std::vector<float> expected_output_values;
7124 Status status;
7125 };
7126
7127 TEST_P(OpConverter_FP32_FP16_Test, ConvertPad) {
7128 {
7129 // First input is weight, should fail.
7130 Reset();
7131 NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
7132 AddTestWeights("input", {1, 2}, {1, 2}, tf_type_);
7133 AddTestWeights<int>("padding", {1, 2}, {1, 2});
7134 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
7135 "The input \"tensor\" for Pad must be a "
7136 "tensor");
7137 }
7138 {
7139 // padding is a tensor, should fail.
7140 Reset();
7141 NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
7142 AddTestTensor("input", {1, 2});
7143 AddTestTensor("padding", {1, 2});
7144 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
7145 "The input \"paddings\" for Pad must be a "
7146 "constant");
7147 }
7148 {
7149 // Make sure that ranges are inferred across a Pad.
7150 Reset();
7151 NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
7152 AddTestTensor("input", {1, 1, 2, 1});
7153 AddTestWeights<int>("padding", {4, 2}, {0, 0, 1, 0, 0, 1, 0, 0});
7154 TRT_TensorOrWeights input;
7155 TRT_TensorOrWeights output;
7156 RunValidationAndConversion(node_def);
7157 TF_EXPECT_OK(GetTensorOrWeights("input", &input));
7158 TF_EXPECT_OK(GetTensorOrWeights("my_pad", &output));
7159 converter_->ProvideQuantizationRange(input.tensor(), -5.0f, 5.0f);
7160 // Input range should be inferred across pad.
7161 PropagateQuantizationRanges();
7162 auto ranges = quantization_ranges();
7163 EXPECT_EQ(5.0f, ranges[input.tensor()]);
7164 EXPECT_EQ(5.0f, ranges[output.tensor()]);
7165 }
7166
7167 std::vector<PadTestParams> params{
7168 // 1 padding dim
7169 {
7170 /*input_dims=*/{1, 1, 3, 2}, // N, H, W, C
7171 /*pad_dims=*/{4, 2}, // #dims, {pad_before, pad_after}
7172 /*pad_values*/ {0, 0, 0, 0, 0, 1, 0, 0},
7173 /*input_values=*/{1, 2, 3, 4, 5, 6},
7174 /*expected_output_dims=*/{1, 1, 4, 2}, // N, H, W, C
7175 /*expected_output_values=*/
7176 {1, 2, 3, 4, 5, 6, 0, 0},
7177 },
7178 {
7179 /*input_dims=*/{1, 1, 3, 2}, // N, H, W, C
7180 /*pad_dims=*/{4, 2}, // #dims, {pad_before, pad_after}
7181 /*pad_values*/ {0, 0, 0, 0, 0, 0, 0, 1},
7182 /*input_values=*/{1, 2, 3, 4, 5, 6},
7183 /*expected_output_dims=*/{1, 1, 3, 3}, // N, H, W, C
7184 /*expected_output_values=*/
7185 {1, 2, 0, 3, 4, 0, 5, 6, 0},
7186 },
7187 {
7188 /*input_dims=*/{1, 1, 3, 2}, // N, H, W, C
7189 /*pad_dims=*/{4, 2}, // #dims, {pad_before, pad_after}
7190 /*pad_values*/ {0, 0, 1, 0, 0, 0, 0, 0},
7191 /*input_values=*/{1, 2, 3, 4, 5, 6},
7192 /*expected_output_dims=*/{1, 2, 3, 2}, // N, H, W, C
7193 /*expected_output_values=*/
7194 {0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6},
7195 },
7196 // 2 padding dims
7197 {
7198 /*input_dims=*/{1, 1, 2, 1}, // N, H, W, C
7199 /*pad_dims=*/{4, 2}, // #dims, {pad_before, pad_after}
7200 /*pad_values*/ {0, 0, 1, 0, 0, 1, 0, 0},
7201 /*input_values=*/{2.0f, -1.0f},
7202 /*expected_output_dims=*/{1, 2, 3, 1}, // N, H, W, C
7203 /*expected_output_values=*/
7204 {0.0, 0.0, 0.0, 2.0f, -1.0f, 0.0},
7205 },
7206 PadTestParams{
7207 /*input_dims=*/{1, 1, 2, 2}, // N, H, W, C
7208 /*pad_dims=*/{4, 2}, // #dims, {pad_before, pad_after}
7209 /*pad_values*/ {0, 0, 1, 0, 0, 1, 0, 0},
7210 /*input_values=*/{2, -1, 3., 4},
7211 /*expected_output_dims=*/{1, 2, 3, 2}, // N, H, W, C
7212 /*expected_output_values=*/
7213 {0, 0, 0, 0, 0, 0, 2, -1, 3, 4, 0, 0},
7214 },
7215 PadTestParams{
7216 /*input_dims=*/{1, 1, 2, 1, 2}, // N, C, H, W, D
7217 /*pad_dims=*/{5, 2}, // #dims, {pad_before, pad_after}
7218 /*pad_values*/ {0, 0, 1, 0, 0, 1, 0, 0, 0, 0},
7219 /*input_values=*/{2, -1, 3., 4},
7220 /*expected_output_dims=*/{1, 2, 3, 1, 2}, // N, H, W, C
7221 /*expected_output_values=*/
7222 {0, 0, 0, 0, 0, 0, 2, -1, 3, 4, 0, 0},
7223 },
7224 PadTestParams{
7225 /*input_dims=*/{1, 1, 2, 1, 2}, // N, C, H, W, D
7226 /*pad_dims=*/{5, 2}, // #dims, {pad_before, pad_after}
7227 /*pad_values*/ {0, 0, 0, 1, 0, 0, 1, 1, 0, 0},
7228 /*input_values=*/{2, -1, 3., 4},
7229 /*expected_output_dims=*/{1, 2, 2, 3, 2}, // N, H, W, C
7230 /*expected_output_values=*/
7231 {0., 0., 2., -1., 0., 0., 0., 0., 3., 4., 0., 0.,
7232 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0},
7233 },
7234 PadTestParams{
7235 /*input_dims=*/{1, 1, 2, 1}, // N, H, W, C
7236 /*pad_dims=*/{4, 2}, // #dims, {pad_before, pad_after}
7237 /*pad_values*/ {1, 0, 0, 0, 0, 1, 0, 0},
7238 /*input_values=*/{2.0f, -1.0f},
7239 /*expected_output_dims=*/{2, 1, 3, 1}, // N, H, W, C
7240 /*expected_output_values=*/{0.0, 0.0, 0.0, 2.0f, -1.0f, 0.0},
7241 trt_mode_ == TrtTestMode::kImplicitBatch
7242 ? errors::InvalidArgument("Padding layer does not support "
7243 "padding on batch dimension")
7244 : Status::OK()},
7245 PadTestParams{
7246 /*input_dims=*/{1, 1, 2, 1}, // N, H, W, C
7247 /*pad_dims=*/{4, 2}, // #dims, {pad_before, pad_after}
7248 /*pad_values*/ {0, 0, 1, 0, 0, 1, 1, 1},
7249 /*input_values=*/{2.0f, -1.0f},
7250 /*expected_output_dims=*/{}, // N, H, W, C
7251 /*expected_output_values=*/{},
7252 errors::InvalidArgument("Padding layer does not support padding on "
7253 "> 2")},
7254 PadTestParams{
7255 /*input_dims=*/{1, 2, 2}, // N, H, W
7256 /*pad_dims=*/{3, 2}, // #dims, {pad_before, pad_after}
7257 /*pad_values*/ {0, 0, 1, 0, 0, 1},
7258 /*input_values=*/{2, -1, 3., 4},
7259 /*expected_output_dims=*/{1, 3, 3}, // N, H, W, C
7260 /*expected_output_values=*/
7261 {0., 0., 0., 2., -1., 0., 3., 4., 0.},
7262 errors::InvalidArgument("Convertpad requires at least 4D input, at "
7263 "my_pad")}};
7264
7265 for (auto p : params) {
7266 Reset();
7267 // Create pad node.
7268 NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
7269 // Create input tensor.
7270 AddTestTensor("input", p.input_dims, p.input_values);
7271 // Create output size.
7272 AddTestWeights<int32>("padding", p.pad_dims, p.pad_values);
7273 TestOpConverter("my_pad", node_def, p.expected_output_dims, p.status,
7274 p.status, ElementsAreArray(p.expected_output_values));
7275 }
7276 }
7277 } // namespace convert
7278 } // namespace tensorrt
7279 } // namespace tensorflow
7280
7281 #endif // GOOGLE_CUDA && GOOGLE_TENSORRT
7282