1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <memory>
17 #include <string>
18 #include <utility>
19 #include <vector>
20
21 #include "absl/container/inlined_vector.h"
22 #include "absl/memory/memory.h"
23 #include "absl/strings/str_join.h"
24 #include "tensorflow/compiler/tf2tensorrt/common/datavec.h"
25 #include "tensorflow/compiler/tf2tensorrt/common/utils.h"
26 #include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
27 #include "tensorflow/compiler/tf2tensorrt/utils/trt_engine_instance.pb.h" // NOLINT
28 #include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h"
29 #include "tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h"
30 #include "tensorflow/core/common_runtime/device.h"
31 #include "tensorflow/core/common_runtime/device_factory.h"
32 #include "tensorflow/core/framework/fake_input.h"
33 #include "tensorflow/core/framework/node_def_builder.h"
34 #include "tensorflow/core/framework/op_kernel.h"
35 #include "tensorflow/core/framework/resource_mgr.h"
36 #include "tensorflow/core/framework/tensor.h"
37 #include "tensorflow/core/framework/tensor_shape.h"
38 #include "tensorflow/core/framework/tensor_shape.pb.h"
39 #include "tensorflow/core/framework/tensor_types.h"
40 #include "tensorflow/core/framework/types.h"
41 #include "tensorflow/core/framework/types.pb.h"
42 #include "tensorflow/core/kernels/ops_testutil.h"
43 #include "tensorflow/core/lib/core/status_test_util.h"
44 #include "tensorflow/core/lib/io/record_reader.h"
45 #include "tensorflow/core/platform/env.h"
46 #include "tensorflow/core/platform/errors.h"
47 #include "tensorflow/core/platform/file_system.h"
48 #include "tensorflow/core/platform/path.h"
49 #include "tensorflow/core/platform/status.h"
50 #include "tensorflow/core/platform/test.h"
51 #include "tensorflow/core/platform/tstring.h"
52 #include "tensorflow/core/platform/types.h"
53
54 #if GOOGLE_CUDA && GOOGLE_TENSORRT
55
56 namespace tensorflow {
57 namespace tensorrt {
58
59 struct TestParam {
60 nvinfer1::Dims dims;
61 bool dynamic_shape;
62 int n_inputs;
63 };
64
65 class TRTEngineResourceOpsTest
66 : public OpsTestBase,
67 public ::testing::WithParamInterface<TestParam> {
68 public:
TRTEngineResourceOpsTest()69 TRTEngineResourceOpsTest() : param_(GetParam()) {}
70
71 protected:
Reset()72 void Reset() {
73 for (auto& temp : tensors_) {
74 delete temp;
75 }
76 for (auto& temp : managed_outputs_) {
77 delete temp;
78 }
79 tensors_.clear();
80 managed_outputs_.clear();
81 inputs_.clear();
82 }
83
NetworkWith1Input(nvinfer1::INetworkDefinition * network,ITensorProxyPtr input)84 ITensorProxyPtr NetworkWith1Input(nvinfer1::INetworkDefinition* network,
85 ITensorProxyPtr input) {
86 // Add a unary layer.
87 nvinfer1::IUnaryLayer* layer =
88 network->addUnary(*input->trt_tensor(), nvinfer1::UnaryOperation::kEXP);
89 EXPECT_NE(nullptr, layer);
90 return layer->getOutput(0);
91 }
92
93 // Constructs a network with two inputs, where the second input is a shape
94 // tensor. We take a slice of the first input with the size of the slice
95 // specified by the second input, assuming the first input is a 2D tensor.
96 // We then add the slice to itself to produce the output of the network.
NetworkWith2Inputs(nvinfer1::INetworkDefinition * network,ITensorProxyPtr input)97 ITensorProxyPtr NetworkWith2Inputs(nvinfer1::INetworkDefinition* network,
98 ITensorProxyPtr input) {
99 nvinfer1::Dims dims2{1, {2}};
100 ITensorProxyPtr input2 =
101 network->addInput(absl::StrCat(IONamePrefixes::kInputPHName, 1).c_str(),
102 nvinfer1::DataType::kINT32, dims2);
103 EXPECT_NE(nullptr, input2->trt_tensor());
104
105 nvinfer1::Dims start{2, {0, 0}};
106 nvinfer1::Dims stride{2, {1, 1}};
107 auto slice_layer =
108 network->addSlice(*input->trt_tensor(), start, stride, stride);
109 EXPECT_NE(nullptr, slice_layer);
110
111 slice_layer->setInput(2, *input2->trt_tensor());
112 ITensorProxyPtr sliced_input = slice_layer->getOutput(0);
113 EXPECT_NE(nullptr, sliced_input->trt_tensor());
114
115 auto layer = network->addElementWise(*sliced_input->trt_tensor(),
116 *sliced_input->trt_tensor(),
117 nvinfer1::ElementWiseOperation::kSUM);
118 EXPECT_NE(nullptr, layer);
119 return layer->getOutput(0);
120 }
121
CreateTRTEngine()122 TrtUniquePtrType<nvinfer1::ICudaEngine> CreateTRTEngine() {
123 TrtUniquePtrType<nvinfer1::IBuilder> builder(
124 nvinfer1::createInferBuilder(logger_));
125 TrtUniquePtrType<nvinfer1::INetworkDefinition> network;
126 #if IS_TRT_VERSION_GE(8, 0, 0, 0)
127 network =
128 TrtUniquePtrType<nvinfer1::INetworkDefinition>(builder->createNetworkV2(
129 1U << static_cast<int>(
130 nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)));
131 #else
132 network =
133 TrtUniquePtrType<nvinfer1::INetworkDefinition>(builder->createNetworkV2(
134 1U << static_cast<int>(
135 nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)));
136 #endif
137
138 // Add the input.
139 nvinfer1::Dims dims = this->param_.dims;
140 if (this->param_.dynamic_shape) {
141 std::fill(dims.d, dims.d + dims.nbDims, -1);
142 }
143 const std::string in_name = StrCat(IONamePrefixes::kInputPHName, 0);
144 ITensorProxyPtr input =
145 network->addInput(in_name.c_str(), nvinfer1::DataType::kFLOAT, dims);
146 EXPECT_NE(nullptr, input->trt_tensor());
147 // Mark the output.
148 ITensorProxyPtr output =
149 this->param_.n_inputs == 1
150 ? this->NetworkWith1Input(network.get(), input)
151 : this->NetworkWith2Inputs(network.get(), input);
152 output->setName("output");
153 network->markOutput(*output->trt_tensor());
154
155 // Build the engine
156 TrtUniquePtrType<nvinfer1::IBuilderConfig> builder_config(
157 builder->createBuilderConfig());
158 builder_config->setMaxWorkspaceSize(1 << 10);
159 builder->setMaxBatchSize(1);
160
161 if (this->param_.dynamic_shape) {
162 TrtShapeOptimizationProfile profile;
163 profile.SetShapeTensorMask(network.get());
164 const int n_input = param_.n_inputs;
165 // Set the input mask to true (no resource input)
166 std::vector<bool> input_mask(n_input, true);
167 profile.SetInputMask(input_mask);
168 // The for loop defines three optimization profiles for the network.
169 for (int i = 1; i <= 3; i++) {
170 std::vector<TensorShape> shape_vec(n_input);
171 // Define a shape with all dimensions set to 3*i.
172 std::vector<int> dimvec(this->param_.dims.nbDims, 3 * i);
173 TensorShape shape;
174 TF_CHECK_OK(
175 TensorShapeUtils::MakeShape(dimvec.data(), dimvec.size(), &shape));
176
177 const ITensorProxyPtr input = network->getInput(0);
178 const char* name = input->getName();
179 VLOG(2) << "Defining profile for input " << name;
180 shape_vec[0] = shape;
181 if (this->param_.n_inputs == 2) {
182 // The shape of the shape tensor.
183 TF_CHECK_OK(TensorShapeUtils::MakeShape(
184 std::vector<int32>{param_.dims.nbDims}, &shape));
185 shape_vec[1] = shape;
186 // Values of the shape tensor
187 Tensor shape_tensor(DT_INT32, shape);
188 // Define shape values {1, i}, where 1 is the value of the first dim,
189 // and i is the value of the second dimension.
190 std::vector<int32> vals{1, i};
191 std::copy_n(vals.data(), vals.size(),
192 shape_tensor.flat<int32_t>().data());
193 DataVec shape_values{{"one", {}}, {"two", shape_tensor}};
194 TF_CHECK_OK(profile.CollectShapeValues(shape_values));
195 } else {
196 TF_CHECK_OK(profile.CollectShapeValues({{"one", {}}}));
197 }
198 profile.AddShape(shape_vec);
199 }
200 std::vector<PartialTensorShape> input_partial_shapes;
201 TF_CHECK_OK(GetNetworkInputShapes(network.get(), &input_partial_shapes));
202 profile.InitProfiles(input_partial_shapes,
203 ProfileStrategy::kImplicitBatchModeCompatible);
204 // Configure and build engine
205 TF_CHECK_OK(profile.ConfigureBuilder(builder.get(), builder_config.get(),
206 network.get()));
207 }
208 VLOG(2) << "ConfigureBuilder Finished";
209 TrtUniquePtrType<nvinfer1::ICudaEngine> engine(
210 builder->buildEngineWithConfig(*network, *builder_config));
211 VLOG(2) << "Engine constructed";
212 EXPECT_NE(nullptr, engine);
213 return engine;
214 }
215 Logger& logger_ = *Logger::GetLogger();
216 TestParam param_;
217 };
218
219 #if IS_TRT_VERSION_GE(7, 1, 3, 0)
220 constexpr std::array<TestParam, 3> TestParameters = {
221 TestParam{nvinfer1::Dims{1, {1}}, false, 1},
222 TestParam{nvinfer1::Dims{1, {1}}, true, 1},
223 TestParam{nvinfer1::Dims{2, {3, 3}}, true, 2}};
224 #else
225 constexpr std::array<TestParam, 2> TestParameters = {
226 TestParam{nvinfer1::Dims{1, {1}}, false, 1},
227 TestParam{nvinfer1::Dims{1, {1}}, true, 1}};
228 #endif
229
230 INSTANTIATE_TEST_CASE_P(EngineResourceOpsTestInstantiation,
231 TRTEngineResourceOpsTest,
232 ::testing::ValuesIn(TestParameters));
233
TEST_P(TRTEngineResourceOpsTest,Basic)234 TEST_P(TRTEngineResourceOpsTest, Basic) {
235 // Create the GPU device.
236 std::unique_ptr<Device> device(
237 DeviceFactory::NewDevice("GPU", {}, "/job:worker/replica:0/task:0"));
238 ResourceMgr* rm = device->resource_manager();
239 SetDevice(DEVICE_GPU, std::move(device));
240
241 // Create a resource handle.
242 const string container(kTfTrtContainerName);
243 const string resource_name = "myresource";
244 Reset();
245 TF_ASSERT_OK(NodeDefBuilder("op", "CreateTRTResourceHandle")
246 .Attr("resource_name", resource_name)
247 .Finalize(node_def()));
248 TF_ASSERT_OK(InitOp());
249 TF_ASSERT_OK(RunOpKernel());
250 ResourceHandle handle =
251 context_->mutable_output(0)->scalar<ResourceHandle>()();
252
253 // Check that a resource hasn't been created yet.
254 TRTEngineCacheResource* resource = nullptr;
255 EXPECT_TRUE(
256 errors::IsNotFound(rm->Lookup(container, resource_name, &resource)));
257
258 // Create a resource and use an empty file to initialize the resource.
259 Reset();
260 Env* env = Env::Default();
261 const string filename = io::JoinPath(testing::TmpDir(), "trt_engine_file");
262 {
263 std::unique_ptr<WritableFile> file;
264 TF_ASSERT_OK(env->NewWritableFile(filename, &file));
265 }
266 TF_ASSERT_OK(NodeDefBuilder("op", "InitializeTRTResource")
267 .Input(FakeInput(DT_RESOURCE))
268 .Input(FakeInput(DT_STRING))
269 .Attr("max_cached_engines_count", 1)
270 .Finalize(node_def()));
271 TF_ASSERT_OK(InitOp());
272 AddInputFromArray<ResourceHandle>(TensorShape({}), {handle});
273 AddInputFromArray<tstring>(TensorShape({}), {filename});
274 TF_ASSERT_OK(RunOpKernel());
275
276 // Check that the resource is registered with the resource manager and the
277 // cache of the resource is empty.
278 EXPECT_TRUE(rm->Lookup(container, resource_name, &resource).ok());
279 EXPECT_EQ(0, resource->cache_.size());
280
281 // Create an engine and add it to the cache of the resource.
282 TrtUniquePtrType<nvinfer1::ICudaEngine> engine = CreateTRTEngine();
283 ExecutionContext context = ExecutionContext::Create(engine.get());
284
285 std::vector<TensorShape> engine_input_shape(1);
286 TF_ASSERT_OK(DimsAdapter(param_.dims).TensorShape(&(engine_input_shape[0])));
287 if (param_.n_inputs > 1) {
288 engine_input_shape.push_back(TensorShape({1, 1}));
289 }
290 resource->cache_.emplace(
291 engine_input_shape,
292 std::make_unique<EngineContext>(std::move(engine), std::move(context)));
293 // Check that the resource has multiple references before it is unregistered
294 // from the resource manager.
295 EXPECT_FALSE(resource->RefCountIsOne());
296
297 // Serialize the engine to a file and unregistered the resource from the
298 // resource manager.
299 Reset();
300 TF_ASSERT_OK(NodeDefBuilder("op", "SerializeTRTResource")
301 .Attr("delete_resource", true)
302 .Input(FakeInput(DT_STRING))
303 .Input(FakeInput(DT_STRING))
304 .Finalize(node_def()));
305 TF_ASSERT_OK(InitOp());
306 AddInputFromArray<tstring>(TensorShape({}), {resource_name});
307 AddInputFromArray<tstring>(TensorShape({}), {filename});
308 TF_ASSERT_OK(RunOpKernel());
309 // Check that the resource now has only one reference. Detach the reference
310 // to the resource to destroy the resource.
311 EXPECT_TRUE(resource->RefCountIsOne());
312 resource->Unref();
313
314 // Check that unregistering the resource from the resource manager returns
315 // an error as the resource has already been unregistered.
316 Reset();
317 TF_ASSERT_OK(NodeDefBuilder("op", "DestroyResourceOp")
318 .Attr("ignore_lookup_error", false)
319 .Input(FakeInput(DT_RESOURCE))
320 .Finalize(node_def()));
321 TF_ASSERT_OK(InitOp());
322 AddInputFromArray<ResourceHandle>(TensorShape({}), {handle});
323 EXPECT_TRUE(errors::IsNotFound(RunOpKernel()));
324
325 // Verify the file for the serialized engine.
326 std::unique_ptr<RandomAccessFile> file;
327 TF_ASSERT_OK(env->NewRandomAccessFile(filename, &file));
328 auto reader = std::make_unique<io::RecordReader>(file.get());
329 uint64 offset = 0;
330 tstring record;
331 TF_ASSERT_OK(reader->ReadRecord(&offset, &record));
332 TRTEngineInstance engine_instance;
333 engine_instance.ParseFromString(record);
334 EXPECT_EQ(param_.n_inputs, engine_instance.input_shapes_size());
335 EXPECT_EQ(param_.dims.nbDims, engine_instance.input_shapes(0).dim_size());
336 for (int i = 0; i < param_.dims.nbDims; i++) {
337 EXPECT_EQ(param_.dims.d[i], engine_instance.input_shapes(0).dim(i).size());
338 }
339 EXPECT_TRUE(errors::IsOutOfRange(reader->ReadRecord(&offset, &record)));
340
341 // Recreate the resource and use the file with the serialized engine to
342 // initialize the resource.
343 Reset();
344 TF_ASSERT_OK(NodeDefBuilder("op", "InitializeTRTResource")
345 .Input(FakeInput(DT_RESOURCE))
346 .Input(FakeInput(DT_STRING))
347 .Attr("max_cached_engines_count", 1)
348 .Finalize(node_def()));
349 TF_ASSERT_OK(InitOp());
350 AddInputFromArray<ResourceHandle>(TensorShape({}), {handle});
351 AddInputFromArray<tstring>(TensorShape({}), {filename});
352 TF_ASSERT_OK(RunOpKernel());
353
354 // Check that the resource is registered with the resource manager again and
355 // the cache of the resource is not empty.
356 EXPECT_TRUE(rm->Lookup(container, resource_name, &resource).ok());
357 EXPECT_EQ(1, resource->cache_.size());
358 if (this->param_.dynamic_shape) {
359 EXPECT_EQ(3, resource->profiles_.GetNumProfiles());
360 EXPECT_EQ(3, resource->cache_.begin()->second->GetNumContexts());
361
362 if (this->param_.n_inputs == 1) {
363 // Check if profiles are restored correctly.
364 std::vector<TensorShape> shapes(1);
365 // We create a shape vector that matches only profile 1.
366 TF_CHECK_OK(
367 TensorShapeUtils::MakeShape(std::vector<int32>{6}, &shapes[0]));
368 EXPECT_EQ(1, resource->profiles_.GetProfileNumber(shapes));
369 } else {
370 // Check if shape values are restored corretly.
371 std::vector<TensorShape> shapes(2);
372 // We create a shape vector that matches only profile 2.
373 TF_CHECK_OK(
374 TensorShapeUtils::MakeShape(std::vector<int32>{9, 9}, &shapes[0]));
375 TF_CHECK_OK(
376 TensorShapeUtils::MakeShape(std::vector<int32>{2}, &shapes[1]));
377 Tensor shape_tensor(DT_INT32, shapes[1]);
378 std::vector<int32> vals{1, 3};
379 std::copy_n(vals.data(), vals.size(),
380 shape_tensor.flat<int32_t>().data());
381 // DataVec names are not in used CollectShapeValues, only the order
382 // matters.
383 DataVec shape_values{{"one", {}}, {"two", shape_tensor}};
384 TF_CHECK_OK(resource->profiles_.CollectShapeValues(shape_values));
385 EXPECT_EQ(2, resource->profiles_.GetProfileNumber(shapes));
386 }
387 }
388 // Check that the resource has multiple references before it is unregistered
389 // from the resource manager.
390 EXPECT_FALSE(resource->RefCountIsOne());
391
392 // Unregister the resource from the resource manager two times, expect that
393 // the second time produces an error.
394 Reset();
395 TF_ASSERT_OK(NodeDefBuilder("op", "DestroyResourceOp")
396 .Attr("ignore_lookup_error", false)
397 .Input(FakeInput(DT_RESOURCE))
398 .Finalize(node_def()));
399 TF_ASSERT_OK(InitOp());
400 AddInputFromArray<ResourceHandle>(TensorShape({}), {handle});
401 TF_ASSERT_OK(RunOpKernel());
402 EXPECT_TRUE(errors::IsNotFound(RunOpKernel()));
403
404 // Check that the resource now has only one reference. Detach the reference
405 // to the resource to destroy resource.
406 EXPECT_TRUE(resource->RefCountIsOne());
407 resource->Unref();
408 }
409
410 } // namespace tensorrt
411 } // namespace tensorflow
412
413 #endif // GOOGLE_CUDA && GOOGLE_TENSORRT
414