• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include <memory>
17 #include <string>
18 #include <utility>
19 #include <vector>
20 
21 #include "absl/container/inlined_vector.h"
22 #include "absl/memory/memory.h"
23 #include "absl/strings/str_join.h"
24 #include "tensorflow/compiler/tf2tensorrt/common/datavec.h"
25 #include "tensorflow/compiler/tf2tensorrt/common/utils.h"
26 #include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
27 #include "tensorflow/compiler/tf2tensorrt/utils/trt_engine_instance.pb.h"  // NOLINT
28 #include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h"
29 #include "tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h"
30 #include "tensorflow/core/common_runtime/device.h"
31 #include "tensorflow/core/common_runtime/device_factory.h"
32 #include "tensorflow/core/framework/fake_input.h"
33 #include "tensorflow/core/framework/node_def_builder.h"
34 #include "tensorflow/core/framework/op_kernel.h"
35 #include "tensorflow/core/framework/resource_mgr.h"
36 #include "tensorflow/core/framework/tensor.h"
37 #include "tensorflow/core/framework/tensor_shape.h"
38 #include "tensorflow/core/framework/tensor_shape.pb.h"
39 #include "tensorflow/core/framework/tensor_types.h"
40 #include "tensorflow/core/framework/types.h"
41 #include "tensorflow/core/framework/types.pb.h"
42 #include "tensorflow/core/kernels/ops_testutil.h"
43 #include "tensorflow/core/lib/core/status_test_util.h"
44 #include "tensorflow/core/lib/io/record_reader.h"
45 #include "tensorflow/core/platform/env.h"
46 #include "tensorflow/core/platform/errors.h"
47 #include "tensorflow/core/platform/file_system.h"
48 #include "tensorflow/core/platform/path.h"
49 #include "tensorflow/core/platform/status.h"
50 #include "tensorflow/core/platform/test.h"
51 #include "tensorflow/core/platform/tstring.h"
52 #include "tensorflow/core/platform/types.h"
53 
54 #if GOOGLE_CUDA && GOOGLE_TENSORRT
55 
56 namespace tensorflow {
57 namespace tensorrt {
58 
59 struct TestParam {
60   nvinfer1::Dims dims;
61   bool dynamic_shape;
62   int n_inputs;
63 };
64 
65 class TRTEngineResourceOpsTest
66     : public OpsTestBase,
67       public ::testing::WithParamInterface<TestParam> {
68  public:
TRTEngineResourceOpsTest()69   TRTEngineResourceOpsTest() : param_(GetParam()) {}
70 
71  protected:
Reset()72   void Reset() {
73     for (auto& temp : tensors_) {
74       delete temp;
75     }
76     for (auto& temp : managed_outputs_) {
77       delete temp;
78     }
79     tensors_.clear();
80     managed_outputs_.clear();
81     inputs_.clear();
82   }
83 
NetworkWith1Input(nvinfer1::INetworkDefinition * network,ITensorProxyPtr input)84   ITensorProxyPtr NetworkWith1Input(nvinfer1::INetworkDefinition* network,
85                                     ITensorProxyPtr input) {
86     // Add a unary layer.
87     nvinfer1::IUnaryLayer* layer =
88         network->addUnary(*input->trt_tensor(), nvinfer1::UnaryOperation::kEXP);
89     EXPECT_NE(nullptr, layer);
90     return layer->getOutput(0);
91   }
92 
93   // Constructs a network with two inputs, where the second input is a shape
94   // tensor. We take a slice of the first input with the size of the slice
95   // specified by the second input, assuming the first input is a 2D tensor.
96   // We then add the slice to itself to produce the output of the network.
NetworkWith2Inputs(nvinfer1::INetworkDefinition * network,ITensorProxyPtr input)97   ITensorProxyPtr NetworkWith2Inputs(nvinfer1::INetworkDefinition* network,
98                                      ITensorProxyPtr input) {
99     nvinfer1::Dims dims2{1, {2}};
100     ITensorProxyPtr input2 =
101         network->addInput(absl::StrCat(IONamePrefixes::kInputPHName, 1).c_str(),
102                           nvinfer1::DataType::kINT32, dims2);
103     EXPECT_NE(nullptr, input2->trt_tensor());
104 
105     nvinfer1::Dims start{2, {0, 0}};
106     nvinfer1::Dims stride{2, {1, 1}};
107     auto slice_layer =
108         network->addSlice(*input->trt_tensor(), start, stride, stride);
109     EXPECT_NE(nullptr, slice_layer);
110 
111     slice_layer->setInput(2, *input2->trt_tensor());
112     ITensorProxyPtr sliced_input = slice_layer->getOutput(0);
113     EXPECT_NE(nullptr, sliced_input->trt_tensor());
114 
115     auto layer = network->addElementWise(*sliced_input->trt_tensor(),
116                                          *sliced_input->trt_tensor(),
117                                          nvinfer1::ElementWiseOperation::kSUM);
118     EXPECT_NE(nullptr, layer);
119     return layer->getOutput(0);
120   }
121 
CreateTRTEngine()122   TrtUniquePtrType<nvinfer1::ICudaEngine> CreateTRTEngine() {
123     TrtUniquePtrType<nvinfer1::IBuilder> builder(
124         nvinfer1::createInferBuilder(logger_));
125     TrtUniquePtrType<nvinfer1::INetworkDefinition> network;
126 #if IS_TRT_VERSION_GE(8, 0, 0, 0)
127     network =
128         TrtUniquePtrType<nvinfer1::INetworkDefinition>(builder->createNetworkV2(
129             1U << static_cast<int>(
130                 nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)));
131 #else
132     network =
133         TrtUniquePtrType<nvinfer1::INetworkDefinition>(builder->createNetworkV2(
134             1U << static_cast<int>(
135                 nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)));
136 #endif
137 
138     // Add the input.
139     nvinfer1::Dims dims = this->param_.dims;
140     if (this->param_.dynamic_shape) {
141       std::fill(dims.d, dims.d + dims.nbDims, -1);
142     }
143     const std::string in_name = StrCat(IONamePrefixes::kInputPHName, 0);
144     ITensorProxyPtr input =
145         network->addInput(in_name.c_str(), nvinfer1::DataType::kFLOAT, dims);
146     EXPECT_NE(nullptr, input->trt_tensor());
147     // Mark the output.
148     ITensorProxyPtr output =
149         this->param_.n_inputs == 1
150             ? this->NetworkWith1Input(network.get(), input)
151             : this->NetworkWith2Inputs(network.get(), input);
152     output->setName("output");
153     network->markOutput(*output->trt_tensor());
154 
155     // Build the engine
156     TrtUniquePtrType<nvinfer1::IBuilderConfig> builder_config(
157         builder->createBuilderConfig());
158     builder_config->setMaxWorkspaceSize(1 << 10);
159     builder->setMaxBatchSize(1);
160 
161     if (this->param_.dynamic_shape) {
162       TrtShapeOptimizationProfile profile;
163       profile.SetShapeTensorMask(network.get());
164       const int n_input = param_.n_inputs;
165       // Set the input mask to true (no resource input)
166       std::vector<bool> input_mask(n_input, true);
167       profile.SetInputMask(input_mask);
168       // The for loop defines three optimization profiles for the network.
169       for (int i = 1; i <= 3; i++) {
170         std::vector<TensorShape> shape_vec(n_input);
171         // Define a shape with all dimensions set to 3*i.
172         std::vector<int> dimvec(this->param_.dims.nbDims, 3 * i);
173         TensorShape shape;
174         TF_CHECK_OK(
175             TensorShapeUtils::MakeShape(dimvec.data(), dimvec.size(), &shape));
176 
177         const ITensorProxyPtr input = network->getInput(0);
178         const char* name = input->getName();
179         VLOG(2) << "Defining profile for input " << name;
180         shape_vec[0] = shape;
181         if (this->param_.n_inputs == 2) {
182           // The shape of the shape tensor.
183           TF_CHECK_OK(TensorShapeUtils::MakeShape(
184               std::vector<int32>{param_.dims.nbDims}, &shape));
185           shape_vec[1] = shape;
186           // Values of the shape tensor
187           Tensor shape_tensor(DT_INT32, shape);
188           // Define shape values {1, i}, where 1 is the value of the first dim,
189           // and i is the value of the second dimension.
190           std::vector<int32> vals{1, i};
191           std::copy_n(vals.data(), vals.size(),
192                       shape_tensor.flat<int32_t>().data());
193           DataVec shape_values{{"one", {}}, {"two", shape_tensor}};
194           TF_CHECK_OK(profile.CollectShapeValues(shape_values));
195         } else {
196           TF_CHECK_OK(profile.CollectShapeValues({{"one", {}}}));
197         }
198         profile.AddShape(shape_vec);
199       }
200       std::vector<PartialTensorShape> input_partial_shapes;
201       TF_CHECK_OK(GetNetworkInputShapes(network.get(), &input_partial_shapes));
202       profile.InitProfiles(input_partial_shapes,
203                            ProfileStrategy::kImplicitBatchModeCompatible);
204       // Configure and build engine
205       TF_CHECK_OK(profile.ConfigureBuilder(builder.get(), builder_config.get(),
206                                            network.get()));
207     }
208     VLOG(2) << "ConfigureBuilder Finished";
209     TrtUniquePtrType<nvinfer1::ICudaEngine> engine(
210         builder->buildEngineWithConfig(*network, *builder_config));
211     VLOG(2) << "Engine constructed";
212     EXPECT_NE(nullptr, engine);
213     return engine;
214   }
215   Logger& logger_ = *Logger::GetLogger();
216   TestParam param_;
217 };
218 
219 #if IS_TRT_VERSION_GE(7, 1, 3, 0)
220 constexpr std::array<TestParam, 3> TestParameters = {
221     TestParam{nvinfer1::Dims{1, {1}}, false, 1},
222     TestParam{nvinfer1::Dims{1, {1}}, true, 1},
223     TestParam{nvinfer1::Dims{2, {3, 3}}, true, 2}};
224 #else
225 constexpr std::array<TestParam, 2> TestParameters = {
226     TestParam{nvinfer1::Dims{1, {1}}, false, 1},
227     TestParam{nvinfer1::Dims{1, {1}}, true, 1}};
228 #endif
229 
230 INSTANTIATE_TEST_CASE_P(EngineResourceOpsTestInstantiation,
231                         TRTEngineResourceOpsTest,
232                         ::testing::ValuesIn(TestParameters));
233 
TEST_P(TRTEngineResourceOpsTest,Basic)234 TEST_P(TRTEngineResourceOpsTest, Basic) {
235   // Create the GPU device.
236   std::unique_ptr<Device> device(
237       DeviceFactory::NewDevice("GPU", {}, "/job:worker/replica:0/task:0"));
238   ResourceMgr* rm = device->resource_manager();
239   SetDevice(DEVICE_GPU, std::move(device));
240 
241   // Create a resource handle.
242   const string container(kTfTrtContainerName);
243   const string resource_name = "myresource";
244   Reset();
245   TF_ASSERT_OK(NodeDefBuilder("op", "CreateTRTResourceHandle")
246                    .Attr("resource_name", resource_name)
247                    .Finalize(node_def()));
248   TF_ASSERT_OK(InitOp());
249   TF_ASSERT_OK(RunOpKernel());
250   ResourceHandle handle =
251       context_->mutable_output(0)->scalar<ResourceHandle>()();
252 
253   // Check that a resource hasn't been created yet.
254   TRTEngineCacheResource* resource = nullptr;
255   EXPECT_TRUE(
256       errors::IsNotFound(rm->Lookup(container, resource_name, &resource)));
257 
258   // Create a resource and use an empty file to initialize the resource.
259   Reset();
260   Env* env = Env::Default();
261   const string filename = io::JoinPath(testing::TmpDir(), "trt_engine_file");
262   {
263     std::unique_ptr<WritableFile> file;
264     TF_ASSERT_OK(env->NewWritableFile(filename, &file));
265   }
266   TF_ASSERT_OK(NodeDefBuilder("op", "InitializeTRTResource")
267                    .Input(FakeInput(DT_RESOURCE))
268                    .Input(FakeInput(DT_STRING))
269                    .Attr("max_cached_engines_count", 1)
270                    .Finalize(node_def()));
271   TF_ASSERT_OK(InitOp());
272   AddInputFromArray<ResourceHandle>(TensorShape({}), {handle});
273   AddInputFromArray<tstring>(TensorShape({}), {filename});
274   TF_ASSERT_OK(RunOpKernel());
275 
276   // Check that the resource is registered with the resource manager and the
277   // cache of the resource is empty.
278   EXPECT_TRUE(rm->Lookup(container, resource_name, &resource).ok());
279   EXPECT_EQ(0, resource->cache_.size());
280 
281   // Create an engine and add it to the cache of the resource.
282   TrtUniquePtrType<nvinfer1::ICudaEngine> engine = CreateTRTEngine();
283   ExecutionContext context = ExecutionContext::Create(engine.get());
284 
285   std::vector<TensorShape> engine_input_shape(1);
286   TF_ASSERT_OK(DimsAdapter(param_.dims).TensorShape(&(engine_input_shape[0])));
287   if (param_.n_inputs > 1) {
288     engine_input_shape.push_back(TensorShape({1, 1}));
289   }
290   resource->cache_.emplace(
291       engine_input_shape,
292       std::make_unique<EngineContext>(std::move(engine), std::move(context)));
293   // Check that the resource has multiple references before it is unregistered
294   // from the resource manager.
295   EXPECT_FALSE(resource->RefCountIsOne());
296 
297   // Serialize the engine to a file and unregistered the resource from the
298   // resource manager.
299   Reset();
300   TF_ASSERT_OK(NodeDefBuilder("op", "SerializeTRTResource")
301                    .Attr("delete_resource", true)
302                    .Input(FakeInput(DT_STRING))
303                    .Input(FakeInput(DT_STRING))
304                    .Finalize(node_def()));
305   TF_ASSERT_OK(InitOp());
306   AddInputFromArray<tstring>(TensorShape({}), {resource_name});
307   AddInputFromArray<tstring>(TensorShape({}), {filename});
308   TF_ASSERT_OK(RunOpKernel());
309   // Check that the resource now has only one reference. Detach the reference
310   // to the resource to destroy the resource.
311   EXPECT_TRUE(resource->RefCountIsOne());
312   resource->Unref();
313 
314   // Check that unregistering the resource from the resource manager returns
315   // an error as the resource has already been unregistered.
316   Reset();
317   TF_ASSERT_OK(NodeDefBuilder("op", "DestroyResourceOp")
318                    .Attr("ignore_lookup_error", false)
319                    .Input(FakeInput(DT_RESOURCE))
320                    .Finalize(node_def()));
321   TF_ASSERT_OK(InitOp());
322   AddInputFromArray<ResourceHandle>(TensorShape({}), {handle});
323   EXPECT_TRUE(errors::IsNotFound(RunOpKernel()));
324 
325   // Verify the file for the serialized engine.
326   std::unique_ptr<RandomAccessFile> file;
327   TF_ASSERT_OK(env->NewRandomAccessFile(filename, &file));
328   auto reader = std::make_unique<io::RecordReader>(file.get());
329   uint64 offset = 0;
330   tstring record;
331   TF_ASSERT_OK(reader->ReadRecord(&offset, &record));
332   TRTEngineInstance engine_instance;
333   engine_instance.ParseFromString(record);
334   EXPECT_EQ(param_.n_inputs, engine_instance.input_shapes_size());
335   EXPECT_EQ(param_.dims.nbDims, engine_instance.input_shapes(0).dim_size());
336   for (int i = 0; i < param_.dims.nbDims; i++) {
337     EXPECT_EQ(param_.dims.d[i], engine_instance.input_shapes(0).dim(i).size());
338   }
339   EXPECT_TRUE(errors::IsOutOfRange(reader->ReadRecord(&offset, &record)));
340 
341   // Recreate the resource and use the file with the serialized engine to
342   // initialize the resource.
343   Reset();
344   TF_ASSERT_OK(NodeDefBuilder("op", "InitializeTRTResource")
345                    .Input(FakeInput(DT_RESOURCE))
346                    .Input(FakeInput(DT_STRING))
347                    .Attr("max_cached_engines_count", 1)
348                    .Finalize(node_def()));
349   TF_ASSERT_OK(InitOp());
350   AddInputFromArray<ResourceHandle>(TensorShape({}), {handle});
351   AddInputFromArray<tstring>(TensorShape({}), {filename});
352   TF_ASSERT_OK(RunOpKernel());
353 
354   // Check that the resource is registered with the resource manager again and
355   // the cache of the resource is not empty.
356   EXPECT_TRUE(rm->Lookup(container, resource_name, &resource).ok());
357   EXPECT_EQ(1, resource->cache_.size());
358   if (this->param_.dynamic_shape) {
359     EXPECT_EQ(3, resource->profiles_.GetNumProfiles());
360     EXPECT_EQ(3, resource->cache_.begin()->second->GetNumContexts());
361 
362     if (this->param_.n_inputs == 1) {
363       // Check if profiles are restored correctly.
364       std::vector<TensorShape> shapes(1);
365       // We create a shape vector that matches only profile 1.
366       TF_CHECK_OK(
367           TensorShapeUtils::MakeShape(std::vector<int32>{6}, &shapes[0]));
368       EXPECT_EQ(1, resource->profiles_.GetProfileNumber(shapes));
369     } else {
370       // Check if shape values are restored corretly.
371       std::vector<TensorShape> shapes(2);
372       // We create a shape vector that matches only profile 2.
373       TF_CHECK_OK(
374           TensorShapeUtils::MakeShape(std::vector<int32>{9, 9}, &shapes[0]));
375       TF_CHECK_OK(
376           TensorShapeUtils::MakeShape(std::vector<int32>{2}, &shapes[1]));
377       Tensor shape_tensor(DT_INT32, shapes[1]);
378       std::vector<int32> vals{1, 3};
379       std::copy_n(vals.data(), vals.size(),
380                   shape_tensor.flat<int32_t>().data());
381       // DataVec names are not in used CollectShapeValues, only the order
382       // matters.
383       DataVec shape_values{{"one", {}}, {"two", shape_tensor}};
384       TF_CHECK_OK(resource->profiles_.CollectShapeValues(shape_values));
385       EXPECT_EQ(2, resource->profiles_.GetProfileNumber(shapes));
386     }
387   }
388   // Check that the resource has multiple references before it is unregistered
389   // from the resource manager.
390   EXPECT_FALSE(resource->RefCountIsOne());
391 
392   // Unregister the resource from the resource manager two times, expect that
393   // the second time produces an error.
394   Reset();
395   TF_ASSERT_OK(NodeDefBuilder("op", "DestroyResourceOp")
396                    .Attr("ignore_lookup_error", false)
397                    .Input(FakeInput(DT_RESOURCE))
398                    .Finalize(node_def()));
399   TF_ASSERT_OK(InitOp());
400   AddInputFromArray<ResourceHandle>(TensorShape({}), {handle});
401   TF_ASSERT_OK(RunOpKernel());
402   EXPECT_TRUE(errors::IsNotFound(RunOpKernel()));
403 
404   // Check that the resource now has only one reference. Detach the reference
405   // to the resource to destroy resource.
406   EXPECT_TRUE(resource->RefCountIsOne());
407   resource->Unref();
408 }
409 
410 }  // namespace tensorrt
411 }  // namespace tensorflow
412 
413 #endif  // GOOGLE_CUDA && GOOGLE_TENSORRT
414