1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/tf2tensorrt/convert/convert_graph.h"
17
18 #include <regex> // NOLINT
19
20 #include <gmock/gmock.h>
21 #include <gtest/gtest.h>
22 #include "tensorflow/cc/framework/ops.h"
23 #include "tensorflow/cc/framework/scope.h"
24 #include "tensorflow/cc/ops/standard_ops.h"
25 #include "tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h"
26 #include "tensorflow/core/common_runtime/device_mgr.h"
27 #include "tensorflow/core/common_runtime/device_set.h"
28 #include "tensorflow/core/framework/tensor_shape.h"
29 #include "tensorflow/core/grappler/clusters/cluster.h"
30 #include "tensorflow/core/grappler/grappler_item.h"
31 #include "tensorflow/core/lib/core/status.h"
32 #include "tensorflow/core/lib/core/status_test_util.h"
33 #include "tensorflow/core/platform/test.h"
34 #include "tensorflow/core/protobuf/config.pb.h" // NOLINT
35 #include "tensorflow/core/public/session.h"
36
37 #if GOOGLE_CUDA && GOOGLE_TENSORRT
38
39 namespace tensorflow {
40 namespace tensorrt {
41 namespace convert {
42
43 // TODO(laigd): put this into some test utils file.
ExpectStatus(Status status,error::Code code=error::OK,const char * substr=nullptr)44 void ExpectStatus(Status status, error::Code code = error::OK,
45 const char* substr = nullptr) {
46 EXPECT_EQ(code, status.code())
47 << status << " vs expected error code \"" << error::Code_Name(code)
48 << "\" and message \"" << substr << "\"";
49 if (substr) {
50 EXPECT_THAT(status.error_message(), ::testing::HasSubstr(substr)) << status;
51 }
52 }
53
54 class FakeCluster : public grappler::Cluster {
55 public:
FakeCluster()56 FakeCluster() : Cluster(0) {}
57
SetDeviceSet(const DeviceSet * device_set)58 void SetDeviceSet(const DeviceSet* device_set) { device_set_ = device_set; }
59
GetDeviceSet() const60 const DeviceSet* GetDeviceSet() const override { return device_set_; }
61
type() const62 string type() const override { return ""; }
Provision()63 Status Provision() override { return Status::OK(); }
Initialize(const grappler::GrapplerItem & item)64 Status Initialize(const grappler::GrapplerItem& item) override {
65 return Status::OK();
66 }
Run(const GraphDef & graph_def,const std::vector<std::pair<string,Tensor>> & feed,const std::vector<string> & fetch,RunMetadata * metadata)67 Status Run(const GraphDef& graph_def,
68 const std::vector<std::pair<string, Tensor>>& feed,
69 const std::vector<string>& fetch, RunMetadata* metadata) override {
70 return Status::OK();
71 }
72
73 private:
74 const DeviceSet* device_set_ = nullptr;
75 };
76
TEST(ConvertGraphTest,GetDeviceAndAllocator)77 TEST(ConvertGraphTest, GetDeviceAndAllocator) {
78 ConversionParams params;
79 EngineInfo engine_info;
80 {
81 // params.cluster is not set, and no gpu device is available.
82 auto result = GetDeviceAndAllocator(params, engine_info);
83 EXPECT_EQ(-1, result.first);
84 EXPECT_EQ(nullptr, result.second);
85 }
86
87 // Create a session with two (virtual) gpu device.
88 SessionOptions options;
89 ConfigProto* config = &options.config;
90 GPUOptions* gpu_options = config->mutable_gpu_options();
91 auto virtual_devices =
92 gpu_options->mutable_experimental()->add_virtual_devices();
93 virtual_devices->add_memory_limit_mb(200);
94 virtual_devices->add_memory_limit_mb(200);
95 std::unique_ptr<Session> session(NewSession(options));
96
97 {
98 // params.cluster is not set, should find and return first gpu id and
99 // corresponding allocator.
100 auto result = GetDeviceAndAllocator(params, engine_info);
101 EXPECT_EQ(0, result.first);
102 EXPECT_NE(nullptr, result.second);
103 EXPECT_EQ("GPU_0_bfc", result.second->Name());
104 }
105
106 FakeCluster cluster;
107 params.cluster = &cluster;
108 {
109 // params.cluster->GetDeviceSet() returns null, should find and return first
110 // gpu id and corresponding allocator.
111 auto result = GetDeviceAndAllocator(params, engine_info);
112 EXPECT_EQ(0, result.first);
113 EXPECT_NE(nullptr, result.second);
114 EXPECT_EQ("GPU_0_bfc", result.second->Name());
115 }
116
117 // Build the DeviceSet.
118 DeviceSet device_set;
119 const DeviceMgr* device_mgr = nullptr;
120 TF_ASSERT_OK(session->LocalDeviceManager(&device_mgr));
121 for (auto d : device_mgr->ListDevices()) {
122 device_set.AddDevice(d);
123 }
124 cluster.SetDeviceSet(&device_set);
125 {
126 // engine_info.device is not set, should find and return first gpu id and
127 // corresponding allocator.
128 auto result = GetDeviceAndAllocator(params, engine_info);
129 EXPECT_EQ(0, result.first);
130 EXPECT_NE(nullptr, result.second);
131 EXPECT_EQ("GPU_0_bfc", result.second->Name());
132 }
133
134 engine_info.device = "/GPU:1";
135 {
136 // Set to use second device.
137 auto result = GetDeviceAndAllocator(params, engine_info);
138 EXPECT_EQ(0, result.first);
139 EXPECT_NE(nullptr, result.second);
140 EXPECT_EQ("GPU_1_bfc", result.second->Name());
141 }
142
143 engine_info.device = "/GPU:3";
144 {
145 // Set to use nonexistent device.
146 auto result = GetDeviceAndAllocator(params, engine_info);
147 EXPECT_EQ(-1, result.first);
148 EXPECT_EQ(nullptr, result.second);
149 }
150 }
151
152 class ConvertAfterShapesTest : public ::testing::Test {
153 public:
RunConvertAfterShape(Scope s,GraphDef * output_graph_def,int maximum_batch_size=1000)154 Status RunConvertAfterShape(Scope s, GraphDef* output_graph_def,
155 int maximum_batch_size = 1000) {
156 // Create GraphProperties.
157 grappler::GrapplerItem item;
158 TF_EXPECT_OK(s.ToGraphDef(&item.graph));
159 grappler::GraphProperties graph_properties(item);
160 TF_EXPECT_OK(graph_properties.InferStatically(true));
161
162 // Construct ConversionParams.
163 const std::vector<string> output_names{"output"};
164 ConversionParams params;
165 params.output_names = &output_names;
166 params.max_batch_size = maximum_batch_size;
167 params.max_workspace_size_bytes = 8 << 20;
168 params.output_graph_def = output_graph_def;
169 params.minimum_segment_size = 1;
170 params.grappler_item = &item;
171 params.use_calibration = false;
172 params.trt_logger_name = "DefaultLogger";
173
174 return ConvertAfterShapes(params);
175 }
176 };
177
TEST_F(ConvertAfterShapesTest,DirectlyConnectedEngines)178 TEST_F(ConvertAfterShapesTest, DirectlyConnectedEngines) {
179 // Create the graph. There will be two TRTEngineOps after the conversion, and
180 // the upstream TRTEngineOp will have two output connections from the same
181 // node:port inside the op to the downstream TRTEngineOp. Then, if it adds the
182 // downstream TRTEngineOp first, when adding the upstream op it'll need to
183 // update the same output connection twice. This test ensures the correctness
184 // of the conversion under such condition.
185 Scope s = Scope::NewRootScope();
186 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT,
187 ops::Placeholder::Shape({2, 1}));
188 // We purposefully choose the name of the root node of each segment, so it'll
189 // process the segment in the downstream first, then, when it tries to update
190 // the edge between the two TRTEngineOps, it'll try to add the same edge
191 // multiple times.
192 auto segment_root_1 = ops::Identity(s.WithOpName("segment_root_b"), input);
193 auto add1 = ops::Add(s.WithOpName("add1"), segment_root_1, segment_root_1);
194 // Add incompatible reshapes that change the batch dimension.
195 auto incompatible =
196 ops::Reshape(s.WithOpName("reshape1"), add1, Input({1, 2}));
197 incompatible =
198 ops::Reshape(s.WithOpName("reshape2"), incompatible, Input({2, 1}));
199
200 auto add2 = ops::Add(s.WithOpName("add2"), incompatible, add1);
201 auto segment_root_2 = ops::Identity(s.WithOpName("segment_root_a"), add1);
202 auto add3 = ops::Add(s.WithOpName("add3"), add2, segment_root_2);
203 ops::Identity(s.WithOpName("output"), add3);
204
205 GraphDef output_graph_def;
206 TF_EXPECT_OK(RunConvertAfterShape(s, &output_graph_def));
207
208 auto remove_graph_sequence_number = [](std::string node_name) {
209 const std::regex pattern("TRTEngineOp_[0-9]+_");
210 return std::regex_replace(node_name, pattern, "TRTEngineOp_");
211 };
212 int num_trt_ops = 0;
213 for (const NodeDef& node : output_graph_def.node()) {
214 std::string node_name = node.name();
215 if (node.op() != "TRTEngineOp") continue;
216 node_name = remove_graph_sequence_number(node_name);
217 if (node_name == "TRTEngineOp_1") {
218 EXPECT_EQ(1, node.input_size());
219 EXPECT_EQ("input", node.input(0));
220 ++num_trt_ops;
221 } else if (node_name == "TRTEngineOp_0") {
222 EXPECT_EQ(2, node.input_size());
223 EXPECT_EQ("TRTEngineOp_1", remove_graph_sequence_number(node.input(0)));
224 EXPECT_EQ("reshape2", node.input(1));
225 ++num_trt_ops;
226 }
227 }
228 EXPECT_EQ(2, num_trt_ops);
229 }
230
231 } // namespace convert
232 } // namespace tensorrt
233 } // namespace tensorflow
234
235 #endif // GOOGLE_CUDA && GOOGLE_TENSORRT
236