• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/tf2tensorrt/convert/convert_graph.h"
17 
18 #include <regex>  // NOLINT
19 
20 #include <gmock/gmock.h>
21 #include <gtest/gtest.h>
22 #include "tensorflow/cc/framework/ops.h"
23 #include "tensorflow/cc/framework/scope.h"
24 #include "tensorflow/cc/ops/standard_ops.h"
25 #include "tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h"
26 #include "tensorflow/core/common_runtime/device_mgr.h"
27 #include "tensorflow/core/common_runtime/device_set.h"
28 #include "tensorflow/core/framework/tensor_shape.h"
29 #include "tensorflow/core/grappler/clusters/cluster.h"
30 #include "tensorflow/core/grappler/grappler_item.h"
31 #include "tensorflow/core/lib/core/status.h"
32 #include "tensorflow/core/lib/core/status_test_util.h"
33 #include "tensorflow/core/platform/test.h"
34 #include "tensorflow/core/protobuf/config.pb.h"  // NOLINT
35 #include "tensorflow/core/public/session.h"
36 
37 #if GOOGLE_CUDA && GOOGLE_TENSORRT
38 
39 namespace tensorflow {
40 namespace tensorrt {
41 namespace convert {
42 
43 // TODO(laigd): put this into some test utils file.
ExpectStatus(Status status,error::Code code=error::OK,const char * substr=nullptr)44 void ExpectStatus(Status status, error::Code code = error::OK,
45                   const char* substr = nullptr) {
46   EXPECT_EQ(code, status.code())
47       << status << " vs expected error code \"" << error::Code_Name(code)
48       << "\" and message \"" << substr << "\"";
49   if (substr) {
50     EXPECT_THAT(status.error_message(), ::testing::HasSubstr(substr)) << status;
51   }
52 }
53 
54 class FakeCluster : public grappler::Cluster {
55  public:
FakeCluster()56   FakeCluster() : Cluster(0) {}
57 
SetDeviceSet(const DeviceSet * device_set)58   void SetDeviceSet(const DeviceSet* device_set) { device_set_ = device_set; }
59 
GetDeviceSet() const60   const DeviceSet* GetDeviceSet() const override { return device_set_; }
61 
type() const62   string type() const override { return ""; }
Provision()63   Status Provision() override { return Status::OK(); }
Initialize(const grappler::GrapplerItem & item)64   Status Initialize(const grappler::GrapplerItem& item) override {
65     return Status::OK();
66   }
Run(const GraphDef & graph_def,const std::vector<std::pair<string,Tensor>> & feed,const std::vector<string> & fetch,RunMetadata * metadata)67   Status Run(const GraphDef& graph_def,
68              const std::vector<std::pair<string, Tensor>>& feed,
69              const std::vector<string>& fetch, RunMetadata* metadata) override {
70     return Status::OK();
71   }
72 
73  private:
74   const DeviceSet* device_set_ = nullptr;
75 };
76 
TEST(ConvertGraphTest,GetDeviceAndAllocator)77 TEST(ConvertGraphTest, GetDeviceAndAllocator) {
78   ConversionParams params;
79   EngineInfo engine_info;
80   {
81     // params.cluster is not set, and no gpu device is available.
82     auto result = GetDeviceAndAllocator(params, engine_info);
83     EXPECT_EQ(-1, result.first);
84     EXPECT_EQ(nullptr, result.second);
85   }
86 
87   // Create a session with two (virtual) gpu device.
88   SessionOptions options;
89   ConfigProto* config = &options.config;
90   GPUOptions* gpu_options = config->mutable_gpu_options();
91   auto virtual_devices =
92       gpu_options->mutable_experimental()->add_virtual_devices();
93   virtual_devices->add_memory_limit_mb(200);
94   virtual_devices->add_memory_limit_mb(200);
95   std::unique_ptr<Session> session(NewSession(options));
96 
97   {
98     // params.cluster is not set, should find and return first gpu id and
99     // corresponding allocator.
100     auto result = GetDeviceAndAllocator(params, engine_info);
101     EXPECT_EQ(0, result.first);
102     EXPECT_NE(nullptr, result.second);
103     EXPECT_EQ("GPU_0_bfc", result.second->Name());
104   }
105 
106   FakeCluster cluster;
107   params.cluster = &cluster;
108   {
109     // params.cluster->GetDeviceSet() returns null, should find and return first
110     // gpu id and corresponding allocator.
111     auto result = GetDeviceAndAllocator(params, engine_info);
112     EXPECT_EQ(0, result.first);
113     EXPECT_NE(nullptr, result.second);
114     EXPECT_EQ("GPU_0_bfc", result.second->Name());
115   }
116 
117   // Build the DeviceSet.
118   DeviceSet device_set;
119   const DeviceMgr* device_mgr = nullptr;
120   TF_ASSERT_OK(session->LocalDeviceManager(&device_mgr));
121   for (auto d : device_mgr->ListDevices()) {
122     device_set.AddDevice(d);
123   }
124   cluster.SetDeviceSet(&device_set);
125   {
126     // engine_info.device is not set, should find and return first gpu id and
127     // corresponding allocator.
128     auto result = GetDeviceAndAllocator(params, engine_info);
129     EXPECT_EQ(0, result.first);
130     EXPECT_NE(nullptr, result.second);
131     EXPECT_EQ("GPU_0_bfc", result.second->Name());
132   }
133 
134   engine_info.device = "/GPU:1";
135   {
136     // Set to use second device.
137     auto result = GetDeviceAndAllocator(params, engine_info);
138     EXPECT_EQ(0, result.first);
139     EXPECT_NE(nullptr, result.second);
140     EXPECT_EQ("GPU_1_bfc", result.second->Name());
141   }
142 
143   engine_info.device = "/GPU:3";
144   {
145     // Set to use nonexistent device.
146     auto result = GetDeviceAndAllocator(params, engine_info);
147     EXPECT_EQ(-1, result.first);
148     EXPECT_EQ(nullptr, result.second);
149   }
150 }
151 
152 class ConvertAfterShapesTest : public ::testing::Test {
153  public:
RunConvertAfterShape(Scope s,GraphDef * output_graph_def,int maximum_batch_size=1000)154   Status RunConvertAfterShape(Scope s, GraphDef* output_graph_def,
155                               int maximum_batch_size = 1000) {
156     // Create GraphProperties.
157     grappler::GrapplerItem item;
158     TF_EXPECT_OK(s.ToGraphDef(&item.graph));
159     grappler::GraphProperties graph_properties(item);
160     TF_EXPECT_OK(graph_properties.InferStatically(true));
161 
162     // Construct ConversionParams.
163     const std::vector<string> output_names{"output"};
164     ConversionParams params;
165     params.output_names = &output_names;
166     params.max_batch_size = maximum_batch_size;
167     params.max_workspace_size_bytes = 8 << 20;
168     params.output_graph_def = output_graph_def;
169     params.minimum_segment_size = 1;
170     params.grappler_item = &item;
171     params.use_calibration = false;
172     params.trt_logger_name = "DefaultLogger";
173 
174     return ConvertAfterShapes(params);
175   }
176 };
177 
TEST_F(ConvertAfterShapesTest,DirectlyConnectedEngines)178 TEST_F(ConvertAfterShapesTest, DirectlyConnectedEngines) {
179   // Create the graph. There will be two TRTEngineOps after the conversion, and
180   // the upstream TRTEngineOp will have two output connections from the same
181   // node:port inside the op to the downstream TRTEngineOp. Then, if it adds the
182   // downstream TRTEngineOp first, when adding the upstream op it'll need to
183   // update the same output connection twice. This test ensures the correctness
184   // of the conversion under such condition.
185   Scope s = Scope::NewRootScope();
186   auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT,
187                                 ops::Placeholder::Shape({2, 1}));
188   // We purposefully choose the name of the root node of each segment, so it'll
189   // process the segment in the downstream first, then, when it tries to update
190   // the edge between the two TRTEngineOps, it'll try to add the same edge
191   // multiple times.
192   auto segment_root_1 = ops::Identity(s.WithOpName("segment_root_b"), input);
193   auto add1 = ops::Add(s.WithOpName("add1"), segment_root_1, segment_root_1);
194   // Add incompatible reshapes that change the batch dimension.
195   auto incompatible =
196       ops::Reshape(s.WithOpName("reshape1"), add1, Input({1, 2}));
197   incompatible =
198       ops::Reshape(s.WithOpName("reshape2"), incompatible, Input({2, 1}));
199 
200   auto add2 = ops::Add(s.WithOpName("add2"), incompatible, add1);
201   auto segment_root_2 = ops::Identity(s.WithOpName("segment_root_a"), add1);
202   auto add3 = ops::Add(s.WithOpName("add3"), add2, segment_root_2);
203   ops::Identity(s.WithOpName("output"), add3);
204 
205   GraphDef output_graph_def;
206   TF_EXPECT_OK(RunConvertAfterShape(s, &output_graph_def));
207 
208   auto remove_graph_sequence_number = [](std::string node_name) {
209     const std::regex pattern("TRTEngineOp_[0-9]+_");
210     return std::regex_replace(node_name, pattern, "TRTEngineOp_");
211   };
212   int num_trt_ops = 0;
213   for (const NodeDef& node : output_graph_def.node()) {
214     std::string node_name = node.name();
215     if (node.op() != "TRTEngineOp") continue;
216     node_name = remove_graph_sequence_number(node_name);
217     if (node_name == "TRTEngineOp_1") {
218       EXPECT_EQ(1, node.input_size());
219       EXPECT_EQ("input", node.input(0));
220       ++num_trt_ops;
221     } else if (node_name == "TRTEngineOp_0") {
222       EXPECT_EQ(2, node.input_size());
223       EXPECT_EQ("TRTEngineOp_1", remove_graph_sequence_number(node.input(0)));
224       EXPECT_EQ("reshape2", node.input(1));
225       ++num_trt_ops;
226     }
227   }
228   EXPECT_EQ(2, num_trt_ops);
229 }
230 
231 }  // namespace convert
232 }  // namespace tensorrt
233 }  // namespace tensorflow
234 
235 #endif  // GOOGLE_CUDA && GOOGLE_TENSORRT
236