• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/jit/clone_constants_for_better_clustering.h"
17 
18 #include "tensorflow/cc/framework/ops.h"
19 #include "tensorflow/cc/ops/array_ops.h"
20 #include "tensorflow/cc/ops/const_op.h"
21 #include "tensorflow/cc/ops/math_ops.h"
22 #include "tensorflow/compiler/jit/node_matchers.h"
23 #include "tensorflow/core/lib/core/status_test_util.h"
24 #include "tensorflow/core/platform/test.h"
25 #include "tensorflow/core/public/session_options.h"
26 
27 namespace tensorflow {
28 namespace {
29 using ::tensorflow::testing::FindNodeByName;
30 
CloneConstantsForBetterClustering(const Scope & s,std::unique_ptr<Graph> * result)31 Status CloneConstantsForBetterClustering(const Scope& s,
32                                          std::unique_ptr<Graph>* result) {
33   auto graph = std::make_unique<Graph>(OpRegistry::Global());
34   SessionOptions session_options;
35   session_options.config.mutable_graph_options()
36       ->mutable_optimizer_options()
37       ->set_global_jit_level(OptimizerOptions::ON_2);
38   GraphOptimizationPassOptions options;
39   options.graph = &graph;
40   options.session_options = &session_options;
41 
42   // Scope::ToGraph seems to drop assigned devices, probably because it goes
43   // through a GraphDef.  So explicitly maintain the device assignment.
44   // std::unordered_map<string, string> assigned_device_names;
45   // for (Node* n : s.graph()->nodes()) {
46   //   assigned_device_names[n->name()] = n->assigned_device_name();
47   // }
48   GraphConstructorOptions opts;
49   opts.expect_device_spec = true;
50   TF_RETURN_IF_ERROR(s.ToGraph(graph.get(), opts));
51   // for (Node* n : graph->nodes()) {
52   //   n->set_assigned_device_name(assigned_device_names[n->name()]);
53   // }
54 
55   CloneConstantsForBetterClusteringPass rewriter;
56   TF_RETURN_IF_ERROR(rewriter.Run(options));
57   *result = std::move(graph);
58   return OkStatus();
59 }
60 
61 const char* kCPU = "/job:localhost/replica:0/task:0/device:CPU:0";
62 const char* kGPU = "/job:localhost/replica:0/task:0/device:GPU:0";
63 
TEST(CloneConstantsForBetterClusteringTest,HostConstantPlacedOnCpu)64 TEST(CloneConstantsForBetterClusteringTest, HostConstantPlacedOnCpu) {
65   Scope root = Scope::NewRootScope().ExitOnError();
66   Scope on_gpu = root.WithAssignedDevice(kGPU).WithDevice(kGPU);
67   Scope on_cpu = root.WithAssignedDevice(kCPU).WithDevice(kCPU);
68 
69   Output in0 = ops::Placeholder(on_gpu.WithOpName("in0"), DT_FLOAT);
70   Output in1 = ops::Placeholder(on_gpu.WithOpName("in1"), DT_FLOAT);
71 
72   Output perm = ops::Const(on_cpu.WithOpName("perm"), {3, 1, 2, 0});
73 
74   {
75     Output tr0 = ops::Transpose(on_gpu.WithOpName("tr0"), in0, perm);
76     Output tr1 = ops::Transpose(on_gpu.WithOpName("tr1"), in1, perm);
77   }
78 
79   std::unique_ptr<Graph> result;
80   TF_ASSERT_OK(CloneConstantsForBetterClustering(root, &result));
81 
82   OutputTensor tr0_perm;
83   TF_ASSERT_OK(FindNodeByName(result.get(), "tr0")->input_tensor(1, &tr0_perm));
84 
85   OutputTensor tr1_perm;
86   TF_ASSERT_OK(FindNodeByName(result.get(), "tr1")->input_tensor(1, &tr1_perm));
87 
88   EXPECT_NE(tr0_perm.node, tr1_perm.node);
89 }
90 
TEST(CloneConstantsForBetterClusteringTest,HostConstantPlacedOnGpu)91 TEST(CloneConstantsForBetterClusteringTest, HostConstantPlacedOnGpu) {
92   Scope root = Scope::NewRootScope().ExitOnError();
93   Scope on_gpu = root.WithAssignedDevice(kGPU).WithDevice(kGPU);
94 
95   Output in0 = ops::Placeholder(on_gpu.WithOpName("in0"), DT_FLOAT);
96   Output in1 = ops::Placeholder(on_gpu.WithOpName("in1"), DT_FLOAT);
97 
98   Output perm = ops::Const(on_gpu.WithOpName("perm"), {3, 1, 2, 0});
99 
100   {
101     Output tr0 = ops::Transpose(on_gpu.WithOpName("tr0"), in0, perm);
102     Output tr1 = ops::Transpose(on_gpu.WithOpName("tr1"), in1, perm);
103   }
104 
105   std::unique_ptr<Graph> result;
106   TF_ASSERT_OK(CloneConstantsForBetterClustering(root, &result));
107 
108   OutputTensor tr0_perm;
109   TF_ASSERT_OK(FindNodeByName(result.get(), "tr0")->input_tensor(1, &tr0_perm));
110 
111   OutputTensor tr1_perm;
112   TF_ASSERT_OK(FindNodeByName(result.get(), "tr1")->input_tensor(1, &tr1_perm));
113 
114   EXPECT_NE(tr0_perm.node, tr1_perm.node);
115 }
116 
TEST(CloneConstantsForBetterClusteringTest,DontCloneNonHostConstants)117 TEST(CloneConstantsForBetterClusteringTest, DontCloneNonHostConstants) {
118   Scope root = Scope::NewRootScope().ExitOnError();
119   Scope on_gpu = root.WithAssignedDevice(kGPU).WithDevice(kGPU);
120 
121   Output in0 = ops::Placeholder(on_gpu.WithOpName("in0"), DT_FLOAT);
122   Output in1 = ops::Placeholder(on_gpu.WithOpName("in1"), DT_FLOAT);
123 
124   Output perm_f32 = ops::Const(on_gpu.WithOpName("perm"), {3.0, 1.0, 2.0, 0.0});
125   Output perm_int0 =
126       ops::Cast(on_gpu.WithOpName("perm_cast_0"), perm_f32, DT_INT32);
127   Output perm_int1 =
128       ops::Cast(on_gpu.WithOpName("perm_cast_1"), perm_f32, DT_INT32);
129 
130   {
131     Output tr0 = ops::Transpose(on_gpu.WithOpName("tr0"), in0, perm_int0);
132     Output tr1 = ops::Transpose(on_gpu.WithOpName("tr1"), in1, perm_int1);
133   }
134 
135   std::unique_ptr<Graph> result;
136   TF_ASSERT_OK(CloneConstantsForBetterClustering(root, &result));
137 
138   OutputTensor tr0_perm;
139   TF_ASSERT_OK(
140       FindNodeByName(result.get(), "perm_cast_0")->input_tensor(0, &tr0_perm));
141 
142   OutputTensor tr1_perm;
143   TF_ASSERT_OK(
144       FindNodeByName(result.get(), "perm_cast_1")->input_tensor(0, &tr1_perm));
145 
146   EXPECT_EQ(tr0_perm.node, tr1_perm.node);
147 }
148 
TEST(CloneConstantsForBetterClusteringTest,DontCloneLargeConstants)149 TEST(CloneConstantsForBetterClusteringTest, DontCloneLargeConstants) {
150   Scope root = Scope::NewRootScope().ExitOnError();
151   Scope on_gpu = root.WithAssignedDevice(kGPU).WithDevice(kGPU);
152   Scope on_cpu = root.WithAssignedDevice(kCPU).WithDevice(kCPU);
153 
154   Output in0 = ops::Placeholder(on_gpu.WithOpName("in0"), DT_FLOAT);
155   Output in1 = ops::Placeholder(on_gpu.WithOpName("in1"), DT_FLOAT);
156 
157   Output perm = ops::Const(
158       on_cpu.WithOpName("perm"),
159       {17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
160 
161   {
162     Output tr0 = ops::Transpose(on_gpu.WithOpName("tr0"), in0, perm);
163     Output tr1 = ops::Transpose(on_gpu.WithOpName("tr1"), in1, perm);
164   }
165 
166   std::unique_ptr<Graph> result;
167   TF_ASSERT_OK(CloneConstantsForBetterClustering(root, &result));
168 
169   OutputTensor tr0_perm;
170   TF_ASSERT_OK(FindNodeByName(result.get(), "tr0")->input_tensor(1, &tr0_perm));
171 
172   OutputTensor tr1_perm;
173   TF_ASSERT_OK(FindNodeByName(result.get(), "tr1")->input_tensor(1, &tr1_perm));
174 
175   EXPECT_EQ(tr0_perm.node, tr1_perm.node);
176 }
177 
TEST(CloneConstantsForBetterClusteringTest,InplaceOps)178 TEST(CloneConstantsForBetterClusteringTest, InplaceOps) {
179   Scope root = Scope::NewRootScope().ExitOnError();
180   Scope on_gpu = root.WithAssignedDevice(kGPU).WithDevice(kGPU);
181   Scope on_cpu = root.WithAssignedDevice(kCPU).WithDevice(kCPU);
182 
183   Output in0 = ops::Placeholder(on_gpu.WithOpName("in0"), DT_FLOAT);
184   Output in1 = ops::Placeholder(on_gpu.WithOpName("in1"), DT_FLOAT);
185 
186   Output perm = ops::Const(on_cpu.WithOpName("perm"), {3, 1, 2, 0});
187 
188   {
189     Output tr0 = ops::Transpose(on_gpu.WithOpName("tr0"), in0, perm);
190     Output tr1 = ops::Transpose(on_gpu.WithOpName("tr1"), in1, perm);
191   }
192 
193   Output in_place_add =
194       ops::InplaceAdd(on_cpu.WithOpName("tr0"), perm,
195                       ops::Placeholder(on_cpu.WithOpName("i"), DT_INT32), perm);
196 
197   std::unique_ptr<Graph> result;
198   TF_ASSERT_OK(CloneConstantsForBetterClustering(root, &result));
199 
200   OutputTensor tr0_perm;
201   TF_ASSERT_OK(FindNodeByName(result.get(), "tr0")->input_tensor(1, &tr0_perm));
202 
203   OutputTensor tr1_perm;
204   TF_ASSERT_OK(FindNodeByName(result.get(), "tr1")->input_tensor(1, &tr1_perm));
205 
206   EXPECT_EQ(tr0_perm.node, tr1_perm.node);
207 }
208 }  // namespace
209 }  // namespace tensorflow
210