1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/jit/clone_constants_for_better_clustering.h"
17
18 #include "tensorflow/cc/framework/ops.h"
19 #include "tensorflow/cc/ops/array_ops.h"
20 #include "tensorflow/cc/ops/const_op.h"
21 #include "tensorflow/cc/ops/math_ops.h"
22 #include "tensorflow/compiler/jit/node_matchers.h"
23 #include "tensorflow/core/lib/core/status_test_util.h"
24 #include "tensorflow/core/platform/test.h"
25 #include "tensorflow/core/public/session_options.h"
26
27 namespace tensorflow {
28 namespace {
29 using ::tensorflow::testing::FindNodeByName;
30
CloneConstantsForBetterClustering(const Scope & s,std::unique_ptr<Graph> * result)31 Status CloneConstantsForBetterClustering(const Scope& s,
32 std::unique_ptr<Graph>* result) {
33 auto graph = std::make_unique<Graph>(OpRegistry::Global());
34 SessionOptions session_options;
35 session_options.config.mutable_graph_options()
36 ->mutable_optimizer_options()
37 ->set_global_jit_level(OptimizerOptions::ON_2);
38 GraphOptimizationPassOptions options;
39 options.graph = &graph;
40 options.session_options = &session_options;
41
42 // Scope::ToGraph seems to drop assigned devices, probably because it goes
43 // through a GraphDef. So explicitly maintain the device assignment.
44 // std::unordered_map<string, string> assigned_device_names;
45 // for (Node* n : s.graph()->nodes()) {
46 // assigned_device_names[n->name()] = n->assigned_device_name();
47 // }
48 GraphConstructorOptions opts;
49 opts.expect_device_spec = true;
50 TF_RETURN_IF_ERROR(s.ToGraph(graph.get(), opts));
51 // for (Node* n : graph->nodes()) {
52 // n->set_assigned_device_name(assigned_device_names[n->name()]);
53 // }
54
55 CloneConstantsForBetterClusteringPass rewriter;
56 TF_RETURN_IF_ERROR(rewriter.Run(options));
57 *result = std::move(graph);
58 return OkStatus();
59 }
60
61 const char* kCPU = "/job:localhost/replica:0/task:0/device:CPU:0";
62 const char* kGPU = "/job:localhost/replica:0/task:0/device:GPU:0";
63
TEST(CloneConstantsForBetterClusteringTest,HostConstantPlacedOnCpu)64 TEST(CloneConstantsForBetterClusteringTest, HostConstantPlacedOnCpu) {
65 Scope root = Scope::NewRootScope().ExitOnError();
66 Scope on_gpu = root.WithAssignedDevice(kGPU).WithDevice(kGPU);
67 Scope on_cpu = root.WithAssignedDevice(kCPU).WithDevice(kCPU);
68
69 Output in0 = ops::Placeholder(on_gpu.WithOpName("in0"), DT_FLOAT);
70 Output in1 = ops::Placeholder(on_gpu.WithOpName("in1"), DT_FLOAT);
71
72 Output perm = ops::Const(on_cpu.WithOpName("perm"), {3, 1, 2, 0});
73
74 {
75 Output tr0 = ops::Transpose(on_gpu.WithOpName("tr0"), in0, perm);
76 Output tr1 = ops::Transpose(on_gpu.WithOpName("tr1"), in1, perm);
77 }
78
79 std::unique_ptr<Graph> result;
80 TF_ASSERT_OK(CloneConstantsForBetterClustering(root, &result));
81
82 OutputTensor tr0_perm;
83 TF_ASSERT_OK(FindNodeByName(result.get(), "tr0")->input_tensor(1, &tr0_perm));
84
85 OutputTensor tr1_perm;
86 TF_ASSERT_OK(FindNodeByName(result.get(), "tr1")->input_tensor(1, &tr1_perm));
87
88 EXPECT_NE(tr0_perm.node, tr1_perm.node);
89 }
90
TEST(CloneConstantsForBetterClusteringTest,HostConstantPlacedOnGpu)91 TEST(CloneConstantsForBetterClusteringTest, HostConstantPlacedOnGpu) {
92 Scope root = Scope::NewRootScope().ExitOnError();
93 Scope on_gpu = root.WithAssignedDevice(kGPU).WithDevice(kGPU);
94
95 Output in0 = ops::Placeholder(on_gpu.WithOpName("in0"), DT_FLOAT);
96 Output in1 = ops::Placeholder(on_gpu.WithOpName("in1"), DT_FLOAT);
97
98 Output perm = ops::Const(on_gpu.WithOpName("perm"), {3, 1, 2, 0});
99
100 {
101 Output tr0 = ops::Transpose(on_gpu.WithOpName("tr0"), in0, perm);
102 Output tr1 = ops::Transpose(on_gpu.WithOpName("tr1"), in1, perm);
103 }
104
105 std::unique_ptr<Graph> result;
106 TF_ASSERT_OK(CloneConstantsForBetterClustering(root, &result));
107
108 OutputTensor tr0_perm;
109 TF_ASSERT_OK(FindNodeByName(result.get(), "tr0")->input_tensor(1, &tr0_perm));
110
111 OutputTensor tr1_perm;
112 TF_ASSERT_OK(FindNodeByName(result.get(), "tr1")->input_tensor(1, &tr1_perm));
113
114 EXPECT_NE(tr0_perm.node, tr1_perm.node);
115 }
116
TEST(CloneConstantsForBetterClusteringTest,DontCloneNonHostConstants)117 TEST(CloneConstantsForBetterClusteringTest, DontCloneNonHostConstants) {
118 Scope root = Scope::NewRootScope().ExitOnError();
119 Scope on_gpu = root.WithAssignedDevice(kGPU).WithDevice(kGPU);
120
121 Output in0 = ops::Placeholder(on_gpu.WithOpName("in0"), DT_FLOAT);
122 Output in1 = ops::Placeholder(on_gpu.WithOpName("in1"), DT_FLOAT);
123
124 Output perm_f32 = ops::Const(on_gpu.WithOpName("perm"), {3.0, 1.0, 2.0, 0.0});
125 Output perm_int0 =
126 ops::Cast(on_gpu.WithOpName("perm_cast_0"), perm_f32, DT_INT32);
127 Output perm_int1 =
128 ops::Cast(on_gpu.WithOpName("perm_cast_1"), perm_f32, DT_INT32);
129
130 {
131 Output tr0 = ops::Transpose(on_gpu.WithOpName("tr0"), in0, perm_int0);
132 Output tr1 = ops::Transpose(on_gpu.WithOpName("tr1"), in1, perm_int1);
133 }
134
135 std::unique_ptr<Graph> result;
136 TF_ASSERT_OK(CloneConstantsForBetterClustering(root, &result));
137
138 OutputTensor tr0_perm;
139 TF_ASSERT_OK(
140 FindNodeByName(result.get(), "perm_cast_0")->input_tensor(0, &tr0_perm));
141
142 OutputTensor tr1_perm;
143 TF_ASSERT_OK(
144 FindNodeByName(result.get(), "perm_cast_1")->input_tensor(0, &tr1_perm));
145
146 EXPECT_EQ(tr0_perm.node, tr1_perm.node);
147 }
148
TEST(CloneConstantsForBetterClusteringTest,DontCloneLargeConstants)149 TEST(CloneConstantsForBetterClusteringTest, DontCloneLargeConstants) {
150 Scope root = Scope::NewRootScope().ExitOnError();
151 Scope on_gpu = root.WithAssignedDevice(kGPU).WithDevice(kGPU);
152 Scope on_cpu = root.WithAssignedDevice(kCPU).WithDevice(kCPU);
153
154 Output in0 = ops::Placeholder(on_gpu.WithOpName("in0"), DT_FLOAT);
155 Output in1 = ops::Placeholder(on_gpu.WithOpName("in1"), DT_FLOAT);
156
157 Output perm = ops::Const(
158 on_cpu.WithOpName("perm"),
159 {17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
160
161 {
162 Output tr0 = ops::Transpose(on_gpu.WithOpName("tr0"), in0, perm);
163 Output tr1 = ops::Transpose(on_gpu.WithOpName("tr1"), in1, perm);
164 }
165
166 std::unique_ptr<Graph> result;
167 TF_ASSERT_OK(CloneConstantsForBetterClustering(root, &result));
168
169 OutputTensor tr0_perm;
170 TF_ASSERT_OK(FindNodeByName(result.get(), "tr0")->input_tensor(1, &tr0_perm));
171
172 OutputTensor tr1_perm;
173 TF_ASSERT_OK(FindNodeByName(result.get(), "tr1")->input_tensor(1, &tr1_perm));
174
175 EXPECT_EQ(tr0_perm.node, tr1_perm.node);
176 }
177
TEST(CloneConstantsForBetterClusteringTest,InplaceOps)178 TEST(CloneConstantsForBetterClusteringTest, InplaceOps) {
179 Scope root = Scope::NewRootScope().ExitOnError();
180 Scope on_gpu = root.WithAssignedDevice(kGPU).WithDevice(kGPU);
181 Scope on_cpu = root.WithAssignedDevice(kCPU).WithDevice(kCPU);
182
183 Output in0 = ops::Placeholder(on_gpu.WithOpName("in0"), DT_FLOAT);
184 Output in1 = ops::Placeholder(on_gpu.WithOpName("in1"), DT_FLOAT);
185
186 Output perm = ops::Const(on_cpu.WithOpName("perm"), {3, 1, 2, 0});
187
188 {
189 Output tr0 = ops::Transpose(on_gpu.WithOpName("tr0"), in0, perm);
190 Output tr1 = ops::Transpose(on_gpu.WithOpName("tr1"), in1, perm);
191 }
192
193 Output in_place_add =
194 ops::InplaceAdd(on_cpu.WithOpName("tr0"), perm,
195 ops::Placeholder(on_cpu.WithOpName("i"), DT_INT32), perm);
196
197 std::unique_ptr<Graph> result;
198 TF_ASSERT_OK(CloneConstantsForBetterClustering(root, &result));
199
200 OutputTensor tr0_perm;
201 TF_ASSERT_OK(FindNodeByName(result.get(), "tr0")->input_tensor(1, &tr0_perm));
202
203 OutputTensor tr1_perm;
204 TF_ASSERT_OK(FindNodeByName(result.get(), "tr1")->input_tensor(1, &tr1_perm));
205
206 EXPECT_EQ(tr0_perm.node, tr1_perm.node);
207 }
208 } // namespace
209 } // namespace tensorflow
210