1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
17 #include "tensorflow/core/framework/allocator.h"
18 #include "tensorflow/core/framework/fake_input.h"
19 #include "tensorflow/core/framework/node_def_builder.h"
20 #include "tensorflow/core/framework/op_kernel.h"
21 #include "tensorflow/core/framework/tensor.h"
22 #include "tensorflow/core/framework/tensor_testutil.h"
23 #include "tensorflow/core/framework/types.h"
24 #include "tensorflow/core/graph/graph.h"
25 #include "tensorflow/core/graph/node_builder.h"
26 #include "tensorflow/core/kernels/ops_testutil.h"
27 #include "tensorflow/core/lib/core/status_test_util.h"
28 #include "tensorflow/core/lib/strings/str_util.h"
29 #include "tensorflow/core/platform/test.h"
30 #include "tensorflow/core/platform/test_benchmark.h"
31
32 namespace tensorflow {
33
34 namespace {
35
ExpectHasSubstr(StringPiece s,StringPiece expected)36 static void ExpectHasSubstr(StringPiece s, StringPiece expected) {
37 EXPECT_TRUE(absl::StrContains(s, expected))
38 << "'" << s << "' does not contain '" << expected << "'";
39 }
40
41 class SparseDenseCDivTest : public OpsTestBase {
42 protected:
43 template <typename T>
MakeOp()44 void MakeOp() {
45 DataType value_type = tensorflow::DataTypeToEnum<T>::value;
46 TF_ASSERT_OK(NodeDefBuilder("cdiv", "SparseDenseCwiseDiv")
47 .Input(FakeInput(DT_INT64))
48 .Input(FakeInput(value_type))
49 .Input(FakeInput(DT_INT64))
50 .Input(FakeInput(value_type))
51 .Attr("T", value_type)
52 .Finalize(node_def()));
53 TF_ASSERT_OK(InitOp());
54 }
55 };
56
57 class SparseDenseCMulTest : public OpsTestBase {
58 protected:
59 template <typename T>
MakeOp()60 void MakeOp() {
61 DataType value_type = tensorflow::DataTypeToEnum<T>::value;
62 TF_ASSERT_OK(NodeDefBuilder("cmul", "SparseDenseCwiseMul")
63 .Input(FakeInput(DT_INT64))
64 .Input(FakeInput(value_type))
65 .Input(FakeInput(DT_INT64))
66 .Input(FakeInput(value_type))
67 .Attr("T", value_type)
68 .Finalize(node_def()));
69 TF_ASSERT_OK(InitOp());
70 }
71 };
72
TEST_F(SparseDenseCDivTest,DoNotBroadcastSparse_FewerDims)73 TEST_F(SparseDenseCDivTest, DoNotBroadcastSparse_FewerDims) {
74 MakeOp<float>();
75 // [1] op [2, 1]
76 AddInputFromArray<int64>(TensorShape({1, 1}), {0}); // indices
77 AddInputFromArray<float>(TensorShape({1}), {1618}); // values
78 AddInputFromArray<int64>(TensorShape({1}), {1}); // shape
79 AddInputFromArray<float>(TensorShape({2, 1}), {17, 19}); // dense
80
81 ExpectHasSubstr(RunOpKernel().ToString(), "broadcasts dense to sparse only");
82 }
83
TEST_F(SparseDenseCDivTest,DoNotBroadcastSparse_SameDims)84 TEST_F(SparseDenseCDivTest, DoNotBroadcastSparse_SameDims) {
85 MakeOp<float>();
86 // [1, 1] op [2, 1]
87 AddInputFromArray<int64>(TensorShape({1, 2}), {0, 0});
88 AddInputFromArray<float>(TensorShape({1}), {1618});
89 AddInputFromArray<int64>(TensorShape({2}), {1, 1});
90 AddInputFromArray<float>(TensorShape({2, 1}), {17, 19});
91
92 ExpectHasSubstr(RunOpKernel().ToString(), "broadcasts dense to sparse only");
93 }
94
TEST_F(SparseDenseCDivTest,SameShape)95 TEST_F(SparseDenseCDivTest, SameShape) {
96 MakeOp<float>();
97 // [ 1]
98 // [2 ] cdiv [dense: same shape, all 1's]
99 // [3 4]
100 const auto indices_shape = TensorShape({4, 2});
101 std::initializer_list<int64> in{0, 1, 1, 0, 2, 0, 2, 1};
102 const gtl::ArraySlice<int64> indices(in);
103 std::initializer_list<int64> sh{3, 2};
104 const gtl::ArraySlice<int64> shape(sh);
105
106 // Tensor dense(DT_FLOAT, TensorShape({3, 1}));
107 Tensor dense(DT_FLOAT, TensorShape(shape));
108 auto dense_flat = dense.flat<float>();
109 dense_flat.setConstant(1.);
110
111 AddInputFromArray<int64>(indices_shape, indices);
112 AddInputFromArray<float>(TensorShape({4}), {1, 2, 3, 4});
113 AddInputFromArray<int64>(TensorShape({2}), shape);
114 AddInputFromArray<float>(TensorShape(shape), dense_flat);
115
116 TF_ASSERT_OK(RunOpKernel());
117
118 Tensor expected(allocator(), DT_FLOAT, TensorShape({4}));
119 test::FillValues<float>(&expected, {1, 2, 3, 4});
120 test::ExpectTensorEqual<float>(expected, *GetOutput(0));
121 }
122
TEST_F(SparseDenseCDivTest,BroadcastDenseSameDims)123 TEST_F(SparseDenseCDivTest, BroadcastDenseSameDims) {
124 // No broadcast.
125 MakeOp<float>();
126 // [ 1]
127 // [2 ] cdiv [dense: shape [3,1], all 1's]
128 // [3 4]
129 const auto indices_shape = TensorShape({4, 2});
130 std::initializer_list<int64> in{0, 1, 1, 0, 2, 0, 2, 1};
131 const gtl::ArraySlice<int64> indices(in);
132 std::initializer_list<int64> sh{3, 2};
133 const gtl::ArraySlice<int64> shape(sh);
134
135 Tensor dense(DT_FLOAT, TensorShape({3, 1}));
136 auto dense_flat = dense.flat<float>();
137 dense_flat.setConstant(1.);
138
139 AddInputFromArray<int64>(indices_shape, indices);
140 AddInputFromArray<float>(TensorShape({4}), {1, 2, 3, 4});
141 AddInputFromArray<int64>(TensorShape({2}), shape);
142 AddInputFromArray<float>(TensorShape({3, 1}), dense_flat);
143
144 TF_ASSERT_OK(RunOpKernel());
145
146 Tensor expected(allocator(), DT_FLOAT, TensorShape({4}));
147 test::FillValues<float>(&expected, {1, 2, 3, 4});
148 test::ExpectTensorEqual<float>(expected, *GetOutput(0));
149 }
150
TEST_F(SparseDenseCDivTest,BroadcastDenseFewerDims)151 TEST_F(SparseDenseCDivTest, BroadcastDenseFewerDims) {
152 MakeOp<float>();
153 // [ 1]
154 // [2 ] cdiv [dense: shape [2]]
155 // [3 4]
156 const auto indices_shape = TensorShape({4, 2});
157 std::initializer_list<int64> in{0, 1, 1, 0, 2, 0, 2, 1};
158 const gtl::ArraySlice<int64> indices(in);
159 std::initializer_list<int64> sh{3, 2};
160 const gtl::ArraySlice<int64> shape(sh);
161
162 Tensor dense(DT_FLOAT, TensorShape({2}));
163 auto dense_flat = dense.flat<float>();
164 dense_flat.setConstant(1.);
165
166 AddInputFromArray<int64>(indices_shape, indices);
167 AddInputFromArray<float>(TensorShape({4}), {1, 2, 3, 4});
168 AddInputFromArray<int64>(TensorShape({2}), shape);
169 AddInputFromArray<float>(TensorShape({2}), dense_flat);
170
171 TF_ASSERT_OK(RunOpKernel());
172
173 Tensor expected(allocator(), DT_FLOAT, TensorShape({4}));
174 test::FillValues<float>(&expected, {1, 2, 3, 4});
175 test::ExpectTensorEqual<float>(expected, *GetOutput(0));
176 }
177
TEST_F(SparseDenseCMulTest,BroadcastDense)178 TEST_F(SparseDenseCMulTest, BroadcastDense) {
179 MakeOp<float>();
180 // [ 1]
181 // [2 ] (shape [3,2]) cmul [0.5 0] (shape [2])
182 // [3 4]
183 //
184 // Result:
185 // [? 0]
186 // [1 ?] where ? remains implicitly zero.
187 // [1.5 0]
188 const auto indices_shape = TensorShape({4, 2});
189 std::initializer_list<int64> in{0, 1, 1, 0, 2, 0, 2, 1};
190 const gtl::ArraySlice<int64> indices(in);
191 std::initializer_list<int64> sh{3, 2};
192 const gtl::ArraySlice<int64> shape(sh);
193
194 Tensor dense(DT_FLOAT, TensorShape({2}));
195 auto dense_flat = dense.flat<float>();
196 dense_flat(0) = 0.5;
197 dense_flat(1) = 0;
198
199 AddInputFromArray<int64>(indices_shape, indices);
200 AddInputFromArray<float>(TensorShape({4}), {1, 2, 3, 4});
201 AddInputFromArray<int64>(TensorShape({2}), shape);
202 AddInputFromArray<float>(TensorShape({2}), dense_flat);
203
204 TF_ASSERT_OK(RunOpKernel());
205
206 Tensor expected(allocator(), DT_FLOAT, TensorShape({4}));
207 test::FillValues<float>(&expected, {0, 1, 1.5, 0});
208 test::ExpectTensorEqual<float>(expected, *GetOutput(0));
209 }
210
211 // Benchmarking code follows.
212
SparseMatCMulDenseMat(Graph * g,Node * sp_indices,Node * sp_vals,Node * sp_shape,Node * dense)213 static Graph* SparseMatCMulDenseMat(Graph* g, Node* sp_indices, Node* sp_vals,
214 Node* sp_shape, Node* dense) {
215 Node* ret;
216 TF_CHECK_OK(
217 NodeBuilder(g->NewName("SparseDenseCwiseMul"), "SparseDenseCwiseMul")
218 .Input(sp_indices)
219 .Input(sp_vals)
220 .Input(sp_shape)
221 .Input(dense)
222 .Finalize(g, &ret));
223 return g;
224 }
225
MakeTensor(Graph * g,int B,int M,int N)226 static Node* MakeTensor(Graph* g, int B, int M, int N) {
227 Tensor data(DT_FLOAT, TensorShape({B, M, N}));
228 data.flat<float>().setRandom();
229 return test::graph::Constant(g, data);
230 }
231
232 struct ST {
233 Node* indices;
234 Node* vals;
235 Node* shape;
236 };
237
MakeSparseTensor(Graph * g,int B,int M,int N,int nnz_inner)238 static ST MakeSparseTensor(Graph* g, int B, int M, int N, int nnz_inner) {
239 const int total_nnz = B * M * nnz_inner;
240 const int kNumDims = 3;
241
242 Tensor indices(DT_INT64, TensorShape({total_nnz, kNumDims}));
243 Tensor vals(DT_FLOAT, TensorShape({total_nnz}));
244 Tensor shape(DT_INT64, TensorShape({kNumDims}));
245 vals.flat<float>().setRandom();
246 test::FillValues(&shape, gtl::ArraySlice<int64>({B, M, N}));
247 auto indices_mat = indices.matrix<int64>();
248
249 int nnz_cnt = 0;
250 std::unordered_set<int> picked;
251 std::random_device rd;
252 std::mt19937 gen(rd());
253 std::uniform_int_distribution<> dist(0, N - 1);
254
255 for (int i = 0; i < B; ++i) {
256 for (int j = 0; j < M; ++j) {
257 for (int k = 0; k < nnz_inner; ++k) {
258 indices_mat(nnz_cnt, 0) = i;
259 indices_mat(nnz_cnt, 1) = j;
260
261 int inner = dist(gen);
262 while (picked.count(inner) == 1) {
263 inner = dist(gen);
264 }
265 picked.insert(inner);
266 indices_mat(nnz_cnt, 2) = inner;
267
268 ++nnz_cnt;
269 }
270 }
271 }
272
273 return ST{test::graph::Constant(g, indices), test::graph::Constant(g, vals),
274 test::graph::Constant(g, shape)};
275 }
276
277 // [8, 4, N{nnz}] cmul [8, 4, N]
278 #define BM_SparseMatCMulDenseMatArgs(N, NNZ_INNER) \
279 static void BM_SparseMatCMulDenseMat_##N##_##NNZ_INNER( \
280 ::testing::benchmark::State& state) { \
281 Graph* g = new Graph(OpRegistry::Global()); \
282 Node* dense = MakeTensor(g, 8, 4, N); \
283 ST sp = MakeSparseTensor(g, 8, 4, N, NNZ_INNER); \
284 \
285 test::Benchmark( \
286 "cpu", SparseMatCMulDenseMat(g, sp.indices, sp.vals, sp.shape, dense), \
287 /*old_benchmark_api*/ false) \
288 .Run(state); \
289 state.SetItemsProcessed( \
290 static_cast<int64>(state.iterations() * 8 * 4 * N * 2)); \
291 } \
292 BENCHMARK(BM_SparseMatCMulDenseMat_##N##_##NNZ_INNER)
293
294 BM_SparseMatCMulDenseMatArgs(1048576, 1);
295 BM_SparseMatCMulDenseMatArgs(1048576, 8);
296 BM_SparseMatCMulDenseMatArgs(1048576, 32);
297 BM_SparseMatCMulDenseMatArgs(262144, 1);
298 BM_SparseMatCMulDenseMatArgs(262144, 8);
299 BM_SparseMatCMulDenseMatArgs(262144, 32);
300
301 } // namespace
302
303 } // namespace tensorflow
304