1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #define EIGEN_USE_THREADS
17
18 #include <functional>
19 #include <memory>
20 #include <vector>
21
22 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
23 #include "tensorflow/core/framework/allocator.h"
24 #include "tensorflow/core/framework/fake_input.h"
25 #include "tensorflow/core/framework/node_def_builder.h"
26 #include "tensorflow/core/framework/op_kernel.h"
27 #include "tensorflow/core/framework/tensor.h"
28 #include "tensorflow/core/framework/tensor_testutil.h"
29 #include "tensorflow/core/framework/types.h"
30 #include "tensorflow/core/framework/types.pb.h"
31 #include "tensorflow/core/graph/node_builder.h"
32 #include "tensorflow/core/kernels/ops_testutil.h"
33 #include "tensorflow/core/kernels/ops_util.h"
34 #include "tensorflow/core/kernels/quantization_utils.h"
35 #include "tensorflow/core/lib/core/status.h"
36 #include "tensorflow/core/lib/core/status_test_util.h"
37 #include "tensorflow/core/platform/test.h"
38 #include "tensorflow/core/platform/test_benchmark.h"
39
40 namespace tensorflow {
41
42 using test::graph::Constant;
43
44 class QuantizedConcatTest : public OpsTestBase {
45 protected:
QuantizedConcatTest()46 QuantizedConcatTest() {}
47
48 void TestSmall8Bit(float first_min, float first_max, float second_min,
49 float second_max);
50 void TestSmall32Bit(float first_min, float first_max, float second_min,
51 float second_max);
52 void TestSecondDim8Bit(float first_min, float first_max, float second_min,
53 float second_max);
54 };
55
TEST_F(QuantizedConcatTest,Small8Bit)56 TEST_F(QuantizedConcatTest, Small8Bit) {
57 TestSmall8Bit(0.0f, 255.0f, 0.0f, 25.0f);
58 }
59
TEST_F(QuantizedConcatTest,Small8BitSameRange)60 TEST_F(QuantizedConcatTest, Small8BitSameRange) {
61 // Range for both is the same, so impl can use memcpy.
62 TestSmall8Bit(0.0f, 255.0f, 0.0f, 255.0f);
63 }
64
TestSmall8Bit(float first_min,float first_max,float second_min,float second_max)65 void QuantizedConcatTest::TestSmall8Bit(float first_min, float first_max,
66 float second_min, float second_max) {
67 TF_ASSERT_OK(NodeDefBuilder("quantized_concat_op", "QuantizedConcat")
68 .Input(FakeInput(DT_INT32))
69 .Input(FakeInput(2, DT_QUINT8))
70 .Input(FakeInput(2, DT_FLOAT))
71 .Input(FakeInput(2, DT_FLOAT))
72 .Attr("N", 2)
73 .Attr("T", DataTypeToEnum<quint8>::v())
74 .Finalize(node_def()));
75 TF_ASSERT_OK(InitOp());
76 const int first_batch = 2;
77 const int first_height = 2;
78 const int first_width = 3;
79 Tensor first_float(DT_FLOAT, {first_batch, first_height, first_width});
80 test::FillValues<float>(&first_float,
81 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
82 Tensor first_quantized =
83 FloatTensorToQuantized<quint8>(first_float, first_min, first_max);
84
85 const int second_batch = 2;
86 const int second_height = 2;
87 const int second_width = 3;
88 Tensor second_float(DT_FLOAT, {second_batch, second_height, second_width});
89 test::FillValues<float>(&second_float,
90 {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24});
91 Tensor second_quantized =
92 FloatTensorToQuantized<quint8>(second_float, second_min, second_max);
93
94 const int expected_batch = first_batch + second_batch;
95 Tensor expected_float(DT_FLOAT, {expected_batch, first_height, first_width});
96 test::FillValues<float>(&expected_float,
97 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
98 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24});
99
100 AddInputFromArray<int32>(TensorShape({}), {0});
101 AddInputFromArray<quint8>(first_quantized.shape(),
102 first_quantized.flat<quint8>());
103 AddInputFromArray<quint8>(second_quantized.shape(),
104 second_quantized.flat<quint8>());
105 AddInputFromArray<float>(TensorShape({}), {first_min});
106 AddInputFromArray<float>(TensorShape({}), {second_min});
107 AddInputFromArray<float>(TensorShape({}), {first_max});
108 AddInputFromArray<float>(TensorShape({}), {second_max});
109 TF_ASSERT_OK(RunOpKernel());
110 const Tensor& output_quantized = *GetOutput(0);
111 const float output_min = GetOutput(1)->flat<float>()(0);
112 const float output_max = GetOutput(2)->flat<float>()(0);
113 Tensor output_float =
114 QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max);
115 test::ExpectTensorNear<float>(expected_float, output_float, 0.2);
116 }
117
TEST_F(QuantizedConcatTest,Small32Bit)118 TEST_F(QuantizedConcatTest, Small32Bit) {
119 TestSmall32Bit(0.0f, 1200.0f, 0.0f, 2400.0f);
120 }
121
TEST_F(QuantizedConcatTest,Small32BitSameRange)122 TEST_F(QuantizedConcatTest, Small32BitSameRange) {
123 TestSmall32Bit(-2400.0f, 2400.0f, -2400.0f, 2400.0f);
124 }
125
TEST_F(QuantizedConcatTest,Small32BitOneDimSameRangeAsOutput)126 TEST_F(QuantizedConcatTest, Small32BitOneDimSameRangeAsOutput) {
127 TestSmall32Bit(-2400.0f, 2400.0f, -1200.0f, 2400.0f);
128 }
129
TestSmall32Bit(float first_min,float first_max,float second_min,float second_max)130 void QuantizedConcatTest::TestSmall32Bit(float first_min, float first_max,
131 float second_min, float second_max) {
132 TF_ASSERT_OK(NodeDefBuilder("quantized_concat_op", "QuantizedConcat")
133 .Input(FakeInput(DT_INT32))
134 .Input(FakeInput(2, DT_QINT32))
135 .Input(FakeInput(2, DT_FLOAT))
136 .Input(FakeInput(2, DT_FLOAT))
137 .Attr("N", 2)
138 .Attr("T", DataTypeToEnum<qint32>::v())
139 .Finalize(node_def()));
140 TF_ASSERT_OK(InitOp());
141 const int first_batch = 2;
142 const int first_height = 2;
143 const int first_width = 3;
144 Tensor first_float(DT_FLOAT, {first_batch, first_height, first_width});
145 test::FillValues<float>(&first_float, {100, 200, 300, 400, 500, 600, 700, 800,
146 900, 1000, 1100, 1200});
147 Tensor first_quantized =
148 FloatTensorToQuantized<qint32>(first_float, first_min, first_max);
149
150 const int second_batch = 2;
151 const int second_height = 2;
152 const int second_width = 3;
153 Tensor second_float(DT_FLOAT, {second_batch, second_height, second_width});
154 test::FillValues<float>(&second_float, {1300, 1400, 1500, 1600, 1700, 1800,
155 1900, 2000, 2100, 2200, 2300, 2400});
156 Tensor second_quantized =
157 FloatTensorToQuantized<qint32>(second_float, second_min, second_max);
158
159 const int expected_batch = first_batch + second_batch;
160 Tensor expected_float(DT_FLOAT, {expected_batch, first_height, first_width});
161 test::FillValues<float>(
162 &expected_float,
163 {100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200,
164 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000, 2100, 2200, 2300, 2400});
165
166 AddInputFromArray<int32>(TensorShape({}), {0});
167 AddInputFromArray<qint32>(first_quantized.shape(),
168 first_quantized.flat<qint32>());
169 AddInputFromArray<qint32>(second_quantized.shape(),
170 second_quantized.flat<qint32>());
171 AddInputFromArray<float>(TensorShape({}), {first_min});
172 AddInputFromArray<float>(TensorShape({}), {second_min});
173 AddInputFromArray<float>(TensorShape({}), {first_max});
174 AddInputFromArray<float>(TensorShape({}), {second_max});
175 TF_ASSERT_OK(RunOpKernel());
176 const Tensor& output_quantized = *GetOutput(0);
177 const float output_min = GetOutput(1)->flat<float>()(0);
178 const float output_max = GetOutput(2)->flat<float>()(0);
179 Tensor output_float =
180 QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max);
181 test::ExpectTensorNear<float>(expected_float, output_float, 0.2);
182 }
183
TEST_F(QuantizedConcatTest,SecondDim8Bit)184 TEST_F(QuantizedConcatTest, SecondDim8Bit) {
185 TestSecondDim8Bit(-10.0f, 150.0f, 0.0f, 200.0f);
186 }
187
TEST_F(QuantizedConcatTest,SecondDim8BitSameRange)188 TEST_F(QuantizedConcatTest, SecondDim8BitSameRange) {
189 TestSecondDim8Bit(-10.0f, 150.0f, -10.0f, 150.0f);
190 }
191
TestSecondDim8Bit(float first_min,float first_max,float second_min,float second_max)192 void QuantizedConcatTest::TestSecondDim8Bit(float first_min, float first_max,
193 float second_min,
194 float second_max) {
195 TF_ASSERT_OK(NodeDefBuilder("quantized_concat_op", "QuantizedConcat")
196 .Input(FakeInput(DT_INT32))
197 .Input(FakeInput(2, DT_QUINT8))
198 .Input(FakeInput(2, DT_FLOAT))
199 .Input(FakeInput(2, DT_FLOAT))
200 .Attr("N", 2)
201 .Attr("T", DataTypeToEnum<quint8>::v())
202 .Finalize(node_def()));
203 TF_ASSERT_OK(InitOp());
204 const int first_batch = 2;
205 const int first_height = 2;
206 const int first_width = 3;
207 Tensor first_float(DT_FLOAT, {first_batch, first_height, first_width});
208 test::FillValues<float>(&first_float,
209 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
210 Tensor first_quantized =
211 FloatTensorToQuantized<quint8>(first_float, first_min, first_max);
212
213 const int second_batch = 2;
214 const int second_height = 2;
215 const int second_width = 3;
216 Tensor second_float(DT_FLOAT, {second_batch, second_height, second_width});
217 test::FillValues<float>(&second_float,
218 {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24});
219 Tensor second_quantized =
220 FloatTensorToQuantized<quint8>(second_float, second_min, second_max);
221
222 const int expected_height = first_height + second_height;
223 Tensor expected_float(DT_FLOAT, {first_batch, expected_height, first_width});
224 test::FillValues<float>(&expected_float,
225 {1, 2, 3, 4, 5, 6, 13, 14, 15, 16, 17, 18,
226 7, 8, 9, 10, 11, 12, 19, 20, 21, 22, 23, 24});
227
228 AddInputFromArray<int32>(TensorShape({}), {1});
229 AddInputFromArray<quint8>(first_quantized.shape(),
230 first_quantized.flat<quint8>());
231 AddInputFromArray<quint8>(second_quantized.shape(),
232 second_quantized.flat<quint8>());
233 AddInputFromArray<float>(TensorShape({}), {first_min});
234 AddInputFromArray<float>(TensorShape({}), {second_min});
235 AddInputFromArray<float>(TensorShape({}), {first_max});
236 AddInputFromArray<float>(TensorShape({}), {second_max});
237 TF_ASSERT_OK(RunOpKernel());
238 const Tensor& output_quantized = *GetOutput(0);
239 const float output_min = GetOutput(1)->flat<float>()(0);
240 const float output_max = GetOutput(2)->flat<float>()(0);
241 Tensor output_float =
242 QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max);
243 test::ExpectTensorNear<float>(expected_float, output_float, 1.0);
244 }
245
246 // For the benchmark, we set up two 2-dimensional tensors, each kDim1 x 'dim'
247 // in size, and concat them together along "concat_dimension".
248 // If <same_limits> is true, then both concatenated dimensions have the same
249 // quantized range; otherwise, they are set to different values.
250 template <typename T>
ConcatHelper(::testing::benchmark::State & state,int concat_dimension,bool same_limits,int dim2)251 static void ConcatHelper(::testing::benchmark::State& state,
252 int concat_dimension, bool same_limits, int dim2) {
253 Graph* g = new Graph(OpRegistry::Global());
254
255 DataType dt = DataTypeToEnum<T>::v();
256 const int kDim1 = 100;
257 TensorShape shape({kDim1, dim2});
258
259 Tensor concat_dim = test::AsScalar<int32>(concat_dimension);
260 Tensor in0(dt, shape);
261 in0.flat<T>().setRandom();
262 Tensor in1(dt, shape);
263 in1.flat<T>().setRandom();
264
265 Tensor mins0 = test::AsScalar<float>(-1.0);
266 Tensor maxes0 = test::AsScalar<float>(1.0);
267 Tensor mins1 = test::AsScalar<float>(same_limits ? -1.0 : -255.0);
268 Tensor maxes1 = test::AsScalar<float>(same_limits ? 1.0 : 255.0);
269
270 Node* node;
271 TF_CHECK_OK(NodeBuilder(g->NewName("n"), "QuantizedConcat")
272 .Input(Constant(g, concat_dim))
273 .Input({Constant(g, in0), Constant(g, in1)})
274 .Input({Constant(g, mins0), Constant(g, mins1)})
275 .Input({Constant(g, maxes0), Constant(g, maxes1)})
276 .Attr("N", 2)
277 .Attr("T", dt)
278 .Finalize(g, &node));
279
280 test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state);
281 state.SetBytesProcessed(static_cast<int64>(state.iterations()) *
282 ((kDim1 * dim2) + (kDim1 * dim2)) * sizeof(T));
283 }
284
BM_QConcatDim0SameLimitQInt32(::testing::benchmark::State & state)285 static void BM_QConcatDim0SameLimitQInt32(::testing::benchmark::State& state) {
286 const int dim2 = state.range(0);
287
288 ConcatHelper<qint32>(state, 0 /* concat_dimension */, true /* same_limits */,
289 dim2);
290 }
291
BM_QConcatDim1SameLimitQInt32(::testing::benchmark::State & state)292 static void BM_QConcatDim1SameLimitQInt32(::testing::benchmark::State& state) {
293 const int dim2 = state.range(0);
294
295 ConcatHelper<qint32>(state, 1 /* concat_dimension */, true /* same_limits */,
296 dim2);
297 }
298
BM_QConcatDim0DifferLimitQInt32(::testing::benchmark::State & state)299 static void BM_QConcatDim0DifferLimitQInt32(
300 ::testing::benchmark::State& state) {
301 const int dim2 = state.range(0);
302
303 ConcatHelper<qint32>(state, 0 /* concat_dimension */, false /* same_limits */,
304 dim2);
305 }
306
BM_QConcatDim1DifferLimitQInt32(::testing::benchmark::State & state)307 static void BM_QConcatDim1DifferLimitQInt32(
308 ::testing::benchmark::State& state) {
309 const int dim2 = state.range(0);
310
311 ConcatHelper<qint32>(state, 1 /* concat_dimension */, false /* same_limits */,
312 dim2);
313 }
314
315 BENCHMARK(BM_QConcatDim0SameLimitQInt32)
316 ->UseRealTime()
317 ->Arg(1000)
318 ->Arg(20000)
319 ->Arg(100000);
320 BENCHMARK(BM_QConcatDim1SameLimitQInt32)
321 ->UseRealTime()
322 ->Arg(1000)
323 ->Arg(20000)
324 ->Arg(100000);
325 BENCHMARK(BM_QConcatDim0DifferLimitQInt32)
326 ->UseRealTime()
327 ->Arg(1000)
328 ->Arg(20000)
329 ->Arg(100000);
330 BENCHMARK(BM_QConcatDim1DifferLimitQInt32)
331 ->UseRealTime()
332 ->Arg(1000)
333 ->Arg(20000)
334 ->Arg(100000);
335
BM_QConcatDim0SameLimitQUint8(::testing::benchmark::State & state)336 static void BM_QConcatDim0SameLimitQUint8(::testing::benchmark::State& state) {
337 const int dim2 = state.range(0);
338
339 ConcatHelper<qint32>(state, 0 /* concat_dimension */, true /* same_limits */,
340 dim2);
341 }
342
BM_QConcatDim1SameLimitQUint8(::testing::benchmark::State & state)343 static void BM_QConcatDim1SameLimitQUint8(::testing::benchmark::State& state) {
344 const int dim2 = state.range(0);
345
346 ConcatHelper<qint32>(state, 1 /* concat_dimension */, true /* same_limits */,
347 dim2);
348 }
349
BM_QConcatDim0DifferLimitQUint8(::testing::benchmark::State & state)350 static void BM_QConcatDim0DifferLimitQUint8(
351 ::testing::benchmark::State& state) {
352 const int dim2 = state.range(0);
353
354 ConcatHelper<qint32>(state, 0 /* concat_dimension */, false /* same_limits */,
355 dim2);
356 }
357
BM_QConcatDim1DifferLimitQUint8(::testing::benchmark::State & state)358 static void BM_QConcatDim1DifferLimitQUint8(
359 ::testing::benchmark::State& state) {
360 const int dim2 = state.range(0);
361
362 ConcatHelper<qint32>(state, 1 /* concat_dimension */, false /* same_limits */,
363 dim2);
364 }
365
366 BENCHMARK(BM_QConcatDim0SameLimitQUint8)
367 ->UseRealTime()
368 ->Arg(1000)
369 ->Arg(20000)
370 ->Arg(100000);
371 BENCHMARK(BM_QConcatDim1SameLimitQUint8)
372 ->UseRealTime()
373 ->Arg(1000)
374 ->Arg(20000)
375 ->Arg(100000);
376 BENCHMARK(BM_QConcatDim0DifferLimitQUint8)
377 ->UseRealTime()
378 ->Arg(1000)
379 ->Arg(20000)
380 ->Arg(100000);
381 BENCHMARK(BM_QConcatDim1DifferLimitQUint8)
382 ->UseRealTime()
383 ->Arg(1000)
384 ->Arg(20000)
385 ->Arg(100000);
386
387 } // namespace tensorflow
388