• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include <functional>
17 #include <memory>
18 #include <random>
19 #include <vector>
20 
21 #include "tensorflow/cc/ops/array_ops.h"
22 #include "tensorflow/cc/ops/const_op.h"
23 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
24 #include "tensorflow/core/framework/allocator.h"
25 #include "tensorflow/core/framework/fake_input.h"
26 #include "tensorflow/core/framework/node_def_builder.h"
27 #include "tensorflow/core/framework/op_kernel.h"
28 #include "tensorflow/core/framework/tensor.h"
29 #include "tensorflow/core/framework/tensor_testutil.h"
30 #include "tensorflow/core/framework/types.h"
31 #include "tensorflow/core/framework/types.pb.h"
32 #include "tensorflow/core/kernels/ops_testutil.h"
33 #include "tensorflow/core/lib/core/status_test_util.h"
34 #include "tensorflow/core/platform/test_benchmark.h"
35 
36 namespace tensorflow {
37 namespace {
38 
39 class DequantizeOpTest : public OpsTestBase {
40  protected:
41   template <typename T>
ComputeDequantizeMinCombinedUsingEigen(const Tensor & input,float min_range,float max_range,Tensor * output)42   void ComputeDequantizeMinCombinedUsingEigen(const Tensor& input,
43                                               float min_range, float max_range,
44                                               Tensor* output) {
45     float half_range =
46         !std::is_signed<T>::value
47             ? 0.0f
48             : (static_cast<float>(std::numeric_limits<T>::max()) -
49                std::numeric_limits<T>::min() + 1) /
50                   2.0f;
51     const float scale_factor =
52         (max_range - min_range) /
53         (static_cast<float>(std::numeric_limits<T>::max()) -
54          std::numeric_limits<T>::min());
55     output->flat<float>() =
56         ((input.flat<T>().template cast<int>().template cast<float>() +
57           half_range) *
58          scale_factor) +
59         min_range;
60   }
61 
62   // Compares dequantize min vs the same using eigen. This tests that a change
63   // to not use eigen gives equivalent results to using eigen.
64   template <typename T>
RunDequantizeMinCombinedTest(float min_range,float max_range,const string & op_name)65   void RunDequantizeMinCombinedTest(float min_range, float max_range,
66                                     const string& op_name) {
67     TF_ASSERT_OK(NodeDefBuilder("dequantize_op", op_name)
68                      .Input(FakeInput(DataTypeToEnum<T>::v()))
69                      .Input(FakeInput(DT_FLOAT))
70                      .Input(FakeInput(DT_FLOAT))
71                      .Attr("T", DataTypeToEnum<T>::v())
72                      .Attr("mode", "MIN_COMBINED")
73                      .Finalize(node_def()));
74     TF_ASSERT_OK(InitOp());
75 
76     std::vector<T> input;
77     for (int64_t i = std::numeric_limits<T>::min();
78          i < std::numeric_limits<T>::max(); ++i) {
79       input.push_back(static_cast<T>(i));
80     }
81     TensorShape shape({static_cast<int64>(input.size())});
82     AddInputFromArray<T>(shape, input);
83     AddInputFromArray<float>(TensorShape({}), {min_range});
84     AddInputFromArray<float>(TensorShape({}), {max_range});
85     TF_ASSERT_OK(RunOpKernel());
86     Tensor expected(allocator(), DT_FLOAT, shape);
87     ComputeDequantizeMinCombinedUsingEigen<T>(GetInput(0), min_range, max_range,
88                                               &expected);
89     test::ExpectTensorEqual<float>(expected, *GetOutput(0));
90   }
91 
92   // Compares dequantize min vs the same using eigen. This tests that a change
93   // to not use eigen gives equivalent results to using eigen.
94   template <typename T>
RunDequantizeBfloat16MinCombinedTest(float min_range,float max_range)95   void RunDequantizeBfloat16MinCombinedTest(float min_range, float max_range) {
96     TF_ASSERT_OK(NodeDefBuilder("dequantize_op_bfloat16", "Dequantize")
97                      .Input(FakeInput(DataTypeToEnum<T>::v()))
98                      .Input(FakeInput(DT_FLOAT))
99                      .Input(FakeInput(DT_FLOAT))
100                      .Attr("T", DataTypeToEnum<T>::v())
101                      .Attr("mode", "MIN_COMBINED")
102                      .Attr("dtype", DT_BFLOAT16)
103                      .Finalize(node_def()));
104     TF_ASSERT_OK(InitOp());
105 
106     std::vector<T> input;
107     for (int64_t i = std::numeric_limits<T>::min();
108          i < std::numeric_limits<T>::max(); ++i) {
109       input.push_back(static_cast<T>(i));
110     }
111     TensorShape shape({static_cast<int64>(input.size())});
112     AddInputFromArray<T>(shape, input);
113     AddInputFromArray<float>(TensorShape({}), {min_range});
114     AddInputFromArray<float>(TensorShape({}), {max_range});
115     TF_ASSERT_OK(RunOpKernel());
116 
117     Tensor expected_float32(allocator(), DT_FLOAT, shape);
118     ComputeDequantizeMinCombinedUsingEigen<T>(GetInput(0), min_range, max_range,
119                                               &expected_float32);
120     Tensor expected(allocator(), DT_BFLOAT16, shape);
121     expected.flat<bfloat16>() = expected_float32.flat<float>().cast<bfloat16>();
122 
123     test::ExpectTensorEqual<bfloat16>(expected, *GetOutput(0));
124   }
125 
126   // Creates a tensor with the specified dims, using values chosen from data,
127   // multiplied by (1 + index) along the axis dimension.
128   template <typename T>
ScalePerSliceAlongAxis(std::vector<int64> dims,int axis,const std::vector<T> & data)129   std::vector<T> ScalePerSliceAlongAxis(std::vector<int64> dims, int axis,
130                                         const std::vector<T>& data) {
131     uint32 seed = 123;
132     std::minstd_rand rng(seed);
133     int64_t out_size = 1;
134     for (int dim : dims) {
135       out_size *= dim;
136     }
137     int minor_size = 1;
138     for (int i = axis + 1; i < dims.size(); ++i) {
139       minor_size *= dims[i];
140     }
141     std::vector<T> out(out_size);
142     int num_slices = (axis == -1) ? 1 : dims[axis];
143     for (int out_idx = 0; out_idx < out_size; ++out_idx) {
144       int in_idx = rng() % data.size();
145       T multiplier = ((out_idx / minor_size) % num_slices) + 1;
146       out[out_idx] = data[in_idx] * multiplier;
147     }
148     return out;
149   }
150 
151   template <typename T>
RunDequantizeScaledTest(float min_range,float max_range,int axis,const std::vector<T> & values,const std::vector<float> & expected)152   void RunDequantizeScaledTest(float min_range, float max_range, int axis,
153                                const std::vector<T>& values,
154                                const std::vector<float>& expected) {
155     const std::vector<int64> dims = {2, 3, 4, 5};
156     int num_slices = (axis == -1) ? 1 : dims[axis];
157     TF_ASSERT_OK(NodeDefBuilder("dequantize_op", "Dequantize")
158                      .Input(FakeInput(DataTypeToEnum<T>::v()))
159                      .Input(FakeInput(DT_FLOAT))
160                      .Input(FakeInput(DT_FLOAT))
161                      .Attr("T", DataTypeToEnum<T>::v())
162                      .Attr("mode", "SCALED")
163                      .Attr("axis", axis)
164                      .Finalize(node_def()));
165     TF_ASSERT_OK(InitOp());
166 
167     AddInputFromArray<T>(TensorShape(dims),
168                          ScalePerSliceAlongAxis(dims, -1, values));
169     std::vector<float> min_ranges(num_slices), max_ranges(num_slices);
170     for (int slice_idx = 0; slice_idx < num_slices; ++slice_idx) {
171       min_ranges[slice_idx] = (slice_idx + 1) * min_range;
172       max_ranges[slice_idx] = (slice_idx + 1) * max_range;
173     }
174     AddInputFromArray<float>(TensorShape({num_slices}), min_ranges);
175     AddInputFromArray<float>(TensorShape({num_slices}), max_ranges);
176     TF_ASSERT_OK(RunOpKernel());
177 
178     Tensor expected_tensor(allocator(), DT_FLOAT, TensorShape(dims));
179     test::FillValues<float>(&expected_tensor,
180                             ScalePerSliceAlongAxis(dims, axis, expected));
181     test::ExpectClose(expected_tensor, *GetOutput(0));
182   }
183 };
184 
185 struct ParameterizedDequantizeOpTest
186     : public OpsTestBase,
187       public ::testing::WithParamInterface<int> {};
188 
TEST_F(DequantizeOpTest,DequantizeMinCombinedQuint8)189 TEST_F(DequantizeOpTest, DequantizeMinCombinedQuint8) {
190   RunDequantizeMinCombinedTest<quint8>(0, 255.0f, "Dequantize");
191 }
TEST_F(DequantizeOpTest,DequantizeMinCombinedQint8)192 TEST_F(DequantizeOpTest, DequantizeMinCombinedQint8) {
193   RunDequantizeMinCombinedTest<qint8>(0, 255.0f, "Dequantize");
194 }
TEST_F(DequantizeOpTest,DequantizeMinCombinedQint16)195 TEST_F(DequantizeOpTest, DequantizeMinCombinedQint16) {
196   RunDequantizeMinCombinedTest<qint16>(0, 255.0f, "Dequantize");
197 }
TEST_F(DequantizeOpTest,DequantizeMinCombinedQuint16)198 TEST_F(DequantizeOpTest, DequantizeMinCombinedQuint16) {
199   RunDequantizeMinCombinedTest<quint16>(0, 255.0f, "Dequantize");
200 }
201 
TEST_F(DequantizeOpTest,DequantizeBfloat16MinCombinedQuint8)202 TEST_F(DequantizeOpTest, DequantizeBfloat16MinCombinedQuint8) {
203   RunDequantizeBfloat16MinCombinedTest<quint8>(0, 255.0f);
204 }
TEST_F(DequantizeOpTest,DequantizeBfloat16MinCombinedQint8)205 TEST_F(DequantizeOpTest, DequantizeBfloat16MinCombinedQint8) {
206   RunDequantizeBfloat16MinCombinedTest<qint8>(0, 255.0f);
207 }
TEST_F(DequantizeOpTest,DequantizeBfloat16MinCombinedQint16)208 TEST_F(DequantizeOpTest, DequantizeBfloat16MinCombinedQint16) {
209   RunDequantizeBfloat16MinCombinedTest<qint16>(0, 255.0f);
210 }
TEST_F(DequantizeOpTest,DequantizeBfloat16MinCombinedQuint16)211 TEST_F(DequantizeOpTest, DequantizeBfloat16MinCombinedQuint16) {
212   RunDequantizeBfloat16MinCombinedTest<quint16>(0, 255.0f);
213 }
214 
TEST_F(DequantizeOpTest,DequantizeScaledQuint8Zero)215 TEST_F(DequantizeOpTest, DequantizeScaledQuint8Zero) {
216   RunDequantizeScaledTest<quint8>(-255.0f, 127.0f, -1, {0}, {0.0});
217 }
TEST_F(DequantizeOpTest,DequantizeScaledQuint8CheckIgnoresNegative)218 TEST_F(DequantizeOpTest, DequantizeScaledQuint8CheckIgnoresNegative) {
219   RunDequantizeScaledTest<quint8>(-512.0f, 255.0f, -1, {255}, {255.0});
220 }
TEST_F(DequantizeOpTest,DequantizeScaledQuint8ScaleDown)221 TEST_F(DequantizeOpTest, DequantizeScaledQuint8ScaleDown) {
222   RunDequantizeScaledTest<quint8>(-1.0f, 2.0f, -1, {255}, {2.0});
223 }
TEST_F(DequantizeOpTest,DequantizeScaledQuint8ScaleUp)224 TEST_F(DequantizeOpTest, DequantizeScaledQuint8ScaleUp) {
225   RunDequantizeScaledTest<quint8>(200.0f, 400.0f, -1, {255}, {400.0});
226 }
227 
TEST_F(DequantizeOpTest,DequantizeScaledQint8Zero)228 TEST_F(DequantizeOpTest, DequantizeScaledQint8Zero) {
229   RunDequantizeScaledTest<qint8>(-255.0f, 127.0f, -1, {0}, {0.0});
230 }
TEST_F(DequantizeOpTest,DequantizeScaledQint8ScaleIdentity)231 TEST_F(DequantizeOpTest, DequantizeScaledQint8ScaleIdentity) {
232   RunDequantizeScaledTest<qint8>(-10.0f, 127.0f, -1, {-127}, {-127.0});
233 }
TEST_F(DequantizeOpTest,DequantizeScaledQint8ScaleDown)234 TEST_F(DequantizeOpTest, DequantizeScaledQint8ScaleDown) {
235   RunDequantizeScaledTest<qint8>(-2.0f, 1.0f, -1, {-128}, {-2.0});
236 }
TEST_F(DequantizeOpTest,DequantizeScaledQint8ScaleUp)237 TEST_F(DequantizeOpTest, DequantizeScaledQint8ScaleUp) {
238   RunDequantizeScaledTest<qint8>(-1.0f, 300.0f, -1, {42}, {99.212601});
239 }
TEST_F(DequantizeOpTest,DequantizeScaledQint8Axis1)240 TEST_F(DequantizeOpTest, DequantizeScaledQint8Axis1) {
241   RunDequantizeScaledTest<qint8>(-12.8f, 12.7f, 1, {-20, -10, 0, 1, 10, 20},
242                                  {-2.0, -1.0, 0.0, 0.1, 1.0, 2.0});
243 }
TEST_F(DequantizeOpTest,DequantizeScaledQint8Axis3)244 TEST_F(DequantizeOpTest, DequantizeScaledQint8Axis3) {
245   RunDequantizeScaledTest<qint8>(-12.8f, 12.7f, 3, {-20, -10, 0, 1, 10, 20},
246                                  {-2.0, -1.0, 0.0, 0.1, 1.0, 2.0});
247 }
248 
249 template <typename T>
BM_DequantizeMinCombinedCpu(::testing::benchmark::State & state)250 static void BM_DequantizeMinCombinedCpu(::testing::benchmark::State& state) {
251   auto root = Scope::NewRootScope().ExitOnError();
252   const int64_t num_values = 1500 * 250;
253   std::vector<T> inputs;
254 
255   inputs.reserve(num_values);
256   for (int i = 0; i < num_values; ++i) inputs.push_back(i);
257 
258   ops::Dequantize(root, test::AsTensor<T>(inputs), test::AsScalar<float>(-1.5f),
259                   test::AsScalar<float>(20.5f),
260                   ops::Dequantize::Attrs().Mode("MIN_COMBINED"));
261   TF_CHECK_OK(root.status());
262   Graph* g = new Graph(OpRegistry::Global());
263   TF_CHECK_OK(root.ToGraph(g));
264 
265   test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state);
266   state.SetBytesProcessed(state.iterations() * num_values *
267                           (sizeof(float) + sizeof(T)));
268   state.SetItemsProcessed(state.iterations());
269 }
270 
BM_DequantizeMinCombinedCpuQuint16(::testing::benchmark::State & state)271 void BM_DequantizeMinCombinedCpuQuint16(::testing::benchmark::State& state) {
272   BM_DequantizeMinCombinedCpu<quint16>(state);
273 }
274 
BM_DequantizeMinCombinedCpuQint16(::testing::benchmark::State & state)275 void BM_DequantizeMinCombinedCpuQint16(::testing::benchmark::State& state) {
276   BM_DequantizeMinCombinedCpu<qint16>(state);
277 }
278 
BM_DequantizeMinCombinedCpuQuint8(::testing::benchmark::State & state)279 void BM_DequantizeMinCombinedCpuQuint8(::testing::benchmark::State& state) {
280   BM_DequantizeMinCombinedCpu<quint8>(state);
281 }
282 
BM_DequantizeMinCombinedCpuQint8(::testing::benchmark::State & state)283 void BM_DequantizeMinCombinedCpuQint8(::testing::benchmark::State& state) {
284   BM_DequantizeMinCombinedCpu<qint8>(state);
285 }
286 
287 BENCHMARK(BM_DequantizeMinCombinedCpuQuint16);
288 BENCHMARK(BM_DequantizeMinCombinedCpuQint16);
289 BENCHMARK(BM_DequantizeMinCombinedCpuQuint8);
290 BENCHMARK(BM_DequantizeMinCombinedCpuQint8);
291 
292 template <typename T>
BM_DequantizeBfloat16MinCombinedCpu(::testing::benchmark::State & state)293 static void BM_DequantizeBfloat16MinCombinedCpu(
294     ::testing::benchmark::State& state) {
295   auto root = Scope::NewRootScope().ExitOnError();
296   const int64_t num_values = 1500 * 250;
297   std::vector<T> inputs;
298 
299   inputs.reserve(num_values);
300   for (int i = 0; i < num_values; ++i) inputs.push_back(i);
301 
302   ops::Dequantize(root, test::AsTensor<T>(inputs), test::AsScalar<float>(-1.5f),
303                   test::AsScalar<float>(20.5f),
304                   ops::Dequantize::Attrs().Dtype(DT_BFLOAT16));
305   TF_CHECK_OK(root.status());
306   Graph* g = new Graph(OpRegistry::Global());
307   TF_CHECK_OK(root.ToGraph(g));
308 
309   test::Benchmark("cpu", g, /*old_benchmark_api=*/false).Run(state);
310   state.SetBytesProcessed(state.iterations() * num_values *
311                           (sizeof(bfloat16) + sizeof(T)));
312   state.SetItemsProcessed(state.iterations());
313 }
314 
BM_DequantizeBfloat16MinCombinedCpuQuint16(::testing::benchmark::State & state)315 void BM_DequantizeBfloat16MinCombinedCpuQuint16(
316     ::testing::benchmark::State& state) {
317   BM_DequantizeBfloat16MinCombinedCpu<quint16>(state);
318 }
319 
BM_DequantizeBfloat16MinCombinedCpuQint16(::testing::benchmark::State & state)320 void BM_DequantizeBfloat16MinCombinedCpuQint16(
321     ::testing::benchmark::State& state) {
322   BM_DequantizeBfloat16MinCombinedCpu<qint16>(state);
323 }
324 
BM_DequantizeBfloat16MinCombinedCpuQuint8(::testing::benchmark::State & state)325 void BM_DequantizeBfloat16MinCombinedCpuQuint8(
326     ::testing::benchmark::State& state) {
327   BM_DequantizeBfloat16MinCombinedCpu<quint8>(state);
328 }
329 
BM_DequantizeBfloat16MinCombinedCpuQint8(::testing::benchmark::State & state)330 void BM_DequantizeBfloat16MinCombinedCpuQint8(
331     ::testing::benchmark::State& state) {
332   BM_DequantizeBfloat16MinCombinedCpu<qint8>(state);
333 }
334 
335 BENCHMARK(BM_DequantizeBfloat16MinCombinedCpuQuint16);
336 BENCHMARK(BM_DequantizeBfloat16MinCombinedCpuQint16);
337 BENCHMARK(BM_DequantizeBfloat16MinCombinedCpuQuint8);
338 BENCHMARK(BM_DequantizeBfloat16MinCombinedCpuQint8);
339 
340 }  // namespace
341 }  // namespace tensorflow
342