• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cfloat>
8 #include <cmath>
9 #include <functional>
10 #include <random>
11 #include <vector>
12 
13 #include <cpuinfo.h>
14 
15 #include <benchmark/benchmark.h>
16 #include "bench/utils.h"
17 #include <xnnpack/AlignedAllocator.h>
18 #include <xnnpack/common.h>
19 #include <xnnpack/math-stubs.h>
20 
21 
22 class Rounding : public benchmark::Fixture {
23  public:
Rounding()24   inline Rounding()
25   {
26     cpuinfo_initialize();
27     const size_t l1d_size = cpuinfo_get_l1d_cache(0)->size;
28     const size_t l1d_reserve = 1024;
29     n_ = (l1d_size - l1d_reserve) / (2 * sizeof(float));
30     n_ = n_ / 16 * 16;
31   }
32 
SetUp(const benchmark::State &)33   virtual void SetUp(const benchmark::State&) override
34   {
35     std::random_device random_device;
36     auto rng = std::mt19937(random_device());
37     auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
38 
39     input_.resize(n());
40     std::generate(input_.begin(), input_.end(), std::ref(f32rng));
41     output_.resize(n());
42     std::fill(output_.begin(), output_.end(), 0xA5);
43   }
44 
TearDown(benchmark::State & state)45   virtual void TearDown(benchmark::State& state) override
46   {
47     state.SetItemsProcessed(uint64_t(state.iterations()) * n());
48     state.SetBytesProcessed(uint64_t(state.iterations()) * n() * 2 * sizeof(float));
49     input_.clear();
50     output_.clear();
51   }
52 
input() const53   inline const float* input() const
54   {
55     return input_.data();
56   }
57 
output()58   inline float* output()
59   {
60     return output_.data();
61   }
62 
n() const63   inline size_t n() const
64   {
65     return n_;
66   }
67 
68  protected:
69   std::vector<float, AlignedAllocator<float, 64>> input_;
70   std::vector<float, AlignedAllocator<float, 64>> output_;
71   size_t n_;
72 };
73 
74 class RoundingToNearestEven : public Rounding { };
75 class RoundingDown : public Rounding { };
76 class RoundingUp : public Rounding { };
77 class RoundingTowardsZero : public Rounding { };
78 
BENCHMARK_F(RoundingToNearestEven,scalar_addsub)79 BENCHMARK_F(RoundingToNearestEven, scalar_addsub)(benchmark::State& state) {
80   for (auto _ : state) {
81     xnn_math_f32_roundne__scalar_addsub(
82         n() * sizeof(float), input(), output());
83   }
84 }
85 
BENCHMARK_F(RoundingToNearestEven,scalar_nearbyint)86 BENCHMARK_F(RoundingToNearestEven, scalar_nearbyint)(benchmark::State& state) {
87   for (auto _ : state) {
88     xnn_math_f32_roundne__scalar_nearbyint(
89         n() * sizeof(float), input(), output());
90   }
91 }
92 
BENCHMARK_F(RoundingToNearestEven,scalar_rint)93 BENCHMARK_F(RoundingToNearestEven, scalar_rint)(benchmark::State& state) {
94   for (auto _ : state) {
95     xnn_math_f32_roundne__scalar_rint(
96         n() * sizeof(float), input(), output());
97   }
98 }
99 
BENCHMARK_F(RoundingDown,scalar_addsub)100 BENCHMARK_F(RoundingDown, scalar_addsub)(benchmark::State& state) {
101   for (auto _ : state) {
102     xnn_math_f32_roundd__scalar_addsub(
103         n() * sizeof(float), input(), output());
104   }
105 }
106 
BENCHMARK_F(RoundingDown,scalar_cvt)107 BENCHMARK_F(RoundingDown, scalar_cvt)(benchmark::State& state) {
108   for (auto _ : state) {
109     xnn_math_f32_roundd__scalar_cvt(
110         n() * sizeof(float), input(), output());
111   }
112 }
113 
BENCHMARK_F(RoundingDown,scalar_floor)114 BENCHMARK_F(RoundingDown, scalar_floor)(benchmark::State& state) {
115   for (auto _ : state) {
116     xnn_math_f32_roundd__scalar_floor(
117         n() * sizeof(float), input(), output());
118   }
119 }
120 
BENCHMARK_F(RoundingUp,scalar_addsub)121 BENCHMARK_F(RoundingUp, scalar_addsub)(benchmark::State& state) {
122   for (auto _ : state) {
123     xnn_math_f32_roundu__scalar_addsub(
124         n() * sizeof(float), input(), output());
125   }
126 }
127 
BENCHMARK_F(RoundingUp,scalar_cvt)128 BENCHMARK_F(RoundingUp, scalar_cvt)(benchmark::State& state) {
129   for (auto _ : state) {
130     xnn_math_f32_roundu__scalar_cvt(
131         n() * sizeof(float), input(), output());
132   }
133 }
134 
BENCHMARK_F(RoundingUp,scalar_ceil)135 BENCHMARK_F(RoundingUp, scalar_ceil)(benchmark::State& state) {
136   for (auto _ : state) {
137     xnn_math_f32_roundu__scalar_ceil(
138         n() * sizeof(float), input(), output());
139   }
140 }
141 
BENCHMARK_F(RoundingTowardsZero,scalar_addsub)142 BENCHMARK_F(RoundingTowardsZero, scalar_addsub)(benchmark::State& state) {
143   for (auto _ : state) {
144     xnn_math_f32_roundz__scalar_addsub(
145         n() * sizeof(float), input(), output());
146   }
147 }
148 
BENCHMARK_F(RoundingTowardsZero,scalar_cvt)149 BENCHMARK_F(RoundingTowardsZero, scalar_cvt)(benchmark::State& state) {
150   for (auto _ : state) {
151     xnn_math_f32_roundz__scalar_cvt(
152         n() * sizeof(float), input(), output());
153   }
154 }
155 
BENCHMARK_F(RoundingTowardsZero,scalar_trunc)156 BENCHMARK_F(RoundingTowardsZero, scalar_trunc)(benchmark::State& state) {
157   for (auto _ : state) {
158     xnn_math_f32_roundz__scalar_trunc(
159         n() * sizeof(float), input(), output());
160   }
161 }
162 
163 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
BENCHMARK_F(RoundingToNearestEven,wasmsimd_addsub)164   BENCHMARK_F(RoundingToNearestEven, wasmsimd_addsub)(benchmark::State& state) {
165     for (auto _ : state) {
166       xnn_math_f32_roundne__wasmsimd_addsub(
167           n() * sizeof(float), input(), output());
168     }
169   }
170 
BENCHMARK_F(RoundingToNearestEven,wasmsimd_native)171   BENCHMARK_F(RoundingToNearestEven, wasmsimd_native)(benchmark::State& state) {
172     for (auto _ : state) {
173       xnn_math_f32_roundne__wasmsimd_native(
174           n() * sizeof(float), input(), output());
175     }
176   }
177 
BENCHMARK_F(RoundingDown,wasmsimd_addsub)178   BENCHMARK_F(RoundingDown, wasmsimd_addsub)(benchmark::State& state) {
179     for (auto _ : state) {
180       xnn_math_f32_roundd__wasmsimd_addsub(
181           n() * sizeof(float), input(), output());
182     }
183   }
184 
BENCHMARK_F(RoundingDown,wasmsimd_cvt)185   BENCHMARK_F(RoundingDown, wasmsimd_cvt)(benchmark::State& state) {
186     for (auto _ : state) {
187       xnn_math_f32_roundd__wasmsimd_cvt(
188           n() * sizeof(float), input(), output());
189     }
190   }
191 
BENCHMARK_F(RoundingDown,wasmsimd_native)192   BENCHMARK_F(RoundingDown, wasmsimd_native)(benchmark::State& state) {
193     for (auto _ : state) {
194       xnn_math_f32_roundd__wasmsimd_native(
195           n() * sizeof(float), input(), output());
196     }
197   }
198 
BENCHMARK_F(RoundingUp,wasmsimd_addsub)199   BENCHMARK_F(RoundingUp, wasmsimd_addsub)(benchmark::State& state) {
200     for (auto _ : state) {
201       xnn_math_f32_roundu__wasmsimd_addsub(
202           n() * sizeof(float), input(), output());
203     }
204   }
205 
BENCHMARK_F(RoundingUp,wasmsimd_cvt)206   BENCHMARK_F(RoundingUp, wasmsimd_cvt)(benchmark::State& state) {
207     for (auto _ : state) {
208       xnn_math_f32_roundu__wasmsimd_cvt(
209           n() * sizeof(float), input(), output());
210     }
211   }
212 
BENCHMARK_F(RoundingUp,wasmsimd_native)213   BENCHMARK_F(RoundingUp, wasmsimd_native)(benchmark::State& state) {
214     for (auto _ : state) {
215       xnn_math_f32_roundu__wasmsimd_native(
216           n() * sizeof(float), input(), output());
217     }
218   }
219 
BENCHMARK_F(RoundingTowardsZero,wasmsimd_addsub)220   BENCHMARK_F(RoundingTowardsZero, wasmsimd_addsub)(benchmark::State& state) {
221     for (auto _ : state) {
222       xnn_math_f32_roundz__wasmsimd_addsub(
223           n() * sizeof(float), input(), output());
224     }
225   }
226 
BENCHMARK_F(RoundingTowardsZero,wasmsimd_cvt)227   BENCHMARK_F(RoundingTowardsZero, wasmsimd_cvt)(benchmark::State& state) {
228     for (auto _ : state) {
229       xnn_math_f32_roundz__wasmsimd_cvt(
230           n() * sizeof(float), input(), output());
231     }
232   }
233 
BENCHMARK_F(RoundingTowardsZero,wasmsimd_native)234   BENCHMARK_F(RoundingTowardsZero, wasmsimd_native)(benchmark::State& state) {
235     for (auto _ : state) {
236       xnn_math_f32_roundz__wasmsimd_native(
237           n() * sizeof(float), input(), output());
238     }
239   }
240 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
241 
242 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
BENCHMARK_F(RoundingToNearestEven,neon_addsub)243   BENCHMARK_F(RoundingToNearestEven, neon_addsub)(benchmark::State& state) {
244     for (auto _ : state) {
245       xnn_math_f32_roundne__neon_addsub(
246           n() * sizeof(float), input(), output());
247     }
248   }
249 
BENCHMARK_F(RoundingToNearestEven,neonv8)250   BENCHMARK_F(RoundingToNearestEven, neonv8)(benchmark::State& state) {
251     for (auto _ : state) {
252       xnn_math_f32_roundne__neonv8(
253           n() * sizeof(float), input(), output());
254     }
255   }
256 
BENCHMARK_F(RoundingDown,neon_addsub)257   BENCHMARK_F(RoundingDown, neon_addsub)(benchmark::State& state) {
258     for (auto _ : state) {
259       xnn_math_f32_roundd__neon_addsub(
260           n() * sizeof(float), input(), output());
261     }
262   }
263 
BENCHMARK_F(RoundingDown,neon_cvt)264   BENCHMARK_F(RoundingDown, neon_cvt)(benchmark::State& state) {
265     for (auto _ : state) {
266       xnn_math_f32_roundd__neon_cvt(
267           n() * sizeof(float), input(), output());
268     }
269   }
270 
BENCHMARK_F(RoundingDown,neonv8)271   BENCHMARK_F(RoundingDown, neonv8)(benchmark::State& state) {
272     for (auto _ : state) {
273       xnn_math_f32_roundd__neonv8(
274           n() * sizeof(float), input(), output());
275     }
276   }
277 
BENCHMARK_F(RoundingUp,neon_addsub)278   BENCHMARK_F(RoundingUp, neon_addsub)(benchmark::State& state) {
279     for (auto _ : state) {
280       xnn_math_f32_roundu__neon_addsub(
281           n() * sizeof(float), input(), output());
282     }
283   }
284 
BENCHMARK_F(RoundingUp,neon_cvt)285   BENCHMARK_F(RoundingUp, neon_cvt)(benchmark::State& state) {
286     for (auto _ : state) {
287       xnn_math_f32_roundu__neon_cvt(
288           n() * sizeof(float), input(), output());
289     }
290   }
291 
BENCHMARK_F(RoundingUp,neonv8)292   BENCHMARK_F(RoundingUp, neonv8)(benchmark::State& state) {
293     for (auto _ : state) {
294       xnn_math_f32_roundu__neonv8(
295           n() * sizeof(float), input(), output());
296     }
297   }
298 
BENCHMARK_F(RoundingTowardsZero,neon_addsub)299   BENCHMARK_F(RoundingTowardsZero, neon_addsub)(benchmark::State& state) {
300     for (auto _ : state) {
301       xnn_math_f32_roundz__neon_addsub(
302           n() * sizeof(float), input(), output());
303     }
304   }
305 
BENCHMARK_F(RoundingTowardsZero,neon_cvt)306   BENCHMARK_F(RoundingTowardsZero, neon_cvt)(benchmark::State& state) {
307     for (auto _ : state) {
308       xnn_math_f32_roundz__neon_cvt(
309           n() * sizeof(float), input(), output());
310     }
311   }
312 
BENCHMARK_F(RoundingTowardsZero,neonv8)313   BENCHMARK_F(RoundingTowardsZero, neonv8)(benchmark::State& state) {
314     for (auto _ : state) {
315       xnn_math_f32_roundz__neonv8(
316           n() * sizeof(float), input(), output());
317     }
318   }
319 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
320 
321 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
BENCHMARK_F(RoundingToNearestEven,sse_addsub)322   BENCHMARK_F(RoundingToNearestEven, sse_addsub)(benchmark::State& state) {
323     for (auto _ : state) {
324       xnn_math_f32_roundne__sse_addsub(
325           n() * sizeof(float), input(), output());
326     }
327   }
328 
BENCHMARK_F(RoundingToNearestEven,sse2_cvt)329   BENCHMARK_F(RoundingToNearestEven, sse2_cvt)(benchmark::State& state) {
330     for (auto _ : state) {
331       xnn_math_f32_roundne__sse2_cvt(
332           n() * sizeof(float), input(), output());
333     }
334   }
335 
BENCHMARK_F(RoundingToNearestEven,sse4)336   BENCHMARK_F(RoundingToNearestEven, sse4)(benchmark::State& state) {
337     for (auto _ : state) {
338       xnn_math_f32_roundne__sse41(
339           n() * sizeof(float), input(), output());
340     }
341   }
342 
BENCHMARK_F(RoundingDown,sse_addsub)343   BENCHMARK_F(RoundingDown, sse_addsub)(benchmark::State& state) {
344     for (auto _ : state) {
345       xnn_math_f32_roundd__sse_addsub(
346           n() * sizeof(float), input(), output());
347     }
348   }
349 
BENCHMARK_F(RoundingDown,sse2_cvt)350   BENCHMARK_F(RoundingDown, sse2_cvt)(benchmark::State& state) {
351     for (auto _ : state) {
352       xnn_math_f32_roundd__sse2_cvt(
353           n() * sizeof(float), input(), output());
354     }
355   }
356 
BENCHMARK_F(RoundingDown,sse4)357   BENCHMARK_F(RoundingDown, sse4)(benchmark::State& state) {
358     for (auto _ : state) {
359       xnn_math_f32_roundd__sse41(
360           n() * sizeof(float), input(), output());
361     }
362   }
363 
BENCHMARK_F(RoundingUp,sse_addsub)364   BENCHMARK_F(RoundingUp, sse_addsub)(benchmark::State& state) {
365     for (auto _ : state) {
366       xnn_math_f32_roundu__sse_addsub(
367           n() * sizeof(float), input(), output());
368     }
369   }
370 
BENCHMARK_F(RoundingUp,sse2_cvt)371   BENCHMARK_F(RoundingUp, sse2_cvt)(benchmark::State& state) {
372     for (auto _ : state) {
373       xnn_math_f32_roundu__sse2_cvt(
374           n() * sizeof(float), input(), output());
375     }
376   }
377 
BENCHMARK_F(RoundingUp,sse4)378   BENCHMARK_F(RoundingUp, sse4)(benchmark::State& state) {
379     for (auto _ : state) {
380       xnn_math_f32_roundu__sse41(
381           n() * sizeof(float), input(), output());
382     }
383   }
384 
BENCHMARK_F(RoundingTowardsZero,sse_addsub)385   BENCHMARK_F(RoundingTowardsZero, sse_addsub)(benchmark::State& state) {
386     for (auto _ : state) {
387       xnn_math_f32_roundz__sse_addsub(
388           n() * sizeof(float), input(), output());
389     }
390   }
391 
BENCHMARK_F(RoundingTowardsZero,sse2_cvt)392   BENCHMARK_F(RoundingTowardsZero, sse2_cvt)(benchmark::State& state) {
393     for (auto _ : state) {
394       xnn_math_f32_roundz__sse2_cvt(
395           n() * sizeof(float), input(), output());
396     }
397   }
398 
BENCHMARK_F(RoundingTowardsZero,sse4)399   BENCHMARK_F(RoundingTowardsZero, sse4)(benchmark::State& state) {
400     for (auto _ : state) {
401       xnn_math_f32_roundz__sse41(
402           n() * sizeof(float), input(), output());
403     }
404   }
405 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
406 
407 
408 #ifndef XNNPACK_BENCHMARK_NO_MAIN
409 BENCHMARK_MAIN();
410 #endif
411