• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <functional>
9 #include <random>
10 #include <vector>
11 
12 #include <benchmark/benchmark.h>
13 #include <fp16/fp16.h>
14 #include "bench/utils.h"
15 
16 #include <xnnpack.h>
17 #include <xnnpack/aligned-allocator.h>
18 #include <xnnpack/common.h>
19 #include <xnnpack/microfnptr.h>
20 #include <xnnpack/microparams-init.h>
21 #include <xnnpack/vcvt.h>
22 
23 
f16_f32_vcvt(benchmark::State & state,xnn_f16_f32_vcvt_ukernel_function cvt,xnn_init_f16_f32_cvt_params_fn init_params=nullptr,benchmark::utils::IsaCheckFunction isa_check=nullptr)24 static void f16_f32_vcvt(
25   benchmark::State& state,
26   xnn_f16_f32_vcvt_ukernel_function cvt,
27   xnn_init_f16_f32_cvt_params_fn init_params = nullptr,
28   benchmark::utils::IsaCheckFunction isa_check = nullptr)
29 {
30   if (isa_check && !isa_check(state)) {
31     return;
32   }
33 
34   const size_t num_elements = state.range(0);
35 
36   std::random_device random_device;
37   auto rng = std::mt19937(random_device());
38   auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
39   auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
40 
41   std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> x(num_elements + XNN_EXTRA_BYTES / sizeof(uint16_t));
42   std::vector<float, AlignedAllocator<float, 64>> y(num_elements);
43   std::generate(x.begin(), x.end(), std::ref(f16rng));
44   std::fill(y.begin(), y.end(), std::nanf(""));
45 
46   xnn_f16_f32_cvt_params params;
47   if (init_params != nullptr) {
48     init_params(&params);
49   }
50   for (auto _ : state) {
51     cvt(num_elements * sizeof(uint16_t), x.data(), y.data(), &params);
52   }
53 
54   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
55   if (cpu_frequency != 0) {
56     state.counters["cpufreq"] = cpu_frequency;
57   }
58 
59   const size_t elements_per_iteration = num_elements;
60   state.counters["elements"] =
61     benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
62 
63   const size_t bytes_per_iteration = num_elements * (sizeof(uint16_t) + sizeof(float));
64   state.counters["bytes"] =
65     benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
66 }
67 
68 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
69   BENCHMARK_CAPTURE(f16_f32_vcvt, neonfp16_x8,
70                     xnn_f16_f32_vcvt_ukernel__neonfp16_x8,
71                     nullptr /* init params */,
72                     benchmark::utils::CheckNEONFP16)
73     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
74     ->UseRealTime();
75   BENCHMARK_CAPTURE(f16_f32_vcvt, neonfp16_x16,
76                     xnn_f16_f32_vcvt_ukernel__neonfp16_x16,
77                     nullptr /* init params */,
78                     benchmark::utils::CheckNEONFP16)
79     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
80     ->UseRealTime();
81 
82   BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int16_x8,
83                     xnn_f16_f32_vcvt_ukernel__neon_int16_x8,
84                     xnn_init_f16_f32_cvt_neon_params,
85                     benchmark::utils::CheckNEON)
86     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
87     ->UseRealTime();
88   BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int16_x16,
89                     xnn_f16_f32_vcvt_ukernel__neon_int16_x16,
90                     xnn_init_f16_f32_cvt_neon_params,
91                     benchmark::utils::CheckNEON)
92     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
93     ->UseRealTime();
94   BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int16_x24,
95                     xnn_f16_f32_vcvt_ukernel__neon_int16_x24,
96                     xnn_init_f16_f32_cvt_neon_params,
97                     benchmark::utils::CheckNEON)
98     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
99     ->UseRealTime();
100   BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int16_x32,
101                     xnn_f16_f32_vcvt_ukernel__neon_int16_x32,
102                     xnn_init_f16_f32_cvt_neon_params,
103                     benchmark::utils::CheckNEON)
104     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
105     ->UseRealTime();
106 
107   BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int32_x8,
108                     xnn_f16_f32_vcvt_ukernel__neon_int32_x8,
109                     xnn_init_f16_f32_cvt_neon_params,
110                     benchmark::utils::CheckNEON)
111     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
112     ->UseRealTime();
113   BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int32_x16,
114                     xnn_f16_f32_vcvt_ukernel__neon_int32_x16,
115                     xnn_init_f16_f32_cvt_neon_params,
116                     benchmark::utils::CheckNEON)
117     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
118     ->UseRealTime();
119   BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int32_x24,
120                     xnn_f16_f32_vcvt_ukernel__neon_int32_x24,
121                     xnn_init_f16_f32_cvt_neon_params,
122                     benchmark::utils::CheckNEON)
123     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
124     ->UseRealTime();
125   BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int32_x32,
126                     xnn_f16_f32_vcvt_ukernel__neon_int32_x32,
127                     xnn_init_f16_f32_cvt_neon_params,
128                     benchmark::utils::CheckNEON)
129     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
130     ->UseRealTime();
131 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
132 
133 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
134   BENCHMARK_CAPTURE(f16_f32_vcvt, avx512skx_x16,
135                     xnn_f16_f32_vcvt_ukernel__avx512skx_x16,
136                     nullptr /* init params */,
137                     benchmark::utils::CheckAVX512SKX)
138     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
139     ->UseRealTime();
140   BENCHMARK_CAPTURE(f16_f32_vcvt, avx512skx_x32,
141                     xnn_f16_f32_vcvt_ukernel__avx512skx_x32,
142                     nullptr /* init params */,
143                     benchmark::utils::CheckAVX512SKX)
144     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
145     ->UseRealTime();
146 
147   BENCHMARK_CAPTURE(f16_f32_vcvt, f16c_x8,
148                     xnn_f16_f32_vcvt_ukernel__f16c_x8,
149                     nullptr /* init params */,
150                     benchmark::utils::CheckF16C)
151     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
152     ->UseRealTime();
153   BENCHMARK_CAPTURE(f16_f32_vcvt, f16c_x16,
154                     xnn_f16_f32_vcvt_ukernel__f16c_x16,
155                     nullptr /* init params */,
156                     benchmark::utils::CheckF16C)
157     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
158     ->UseRealTime();
159 
160   BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int16_x8,
161                     xnn_f16_f32_vcvt_ukernel__avx_int16_x8,
162                     xnn_init_f16_f32_cvt_sse_int16_params,
163                     benchmark::utils::CheckAVX)
164     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
165     ->UseRealTime();
166   BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int16_x16,
167                     xnn_f16_f32_vcvt_ukernel__avx_int16_x16,
168                     xnn_init_f16_f32_cvt_sse_int16_params,
169                     benchmark::utils::CheckAVX)
170     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
171     ->UseRealTime();
172   BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int16_x24,
173                     xnn_f16_f32_vcvt_ukernel__avx_int16_x24,
174                     xnn_init_f16_f32_cvt_sse_int16_params,
175                     benchmark::utils::CheckAVX)
176     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
177     ->UseRealTime();
178   BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int16_x32,
179                     xnn_f16_f32_vcvt_ukernel__avx_int16_x32,
180                     xnn_init_f16_f32_cvt_sse_int16_params,
181                     benchmark::utils::CheckAVX)
182     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
183     ->UseRealTime();
184 
185   BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int32_x8,
186                     xnn_f16_f32_vcvt_ukernel__avx_int32_x8,
187                     xnn_init_f16_f32_cvt_sse_int32_params,
188                     benchmark::utils::CheckAVX)
189     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
190     ->UseRealTime();
191   BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int32_x16,
192                     xnn_f16_f32_vcvt_ukernel__avx_int32_x16,
193                     xnn_init_f16_f32_cvt_sse_int32_params,
194                     benchmark::utils::CheckAVX)
195     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
196     ->UseRealTime();
197   BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int32_x24,
198                     xnn_f16_f32_vcvt_ukernel__avx_int32_x24,
199                     xnn_init_f16_f32_cvt_sse_int32_params,
200                     benchmark::utils::CheckAVX)
201     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
202     ->UseRealTime();
203   BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int32_x32,
204                     xnn_f16_f32_vcvt_ukernel__avx_int32_x32,
205                     xnn_init_f16_f32_cvt_sse_int32_params,
206                     benchmark::utils::CheckAVX)
207     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
208     ->UseRealTime();
209 
210   BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int16_x8,
211                     xnn_f16_f32_vcvt_ukernel__sse41_int16_x8,
212                     xnn_init_f16_f32_cvt_sse_int16_params,
213                     benchmark::utils::CheckSSE41)
214     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
215     ->UseRealTime();
216   BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int16_x16,
217                     xnn_f16_f32_vcvt_ukernel__sse41_int16_x16,
218                     xnn_init_f16_f32_cvt_sse_int16_params,
219                     benchmark::utils::CheckSSE41)
220     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
221     ->UseRealTime();
222   BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int16_x24,
223                     xnn_f16_f32_vcvt_ukernel__sse41_int16_x24,
224                     xnn_init_f16_f32_cvt_sse_int16_params,
225                     benchmark::utils::CheckSSE41)
226     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
227     ->UseRealTime();
228   BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int16_x32,
229                     xnn_f16_f32_vcvt_ukernel__sse41_int16_x32,
230                     xnn_init_f16_f32_cvt_sse_int16_params,
231                     benchmark::utils::CheckSSE41)
232     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
233     ->UseRealTime();
234 
235   BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int32_x8,
236                     xnn_f16_f32_vcvt_ukernel__sse41_int32_x8,
237                     xnn_init_f16_f32_cvt_sse_int32_params,
238                     benchmark::utils::CheckSSE41)
239     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
240     ->UseRealTime();
241   BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int32_x16,
242                     xnn_f16_f32_vcvt_ukernel__sse41_int32_x16,
243                     xnn_init_f16_f32_cvt_sse_int32_params,
244                     benchmark::utils::CheckSSE41)
245     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
246     ->UseRealTime();
247   BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int32_x24,
248                     xnn_f16_f32_vcvt_ukernel__sse41_int32_x24,
249                     xnn_init_f16_f32_cvt_sse_int32_params,
250                     benchmark::utils::CheckSSE41)
251     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
252     ->UseRealTime();
253   BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int32_x32,
254                     xnn_f16_f32_vcvt_ukernel__sse41_int32_x32,
255                     xnn_init_f16_f32_cvt_sse_int32_params,
256                     benchmark::utils::CheckSSE41)
257     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
258     ->UseRealTime();
259 
260   BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int16_x8,
261                     xnn_f16_f32_vcvt_ukernel__sse2_int16_x8,
262                     xnn_init_f16_f32_cvt_sse_int16_params)
263     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
264     ->UseRealTime();
265   BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int16_x16,
266                     xnn_f16_f32_vcvt_ukernel__sse2_int16_x16,
267                     xnn_init_f16_f32_cvt_sse_int16_params)
268     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
269     ->UseRealTime();
270   BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int16_x24,
271                     xnn_f16_f32_vcvt_ukernel__sse2_int16_x24,
272                     xnn_init_f16_f32_cvt_sse_int16_params)
273     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
274     ->UseRealTime();
275   BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int16_x32,
276                     xnn_f16_f32_vcvt_ukernel__sse2_int16_x32,
277                     xnn_init_f16_f32_cvt_sse_int16_params)
278     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
279     ->UseRealTime();
280 
281   BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int32_x8,
282                     xnn_f16_f32_vcvt_ukernel__sse2_int32_x8,
283                     xnn_init_f16_f32_cvt_sse_int32_params)
284     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
285     ->UseRealTime();
286   BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int32_x16,
287                     xnn_f16_f32_vcvt_ukernel__sse2_int32_x16,
288                     xnn_init_f16_f32_cvt_sse_int32_params)
289     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
290     ->UseRealTime();
291   BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int32_x24,
292                     xnn_f16_f32_vcvt_ukernel__sse2_int32_x24,
293                     xnn_init_f16_f32_cvt_sse_int32_params)
294     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
295     ->UseRealTime();
296   BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int32_x32,
297                     xnn_f16_f32_vcvt_ukernel__sse2_int32_x32,
298                     xnn_init_f16_f32_cvt_sse_int32_params)
299     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
300     ->UseRealTime();
301 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
302 
303 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
304   BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int16_x8,
305                     xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x8,
306                     xnn_init_f16_f32_cvt_wasmsimd_int16_params)
307     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
308     ->UseRealTime();
309   BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int16_x16,
310                     xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x16,
311                     xnn_init_f16_f32_cvt_wasmsimd_int16_params)
312     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
313     ->UseRealTime();
314   BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int16_x24,
315                     xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x24,
316                     xnn_init_f16_f32_cvt_wasmsimd_int16_params)
317     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
318     ->UseRealTime();
319   BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int16_x32,
320                     xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x32,
321                     xnn_init_f16_f32_cvt_wasmsimd_int16_params)
322     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
323     ->UseRealTime();
324 
325   BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int32_x8,
326                     xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x8,
327                     xnn_init_f16_f32_cvt_wasmsimd_int32_params)
328     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
329     ->UseRealTime();
330   BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int32_x16,
331                     xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16,
332                     xnn_init_f16_f32_cvt_wasmsimd_int32_params)
333     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
334     ->UseRealTime();
335   BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int32_x24,
336                     xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24,
337                     xnn_init_f16_f32_cvt_wasmsimd_int32_params)
338     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
339     ->UseRealTime();
340   BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int32_x32,
341                     xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32,
342                     xnn_init_f16_f32_cvt_wasmsimd_int32_params)
343     ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
344     ->UseRealTime();
345 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
346 
347 BENCHMARK_CAPTURE(f16_f32_vcvt, scalar_x1,
348                   xnn_f16_f32_vcvt_ukernel__scalar_x1,
349                   xnn_init_f16_f32_cvt_scalar_params)
350   ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
351   ->UseRealTime();
352 BENCHMARK_CAPTURE(f16_f32_vcvt, scalar_x2,
353                   xnn_f16_f32_vcvt_ukernel__scalar_x2,
354                   xnn_init_f16_f32_cvt_scalar_params)
355   ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
356   ->UseRealTime();
357 BENCHMARK_CAPTURE(f16_f32_vcvt, scalar_x3,
358                   xnn_f16_f32_vcvt_ukernel__scalar_x3,
359                   xnn_init_f16_f32_cvt_scalar_params)
360   ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
361   ->UseRealTime();
362 BENCHMARK_CAPTURE(f16_f32_vcvt, scalar_x4,
363                   xnn_f16_f32_vcvt_ukernel__scalar_x4,
364                   xnn_init_f16_f32_cvt_scalar_params)
365   ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
366   ->UseRealTime();
367 
368 #ifndef XNNPACK_BENCHMARK_NO_MAIN
369 BENCHMARK_MAIN();
370 #endif
371