• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <functional>
9 #include <random>
10 #include <vector>
11 
12 #include <benchmark/benchmark.h>
13 #include "bench/utils.h"
14 
15 #include <xnnpack/AlignedAllocator.h>
16 #include <xnnpack/common.h>
17 #include <xnnpack/vunary.h>
18 #include <xnnpack/params.h>
19 #include <xnnpack/params-init.h>
20 
21 
f32_vlrelu(benchmark::State & state,xnn_f32_vlrelu_ukernel_function vlrelu,xnn_init_f32_lrelu_params_fn init_params,benchmark::utils::IsaCheckFunction isa_check=nullptr)22 static void f32_vlrelu(
23   benchmark::State& state,
24   xnn_f32_vlrelu_ukernel_function vlrelu,
25   xnn_init_f32_lrelu_params_fn init_params,
26   benchmark::utils::IsaCheckFunction isa_check = nullptr)
27 {
28   if (isa_check && !isa_check(state)) {
29     return;
30   }
31 
32   const size_t elements = state.range(0);
33   std::vector<float, AlignedAllocator<float, 64>> input(elements);
34   std::vector<float, AlignedAllocator<float, 64>> output(elements);
35 
36   std::random_device random_device;
37   auto rng = std::mt19937(random_device());
38   auto f32rng = std::bind(std::uniform_real_distribution<float>(-5.0f, 5.0f), std::ref(rng));
39   std::generate(input.begin(), input.end(), std::ref(f32rng));
40   std::fill(output.begin(), output.end(), std::nanf(""));
41 
42   union xnn_f32_lrelu_params params;
43   init_params(&params, 0.01f);
44   for (auto _ : state) {
45     vlrelu(elements * sizeof(float), input.data(), output.data(), &params);
46   }
47 
48   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
49   if (cpu_frequency != 0) {
50     state.counters["cpufreq"] = cpu_frequency;
51   }
52 
53   const size_t elements_per_iteration = elements;
54   state.counters["elements"] =
55     benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
56 
57   const size_t bytes_per_iteration = 2 * elements * sizeof(float);
58   state.counters["bytes"] =
59     benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
60 }
61 
62 #if XNN_ARCH_ARM64 || XNN_ARCH_ARM64
63   BENCHMARK_CAPTURE(f32_vlrelu, neon_x4,
64                     xnn_f32_vlrelu_ukernel__neon_x4,
65                     xnn_init_f32_lrelu_scalar_params,
66                     benchmark::utils::CheckNEON)
67     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
68     ->UseRealTime();
69   BENCHMARK_CAPTURE(f32_vlrelu, neon_x8,
70                     xnn_f32_vlrelu_ukernel__neon_x8,
71                     xnn_init_f32_lrelu_scalar_params,
72                     benchmark::utils::CheckNEON)
73     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
74     ->UseRealTime();
75 #endif  // XNN_ARCH_ARM64 || XNN_ARCH_ARM64
76 
77 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
78   BENCHMARK_CAPTURE(f32_vlrelu, sse_x4,
79                     xnn_f32_vlrelu_ukernel__sse_x4,
80                     xnn_init_f32_lrelu_sse_params)
81     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
82     ->UseRealTime();
83   BENCHMARK_CAPTURE(f32_vlrelu, sse_x8,
84                     xnn_f32_vlrelu_ukernel__sse_x8,
85                     xnn_init_f32_lrelu_sse_params)
86     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
87     ->UseRealTime();
88 
89   BENCHMARK_CAPTURE(f32_vlrelu, sse2_x4,
90                     xnn_f32_vlrelu_ukernel__sse2_x4,
91                     xnn_init_f32_lrelu_sse_params)
92     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
93     ->UseRealTime();
94   BENCHMARK_CAPTURE(f32_vlrelu, sse2_x8,
95                     xnn_f32_vlrelu_ukernel__sse2_x8,
96                     xnn_init_f32_lrelu_sse_params)
97     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
98     ->UseRealTime();
99 
100   BENCHMARK_CAPTURE(f32_vlrelu, sse41_x4,
101                     xnn_f32_vlrelu_ukernel__sse41_x4,
102                     xnn_init_f32_lrelu_sse_params,
103                     benchmark::utils::CheckSSE41)
104     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
105     ->UseRealTime();
106   BENCHMARK_CAPTURE(f32_vlrelu, sse41_x8,
107                     xnn_f32_vlrelu_ukernel__sse41_x8,
108                     xnn_init_f32_lrelu_sse_params,
109                     benchmark::utils::CheckSSE41)
110     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
111     ->UseRealTime();
112 
113   BENCHMARK_CAPTURE(f32_vlrelu, avx_x8,
114                     xnn_f32_vlrelu_ukernel__avx_x8,
115                     xnn_init_f32_lrelu_avx_params,
116                     benchmark::utils::CheckAVX)
117     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
118     ->UseRealTime();
119   BENCHMARK_CAPTURE(f32_vlrelu, avx_x16,
120                     xnn_f32_vlrelu_ukernel__avx_x16,
121                     xnn_init_f32_lrelu_avx_params,
122                     benchmark::utils::CheckAVX)
123     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
124     ->UseRealTime();
125 
126   BENCHMARK_CAPTURE(f32_vlrelu, avx512f_x16,
127                     xnn_f32_vlrelu_ukernel__avx512f_x16,
128                     xnn_init_f32_lrelu_scalar_params,
129                     benchmark::utils::CheckAVX512F)
130     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
131     ->UseRealTime();
132   BENCHMARK_CAPTURE(f32_vlrelu, avx512f_x32,
133                     xnn_f32_vlrelu_ukernel__avx512f_x32,
134                     xnn_init_f32_lrelu_scalar_params,
135                     benchmark::utils::CheckAVX512F)
136     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
137     ->UseRealTime();
138 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
139 
140 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
141   BENCHMARK_CAPTURE(f32_vlrelu, wasmsimd_bitselect_x4,
142                     xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x4,
143                     xnn_init_f32_lrelu_wasmsimd_params)
144     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
145     ->UseRealTime();
146   BENCHMARK_CAPTURE(f32_vlrelu, wasmsimd_bitselect_x8,
147                     xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8,
148                     xnn_init_f32_lrelu_wasmsimd_params)
149     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
150     ->UseRealTime();
151 
152   BENCHMARK_CAPTURE(f32_vlrelu, wasmsimd_minmax_x4,
153                     xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x4,
154                     xnn_init_f32_lrelu_wasmsimd_params)
155     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
156     ->UseRealTime();
157   BENCHMARK_CAPTURE(f32_vlrelu, wasmsimd_minmax_x8,
158                     xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8,
159                     xnn_init_f32_lrelu_wasmsimd_params)
160     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
161     ->UseRealTime();
162 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
163 
164 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
165   BENCHMARK_CAPTURE(f32_vlrelu, wasm_x1,
166                     xnn_f32_vlrelu_ukernel__wasm_x1,
167                     xnn_init_f32_lrelu_scalar_params)
168     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
169     ->UseRealTime();
170   BENCHMARK_CAPTURE(f32_vlrelu, wasm_x2,
171                     xnn_f32_vlrelu_ukernel__wasm_x2,
172                     xnn_init_f32_lrelu_scalar_params)
173     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
174     ->UseRealTime();
175   BENCHMARK_CAPTURE(f32_vlrelu, wasm_x4,
176                     xnn_f32_vlrelu_ukernel__wasm_x4,
177                     xnn_init_f32_lrelu_scalar_params)
178     ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
179     ->UseRealTime();
180 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
181 
182 BENCHMARK_CAPTURE(f32_vlrelu, scalar_x1,
183                   xnn_f32_vlrelu_ukernel__scalar_x1,
184                   xnn_init_f32_lrelu_scalar_params)
185   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
186   ->UseRealTime();
187 BENCHMARK_CAPTURE(f32_vlrelu, scalar_x2,
188                   xnn_f32_vlrelu_ukernel__scalar_x2,
189                   xnn_init_f32_lrelu_scalar_params)
190   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
191   ->UseRealTime();
192 BENCHMARK_CAPTURE(f32_vlrelu, scalar_x4,
193                   xnn_f32_vlrelu_ukernel__scalar_x4,
194                   xnn_init_f32_lrelu_scalar_params)
195   ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
196   ->UseRealTime();
197 
198 #ifndef XNNPACK_BENCHMARK_NO_MAIN
199 BENCHMARK_MAIN();
200 #endif
201