1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <functional>
9 #include <random>
10 #include <vector>
11
12 #include <xnnpack.h>
13
14 #include <benchmark/benchmark.h>
15
16 #include "bench/end2end.h"
17 #include "bench/utils.h"
18 #include "models/models.h"
19 #include <xnnpack/dwconv.h>
20 #include <xnnpack/params.h>
21 #include <xnnpack/params-init.h>
22
23
DWConvEnd2EndBenchmark(benchmark::State & state,models::ExecutionPlanFactory model_factory,xnn_f32_dwconv_minmax_unipass_ukernel_function dwconv,xnn_init_f32_minmax_params_fn init_params,uint8_t channel_tile,uint8_t primary_tile,benchmark::utils::IsaCheckFunction isa_check=nullptr)24 static void DWConvEnd2EndBenchmark(
25 benchmark::State& state,
26 models::ExecutionPlanFactory model_factory,
27 xnn_f32_dwconv_minmax_unipass_ukernel_function dwconv,
28 xnn_init_f32_minmax_params_fn init_params,
29 uint8_t channel_tile, uint8_t primary_tile,
30 benchmark::utils::IsaCheckFunction isa_check = nullptr)
31 {
32 if (isa_check && !isa_check(state)) {
33 return;
34 }
35 if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
36 state.SkipWithError("failed to initialize XNNPACK");
37 return;
38 }
39
40 // Override microkernels chosen in xnn_initialize
41 for (size_t i = 0; i < XNN_MAX_F32_DWCONV_UKERNELS; i++) {
42 // Replace only the microkernel the matching kernel size.
43 if (xnn_params.f32.dwconv[i].primary_tile == primary_tile) {
44 // Note: do not directly assign to xnn_params.f32.dwconv[i] because it breaks older gcc.
45 xnn_params.f32.dwconv[i].minmax.unipass = xnn_dwconv_unipass_ukernel_function(dwconv);
46 xnn_params.f32.dwconv[i].channel_tile = channel_tile;
47 xnn_params.f32.dwconv[i].primary_tile = primary_tile;
48 xnn_params.f32.dwconv[i].incremental_tile = 0;
49 xnn_params.f32.dwconv[i].init.f32 = init_params;
50 break;
51 }
52 }
53
54 auto execution_plan = model_factory(nullptr);
55 if (execution_plan.empty()) {
56 state.SkipWithError("failed to create a model");
57 return;
58 }
59
60 for (auto _ : state) {
61 for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
62 xnn_status status = xnn_run_operator(op.get(), nullptr);
63 if (status != xnn_status_success) {
64 state.SkipWithError("failed to run a model");
65 return;
66 }
67 }
68 }
69
70 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
71 if (cpu_frequency != 0) {
72 state.counters["cpufreq"] = cpu_frequency;
73 }
74 }
75
76 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
f32_dwconv_up4x9__aarch64_neonfma(benchmark::State & state,models::ExecutionPlanFactory model)77 static void f32_dwconv_up4x9__aarch64_neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
78 DWConvEnd2EndBenchmark(state, model,
79 xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma,
80 xnn_init_f32_minmax_scalar_params,
81 4 /* channel tile */, 9 /* primary tile */);
82 }
83
f32_dwconv_up4x9__aarch64_neonfma_cortex_a55(benchmark::State & state,models::ExecutionPlanFactory model)84 static void f32_dwconv_up4x9__aarch64_neonfma_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) {
85 DWConvEnd2EndBenchmark(state, model,
86 xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma_cortex_a55,
87 xnn_init_f32_minmax_scalar_params,
88 4 /* channel tile */, 9 /* primary tile */);
89 }
90
91 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__aarch64_neonfma);
92 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__aarch64_neonfma_cortex_a55);
93 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
94
95 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
f32_dwconv_up4x9__neon(benchmark::State & state,models::ExecutionPlanFactory model)96 static void f32_dwconv_up4x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
97 DWConvEnd2EndBenchmark(state, model,
98 xnn_f32_dwconv_minmax_ukernel_up4x9__neon,
99 xnn_init_f32_minmax_scalar_params,
100 4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
101 }
102
f32_dwconv_up4x9__neon_acc2(benchmark::State & state,models::ExecutionPlanFactory model)103 static void f32_dwconv_up4x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
104 DWConvEnd2EndBenchmark(state, model,
105 xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2,
106 xnn_init_f32_minmax_scalar_params,
107 4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
108 }
109
f32_dwconv_up8x9__neon(benchmark::State & state,models::ExecutionPlanFactory model)110 static void f32_dwconv_up8x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
111 DWConvEnd2EndBenchmark(state, model,
112 xnn_f32_dwconv_minmax_ukernel_up8x9__neon,
113 xnn_init_f32_minmax_scalar_params,
114 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
115 }
116
f32_dwconv_up8x9__neon_acc2(benchmark::State & state,models::ExecutionPlanFactory model)117 static void f32_dwconv_up8x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
118 DWConvEnd2EndBenchmark(state, model,
119 xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2,
120 xnn_init_f32_minmax_scalar_params,
121 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
122 }
123
f32_dwconv_up16x9__neon(benchmark::State & state,models::ExecutionPlanFactory model)124 static void f32_dwconv_up16x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
125 DWConvEnd2EndBenchmark(state, model,
126 xnn_f32_dwconv_minmax_ukernel_up16x9__neon,
127 xnn_init_f32_minmax_scalar_params,
128 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
129 }
130
f32_dwconv_up16x9__neon_acc2(benchmark::State & state,models::ExecutionPlanFactory model)131 static void f32_dwconv_up16x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
132 DWConvEnd2EndBenchmark(state, model,
133 xnn_f32_dwconv_minmax_ukernel_up16x9__neon_acc2,
134 xnn_init_f32_minmax_scalar_params,
135 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
136 }
137
f32_dwconv_up4x9__neonfma(benchmark::State & state,models::ExecutionPlanFactory model)138 static void f32_dwconv_up4x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
139 DWConvEnd2EndBenchmark(state, model,
140 xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma,
141 xnn_init_f32_minmax_scalar_params,
142 4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
143 }
144
f32_dwconv_up4x9__neonfma_acc2(benchmark::State & state,models::ExecutionPlanFactory model)145 static void f32_dwconv_up4x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
146 DWConvEnd2EndBenchmark(state, model,
147 xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2,
148 xnn_init_f32_minmax_scalar_params,
149 4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
150 }
151
f32_dwconv_up8x9__neonfma(benchmark::State & state,models::ExecutionPlanFactory model)152 static void f32_dwconv_up8x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
153 DWConvEnd2EndBenchmark(state, model,
154 xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma,
155 xnn_init_f32_minmax_scalar_params,
156 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
157 }
158
f32_dwconv_up8x9__neonfma_acc2(benchmark::State & state,models::ExecutionPlanFactory model)159 static void f32_dwconv_up8x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
160 DWConvEnd2EndBenchmark(state, model,
161 xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2,
162 xnn_init_f32_minmax_scalar_params,
163 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
164 }
165
f32_dwconv_up16x9__neonfma(benchmark::State & state,models::ExecutionPlanFactory model)166 static void f32_dwconv_up16x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
167 DWConvEnd2EndBenchmark(state, model,
168 xnn_f32_dwconv_minmax_ukernel_up16x9__neonfma,
169 xnn_init_f32_minmax_scalar_params,
170 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
171 }
172
f32_dwconv_up16x9__neonfma_acc2(benchmark::State & state,models::ExecutionPlanFactory model)173 static void f32_dwconv_up16x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
174 DWConvEnd2EndBenchmark(state, model,
175 xnn_f32_dwconv_minmax_ukernel_up16x9__neonfma_acc2,
176 xnn_init_f32_minmax_scalar_params,
177 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
178 }
179
180 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neonfma);
181 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neonfma_acc2);
182 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neonfma);
183 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neonfma_acc2);
184 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neonfma);
185 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neonfma_acc2);
186
187 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neon);
188 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neon_acc2);
189 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neon);
190 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neon_acc2);
191 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neon);
192 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neon_acc2);
193 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
194
195
196 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
f32_dwconv_up4x9__sse(benchmark::State & state,models::ExecutionPlanFactory model)197 static void f32_dwconv_up4x9__sse(benchmark::State& state, models::ExecutionPlanFactory model) {
198 DWConvEnd2EndBenchmark(state, model,
199 xnn_f32_dwconv_minmax_ukernel_up4x9__sse,
200 xnn_init_f32_minmax_sse_params,
201 4 /* channel tile */, 9 /* primary tile */);
202 }
f32_dwconv_up4x9__sse_acc2(benchmark::State & state,models::ExecutionPlanFactory model)203 static void f32_dwconv_up4x9__sse_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
204 DWConvEnd2EndBenchmark(state, model,
205 xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2,
206 xnn_init_f32_minmax_sse_params,
207 4 /* channel tile */, 9 /* primary tile */);
208 }
f32_dwconv_up8x9__sse(benchmark::State & state,models::ExecutionPlanFactory model)209 static void f32_dwconv_up8x9__sse(benchmark::State& state, models::ExecutionPlanFactory model) {
210 DWConvEnd2EndBenchmark(state, model,
211 xnn_f32_dwconv_minmax_ukernel_up8x9__sse,
212 xnn_init_f32_minmax_sse_params,
213 8 /* channel tile */, 9 /* primary tile */);
214 }
f32_dwconv_up8x9__sse_acc2(benchmark::State & state,models::ExecutionPlanFactory model)215 static void f32_dwconv_up8x9__sse_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
216 DWConvEnd2EndBenchmark(state, model,
217 xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2,
218 xnn_init_f32_minmax_sse_params,
219 8 /* channel tile */, 9 /* primary tile */);
220 }
221
f32_dwconv_up8x9__avx(benchmark::State & state,models::ExecutionPlanFactory model)222 static void f32_dwconv_up8x9__avx(benchmark::State& state, models::ExecutionPlanFactory model) {
223 DWConvEnd2EndBenchmark(state, model,
224 xnn_f32_dwconv_minmax_ukernel_up8x9__avx,
225 xnn_init_f32_minmax_avx_params,
226 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
227 }
f32_dwconv_up8x9__avx_acc2(benchmark::State & state,models::ExecutionPlanFactory model)228 static void f32_dwconv_up8x9__avx_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
229 DWConvEnd2EndBenchmark(state, model,
230 xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2,
231 xnn_init_f32_minmax_avx_params,
232 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
233 }
f32_dwconv_up16x9__avx(benchmark::State & state,models::ExecutionPlanFactory model)234 static void f32_dwconv_up16x9__avx(benchmark::State& state, models::ExecutionPlanFactory model) {
235 DWConvEnd2EndBenchmark(state, model,
236 xnn_f32_dwconv_minmax_ukernel_up16x9__avx,
237 xnn_init_f32_minmax_avx_params,
238 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
239 }
f32_dwconv_up16x9__avx_acc2(benchmark::State & state,models::ExecutionPlanFactory model)240 static void f32_dwconv_up16x9__avx_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
241 DWConvEnd2EndBenchmark(state, model,
242 xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2,
243 xnn_init_f32_minmax_avx_params,
244 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
245 }
246
f32_dwconv_up8x9__fma3(benchmark::State & state,models::ExecutionPlanFactory model)247 static void f32_dwconv_up8x9__fma3(benchmark::State& state, models::ExecutionPlanFactory model) {
248 DWConvEnd2EndBenchmark(state, model,
249 xnn_f32_dwconv_minmax_ukernel_up8x9__fma3,
250 xnn_init_f32_minmax_avx_params,
251 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
252 }
f32_dwconv_up8x9__fma3_acc2(benchmark::State & state,models::ExecutionPlanFactory model)253 static void f32_dwconv_up8x9__fma3_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
254 DWConvEnd2EndBenchmark(state, model,
255 xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2,
256 xnn_init_f32_minmax_avx_params,
257 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
258 }
f32_dwconv_up16x9__fma3(benchmark::State & state,models::ExecutionPlanFactory model)259 static void f32_dwconv_up16x9__fma3(benchmark::State& state, models::ExecutionPlanFactory model) {
260 DWConvEnd2EndBenchmark(state, model,
261 xnn_f32_dwconv_minmax_ukernel_up16x9__fma3,
262 xnn_init_f32_minmax_avx_params,
263 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
264 }
f32_dwconv_up16x9__fma3_acc2(benchmark::State & state,models::ExecutionPlanFactory model)265 static void f32_dwconv_up16x9__fma3_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
266 DWConvEnd2EndBenchmark(state, model,
267 xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2,
268 xnn_init_f32_minmax_avx_params,
269 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
270 }
271
f32_dwconv_up16x9__avx512f(benchmark::State & state,models::ExecutionPlanFactory model)272 static void f32_dwconv_up16x9__avx512f(benchmark::State& state, models::ExecutionPlanFactory model) {
273 DWConvEnd2EndBenchmark(state, model,
274 xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f,
275 xnn_init_f32_minmax_scalar_params,
276 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
277 }
f32_dwconv_up16x9__avx512f_acc2(benchmark::State & state,models::ExecutionPlanFactory model)278 static void f32_dwconv_up16x9__avx512f_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
279 DWConvEnd2EndBenchmark(state, model,
280 xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2,
281 xnn_init_f32_minmax_scalar_params,
282 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
283 }
f32_dwconv_up32x9__avx512f(benchmark::State & state,models::ExecutionPlanFactory model)284 static void f32_dwconv_up32x9__avx512f(benchmark::State& state, models::ExecutionPlanFactory model) {
285 DWConvEnd2EndBenchmark(state, model,
286 xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f,
287 xnn_init_f32_minmax_scalar_params,
288 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
289 }
f32_dwconv_up32x9__avx512f_acc2(benchmark::State & state,models::ExecutionPlanFactory model)290 static void f32_dwconv_up32x9__avx512f_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
291 DWConvEnd2EndBenchmark(state, model,
292 xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2,
293 xnn_init_f32_minmax_scalar_params,
294 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
295 }
296
297 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx512f);
298 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx512f_acc2);
299 BENCHMARK_FP32_END2END(f32_dwconv_up32x9__avx512f);
300 BENCHMARK_FP32_END2END(f32_dwconv_up32x9__avx512f_acc2);
301
302 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__fma3);
303 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__fma3_acc2);
304 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__fma3);
305 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__fma3_acc2);
306
307 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__avx);
308 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__avx_acc2);
309 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx);
310 BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx_acc2);
311
312 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__sse);
313 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__sse_acc2);
314 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__sse);
315 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__sse_acc2);
316 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
317
318 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
f32_dwconv_up4x9__wasmsimd_arm(benchmark::State & state,models::ExecutionPlanFactory model)319 static void f32_dwconv_up4x9__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
320 DWConvEnd2EndBenchmark(state, model,
321 xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm,
322 xnn_init_f32_minmax_scalar_params,
323 4 /* channel tile */, 9 /* primary tile */);
324 }
325
f32_dwconv_up4x9__wasmsimd_arm_acc2(benchmark::State & state,models::ExecutionPlanFactory model)326 static void f32_dwconv_up4x9__wasmsimd_arm_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
327 DWConvEnd2EndBenchmark(state, model,
328 xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2,
329 xnn_init_f32_minmax_scalar_params,
330 4 /* channel tile */, 9 /* primary tile */);
331 }
332
f32_dwconv_up8x9__wasmsimd_arm(benchmark::State & state,models::ExecutionPlanFactory model)333 static void f32_dwconv_up8x9__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
334 DWConvEnd2EndBenchmark(state, model,
335 xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm,
336 xnn_init_f32_minmax_scalar_params,
337 8 /* channel tile */, 9 /* primary tile */);
338 }
339
f32_dwconv_up8x9__wasmsimd_arm_acc2(benchmark::State & state,models::ExecutionPlanFactory model)340 static void f32_dwconv_up8x9__wasmsimd_arm_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
341 DWConvEnd2EndBenchmark(state, model,
342 xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2,
343 xnn_init_f32_minmax_scalar_params,
344 8 /* channel tile */, 9 /* primary tile */);
345 }
346
f32_dwconv_up4x9__wasmsimd_x86(benchmark::State & state,models::ExecutionPlanFactory model)347 static void f32_dwconv_up4x9__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
348 DWConvEnd2EndBenchmark(state, model,
349 xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86,
350 xnn_init_f32_minmax_scalar_params,
351 4 /* channel tile */, 9 /* primary tile */);
352 }
353
f32_dwconv_up4x9__wasmsimd_x86_acc2(benchmark::State & state,models::ExecutionPlanFactory model)354 static void f32_dwconv_up4x9__wasmsimd_x86_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
355 DWConvEnd2EndBenchmark(state, model,
356 xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2,
357 xnn_init_f32_minmax_scalar_params,
358 4 /* channel tile */, 9 /* primary tile */);
359 }
360
f32_dwconv_up8x9__wasmsimd_x86(benchmark::State & state,models::ExecutionPlanFactory model)361 static void f32_dwconv_up8x9__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
362 DWConvEnd2EndBenchmark(state, model,
363 xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86,
364 xnn_init_f32_minmax_scalar_params,
365 8 /* channel tile */, 9 /* primary tile */);
366 }
367
f32_dwconv_up8x9__wasmsimd_x86_acc2(benchmark::State & state,models::ExecutionPlanFactory model)368 static void f32_dwconv_up8x9__wasmsimd_x86_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
369 DWConvEnd2EndBenchmark(state, model,
370 xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2,
371 xnn_init_f32_minmax_scalar_params,
372 8 /* channel tile */, 9 /* primary tile */);
373 }
374
375 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_arm);
376 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_arm_acc2);
377 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_arm);
378 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_arm_acc2);
379
380 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_x86);
381 BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_x86_acc2);
382 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_x86);
383 BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_x86_acc2);
384 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
385
f32_dwconv_up1x9__scalar(benchmark::State & state,models::ExecutionPlanFactory model)386 static void f32_dwconv_up1x9__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
387 DWConvEnd2EndBenchmark(state, model,
388 xnn_f32_dwconv_minmax_ukernel_up1x9__scalar,
389 xnn_init_f32_minmax_scalar_params,
390 1 /* channel tile */, 9 /* primary tile */);
391 }
392
f32_dwconv_up1x9__scalar_acc2(benchmark::State & state,models::ExecutionPlanFactory model)393 static void f32_dwconv_up1x9__scalar_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
394 DWConvEnd2EndBenchmark(state, model,
395 xnn_f32_dwconv_minmax_ukernel_up1x9__scalar_acc2,
396 xnn_init_f32_minmax_scalar_params,
397 1 /* channel tile */, 9 /* primary tile */);
398 }
399
f32_dwconv_up2x9__scalar(benchmark::State & state,models::ExecutionPlanFactory model)400 static void f32_dwconv_up2x9__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
401 DWConvEnd2EndBenchmark(state, model,
402 xnn_f32_dwconv_minmax_ukernel_up2x9__scalar,
403 xnn_init_f32_minmax_scalar_params,
404 2 /* channel tile */, 9 /* primary tile */);
405 }
406
f32_dwconv_up2x9__scalar_acc2(benchmark::State & state,models::ExecutionPlanFactory model)407 static void f32_dwconv_up2x9__scalar_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
408 DWConvEnd2EndBenchmark(state, model,
409 xnn_f32_dwconv_minmax_ukernel_up2x9__scalar_acc2,
410 xnn_init_f32_minmax_scalar_params,
411 2 /* channel tile */, 9 /* primary tile */);
412 }
413
414 BENCHMARK_FP32_END2END(f32_dwconv_up1x9__scalar);
415 BENCHMARK_FP32_END2END(f32_dwconv_up1x9__scalar_acc2);
416 BENCHMARK_FP32_END2END(f32_dwconv_up2x9__scalar);
417 BENCHMARK_FP32_END2END(f32_dwconv_up2x9__scalar_acc2);
418
419 #ifndef XNNPACK_BENCHMARK_NO_MAIN
420 BENCHMARK_MAIN();
421 #endif
422