• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <functional>
9 #include <random>
10 #include <vector>
11 
12 #include <xnnpack.h>
13 
14 #include <benchmark/benchmark.h>
15 
16 #include "bench/end2end.h"
17 #include "bench/utils.h"
18 #include "models/models.h"
19 #include <xnnpack/dwconv.h>
20 #include <xnnpack/params.h>
21 #include <xnnpack/params-init.h>
22 
23 
DWConvEnd2EndBenchmark(benchmark::State & state,models::ExecutionPlanFactory model_factory,xnn_f32_dwconv_minmax_unipass_ukernel_function dwconv,xnn_init_f32_minmax_params_fn init_params,uint8_t channel_tile,uint8_t primary_tile,benchmark::utils::IsaCheckFunction isa_check=nullptr)24 static void DWConvEnd2EndBenchmark(
25   benchmark::State& state,
26   models::ExecutionPlanFactory model_factory,
27   xnn_f32_dwconv_minmax_unipass_ukernel_function dwconv,
28   xnn_init_f32_minmax_params_fn init_params,
29   uint8_t channel_tile, uint8_t primary_tile,
30   benchmark::utils::IsaCheckFunction isa_check = nullptr)
31 {
32   if (isa_check && !isa_check(state)) {
33     return;
34   }
35   if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
36     state.SkipWithError("failed to initialize XNNPACK");
37     return;
38   }
39 
40   // Override microkernels chosen in xnn_initialize
41   for (size_t i = 0; i < XNN_MAX_F32_DWCONV_UKERNELS; i++) {
42     // Replace only the microkernel the matching kernel size.
43     if (xnn_params.f32.dwconv[i].primary_tile == primary_tile) {
44       // Note: do not directly assign to xnn_params.f32.dwconv[i] because it breaks older gcc.
45       xnn_params.f32.dwconv[i].minmax.unipass = xnn_dwconv_unipass_ukernel_function(dwconv);
46       xnn_params.f32.dwconv[i].channel_tile = channel_tile;
47       xnn_params.f32.dwconv[i].primary_tile = primary_tile;
48       xnn_params.f32.dwconv[i].incremental_tile = 0;
49       xnn_params.f32.dwconv[i].init.f32 = init_params;
50       break;
51     }
52   }
53 
54   auto execution_plan = model_factory(nullptr);
55   if (execution_plan.empty()) {
56     state.SkipWithError("failed to create a model");
57     return;
58   }
59 
60   for (auto _ : state) {
61     for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
62       xnn_status status = xnn_run_operator(op.get(), nullptr);
63       if (status != xnn_status_success) {
64         state.SkipWithError("failed to run a model");
65         return;
66       }
67     }
68   }
69 
70   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
71   if (cpu_frequency != 0) {
72     state.counters["cpufreq"] = cpu_frequency;
73   }
74 }
75 
76 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
f32_dwconv_up4x9__aarch64_neonfma(benchmark::State & state,models::ExecutionPlanFactory model)77   static void f32_dwconv_up4x9__aarch64_neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
78     DWConvEnd2EndBenchmark(state, model,
79       xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma,
80       xnn_init_f32_minmax_scalar_params,
81       4 /* channel tile */, 9 /* primary tile */);
82   }
83 
f32_dwconv_up4x9__aarch64_neonfma_cortex_a55(benchmark::State & state,models::ExecutionPlanFactory model)84   static void f32_dwconv_up4x9__aarch64_neonfma_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) {
85     DWConvEnd2EndBenchmark(state, model,
86       xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma_cortex_a55,
87       xnn_init_f32_minmax_scalar_params,
88       4 /* channel tile */, 9 /* primary tile */);
89   }
90 
91   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__aarch64_neonfma);
92   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__aarch64_neonfma_cortex_a55);
93 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
94 
95 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
f32_dwconv_up4x9__neon(benchmark::State & state,models::ExecutionPlanFactory model)96   static void f32_dwconv_up4x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
97     DWConvEnd2EndBenchmark(state, model,
98       xnn_f32_dwconv_minmax_ukernel_up4x9__neon,
99       xnn_init_f32_minmax_scalar_params,
100       4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
101   }
102 
f32_dwconv_up4x9__neon_acc2(benchmark::State & state,models::ExecutionPlanFactory model)103   static void f32_dwconv_up4x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
104     DWConvEnd2EndBenchmark(state, model,
105       xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2,
106       xnn_init_f32_minmax_scalar_params,
107       4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
108   }
109 
f32_dwconv_up8x9__neon(benchmark::State & state,models::ExecutionPlanFactory model)110   static void f32_dwconv_up8x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
111     DWConvEnd2EndBenchmark(state, model,
112       xnn_f32_dwconv_minmax_ukernel_up8x9__neon,
113       xnn_init_f32_minmax_scalar_params,
114       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
115   }
116 
f32_dwconv_up8x9__neon_acc2(benchmark::State & state,models::ExecutionPlanFactory model)117   static void f32_dwconv_up8x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
118     DWConvEnd2EndBenchmark(state, model,
119       xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2,
120       xnn_init_f32_minmax_scalar_params,
121       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
122   }
123 
f32_dwconv_up16x9__neon(benchmark::State & state,models::ExecutionPlanFactory model)124   static void f32_dwconv_up16x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
125     DWConvEnd2EndBenchmark(state, model,
126       xnn_f32_dwconv_minmax_ukernel_up16x9__neon,
127       xnn_init_f32_minmax_scalar_params,
128       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
129   }
130 
f32_dwconv_up16x9__neon_acc2(benchmark::State & state,models::ExecutionPlanFactory model)131   static void f32_dwconv_up16x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
132     DWConvEnd2EndBenchmark(state, model,
133       xnn_f32_dwconv_minmax_ukernel_up16x9__neon_acc2,
134       xnn_init_f32_minmax_scalar_params,
135       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
136   }
137 
f32_dwconv_up4x9__neonfma(benchmark::State & state,models::ExecutionPlanFactory model)138   static void f32_dwconv_up4x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
139     DWConvEnd2EndBenchmark(state, model,
140       xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma,
141       xnn_init_f32_minmax_scalar_params,
142       4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
143   }
144 
f32_dwconv_up4x9__neonfma_acc2(benchmark::State & state,models::ExecutionPlanFactory model)145   static void f32_dwconv_up4x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
146     DWConvEnd2EndBenchmark(state, model,
147       xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2,
148       xnn_init_f32_minmax_scalar_params,
149       4 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
150   }
151 
f32_dwconv_up8x9__neonfma(benchmark::State & state,models::ExecutionPlanFactory model)152   static void f32_dwconv_up8x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
153     DWConvEnd2EndBenchmark(state, model,
154       xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma,
155       xnn_init_f32_minmax_scalar_params,
156       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
157   }
158 
f32_dwconv_up8x9__neonfma_acc2(benchmark::State & state,models::ExecutionPlanFactory model)159   static void f32_dwconv_up8x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
160     DWConvEnd2EndBenchmark(state, model,
161       xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2,
162       xnn_init_f32_minmax_scalar_params,
163       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
164   }
165 
f32_dwconv_up16x9__neonfma(benchmark::State & state,models::ExecutionPlanFactory model)166   static void f32_dwconv_up16x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
167     DWConvEnd2EndBenchmark(state, model,
168       xnn_f32_dwconv_minmax_ukernel_up16x9__neonfma,
169       xnn_init_f32_minmax_scalar_params,
170       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
171   }
172 
f32_dwconv_up16x9__neonfma_acc2(benchmark::State & state,models::ExecutionPlanFactory model)173   static void f32_dwconv_up16x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
174     DWConvEnd2EndBenchmark(state, model,
175       xnn_f32_dwconv_minmax_ukernel_up16x9__neonfma_acc2,
176       xnn_init_f32_minmax_scalar_params,
177       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEONFMA);
178   }
179 
180   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neonfma);
181   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neonfma_acc2);
182   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neonfma);
183   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neonfma_acc2);
184   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neonfma);
185   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neonfma_acc2);
186 
187   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neon);
188   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neon_acc2);
189   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neon);
190   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neon_acc2);
191   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neon);
192   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__neon_acc2);
193 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
194 
195 
196 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
f32_dwconv_up4x9__sse(benchmark::State & state,models::ExecutionPlanFactory model)197   static void f32_dwconv_up4x9__sse(benchmark::State& state, models::ExecutionPlanFactory model) {
198     DWConvEnd2EndBenchmark(state, model,
199       xnn_f32_dwconv_minmax_ukernel_up4x9__sse,
200       xnn_init_f32_minmax_sse_params,
201       4 /* channel tile */, 9 /* primary tile */);
202   }
f32_dwconv_up4x9__sse_acc2(benchmark::State & state,models::ExecutionPlanFactory model)203   static void f32_dwconv_up4x9__sse_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
204     DWConvEnd2EndBenchmark(state, model,
205       xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2,
206       xnn_init_f32_minmax_sse_params,
207       4 /* channel tile */, 9 /* primary tile */);
208   }
f32_dwconv_up8x9__sse(benchmark::State & state,models::ExecutionPlanFactory model)209   static void f32_dwconv_up8x9__sse(benchmark::State& state, models::ExecutionPlanFactory model) {
210     DWConvEnd2EndBenchmark(state, model,
211       xnn_f32_dwconv_minmax_ukernel_up8x9__sse,
212       xnn_init_f32_minmax_sse_params,
213       8 /* channel tile */, 9 /* primary tile */);
214   }
f32_dwconv_up8x9__sse_acc2(benchmark::State & state,models::ExecutionPlanFactory model)215   static void f32_dwconv_up8x9__sse_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
216     DWConvEnd2EndBenchmark(state, model,
217       xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2,
218       xnn_init_f32_minmax_sse_params,
219       8 /* channel tile */, 9 /* primary tile */);
220   }
221 
f32_dwconv_up8x9__avx(benchmark::State & state,models::ExecutionPlanFactory model)222   static void f32_dwconv_up8x9__avx(benchmark::State& state, models::ExecutionPlanFactory model) {
223     DWConvEnd2EndBenchmark(state, model,
224       xnn_f32_dwconv_minmax_ukernel_up8x9__avx,
225       xnn_init_f32_minmax_avx_params,
226       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
227   }
f32_dwconv_up8x9__avx_acc2(benchmark::State & state,models::ExecutionPlanFactory model)228   static void f32_dwconv_up8x9__avx_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
229     DWConvEnd2EndBenchmark(state, model,
230       xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2,
231       xnn_init_f32_minmax_avx_params,
232       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
233   }
f32_dwconv_up16x9__avx(benchmark::State & state,models::ExecutionPlanFactory model)234   static void f32_dwconv_up16x9__avx(benchmark::State& state, models::ExecutionPlanFactory model) {
235     DWConvEnd2EndBenchmark(state, model,
236       xnn_f32_dwconv_minmax_ukernel_up16x9__avx,
237       xnn_init_f32_minmax_avx_params,
238       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
239   }
f32_dwconv_up16x9__avx_acc2(benchmark::State & state,models::ExecutionPlanFactory model)240   static void f32_dwconv_up16x9__avx_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
241     DWConvEnd2EndBenchmark(state, model,
242       xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2,
243       xnn_init_f32_minmax_avx_params,
244       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
245   }
246 
f32_dwconv_up8x9__fma3(benchmark::State & state,models::ExecutionPlanFactory model)247   static void f32_dwconv_up8x9__fma3(benchmark::State& state, models::ExecutionPlanFactory model) {
248     DWConvEnd2EndBenchmark(state, model,
249       xnn_f32_dwconv_minmax_ukernel_up8x9__fma3,
250       xnn_init_f32_minmax_avx_params,
251       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
252   }
f32_dwconv_up8x9__fma3_acc2(benchmark::State & state,models::ExecutionPlanFactory model)253   static void f32_dwconv_up8x9__fma3_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
254     DWConvEnd2EndBenchmark(state, model,
255       xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2,
256       xnn_init_f32_minmax_avx_params,
257       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
258   }
f32_dwconv_up16x9__fma3(benchmark::State & state,models::ExecutionPlanFactory model)259   static void f32_dwconv_up16x9__fma3(benchmark::State& state, models::ExecutionPlanFactory model) {
260     DWConvEnd2EndBenchmark(state, model,
261       xnn_f32_dwconv_minmax_ukernel_up16x9__fma3,
262       xnn_init_f32_minmax_avx_params,
263       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
264   }
f32_dwconv_up16x9__fma3_acc2(benchmark::State & state,models::ExecutionPlanFactory model)265   static void f32_dwconv_up16x9__fma3_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
266     DWConvEnd2EndBenchmark(state, model,
267       xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2,
268       xnn_init_f32_minmax_avx_params,
269       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckFMA3);
270   }
271 
f32_dwconv_up16x9__avx512f(benchmark::State & state,models::ExecutionPlanFactory model)272   static void f32_dwconv_up16x9__avx512f(benchmark::State& state, models::ExecutionPlanFactory model) {
273     DWConvEnd2EndBenchmark(state, model,
274       xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f,
275       xnn_init_f32_minmax_scalar_params,
276       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
277   }
f32_dwconv_up16x9__avx512f_acc2(benchmark::State & state,models::ExecutionPlanFactory model)278   static void f32_dwconv_up16x9__avx512f_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
279     DWConvEnd2EndBenchmark(state, model,
280       xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2,
281       xnn_init_f32_minmax_scalar_params,
282       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
283   }
f32_dwconv_up32x9__avx512f(benchmark::State & state,models::ExecutionPlanFactory model)284   static void f32_dwconv_up32x9__avx512f(benchmark::State& state, models::ExecutionPlanFactory model) {
285     DWConvEnd2EndBenchmark(state, model,
286       xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f,
287       xnn_init_f32_minmax_scalar_params,
288       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
289   }
f32_dwconv_up32x9__avx512f_acc2(benchmark::State & state,models::ExecutionPlanFactory model)290   static void f32_dwconv_up32x9__avx512f_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
291     DWConvEnd2EndBenchmark(state, model,
292       xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2,
293       xnn_init_f32_minmax_scalar_params,
294       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512F);
295   }
296 
297   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx512f);
298   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx512f_acc2);
299   BENCHMARK_FP32_END2END(f32_dwconv_up32x9__avx512f);
300   BENCHMARK_FP32_END2END(f32_dwconv_up32x9__avx512f_acc2);
301 
302   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__fma3);
303   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__fma3_acc2);
304   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__fma3);
305   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__fma3_acc2);
306 
307   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__avx);
308   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__avx_acc2);
309   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx);
310   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx_acc2);
311 
312   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__sse);
313   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__sse_acc2);
314   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__sse);
315   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__sse_acc2);
316 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
317 
318 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
f32_dwconv_up4x9__wasmsimd_arm(benchmark::State & state,models::ExecutionPlanFactory model)319   static void f32_dwconv_up4x9__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
320     DWConvEnd2EndBenchmark(state, model,
321       xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm,
322       xnn_init_f32_minmax_scalar_params,
323       4 /* channel tile */, 9 /* primary tile */);
324   }
325 
f32_dwconv_up4x9__wasmsimd_arm_acc2(benchmark::State & state,models::ExecutionPlanFactory model)326   static void f32_dwconv_up4x9__wasmsimd_arm_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
327     DWConvEnd2EndBenchmark(state, model,
328       xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2,
329       xnn_init_f32_minmax_scalar_params,
330       4 /* channel tile */, 9 /* primary tile */);
331   }
332 
f32_dwconv_up8x9__wasmsimd_arm(benchmark::State & state,models::ExecutionPlanFactory model)333   static void f32_dwconv_up8x9__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
334     DWConvEnd2EndBenchmark(state, model,
335       xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm,
336       xnn_init_f32_minmax_scalar_params,
337       8 /* channel tile */, 9 /* primary tile */);
338   }
339 
f32_dwconv_up8x9__wasmsimd_arm_acc2(benchmark::State & state,models::ExecutionPlanFactory model)340   static void f32_dwconv_up8x9__wasmsimd_arm_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
341     DWConvEnd2EndBenchmark(state, model,
342       xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2,
343       xnn_init_f32_minmax_scalar_params,
344       8 /* channel tile */, 9 /* primary tile */);
345   }
346 
f32_dwconv_up4x9__wasmsimd_x86(benchmark::State & state,models::ExecutionPlanFactory model)347   static void f32_dwconv_up4x9__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
348     DWConvEnd2EndBenchmark(state, model,
349       xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86,
350       xnn_init_f32_minmax_scalar_params,
351       4 /* channel tile */, 9 /* primary tile */);
352   }
353 
f32_dwconv_up4x9__wasmsimd_x86_acc2(benchmark::State & state,models::ExecutionPlanFactory model)354   static void f32_dwconv_up4x9__wasmsimd_x86_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
355     DWConvEnd2EndBenchmark(state, model,
356       xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2,
357       xnn_init_f32_minmax_scalar_params,
358       4 /* channel tile */, 9 /* primary tile */);
359   }
360 
f32_dwconv_up8x9__wasmsimd_x86(benchmark::State & state,models::ExecutionPlanFactory model)361   static void f32_dwconv_up8x9__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
362     DWConvEnd2EndBenchmark(state, model,
363       xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86,
364       xnn_init_f32_minmax_scalar_params,
365       8 /* channel tile */, 9 /* primary tile */);
366   }
367 
f32_dwconv_up8x9__wasmsimd_x86_acc2(benchmark::State & state,models::ExecutionPlanFactory model)368   static void f32_dwconv_up8x9__wasmsimd_x86_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
369     DWConvEnd2EndBenchmark(state, model,
370       xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2,
371       xnn_init_f32_minmax_scalar_params,
372       8 /* channel tile */, 9 /* primary tile */);
373   }
374 
375   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_arm);
376   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_arm_acc2);
377   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_arm);
378   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_arm_acc2);
379 
380   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_x86);
381   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_x86_acc2);
382   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_x86);
383   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_x86_acc2);
384 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
385 
f32_dwconv_up1x9__scalar(benchmark::State & state,models::ExecutionPlanFactory model)386 static void f32_dwconv_up1x9__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
387   DWConvEnd2EndBenchmark(state, model,
388     xnn_f32_dwconv_minmax_ukernel_up1x9__scalar,
389       xnn_init_f32_minmax_scalar_params,
390       1 /* channel tile */, 9 /* primary tile */);
391 }
392 
f32_dwconv_up1x9__scalar_acc2(benchmark::State & state,models::ExecutionPlanFactory model)393 static void f32_dwconv_up1x9__scalar_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
394   DWConvEnd2EndBenchmark(state, model,
395     xnn_f32_dwconv_minmax_ukernel_up1x9__scalar_acc2,
396       xnn_init_f32_minmax_scalar_params,
397       1 /* channel tile */, 9 /* primary tile */);
398 }
399 
f32_dwconv_up2x9__scalar(benchmark::State & state,models::ExecutionPlanFactory model)400 static void f32_dwconv_up2x9__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
401   DWConvEnd2EndBenchmark(state, model,
402     xnn_f32_dwconv_minmax_ukernel_up2x9__scalar,
403       xnn_init_f32_minmax_scalar_params,
404       2 /* channel tile */, 9 /* primary tile */);
405 }
406 
f32_dwconv_up2x9__scalar_acc2(benchmark::State & state,models::ExecutionPlanFactory model)407 static void f32_dwconv_up2x9__scalar_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
408   DWConvEnd2EndBenchmark(state, model,
409     xnn_f32_dwconv_minmax_ukernel_up2x9__scalar_acc2,
410       xnn_init_f32_minmax_scalar_params,
411       2 /* channel tile */, 9 /* primary tile */);
412 }
413 
414 BENCHMARK_FP32_END2END(f32_dwconv_up1x9__scalar);
415 BENCHMARK_FP32_END2END(f32_dwconv_up1x9__scalar_acc2);
416 BENCHMARK_FP32_END2END(f32_dwconv_up2x9__scalar);
417 BENCHMARK_FP32_END2END(f32_dwconv_up2x9__scalar_acc2);
418 
419 #ifndef XNNPACK_BENCHMARK_NO_MAIN
420 BENCHMARK_MAIN();
421 #endif
422