• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <functional>
9 #include <random>
10 #include <vector>
11 
12 #include <xnnpack.h>
13 
14 #include <benchmark/benchmark.h>
15 
16 #include "bench/end2end.h"
17 #include "bench/utils.h"
18 #include "models/models.h"
19 #include <xnnpack/dwconv.h>
20 #include <xnnpack/params.h>
21 #include <xnnpack/params-init.h>
22 
23 
DWConvEnd2EndBenchmark(benchmark::State & state,models::ExecutionPlanFactory model_factory,xnn_qu8_dwconv_minmax_unipass_ukernel_function dwconv,xnn_init_qu8_conv_minmax_params_fn init_params,uint8_t channel_tile,uint8_t primary_tile,benchmark::utils::IsaCheckFunction isa_check=nullptr)24 static void DWConvEnd2EndBenchmark(
25   benchmark::State& state,
26   models::ExecutionPlanFactory model_factory,
27   xnn_qu8_dwconv_minmax_unipass_ukernel_function dwconv,
28   xnn_init_qu8_conv_minmax_params_fn init_params,
29   uint8_t channel_tile, uint8_t primary_tile,
30   benchmark::utils::IsaCheckFunction isa_check = nullptr)
31 {
32   if (isa_check && !isa_check(state)) {
33     return;
34   }
35   if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
36     state.SkipWithError("failed to initialize XNNPACK");
37     return;
38   }
39 
40   // Override microkernels chosen in xnn_initialize
41   for (size_t i = 0; i < XNN_MAX_QU8_DWCONV_UKERNELS; i++) {
42     // Replace only the microkernel the matching kernel size.
43     if (xnn_params.qu8.dwconv[i].primary_tile == primary_tile) {
44       // Note: do not directly assign to xnn_params.qu8.dwconv[i] because it breaks older gcc.
45       xnn_params.qu8.dwconv[i].minmax.unipass = xnn_dwconv_unipass_ukernel_function(dwconv);
46       xnn_params.qu8.dwconv[i].channel_tile = channel_tile;
47       xnn_params.qu8.dwconv[i].primary_tile = primary_tile;
48       xnn_params.qu8.dwconv[i].incremental_tile = 0;
49       xnn_params.qu8.dwconv[i].init.qu8 = init_params;
50       break;
51     }
52   }
53 
54   auto execution_plan = model_factory(nullptr);
55   if (execution_plan.empty()) {
56     state.SkipWithError("failed to create a model");
57     return;
58   }
59 
60   for (auto _ : state) {
61     for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
62       xnn_status status = xnn_run_operator(op.get(), nullptr);
63       if (status != xnn_status_success) {
64         state.SkipWithError("failed to run a model");
65         return;
66       }
67     }
68   }
69 
70   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
71   if (cpu_frequency != 0) {
72     state.counters["cpufreq"] = cpu_frequency;
73   }
74 }
75 
76 
77 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
qu8_dwconv_up8x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)78   static void qu8_dwconv_up8x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
79     DWConvEnd2EndBenchmark(state, model,
80       xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8,
81       xnn_init_qu8_conv_minmax_rndnu_neon_params,
82       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
83   }
qu8_dwconv_up16x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)84   static void qu8_dwconv_up16x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
85     DWConvEnd2EndBenchmark(state, model,
86       xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8,
87       xnn_init_qu8_conv_minmax_rndnu_neon_params,
88       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
89   }
qu8_dwconv_up24x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)90   static void qu8_dwconv_up24x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
91     DWConvEnd2EndBenchmark(state, model,
92       xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8,
93       xnn_init_qu8_conv_minmax_rndnu_neon_params,
94       24 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
95   }
qu8_dwconv_up32x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)96   static void qu8_dwconv_up32x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
97     DWConvEnd2EndBenchmark(state, model,
98       xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8,
99       xnn_init_qu8_conv_minmax_rndnu_neon_params,
100       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
101   }
qu8_dwconv_up8x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)102   static void qu8_dwconv_up8x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
103     DWConvEnd2EndBenchmark(state, model,
104       xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16,
105       xnn_init_qu8_conv_minmax_rndnu_neon_params,
106       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
107   }
qu8_dwconv_up16x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)108   static void qu8_dwconv_up16x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
109     DWConvEnd2EndBenchmark(state, model,
110       xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16,
111       xnn_init_qu8_conv_minmax_rndnu_neon_params,
112       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
113   }
qu8_dwconv_up24x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)114   static void qu8_dwconv_up24x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
115     DWConvEnd2EndBenchmark(state, model,
116       xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16,
117       xnn_init_qu8_conv_minmax_rndnu_neon_params,
118       24 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
119   }
qu8_dwconv_up32x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)120   static void qu8_dwconv_up32x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
121     DWConvEnd2EndBenchmark(state, model,
122       xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16,
123       xnn_init_qu8_conv_minmax_rndnu_neon_params,
124       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
125   }
126 
127   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__neon_mul8);
128   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__neon_mul8);
129   BENCHMARK_QU8_END2END(qu8_dwconv_up24x9__neon_mul8);
130   BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__neon_mul8);
131   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__neon_mul16);
132   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__neon_mul16);
133   BENCHMARK_QU8_END2END(qu8_dwconv_up24x9__neon_mul16);
134   BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__neon_mul16);
135 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
136 
137 
138 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
qu8_dwconv_up16x9__avx512skx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)139   static void qu8_dwconv_up16x9__avx512skx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
140     DWConvEnd2EndBenchmark(state, model,
141       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32,
142       xnn_init_qu8_conv_minmax_fp32_avx512_params,
143       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512SKX);
144   }
qu8_dwconv_up32x9__avx512skx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)145   static void qu8_dwconv_up32x9__avx512skx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
146     DWConvEnd2EndBenchmark(state, model,
147       xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32,
148       xnn_init_qu8_conv_minmax_fp32_avx512_params,
149       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512SKX);
150   }
qu8_dwconv_up8x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)151   static void qu8_dwconv_up8x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
152     DWConvEnd2EndBenchmark(state, model,
153       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32,
154       xnn_init_qu8_conv_minmax_fp32_avx2_params,
155       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
156   }
qu8_dwconv_up16x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)157   static void qu8_dwconv_up16x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
158     DWConvEnd2EndBenchmark(state, model,
159       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32,
160       xnn_init_qu8_conv_minmax_fp32_avx2_params,
161       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
162   }
qu8_dwconv_up32x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)163   static void qu8_dwconv_up32x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
164     DWConvEnd2EndBenchmark(state, model,
165       xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32,
166       xnn_init_qu8_conv_minmax_fp32_avx2_params,
167       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
168   }
qu8_dwconv_up8x9__avx_mul16(benchmark::State & state,models::ExecutionPlanFactory model)169   static void qu8_dwconv_up8x9__avx_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
170     DWConvEnd2EndBenchmark(state, model,
171       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16,
172       xnn_init_qu8_conv_minmax_fp32_sse2_params,
173       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
174   }
qu8_dwconv_up16x9__avx_mul16(benchmark::State & state,models::ExecutionPlanFactory model)175   static void qu8_dwconv_up16x9__avx_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
176     DWConvEnd2EndBenchmark(state, model,
177       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16,
178       xnn_init_qu8_conv_minmax_fp32_sse2_params,
179       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
180   }
qu8_dwconv_up8x9__avx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)181   static void qu8_dwconv_up8x9__avx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
182     DWConvEnd2EndBenchmark(state, model,
183       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32,
184       xnn_init_qu8_conv_minmax_fp32_sse2_params,
185       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
186   }
qu8_dwconv_up16x9__avx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)187   static void qu8_dwconv_up16x9__avx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
188     DWConvEnd2EndBenchmark(state, model,
189       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32,
190       xnn_init_qu8_conv_minmax_fp32_sse2_params,
191       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
192   }
qu8_dwconv_up8x9__sse41_mul16(benchmark::State & state,models::ExecutionPlanFactory model)193   static void qu8_dwconv_up8x9__sse41_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
194     DWConvEnd2EndBenchmark(state, model,
195       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16,
196       xnn_init_qu8_conv_minmax_fp32_sse2_params,
197       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
198   }
qu8_dwconv_up16x9__sse41_mul16(benchmark::State & state,models::ExecutionPlanFactory model)199   static void qu8_dwconv_up16x9__sse41_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
200     DWConvEnd2EndBenchmark(state, model,
201       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16,
202       xnn_init_qu8_conv_minmax_fp32_sse2_params,
203       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
204   }
qu8_dwconv_up8x9__sse41_mul32(benchmark::State & state,models::ExecutionPlanFactory model)205   static void qu8_dwconv_up8x9__sse41_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
206     DWConvEnd2EndBenchmark(state, model,
207       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32,
208       xnn_init_qu8_conv_minmax_fp32_sse2_params,
209       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
210   }
qu8_dwconv_up16x9__sse41_mul32(benchmark::State & state,models::ExecutionPlanFactory model)211   static void qu8_dwconv_up16x9__sse41_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
212     DWConvEnd2EndBenchmark(state, model,
213       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32,
214       xnn_init_qu8_conv_minmax_fp32_sse2_params,
215       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
216   }
qu8_dwconv_up8x9__sse2_mul16(benchmark::State & state,models::ExecutionPlanFactory model)217   static void qu8_dwconv_up8x9__sse2_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
218     DWConvEnd2EndBenchmark(state, model,
219       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16,
220       xnn_init_qu8_conv_minmax_fp32_sse2_params,
221       8 /* channel tile */, 9 /* primary tile */);
222   }
qu8_dwconv_up16x9__sse2_mul16(benchmark::State & state,models::ExecutionPlanFactory model)223   static void qu8_dwconv_up16x9__sse2_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
224     DWConvEnd2EndBenchmark(state, model,
225       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16,
226       xnn_init_qu8_conv_minmax_fp32_sse2_params,
227       16 /* channel tile */, 9 /* primary tile */);
228   }
229 
230   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx512skx_mul32);
231   BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__avx512skx_mul32);
232 
233   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__avx2_mul32);
234   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx2_mul32);
235   BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__avx2_mul32);
236 
237   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__avx_mul16);
238   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx_mul16);
239   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__avx_mul32);
240   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx_mul32);
241 
242   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__sse41_mul16);
243   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__sse41_mul16);
244   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__sse41_mul32);
245   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__sse41_mul32);
246 
247   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__sse2_mul16);
248   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__sse2_mul16);
249 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
250 
251 
252 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
qu8_dwconv_up8x9__wasmsimd_mul16(benchmark::State & state,models::ExecutionPlanFactory model)253   static void qu8_dwconv_up8x9__wasmsimd_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
254     DWConvEnd2EndBenchmark(state, model,
255       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16,
256       xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
257       8 /* channel tile */, 9 /* primary tile */);
258   }
qu8_dwconv_up16x9__wasmsimd_mul16(benchmark::State & state,models::ExecutionPlanFactory model)259   static void qu8_dwconv_up16x9__wasmsimd_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
260     DWConvEnd2EndBenchmark(state, model,
261       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16,
262       xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
263       16 /* channel tile */, 9 /* primary tile */);
264   }
265 
266   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__wasmsimd_mul16);
267   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__wasmsimd_mul16);
268 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
269 
270 
271 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
qu8_dwconv_up1x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)272   static void qu8_dwconv_up1x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
273     DWConvEnd2EndBenchmark(state, model,
274       xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic,
275       xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
276       1 /* channel tile */, 9 /* primary tile */);
277   }
qu8_dwconv_up2x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)278   static void qu8_dwconv_up2x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
279     DWConvEnd2EndBenchmark(state, model,
280       xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic,
281       xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
282       2 /* channel tile */, 9 /* primary tile */);
283   }
qu8_dwconv_up4x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)284   static void qu8_dwconv_up4x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
285     DWConvEnd2EndBenchmark(state, model,
286       xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic,
287       xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
288       4 /* channel tile */, 9 /* primary tile */);
289   }
290 
291   BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__wasm_fmagic);
292   BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__wasm_fmagic);
293   BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__wasm_fmagic);
294 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
295 
296 
qu8_dwconv_up1x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)297 static void qu8_dwconv_up1x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
298   DWConvEnd2EndBenchmark(state, model,
299     xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic,
300     xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
301     1 /* channel tile */, 9 /* primary tile */);
302 }
qu8_dwconv_up2x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)303 static void qu8_dwconv_up2x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
304   DWConvEnd2EndBenchmark(state, model,
305     xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic,
306     xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
307     2 /* channel tile */, 9 /* primary tile */);
308 }
qu8_dwconv_up4x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)309 static void qu8_dwconv_up4x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
310   DWConvEnd2EndBenchmark(state, model,
311     xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic,
312     xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
313     4 /* channel tile */, 9 /* primary tile */);
314 }
315 
qu8_dwconv_up1x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)316 static void qu8_dwconv_up1x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
317   DWConvEnd2EndBenchmark(state, model,
318     xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic,
319     xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
320     1 /* channel tile */, 9 /* primary tile */);
321 }
qu8_dwconv_up2x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)322 static void qu8_dwconv_up2x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
323   DWConvEnd2EndBenchmark(state, model,
324     xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic,
325     xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
326     2 /* channel tile */, 9 /* primary tile */);
327 }
qu8_dwconv_up4x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)328 static void qu8_dwconv_up4x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
329   DWConvEnd2EndBenchmark(state, model,
330     xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic,
331     xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
332     4 /* channel tile */, 9 /* primary tile */);
333 }
334 
qu8_dwconv_up1x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)335 static void qu8_dwconv_up1x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
336   DWConvEnd2EndBenchmark(state, model,
337     xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf,
338     xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
339     1 /* channel tile */, 9 /* primary tile */);
340 }
qu8_dwconv_up2x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)341 static void qu8_dwconv_up2x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
342   DWConvEnd2EndBenchmark(state, model,
343     xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf,
344     xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
345     2 /* channel tile */, 9 /* primary tile */);
346 }
qu8_dwconv_up4x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)347 static void qu8_dwconv_up4x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
348   DWConvEnd2EndBenchmark(state, model,
349     xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf,
350     xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
351     4 /* channel tile */, 9 /* primary tile */);
352 }
353 
354 BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__scalar_fmagic);
355 BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__scalar_fmagic);
356 BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__scalar_fmagic);
357 
358 BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__scalar_imagic);
359 BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__scalar_imagic);
360 BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__scalar_imagic);
361 
362 BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__scalar_lrintf);
363 BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__scalar_lrintf);
364 BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__scalar_lrintf);
365 
366 
367 #ifndef XNNPACK_BENCHMARK_NO_MAIN
368 BENCHMARK_MAIN();
369 #endif
370