1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <functional>
9 #include <random>
10 #include <vector>
11
12 #include <xnnpack.h>
13
14 #include <benchmark/benchmark.h>
15
16 #include "bench/end2end.h"
17 #include "bench/utils.h"
18 #include "models/models.h"
19 #include <xnnpack/dwconv.h>
20 #include <xnnpack/params.h>
21 #include <xnnpack/params-init.h>
22
23
DWConvEnd2EndBenchmark(benchmark::State & state,models::ExecutionPlanFactory model_factory,xnn_qu8_dwconv_minmax_unipass_ukernel_function dwconv,xnn_init_qu8_conv_minmax_params_fn init_params,uint8_t channel_tile,uint8_t primary_tile,benchmark::utils::IsaCheckFunction isa_check=nullptr)24 static void DWConvEnd2EndBenchmark(
25 benchmark::State& state,
26 models::ExecutionPlanFactory model_factory,
27 xnn_qu8_dwconv_minmax_unipass_ukernel_function dwconv,
28 xnn_init_qu8_conv_minmax_params_fn init_params,
29 uint8_t channel_tile, uint8_t primary_tile,
30 benchmark::utils::IsaCheckFunction isa_check = nullptr)
31 {
32 if (isa_check && !isa_check(state)) {
33 return;
34 }
35 if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
36 state.SkipWithError("failed to initialize XNNPACK");
37 return;
38 }
39
40 // Override microkernels chosen in xnn_initialize
41 for (size_t i = 0; i < XNN_MAX_QU8_DWCONV_UKERNELS; i++) {
42 // Replace only the microkernel the matching kernel size.
43 if (xnn_params.qu8.dwconv[i].primary_tile == primary_tile) {
44 // Note: do not directly assign to xnn_params.qu8.dwconv[i] because it breaks older gcc.
45 xnn_params.qu8.dwconv[i].minmax.unipass = xnn_dwconv_unipass_ukernel_function(dwconv);
46 xnn_params.qu8.dwconv[i].channel_tile = channel_tile;
47 xnn_params.qu8.dwconv[i].primary_tile = primary_tile;
48 xnn_params.qu8.dwconv[i].incremental_tile = 0;
49 xnn_params.qu8.dwconv[i].init.qu8 = init_params;
50 break;
51 }
52 }
53
54 auto execution_plan = model_factory(nullptr);
55 if (execution_plan.empty()) {
56 state.SkipWithError("failed to create a model");
57 return;
58 }
59
60 for (auto _ : state) {
61 for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
62 xnn_status status = xnn_run_operator(op.get(), nullptr);
63 if (status != xnn_status_success) {
64 state.SkipWithError("failed to run a model");
65 return;
66 }
67 }
68 }
69
70 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
71 if (cpu_frequency != 0) {
72 state.counters["cpufreq"] = cpu_frequency;
73 }
74 }
75
76
77 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
qu8_dwconv_up8x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)78 static void qu8_dwconv_up8x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
79 DWConvEnd2EndBenchmark(state, model,
80 xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8,
81 xnn_init_qu8_conv_minmax_rndnu_neon_params,
82 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
83 }
qu8_dwconv_up16x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)84 static void qu8_dwconv_up16x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
85 DWConvEnd2EndBenchmark(state, model,
86 xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8,
87 xnn_init_qu8_conv_minmax_rndnu_neon_params,
88 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
89 }
qu8_dwconv_up24x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)90 static void qu8_dwconv_up24x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
91 DWConvEnd2EndBenchmark(state, model,
92 xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8,
93 xnn_init_qu8_conv_minmax_rndnu_neon_params,
94 24 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
95 }
qu8_dwconv_up32x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)96 static void qu8_dwconv_up32x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
97 DWConvEnd2EndBenchmark(state, model,
98 xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8,
99 xnn_init_qu8_conv_minmax_rndnu_neon_params,
100 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
101 }
qu8_dwconv_up8x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)102 static void qu8_dwconv_up8x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
103 DWConvEnd2EndBenchmark(state, model,
104 xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16,
105 xnn_init_qu8_conv_minmax_rndnu_neon_params,
106 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
107 }
qu8_dwconv_up16x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)108 static void qu8_dwconv_up16x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
109 DWConvEnd2EndBenchmark(state, model,
110 xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16,
111 xnn_init_qu8_conv_minmax_rndnu_neon_params,
112 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
113 }
qu8_dwconv_up24x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)114 static void qu8_dwconv_up24x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
115 DWConvEnd2EndBenchmark(state, model,
116 xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16,
117 xnn_init_qu8_conv_minmax_rndnu_neon_params,
118 24 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
119 }
qu8_dwconv_up32x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)120 static void qu8_dwconv_up32x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
121 DWConvEnd2EndBenchmark(state, model,
122 xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16,
123 xnn_init_qu8_conv_minmax_rndnu_neon_params,
124 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
125 }
126
127 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__neon_mul8);
128 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__neon_mul8);
129 BENCHMARK_QU8_END2END(qu8_dwconv_up24x9__neon_mul8);
130 BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__neon_mul8);
131 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__neon_mul16);
132 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__neon_mul16);
133 BENCHMARK_QU8_END2END(qu8_dwconv_up24x9__neon_mul16);
134 BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__neon_mul16);
135 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
136
137
138 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
qu8_dwconv_up16x9__avx512skx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)139 static void qu8_dwconv_up16x9__avx512skx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
140 DWConvEnd2EndBenchmark(state, model,
141 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32,
142 xnn_init_qu8_conv_minmax_fp32_avx512_params,
143 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512SKX);
144 }
qu8_dwconv_up32x9__avx512skx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)145 static void qu8_dwconv_up32x9__avx512skx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
146 DWConvEnd2EndBenchmark(state, model,
147 xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32,
148 xnn_init_qu8_conv_minmax_fp32_avx512_params,
149 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512SKX);
150 }
qu8_dwconv_up8x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)151 static void qu8_dwconv_up8x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
152 DWConvEnd2EndBenchmark(state, model,
153 xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32,
154 xnn_init_qu8_conv_minmax_fp32_avx2_params,
155 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
156 }
qu8_dwconv_up16x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)157 static void qu8_dwconv_up16x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
158 DWConvEnd2EndBenchmark(state, model,
159 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32,
160 xnn_init_qu8_conv_minmax_fp32_avx2_params,
161 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
162 }
qu8_dwconv_up32x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)163 static void qu8_dwconv_up32x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
164 DWConvEnd2EndBenchmark(state, model,
165 xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32,
166 xnn_init_qu8_conv_minmax_fp32_avx2_params,
167 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
168 }
qu8_dwconv_up8x9__avx_mul16(benchmark::State & state,models::ExecutionPlanFactory model)169 static void qu8_dwconv_up8x9__avx_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
170 DWConvEnd2EndBenchmark(state, model,
171 xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16,
172 xnn_init_qu8_conv_minmax_fp32_sse2_params,
173 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
174 }
qu8_dwconv_up16x9__avx_mul16(benchmark::State & state,models::ExecutionPlanFactory model)175 static void qu8_dwconv_up16x9__avx_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
176 DWConvEnd2EndBenchmark(state, model,
177 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16,
178 xnn_init_qu8_conv_minmax_fp32_sse2_params,
179 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
180 }
qu8_dwconv_up8x9__avx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)181 static void qu8_dwconv_up8x9__avx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
182 DWConvEnd2EndBenchmark(state, model,
183 xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32,
184 xnn_init_qu8_conv_minmax_fp32_sse2_params,
185 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
186 }
qu8_dwconv_up16x9__avx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)187 static void qu8_dwconv_up16x9__avx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
188 DWConvEnd2EndBenchmark(state, model,
189 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32,
190 xnn_init_qu8_conv_minmax_fp32_sse2_params,
191 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
192 }
qu8_dwconv_up8x9__sse41_mul16(benchmark::State & state,models::ExecutionPlanFactory model)193 static void qu8_dwconv_up8x9__sse41_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
194 DWConvEnd2EndBenchmark(state, model,
195 xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16,
196 xnn_init_qu8_conv_minmax_fp32_sse2_params,
197 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
198 }
qu8_dwconv_up16x9__sse41_mul16(benchmark::State & state,models::ExecutionPlanFactory model)199 static void qu8_dwconv_up16x9__sse41_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
200 DWConvEnd2EndBenchmark(state, model,
201 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16,
202 xnn_init_qu8_conv_minmax_fp32_sse2_params,
203 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
204 }
qu8_dwconv_up8x9__sse41_mul32(benchmark::State & state,models::ExecutionPlanFactory model)205 static void qu8_dwconv_up8x9__sse41_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
206 DWConvEnd2EndBenchmark(state, model,
207 xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32,
208 xnn_init_qu8_conv_minmax_fp32_sse2_params,
209 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
210 }
qu8_dwconv_up16x9__sse41_mul32(benchmark::State & state,models::ExecutionPlanFactory model)211 static void qu8_dwconv_up16x9__sse41_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
212 DWConvEnd2EndBenchmark(state, model,
213 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32,
214 xnn_init_qu8_conv_minmax_fp32_sse2_params,
215 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
216 }
qu8_dwconv_up8x9__sse2_mul16(benchmark::State & state,models::ExecutionPlanFactory model)217 static void qu8_dwconv_up8x9__sse2_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
218 DWConvEnd2EndBenchmark(state, model,
219 xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16,
220 xnn_init_qu8_conv_minmax_fp32_sse2_params,
221 8 /* channel tile */, 9 /* primary tile */);
222 }
qu8_dwconv_up16x9__sse2_mul16(benchmark::State & state,models::ExecutionPlanFactory model)223 static void qu8_dwconv_up16x9__sse2_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
224 DWConvEnd2EndBenchmark(state, model,
225 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16,
226 xnn_init_qu8_conv_minmax_fp32_sse2_params,
227 16 /* channel tile */, 9 /* primary tile */);
228 }
229
230 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx512skx_mul32);
231 BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__avx512skx_mul32);
232
233 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__avx2_mul32);
234 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx2_mul32);
235 BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__avx2_mul32);
236
237 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__avx_mul16);
238 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx_mul16);
239 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__avx_mul32);
240 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx_mul32);
241
242 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__sse41_mul16);
243 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__sse41_mul16);
244 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__sse41_mul32);
245 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__sse41_mul32);
246
247 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__sse2_mul16);
248 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__sse2_mul16);
249 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
250
251
252 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
qu8_dwconv_up8x9__wasmsimd_mul16(benchmark::State & state,models::ExecutionPlanFactory model)253 static void qu8_dwconv_up8x9__wasmsimd_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
254 DWConvEnd2EndBenchmark(state, model,
255 xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16,
256 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
257 8 /* channel tile */, 9 /* primary tile */);
258 }
qu8_dwconv_up16x9__wasmsimd_mul16(benchmark::State & state,models::ExecutionPlanFactory model)259 static void qu8_dwconv_up16x9__wasmsimd_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
260 DWConvEnd2EndBenchmark(state, model,
261 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16,
262 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
263 16 /* channel tile */, 9 /* primary tile */);
264 }
265
266 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__wasmsimd_mul16);
267 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__wasmsimd_mul16);
268 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
269
270
271 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
qu8_dwconv_up1x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)272 static void qu8_dwconv_up1x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
273 DWConvEnd2EndBenchmark(state, model,
274 xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic,
275 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
276 1 /* channel tile */, 9 /* primary tile */);
277 }
qu8_dwconv_up2x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)278 static void qu8_dwconv_up2x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
279 DWConvEnd2EndBenchmark(state, model,
280 xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic,
281 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
282 2 /* channel tile */, 9 /* primary tile */);
283 }
qu8_dwconv_up4x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)284 static void qu8_dwconv_up4x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
285 DWConvEnd2EndBenchmark(state, model,
286 xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic,
287 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
288 4 /* channel tile */, 9 /* primary tile */);
289 }
290
291 BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__wasm_fmagic);
292 BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__wasm_fmagic);
293 BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__wasm_fmagic);
294 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
295
296
qu8_dwconv_up1x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)297 static void qu8_dwconv_up1x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
298 DWConvEnd2EndBenchmark(state, model,
299 xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic,
300 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
301 1 /* channel tile */, 9 /* primary tile */);
302 }
qu8_dwconv_up2x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)303 static void qu8_dwconv_up2x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
304 DWConvEnd2EndBenchmark(state, model,
305 xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic,
306 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
307 2 /* channel tile */, 9 /* primary tile */);
308 }
qu8_dwconv_up4x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)309 static void qu8_dwconv_up4x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
310 DWConvEnd2EndBenchmark(state, model,
311 xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic,
312 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
313 4 /* channel tile */, 9 /* primary tile */);
314 }
315
qu8_dwconv_up1x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)316 static void qu8_dwconv_up1x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
317 DWConvEnd2EndBenchmark(state, model,
318 xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic,
319 xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
320 1 /* channel tile */, 9 /* primary tile */);
321 }
qu8_dwconv_up2x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)322 static void qu8_dwconv_up2x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
323 DWConvEnd2EndBenchmark(state, model,
324 xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic,
325 xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
326 2 /* channel tile */, 9 /* primary tile */);
327 }
qu8_dwconv_up4x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)328 static void qu8_dwconv_up4x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
329 DWConvEnd2EndBenchmark(state, model,
330 xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic,
331 xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
332 4 /* channel tile */, 9 /* primary tile */);
333 }
334
qu8_dwconv_up1x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)335 static void qu8_dwconv_up1x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
336 DWConvEnd2EndBenchmark(state, model,
337 xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf,
338 xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
339 1 /* channel tile */, 9 /* primary tile */);
340 }
qu8_dwconv_up2x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)341 static void qu8_dwconv_up2x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
342 DWConvEnd2EndBenchmark(state, model,
343 xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf,
344 xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
345 2 /* channel tile */, 9 /* primary tile */);
346 }
qu8_dwconv_up4x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)347 static void qu8_dwconv_up4x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
348 DWConvEnd2EndBenchmark(state, model,
349 xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf,
350 xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
351 4 /* channel tile */, 9 /* primary tile */);
352 }
353
354 BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__scalar_fmagic);
355 BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__scalar_fmagic);
356 BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__scalar_fmagic);
357
358 BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__scalar_imagic);
359 BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__scalar_imagic);
360 BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__scalar_imagic);
361
362 BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__scalar_lrintf);
363 BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__scalar_lrintf);
364 BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__scalar_lrintf);
365
366
367 #ifndef XNNPACK_BENCHMARK_NO_MAIN
368 BENCHMARK_MAIN();
369 #endif
370