Lines Matching full:benchmark
18 #include <benchmark/benchmark.h>
31 static void GEMMBenchmark(benchmark::State& state, in GEMMBenchmark()
34 benchmark::utils::IsaCheckFunction isa_check = nullptr) in GEMMBenchmark()
48 const size_t nc_stride = benchmark::utils::RoundUp(nc, nr); in GEMMBenchmark()
49 const size_t kc_stride = benchmark::utils::RoundUp(kc, kr); in GEMMBenchmark()
67 …benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(), w_size + c_elements *… in GEMMBenchmark()
85 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(int8_t)); in GEMMBenchmark()
103 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); in GEMMBenchmark()
108 state.counters["OPS"] = benchmark::Counter( in GEMMBenchmark()
109 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in GEMMBenchmark()
112 static void GEMMBenchmark(benchmark::State& state, in GEMMBenchmark()
115 benchmark::utils::IsaCheckFunction isa_check = nullptr) in GEMMBenchmark()
129 const size_t nc_stride = benchmark::utils::RoundUp(nc, nr); in GEMMBenchmark()
130 const size_t kc_stride = benchmark::utils::RoundUp(kc, kr); in GEMMBenchmark()
148 …benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(), w_size + c_elements *… in GEMMBenchmark()
166 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(int8_t)); in GEMMBenchmark()
184 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); in GEMMBenchmark()
189 state.counters["OPS"] = benchmark::Counter( in GEMMBenchmark()
190 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in GEMMBenchmark()
194 static void RuyBenchmark(benchmark::State& state, size_t threads) in RuyBenchmark()
206 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(), in RuyBenchmark()
218 // Note: context must be static to avoid the cost of re-creating it for each benchmark. in RuyBenchmark()
238 …// Thus, on the first benchmark, we compute GEMM for 500 milliseconds (to be safe) without recordi… in RuyBenchmark()
260 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(int8_t)); in RuyBenchmark()
271 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); in RuyBenchmark()
276 state.counters["OPS"] = benchmark::Counter( in RuyBenchmark()
277 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in RuyBenchmark()
280 static void ruy_st(benchmark::State& state, const char* net) in ruy_st()
287 static void qs8_gemm_1x8__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_1x8__neon_mlal_lane()
288 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, 1, 8, 1, 1, benchmark::utils… in qs8_gemm_1x8__neon_mlal_lane()
290 static void qs8_gemm_2x8__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_2x8__neon_mlal_lane()
291 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, 2, 8, 1, 1, benchmark::utils… in qs8_gemm_2x8__neon_mlal_lane()
293 static void qs8_gemm_3x8__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_3x8__neon_mlal_lane()
294 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, 3, 8, 1, 1, benchmark::utils… in qs8_gemm_3x8__neon_mlal_lane()
296 static void qs8_gemm_4x8__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_4x8__neon_mlal_lane()
297 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, 4, 8, 1, 1, benchmark::utils… in qs8_gemm_4x8__neon_mlal_lane()
299 static void qs8_gemm_1x16__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_1x16__neon_mlal_lane()
300 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, 1, 16, 1, 1, benchmark::uti… in qs8_gemm_1x16__neon_mlal_lane()
302 static void qs8_gemm_2x16__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_2x16__neon_mlal_lane()
303 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, 2, 16, 1, 1, benchmark::uti… in qs8_gemm_2x16__neon_mlal_lane()
305 static void qs8_gemm_3x16__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_3x16__neon_mlal_lane()
306 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, 3, 16, 1, 1, benchmark::uti… in qs8_gemm_3x16__neon_mlal_lane()
308 static void qs8_gemm_4x16__neon_mlal_lane(benchmark::State& state, const char* net) { in qs8_gemm_4x16__neon_mlal_lane()
309 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, 4, 16, 1, 1, benchmark::uti… in qs8_gemm_4x16__neon_mlal_lane()
311 static void qs8_gemm_1x8__neon_mull_addw_dup(benchmark::State& state, const char* net) { in qs8_gemm_1x8__neon_mull_addw_dup()
312 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, 1, 8, 1, 1, benchmark::u… in qs8_gemm_1x8__neon_mull_addw_dup()
314 static void qs8_gemm_2x8__neon_mull_addw_dup(benchmark::State& state, const char* net) { in qs8_gemm_2x8__neon_mull_addw_dup()
315 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, 2, 8, 1, 1, benchmark::u… in qs8_gemm_2x8__neon_mull_addw_dup()
317 static void qs8_gemm_3x8__neon_mull_addw_dup(benchmark::State& state, const char* net) { in qs8_gemm_3x8__neon_mull_addw_dup()
318 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, 3, 8, 1, 1, benchmark::u… in qs8_gemm_3x8__neon_mull_addw_dup()
320 static void qs8_gemm_4x8__neon_mull_addw_dup(benchmark::State& state, const char* net) { in qs8_gemm_4x8__neon_mull_addw_dup()
321 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, 4, 8, 1, 1, benchmark::u… in qs8_gemm_4x8__neon_mull_addw_dup()
323 static void qs8_gemm_1x16__neon_mull_addw_dup(benchmark::State& state, const char* net) { in qs8_gemm_1x16__neon_mull_addw_dup()
324 …ate, xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, 1, 16, 1, 1, benchmark::utils::CheckNEO… in qs8_gemm_1x16__neon_mull_addw_dup()
326 static void qs8_gemm_2x16__neon_mull_addw_dup(benchmark::State& state, const char* net) { in qs8_gemm_2x16__neon_mull_addw_dup()
327 …ate, xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, 2, 16, 1, 1, benchmark::utils::CheckNEO… in qs8_gemm_2x16__neon_mull_addw_dup()
329 static void qs8_gemm_3x16__neon_mull_addw_dup(benchmark::State& state, const char* net) { in qs8_gemm_3x16__neon_mull_addw_dup()
330 …ate, xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, 3, 16, 1, 1, benchmark::utils::CheckNEO… in qs8_gemm_3x16__neon_mull_addw_dup()
332 static void qs8_gemm_4x16__neon_mull_addw_dup(benchmark::State& state, const char* net) { in qs8_gemm_4x16__neon_mull_addw_dup()
333 …ate, xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, 4, 16, 1, 1, benchmark::utils::CheckNEO… in qs8_gemm_4x16__neon_mull_addw_dup()
335 static void qs8_gemm_1x8c2__neon_mull_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_1x8c2__neon_mull_padal_dup()
336 …te, xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, 1, 8, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_1x8c2__neon_mull_padal_dup()
338 static void qs8_gemm_2x8c2__neon_mull_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_2x8c2__neon_mull_padal_dup()
339 …te, xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, 2, 8, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_2x8c2__neon_mull_padal_dup()
341 static void qs8_gemm_3x8c2__neon_mull_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_3x8c2__neon_mull_padal_dup()
342 …te, xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, 3, 8, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_3x8c2__neon_mull_padal_dup()
344 static void qs8_gemm_4x8c2__neon_mull_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_4x8c2__neon_mull_padal_dup()
345 …te, xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, 4, 8, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_4x8c2__neon_mull_padal_dup()
347 static void qs8_gemm_1x16c2__neon_mull_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_1x16c2__neon_mull_padal_dup()
348 …, xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, 1, 16, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_1x16c2__neon_mull_padal_dup()
350 static void qs8_gemm_2x16c2__neon_mull_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_2x16c2__neon_mull_padal_dup()
351 …, xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, 2, 16, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_2x16c2__neon_mull_padal_dup()
353 static void qs8_gemm_3x16c2__neon_mull_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_3x16c2__neon_mull_padal_dup()
354 …, xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, 3, 16, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_3x16c2__neon_mull_padal_dup()
356 static void qs8_gemm_4x16c2__neon_mull_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_4x16c2__neon_mull_padal_dup()
357 …, xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, 4, 16, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_4x16c2__neon_mull_padal_dup()
359 static void qs8_gemm_1x8c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_1x8c2__neon_mlal_padal_dup()
360 …te, xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, 1, 8, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_1x8c2__neon_mlal_padal_dup()
362 static void qs8_gemm_2x8c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_2x8c2__neon_mlal_padal_dup()
363 …te, xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, 2, 8, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_2x8c2__neon_mlal_padal_dup()
365 static void qs8_gemm_3x8c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_3x8c2__neon_mlal_padal_dup()
366 …te, xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, 3, 8, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_3x8c2__neon_mlal_padal_dup()
368 static void qs8_gemm_4x8c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_4x8c2__neon_mlal_padal_dup()
369 …te, xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, 4, 8, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_4x8c2__neon_mlal_padal_dup()
371 static void qs8_gemm_1x16c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_1x16c2__neon_mlal_padal_dup()
372 …, xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, 1, 16, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_1x16c2__neon_mlal_padal_dup()
374 static void qs8_gemm_2x16c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_2x16c2__neon_mlal_padal_dup()
375 …, xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, 2, 16, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_2x16c2__neon_mlal_padal_dup()
377 static void qs8_gemm_3x16c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_3x16c2__neon_mlal_padal_dup()
378 …, xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, 3, 16, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_3x16c2__neon_mlal_padal_dup()
380 static void qs8_gemm_4x16c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) { in qs8_gemm_4x16c2__neon_mlal_padal_dup()
381 …, xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, 4, 16, 2, 1, benchmark::utils::CheckNEO… in qs8_gemm_4x16c2__neon_mlal_padal_dup()
383 static void qs8_gemm_1x8c8__neon_mull_padal(benchmark::State& state, const char* net) { in qs8_gemm_1x8c8__neon_mull_padal()
384 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, 1, 8, 8, 1, benchmark::ut… in qs8_gemm_1x8c8__neon_mull_padal()
386 static void qs8_gemm_2x8c8__neon_mull_padal(benchmark::State& state, const char* net) { in qs8_gemm_2x8c8__neon_mull_padal()
387 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, 2, 8, 8, 1, benchmark::ut… in qs8_gemm_2x8c8__neon_mull_padal()
389 static void qs8_gemm_3x8c8__neon_mull_padal(benchmark::State& state, const char* net) { in qs8_gemm_3x8c8__neon_mull_padal()
390 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, 3, 8, 8, 1, benchmark::ut… in qs8_gemm_3x8c8__neon_mull_padal()
392 static void qs8_gemm_4x8c8__neon_mull_padal(benchmark::State& state, const char* net) { in qs8_gemm_4x8c8__neon_mull_padal()
393 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, 4, 8, 8, 1, benchmark::ut… in qs8_gemm_4x8c8__neon_mull_padal()
395 static void qs8_gemm_1x16c8__neon_mull_padal(benchmark::State& state, const char* net) { in qs8_gemm_1x16c8__neon_mull_padal()
396 …tate, xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, 1, 16, 8, 1, benchmark::utils::CheckNEO… in qs8_gemm_1x16c8__neon_mull_padal()
398 static void qs8_gemm_2x16c8__neon_mull_padal(benchmark::State& state, const char* net) { in qs8_gemm_2x16c8__neon_mull_padal()
399 …tate, xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, 2, 16, 8, 1, benchmark::utils::CheckNEO… in qs8_gemm_2x16c8__neon_mull_padal()
401 static void qs8_gemm_3x16c8__neon_mull_padal(benchmark::State& state, const char* net) { in qs8_gemm_3x16c8__neon_mull_padal()
402 …tate, xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, 3, 16, 8, 1, benchmark::utils::CheckNEO… in qs8_gemm_3x16c8__neon_mull_padal()
404 static void qs8_gemm_4x16c8__neon_mull_padal(benchmark::State& state, const char* net) { in qs8_gemm_4x16c8__neon_mull_padal()
405 …tate, xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, 4, 16, 8, 1, benchmark::utils::CheckNEO… in qs8_gemm_4x16c8__neon_mull_padal()
407 static void qs8_gemm_1x8c8__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_1x8c8__neon_mlal_padal()
408 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, 1, 8, 8, 1, benchmark::ut… in qs8_gemm_1x8c8__neon_mlal_padal()
410 static void qs8_gemm_2x8c8__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_2x8c8__neon_mlal_padal()
411 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, 2, 8, 8, 1, benchmark::ut… in qs8_gemm_2x8c8__neon_mlal_padal()
413 static void qs8_gemm_3x8c8__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_3x8c8__neon_mlal_padal()
414 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, 3, 8, 8, 1, benchmark::ut… in qs8_gemm_3x8c8__neon_mlal_padal()
416 static void qs8_gemm_4x8c8__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_4x8c8__neon_mlal_padal()
417 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, 4, 8, 8, 1, benchmark::ut… in qs8_gemm_4x8c8__neon_mlal_padal()
419 static void qs8_gemm_1x16c8__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_1x16c8__neon_mlal_padal()
420 …tate, xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, 1, 16, 8, 1, benchmark::utils::CheckNEO… in qs8_gemm_1x16c8__neon_mlal_padal()
422 static void qs8_gemm_2x16c8__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_2x16c8__neon_mlal_padal()
423 …tate, xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, 2, 16, 8, 1, benchmark::utils::CheckNEO… in qs8_gemm_2x16c8__neon_mlal_padal()
425 static void qs8_gemm_3x16c8__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_3x16c8__neon_mlal_padal()
426 …tate, xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, 3, 16, 8, 1, benchmark::utils::CheckNEO… in qs8_gemm_3x16c8__neon_mlal_padal()
428 static void qs8_gemm_4x16c8__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_4x16c8__neon_mlal_padal()
429 …tate, xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, 4, 16, 8, 1, benchmark::utils::CheckNEO… in qs8_gemm_4x16c8__neon_mlal_padal()
431 static void qs8_gemm_1x8c16__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_1x8c16__neon_mlal_padal()
432 …tate, xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, 1, 8, 16, 1, benchmark::utils::CheckNEO… in qs8_gemm_1x8c16__neon_mlal_padal()
434 static void qs8_gemm_2x8c16__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_2x8c16__neon_mlal_padal()
435 …tate, xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, 2, 8, 16, 1, benchmark::utils::CheckNEO… in qs8_gemm_2x8c16__neon_mlal_padal()
437 static void qs8_gemm_3x8c16__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_3x8c16__neon_mlal_padal()
438 …tate, xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, 3, 8, 16, 1, benchmark::utils::CheckNEO… in qs8_gemm_3x8c16__neon_mlal_padal()
440 static void qs8_gemm_4x8c16__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_4x8c16__neon_mlal_padal()
441 …tate, xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, 4, 8, 16, 1, benchmark::utils::CheckNEO… in qs8_gemm_4x8c16__neon_mlal_padal()
443 static void qs8_gemm_1x16c16__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_1x16c16__neon_mlal_padal()
444 …te, xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, 1, 16, 16, 1, benchmark::utils::CheckNEO… in qs8_gemm_1x16c16__neon_mlal_padal()
446 static void qs8_gemm_2x16c16__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_2x16c16__neon_mlal_padal()
447 …te, xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, 2, 16, 16, 1, benchmark::utils::CheckNEO… in qs8_gemm_2x16c16__neon_mlal_padal()
449 static void qs8_gemm_3x16c16__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_3x16c16__neon_mlal_padal()
450 …te, xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, 3, 16, 16, 1, benchmark::utils::CheckNEO… in qs8_gemm_3x16c16__neon_mlal_padal()
452 static void qs8_gemm_4x16c16__neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_4x16c16__neon_mlal_padal()
453 …te, xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, 4, 16, 16, 1, benchmark::utils::CheckNEO… in qs8_gemm_4x16c16__neon_mlal_padal()
455 static void qs8_gemm_1x8c4__neondot(benchmark::State& state, const char* net) { in qs8_gemm_1x8c4__neondot()
456 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, 1, 8, 4, 1, benchmark::utils::Che… in qs8_gemm_1x8c4__neondot()
458 static void qs8_gemm_4x8c4__neondot(benchmark::State& state, const char* net) { in qs8_gemm_4x8c4__neondot()
459 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, 4, 8, 4, 1, benchmark::utils::Che… in qs8_gemm_4x8c4__neondot()
461 static void qs8_gemm_6x8c4__neondot(benchmark::State& state, const char* net) { in qs8_gemm_6x8c4__neondot()
462 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, 6, 8, 4, 1, benchmark::utils::Che… in qs8_gemm_6x8c4__neondot()
464 static void qs8_gemm_8x8c4__neondot(benchmark::State& state, const char* net) { in qs8_gemm_8x8c4__neondot()
465 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, 8, 8, 4, 1, benchmark::utils::Che… in qs8_gemm_8x8c4__neondot()
467 static void qs8_gemm_1x16c4__neondot(benchmark::State& state, const char* net) { in qs8_gemm_1x16c4__neondot()
468 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, 1, 16, 4, 1, benchmark::utils::C… in qs8_gemm_1x16c4__neondot()
470 static void qs8_gemm_4x16c4__neondot(benchmark::State& state, const char* net) { in qs8_gemm_4x16c4__neondot()
471 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, 4, 16, 4, 1, benchmark::utils::C… in qs8_gemm_4x16c4__neondot()
473 static void qs8_gemm_6x16c4__neondot(benchmark::State& state, const char* net) { in qs8_gemm_6x16c4__neondot()
474 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, 6, 16, 4, 1, benchmark::utils::C… in qs8_gemm_6x16c4__neondot()
476 static void qs8_gemm_8x16c4__neondot(benchmark::State& state, const char* net) { in qs8_gemm_8x16c4__neondot()
477 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, 8, 16, 4, 1, benchmark::utils::C… in qs8_gemm_8x16c4__neondot()
547 …static void qs8_gemm_4x16c4__aarch64_neondot_cortex_a55(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
548 …s8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, 4, 16, 4, 1, benchmark::utils::CheckNEO… in BENCHMARK_GEMM()
550 static void qs8_gemm_1x16c4__aarch64_neondot_ld32(benchmark::State& state, const char* net) { in qs8_gemm_1x16c4__aarch64_neondot_ld32()
551 … xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, 1, 16, 4, 1, benchmark::utils::CheckNEO… in qs8_gemm_1x16c4__aarch64_neondot_ld32()
553 static void qs8_gemm_1x16c4__aarch64_neondot_ld64(benchmark::State& state, const char* net) { in qs8_gemm_1x16c4__aarch64_neondot_ld64()
554 … xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, 1, 16, 4, 1, benchmark::utils::CheckNEO… in qs8_gemm_1x16c4__aarch64_neondot_ld64()
556 static void qs8_gemm_4x16c4__aarch64_neondot_ld32(benchmark::State& state, const char* net) { in qs8_gemm_4x16c4__aarch64_neondot_ld32()
557 … xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, 4, 16, 4, 1, benchmark::utils::CheckNEO… in qs8_gemm_4x16c4__aarch64_neondot_ld32()
559 static void qs8_gemm_4x16c4__aarch64_neondot_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x16c4__aarch64_neondot_ld64()
560 … xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, 4, 16, 4, 1, benchmark::utils::CheckNEO… in qs8_gemm_4x16c4__aarch64_neondot_ld64()
562 static void qs8_gemm_2x8c8__aarch64_neon_mull_padal(benchmark::State& state, const char* net) { in qs8_gemm_2x8c8__aarch64_neon_mull_padal()
563 …xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, 2, 8, 8, 1, benchmark::utils::CheckNEO… in qs8_gemm_2x8c8__aarch64_neon_mull_padal()
565 static void qs8_gemm_2x8c8__aarch64_neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_2x8c8__aarch64_neon_mlal_padal()
566 …xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, 2, 8, 8, 1, benchmark::utils::CheckNEO… in qs8_gemm_2x8c8__aarch64_neon_mlal_padal()
568 static void qs8_gemm_2x8c16__aarch64_neon_mlal_padal(benchmark::State& state, const char* net) { in qs8_gemm_2x8c16__aarch64_neon_mlal_padal()
569 …n_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, 2, 8, 16, 1, benchmark::utils::CheckNEO… in qs8_gemm_2x8c16__aarch64_neon_mlal_padal()
583 static void qs8_gemm_4x4c2__sse2_ld64(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
586 static void qs8_gemm_4x4c2__ssse3_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__ssse3_ld64()
587 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, 4, 4, 2, 1, benchmark::utils::… in qs8_gemm_4x4c2__ssse3_ld64()
589 static void qs8_gemm_4x4c2__sse41_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__sse41_ld64()
590 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, 4, 4, 2, 1, benchmark::utils::… in qs8_gemm_4x4c2__sse41_ld64()
592 static void qs8_gemm_4x4c2__xop_ld64(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__xop_ld64()
593 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, 4, 4, 2, 1, benchmark::utils::Ch… in qs8_gemm_4x4c2__xop_ld64()
596 static void qs8_gemm_4x4c2__sse2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__sse2_ld128()
599 static void qs8_gemm_4x4c2__ssse3_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__ssse3_ld128()
600 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, 4, 4, 2, 1, benchmark::utils:… in qs8_gemm_4x4c2__ssse3_ld128()
602 static void qs8_gemm_4x4c2__sse41_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__sse41_ld128()
603 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, 4, 4, 2, 1, benchmark::utils:… in qs8_gemm_4x4c2__sse41_ld128()
605 static void qs8_gemm_4x4c2__xop_ld128(benchmark::State& state, const char* net) { in qs8_gemm_4x4c2__xop_ld128()
606 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, 4, 4, 2, 1, benchmark::utils::C… in qs8_gemm_4x4c2__xop_ld128()
609 static void qs8_gemm_xw_4x4c2__sse2(benchmark::State& state, const char* net) { in qs8_gemm_xw_4x4c2__sse2()
612 static void qs8_gemm_xw_4x4c2__ssse3(benchmark::State& state, const char* net) { in qs8_gemm_xw_4x4c2__ssse3()
613 …GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, 4, 4, 2, 1, benchmark::utils::Ch… in qs8_gemm_xw_4x4c2__ssse3()
615 static void qs8_gemm_xw_4x4c2__sse41(benchmark::State& state, const char* net) { in qs8_gemm_xw_4x4c2__sse41()
616 …GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, 4, 4, 2, 1, benchmark::utils::Ch… in qs8_gemm_xw_4x4c2__sse41()
618 static void qs8_gemm_xw_4x4c2__xop(benchmark::State& state, const char* net) { in qs8_gemm_xw_4x4c2__xop()
619 …GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, 4, 4, 2, 1, benchmark::utils::Chec… in qs8_gemm_xw_4x4c2__xop()
622 static void qs8_gemm_2x4c8__sse2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__sse2_ld64()
625 static void qs8_gemm_2x4c8__ssse3_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__ssse3_ld64()
626 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, 2, 4, 8, 1, benchmark::utils::… in qs8_gemm_2x4c8__ssse3_ld64()
628 static void qs8_gemm_2x4c8__sse41_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__sse41_ld64()
629 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, 2, 4, 8, 1, benchmark::utils::… in qs8_gemm_2x4c8__sse41_ld64()
631 static void qs8_gemm_2x4c8__xop_ld64(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__xop_ld64()
632 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, 2, 4, 8, 1, benchmark::utils::Ch… in qs8_gemm_2x4c8__xop_ld64()
635 static void qs8_gemm_2x4c8__sse2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__sse2_ld128()
638 static void qs8_gemm_2x4c8__ssse3_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__ssse3_ld128()
639 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, 2, 4, 8, 1, benchmark::utils:… in qs8_gemm_2x4c8__ssse3_ld128()
641 static void qs8_gemm_2x4c8__sse41_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__sse41_ld128()
642 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, 2, 4, 8, 1, benchmark::utils:… in qs8_gemm_2x4c8__sse41_ld128()
644 static void qs8_gemm_2x4c8__xop_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__xop_ld128()
645 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, 2, 4, 8, 1, benchmark::utils::C… in qs8_gemm_2x4c8__xop_ld128()
648 static void qs8_gemm_xw_2x4c8__sse2(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c8__sse2()
651 static void qs8_gemm_xw_2x4c8__ssse3(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c8__ssse3()
652 …GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, 2, 4, 8, 1, benchmark::utils::Ch… in qs8_gemm_xw_2x4c8__ssse3()
654 static void qs8_gemm_xw_2x4c8__sse41(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c8__sse41()
655 …GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, 2, 4, 8, 1, benchmark::utils::Ch… in qs8_gemm_xw_2x4c8__sse41()
657 static void qs8_gemm_xw_2x4c8__xop(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c8__xop()
658 …GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, 2, 4, 8, 1, benchmark::utils::Chec… in qs8_gemm_xw_2x4c8__xop()
661 static void qs8_gemm_3x4c8__sse2_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__sse2_ld64()
664 static void qs8_gemm_3x4c8__ssse3_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__ssse3_ld64()
665 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, 3, 4, 8, 1, benchmark::utils::… in qs8_gemm_3x4c8__ssse3_ld64()
667 static void qs8_gemm_3x4c8__sse41_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__sse41_ld64()
668 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, 3, 4, 8, 1, benchmark::utils::… in qs8_gemm_3x4c8__sse41_ld64()
670 static void qs8_gemm_3x4c8__xop_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__xop_ld64()
671 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, 3, 4, 8, 1, benchmark::utils::Ch… in qs8_gemm_3x4c8__xop_ld64()
674 static void qs8_gemm_3x4c8__sse2_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__sse2_ld128()
677 static void qs8_gemm_3x4c8__ssse3_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__ssse3_ld128()
678 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, 3, 4, 8, 1, benchmark::utils:… in qs8_gemm_3x4c8__ssse3_ld128()
680 static void qs8_gemm_3x4c8__sse41_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__sse41_ld128()
681 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, 3, 4, 8, 1, benchmark::utils:… in qs8_gemm_3x4c8__sse41_ld128()
683 static void qs8_gemm_3x4c8__xop_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__xop_ld128()
684 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, 3, 4, 8, 1, benchmark::utils::C… in qs8_gemm_3x4c8__xop_ld128()
687 static void qs8_gemm_xw_3x4c8__sse2(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c8__sse2()
690 static void qs8_gemm_xw_3x4c8__ssse3(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c8__ssse3()
691 …GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, 3, 4, 8, 1, benchmark::utils::Ch… in qs8_gemm_xw_3x4c8__ssse3()
693 static void qs8_gemm_xw_3x4c8__sse41(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c8__sse41()
694 …GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, 3, 4, 8, 1, benchmark::utils::Ch… in qs8_gemm_xw_3x4c8__sse41()
696 static void qs8_gemm_xw_3x4c8__xop(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c8__xop()
697 …GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, 3, 4, 8, 1, benchmark::utils::Chec… in qs8_gemm_xw_3x4c8__xop()
700 static void qs8_gemm_2x8c8__avx2(benchmark::State& state, const char* net) { in qs8_gemm_2x8c8__avx2()
701 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, 2, 8, 8, 1, benchmark::utils::CheckA… in qs8_gemm_2x8c8__avx2()
703 static void qs8_gemm_3x8c8__avx2(benchmark::State& state, const char* net) { in qs8_gemm_3x8c8__avx2()
704 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, 3, 8, 8, 1, benchmark::utils::CheckA… in qs8_gemm_3x8c8__avx2()
707 static void qs8_gemm_2x16c8__avx512skx(benchmark::State& state, const char* net) { in qs8_gemm_2x16c8__avx512skx()
708 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, 2, 16, 8, 1, benchmark::utils:… in qs8_gemm_2x16c8__avx512skx()
710 static void qs8_gemm_3x16c8__avx512skx(benchmark::State& state, const char* net) { in qs8_gemm_3x16c8__avx512skx()
711 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, 3, 16, 8, 1, benchmark::utils:… in qs8_gemm_3x16c8__avx512skx()
713 static void qs8_gemm_4x16c8__avx512skx(benchmark::State& state, const char* net) { in qs8_gemm_4x16c8__avx512skx()
714 …GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, 4, 16, 8, 1, benchmark::utils:… in qs8_gemm_4x16c8__avx512skx()
717 static void qs8_gemm_xw_2x8c8__avx2(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x8c8__avx2()
718 …GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, 2, 8, 8, 1, benchmark::utils::Che… in qs8_gemm_xw_2x8c8__avx2()
720 static void qs8_gemm_xw_3x8c8__avx2(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x8c8__avx2()
721 …GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, 3, 8, 8, 1, benchmark::utils::Che… in qs8_gemm_xw_3x8c8__avx2()
775 static void qs8_gemm_2x4c8__wasmsimd_ld64(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
778 static void qs8_gemm_3x4c8__wasmsimd_ld64(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__wasmsimd_ld64()
782 static void qs8_gemm_2x4c8__wasmsimd_ld128(benchmark::State& state, const char* net) { in qs8_gemm_2x4c8__wasmsimd_ld128()
785 static void qs8_gemm_3x4c8__wasmsimd_ld128(benchmark::State& state, const char* net) { in qs8_gemm_3x4c8__wasmsimd_ld128()
789 static void qs8_gemm_xw_2x4c8__wasmsimd(benchmark::State& state, const char* net) { in qs8_gemm_xw_2x4c8__wasmsimd()
792 static void qs8_gemm_xw_3x4c8__wasmsimd(benchmark::State& state, const char* net) { in qs8_gemm_xw_3x4c8__wasmsimd()