• Home
  • Raw
  • Download

Lines Matching full:state

34 static void GEMMBenchmark(benchmark::State& state,  in GEMMBenchmark()  argument
39 if (isa_check && !isa_check(state)) { in GEMMBenchmark()
43 const size_t mc = state.range(0); in GEMMBenchmark()
44 const size_t nc = state.range(1); in GEMMBenchmark()
45 const size_t kc = state.range(2); in GEMMBenchmark()
77 for (auto _ : state) { in GEMMBenchmark()
78 // Use circular buffers (exceeding cache size) and prefetch to control cache state: in GEMMBenchmark()
82 state.PauseTiming(); in GEMMBenchmark()
85 state.ResumeTiming(); in GEMMBenchmark()
100 state.counters["cpufreq"] = cpu_frequency; in GEMMBenchmark()
103 state.counters["FLOPS"] = benchmark::Counter( in GEMMBenchmark()
104 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in GEMMBenchmark()
107 static void PPMM1PBenchmark(benchmark::State& state, in PPMM1PBenchmark() argument
113 if (isa_check && !isa_check(state)) { in PPMM1PBenchmark()
117 const size_t mc = state.range(0); in PPMM1PBenchmark()
118 const size_t nc = state.range(1); in PPMM1PBenchmark()
119 const size_t kc = state.range(2); in PPMM1PBenchmark()
152 for (auto _ : state) { in PPMM1PBenchmark()
153 // Use circular buffers (exceeding cache size) and prefetch to control cache state: in PPMM1PBenchmark()
157 state.PauseTiming(); in PPMM1PBenchmark()
160 state.ResumeTiming(); in PPMM1PBenchmark()
176 state.counters["cpufreq"] = cpu_frequency; in PPMM1PBenchmark()
179 state.counters["FLOPS"] = benchmark::Counter( in PPMM1PBenchmark()
180 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in PPMM1PBenchmark()
183 static void PPMM2PBenchmark(benchmark::State& state, in PPMM2PBenchmark() argument
189 if (isa_check && !isa_check(state)) { in PPMM2PBenchmark()
193 const size_t mc = state.range(0); in PPMM2PBenchmark()
194 const size_t nc = state.range(1); in PPMM2PBenchmark()
195 const size_t kc = state.range(2); in PPMM2PBenchmark()
229 for (auto _ : state) { in PPMM2PBenchmark()
230 // Use circular buffers (exceeding cache size) and prefetch to control cache state: in PPMM2PBenchmark()
234 state.PauseTiming(); in PPMM2PBenchmark()
237 state.ResumeTiming(); in PPMM2PBenchmark()
256 state.counters["cpufreq"] = cpu_frequency; in PPMM2PBenchmark()
259 state.counters["FLOPS"] = benchmark::Counter( in PPMM2PBenchmark()
260 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in PPMM2PBenchmark()
264 static void RuyBenchmark(benchmark::State& state, uint32_t threads) in RuyBenchmark() argument
270 const size_t mc = state.range(0); in RuyBenchmark()
271 const size_t nc = state.range(1); in RuyBenchmark()
272 const size_t kc = state.range(2); in RuyBenchmark()
318 for (auto _ : state) { in RuyBenchmark()
319 // Use circular buffers (exceeding cache size) and prefetch to control cache state: in RuyBenchmark()
324 state.PauseTiming(); in RuyBenchmark()
327 state.ResumeTiming(); in RuyBenchmark()
338 state.counters["cpufreq"] = cpu_frequency; in RuyBenchmark()
341 state.counters["FLOPS"] = benchmark::Counter( in RuyBenchmark()
342 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in RuyBenchmark()
345 static void ruy_st(benchmark::State& state, const char* net) in ruy_st() argument
347 RuyBenchmark(state, 1); in ruy_st()
353 static void f32_gemm_1x8__aarch64_neonfma_ld64(benchmark::State& state, const char* net) { in f32_gemm_1x8__aarch64_neonfma_ld64() argument
354 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, 1, 8, 1, 1); in f32_gemm_1x8__aarch64_neonfma_ld64()
356 static void f32_gemm_1x12__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) { in f32_gemm_1x12__aarch64_neonfma_cortex_a53() argument
357 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, 1, 12, 1, 1); in f32_gemm_1x12__aarch64_neonfma_cortex_a53()
359 static void f32_gemm_1x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) { in f32_gemm_1x8__aarch64_neonfma_cortex_a53() argument
360 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, 1, 8, 1, 1); in f32_gemm_1x8__aarch64_neonfma_cortex_a53()
362 static void f32_gemm_1x8__aarch64_neonfma_cortex_a57(benchmark::State& state, const char* net) { in f32_gemm_1x8__aarch64_neonfma_cortex_a57() argument
363 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a57, 1, 8, 1, 1); in f32_gemm_1x8__aarch64_neonfma_cortex_a57()
365 static void f32_gemm_1x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) { in f32_gemm_1x8__aarch64_neonfma_cortex_a75() argument
366 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, 1, 8, 1, 1); in f32_gemm_1x8__aarch64_neonfma_cortex_a75()
368 static void f32_gemm_4x12__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) { in f32_gemm_4x12__aarch64_neonfma_cortex_a53() argument
369 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, 4, 12, 1, 1); in f32_gemm_4x12__aarch64_neonfma_cortex_a53()
371 static void f32_gemm_4x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch64_neonfma_cortex_a53() argument
372 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, 4, 8, 1, 1); in f32_gemm_4x8__aarch64_neonfma_cortex_a53()
374 static void f32_gemm_4x8__aarch64_neonfma_cortex_a55(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch64_neonfma_cortex_a55() argument
375 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, 4, 8, 1, 1); in f32_gemm_4x8__aarch64_neonfma_cortex_a55()
377 static void f32_gemm_4x8__aarch64_neonfma_cortex_a57(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch64_neonfma_cortex_a57() argument
378 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a57, 4, 8, 1, 1); in f32_gemm_4x8__aarch64_neonfma_cortex_a57()
380 static void f32_gemm_4x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch64_neonfma_cortex_a75() argument
381 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, 4, 8, 1, 1); in f32_gemm_4x8__aarch64_neonfma_cortex_a75()
383 static void f32_gemm_4x8__aarch64_neonfma_ld64(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch64_neonfma_ld64() argument
384 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, 4, 8, 1, 1); in f32_gemm_4x8__aarch64_neonfma_ld64()
386 static void f32_gemm_4x8__aarch64_neonfma_ld128(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch64_neonfma_ld128() argument
387 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, 4, 8, 1, 1); in f32_gemm_4x8__aarch64_neonfma_ld128()
389 static void f32_gemm_5x8__aarch64_neonfma_cortex_a57(benchmark::State& state, const char* net) { in f32_gemm_5x8__aarch64_neonfma_cortex_a57() argument
390 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a57, 5, 8, 1, 1); in f32_gemm_5x8__aarch64_neonfma_cortex_a57()
392 static void f32_gemm_5x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) { in f32_gemm_5x8__aarch64_neonfma_cortex_a75() argument
393 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, 5, 8, 1, 1); in f32_gemm_5x8__aarch64_neonfma_cortex_a75()
395 static void f32_gemm_6x8__aarch64_neonfma_ld64(benchmark::State& state, const char* net) { in f32_gemm_6x8__aarch64_neonfma_ld64() argument
396 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, 6, 8, 1, 1); in f32_gemm_6x8__aarch64_neonfma_ld64()
398 static void f32_gemm_6x8__aarch64_neonfma_ld128(benchmark::State& state, const char* net) { in f32_gemm_6x8__aarch64_neonfma_ld128() argument
399 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, 6, 8, 1, 1); in f32_gemm_6x8__aarch64_neonfma_ld128()
401 static void f32_gemm_6x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) { in f32_gemm_6x8__aarch64_neonfma_cortex_a53() argument
402 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, 6, 8, 1, 1); in f32_gemm_6x8__aarch64_neonfma_cortex_a53()
404 static void f32_gemm_6x8__aarch64_neonfma_cortex_a55(benchmark::State& state, const char* net) { in f32_gemm_6x8__aarch64_neonfma_cortex_a55() argument
405 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, 6, 8, 1, 1); in f32_gemm_6x8__aarch64_neonfma_cortex_a55()
407 static void f32_gemm_6x8__aarch64_neonfma_cortex_a73(benchmark::State& state, const char* net) { in f32_gemm_6x8__aarch64_neonfma_cortex_a73() argument
408 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, 6, 8, 1, 1); in f32_gemm_6x8__aarch64_neonfma_cortex_a73()
410 static void f32_gemm_6x8__aarch64_neonfma_cortex_a57(benchmark::State& state, const char* net) { in f32_gemm_6x8__aarch64_neonfma_cortex_a57() argument
411 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a57, 6, 8, 1, 1); in f32_gemm_6x8__aarch64_neonfma_cortex_a57()
413 static void f32_gemm_6x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) { in f32_gemm_6x8__aarch64_neonfma_cortex_a75() argument
414 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, 6, 8, 1, 1); in f32_gemm_6x8__aarch64_neonfma_cortex_a75()
416 static void f32_gemm_1x8__neonfma_lane_ld64(benchmark::State& state, const char* net) { in f32_gemm_1x8__neonfma_lane_ld64() argument
417 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64, 1, 8, 1, 1); in f32_gemm_1x8__neonfma_lane_ld64()
419 static void f32_gemm_4x8__neonfma_lane_ld64(benchmark::State& state, const char* net) { in f32_gemm_4x8__neonfma_lane_ld64() argument
420 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64, 4, 8, 1, 1); in f32_gemm_4x8__neonfma_lane_ld64()
422 static void f32_gemm_4x8__neonfma_lane_ld128(benchmark::State& state, const char* net) { in f32_gemm_4x8__neonfma_lane_ld128() argument
423 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, 4, 8, 1, 1); in f32_gemm_4x8__neonfma_lane_ld128()
425 static void f32_gemm_5x8__neonfma_lane_ld64(benchmark::State& state, const char* net) { in f32_gemm_5x8__neonfma_lane_ld64() argument
426 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64, 5, 8, 1, 1); in f32_gemm_5x8__neonfma_lane_ld64()
428 static void f32_gemm_6x8__neonfma_lane_ld64(benchmark::State& state, const char* net) { in f32_gemm_6x8__neonfma_lane_ld64() argument
429 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64, 6, 8, 1, 1); in f32_gemm_6x8__neonfma_lane_ld64()
431 static void f32_gemm_6x8__neonfma_lane_ld128(benchmark::State& state, const char* net) { in f32_gemm_6x8__neonfma_lane_ld128() argument
432 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128, 6, 8, 1, 1); in f32_gemm_6x8__neonfma_lane_ld128()
464 static void f32_gemm_4x4__aarch32_vfp_ld64(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
465 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x4__aarch32_vfp_ld64, 4, 4, 1, 1, benchmark::uti… in BENCHMARK_GEMM()
467 static void f32_gemm_4x8__aarch32_neon_ld64(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch32_neon_ld64() argument
468 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_ld64, 4, 8, 1, 1, benchmark::ut… in f32_gemm_4x8__aarch32_neon_ld64()
470 static void f32_gemm_4x8__aarch32_neon_cortex_a7(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch32_neon_cortex_a7() argument
471 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, 4, 8, 1, 1, benchmar… in f32_gemm_4x8__aarch32_neon_cortex_a7()
473 static void f32_gemm_4x8__aarch32_neon_cortex_a53(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch32_neon_cortex_a53() argument
474 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, 4, 8, 1, 1, benchma… in f32_gemm_4x8__aarch32_neon_cortex_a53()
476 static void f32_gemm_4x8__aarch32_neon_cortex_a55(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch32_neon_cortex_a55() argument
477 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, 4, 8, 1, 1, benchma… in f32_gemm_4x8__aarch32_neon_cortex_a55()
479 static void f32_gemm_4x8__aarch32_neon_cortex_a75(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch32_neon_cortex_a75() argument
480 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, 4, 8, 1, 1, benchma… in f32_gemm_4x8__aarch32_neon_cortex_a75()
482 static void f32_gemm_4x8__aarch32_neon_pld_cortex_a75(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch32_neon_pld_cortex_a75() argument
483 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_pld_cortex_a75, 4, 8, 1, 1, ben… in f32_gemm_4x8__aarch32_neon_pld_cortex_a75()
496 static void f32_gemm_1x8__neon_lane_ld64(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
497 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, 1, 8, 1, 1, benchmark::utils… in BENCHMARK_GEMM()
499 static void f32_gemm_4x8__neon_lane_ld64(benchmark::State& state, const char* net) { in f32_gemm_4x8__neon_lane_ld64() argument
500 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, 4, 8, 1, 1, benchmark::utils… in f32_gemm_4x8__neon_lane_ld64()
502 static void f32_gemm_4x8__neon_lane_ld128(benchmark::State& state, const char* net) { in f32_gemm_4x8__neon_lane_ld128() argument
503 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, 4, 8, 1, 1, benchmark::util… in f32_gemm_4x8__neon_lane_ld128()
505 static void f32_gemm_5x8__neon_lane_ld64(benchmark::State& state, const char* net) { in f32_gemm_5x8__neon_lane_ld64() argument
506 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, 5, 8, 1, 1, benchmark::utils… in f32_gemm_5x8__neon_lane_ld64()
508 static void f32_gemm_6x8__neon_lane_ld64(benchmark::State& state, const char* net) { in f32_gemm_6x8__neon_lane_ld64() argument
509 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64, 6, 8, 1, 1, benchmark::utils… in f32_gemm_6x8__neon_lane_ld64()
511 static void f32_gemm_6x8__neon_lane_ld128(benchmark::State& state, const char* net) { in f32_gemm_6x8__neon_lane_ld128() argument
512 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, 6, 8, 1, 1, benchmark::util… in f32_gemm_6x8__neon_lane_ld128()
514 static void f32_gemm_1x8__neonfma_dup_ld64(benchmark::State& state, const char* net) { in f32_gemm_1x8__neonfma_dup_ld64() argument
515 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64, 1, 8, 1, 1, benchmark::uti… in f32_gemm_1x8__neonfma_dup_ld64()
517 static void f32_gemm_4x8__neonfma_dup_ld64(benchmark::State& state, const char* net) { in f32_gemm_4x8__neonfma_dup_ld64() argument
518 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, 4, 8, 1, 1, benchmark::uti… in f32_gemm_4x8__neonfma_dup_ld64()
520 static void f32_gemm_4x8__neonfma_dup_ld128(benchmark::State& state, const char* net) { in f32_gemm_4x8__neonfma_dup_ld128() argument
521 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld128, 4, 8, 1, 1, benchmark::ut… in f32_gemm_4x8__neonfma_dup_ld128()
523 static void f32_gemm_6x8__neonfma_dup_ld64(benchmark::State& state, const char* net) { in f32_gemm_6x8__neonfma_dup_ld64() argument
524 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64, 6, 8, 1, 1, benchmark::uti… in f32_gemm_6x8__neonfma_dup_ld64()
526 static void f32_gemm_6x8__neonfma_dup_ld128(benchmark::State& state, const char* net) { in f32_gemm_6x8__neonfma_dup_ld128() argument
527 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld128, 6, 8, 1, 1, benchmark::ut… in f32_gemm_6x8__neonfma_dup_ld128()
529 static void f32_gemm_1x8s4__neon(benchmark::State& state, const char* net) { in f32_gemm_1x8s4__neon() argument
530 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8s4__neon, 1, 8, 1, 4, benchmark::utils::CheckN… in f32_gemm_1x8s4__neon()
532 static void f32_gemm_1x8s4__neonfma(benchmark::State& state, const char* net) { in f32_gemm_1x8s4__neonfma() argument
533 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, 1, 8, 1, 4, benchmark::utils::Che… in f32_gemm_1x8s4__neonfma()
535 static void f32_gemm_4x8s4__neon(benchmark::State& state, const char* net) { in f32_gemm_4x8s4__neon() argument
536 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__neon, 4, 8, 1, 4, benchmark::utils::CheckN… in f32_gemm_4x8s4__neon()
538 static void f32_gemm_4x8s4__neonfma(benchmark::State& state, const char* net) { in f32_gemm_4x8s4__neonfma() argument
539 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__neonfma, 4, 8, 1, 4, benchmark::utils::Che… in f32_gemm_4x8s4__neonfma()
541 static void f32_gemm_6x8s4__neon(benchmark::State& state, const char* net) { in f32_gemm_6x8s4__neon() argument
542 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__neon, 6, 8, 1, 4, benchmark::utils::CheckN… in f32_gemm_6x8s4__neon()
544 static void f32_gemm_6x8s4__neonfma(benchmark::State& state, const char* net) { in f32_gemm_6x8s4__neonfma() argument
545 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, 6, 8, 1, 4, benchmark::utils::Che… in f32_gemm_6x8s4__neonfma()
547 static void f32_gemm_8x8s4__neon(benchmark::State& state, const char* net) { in f32_gemm_8x8s4__neon() argument
548 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_8x8s4__neon, 8, 8, 1, 4, benchmark::utils::CheckN… in f32_gemm_8x8s4__neon()
550 static void f32_gemm_8x8s4__neonfma(benchmark::State& state, const char* net) { in f32_gemm_8x8s4__neonfma() argument
551 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, 8, 8, 1, 4, benchmark::utils::Che… in f32_gemm_8x8s4__neonfma()
553 static void f32_ppmm_4x8_unipass__neonfma(benchmark::State& state, const char* net) { in f32_ppmm_4x8_unipass__neonfma() argument
554 …PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_x32_packx_ukernel_4x__neon_st… in f32_ppmm_4x8_unipass__neonfma()
556 static void f32_ppmm_4x8_twopass__neonfma(benchmark::State& state, const char* net) { in f32_ppmm_4x8_twopass__neonfma() argument
557 …PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_x32_packx_ukernel_4x__neon_st… in f32_ppmm_4x8_twopass__neonfma()
584 static void f32_gemm_1x8__sse_load1(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
585 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__sse_load1, 1, 8, 1, 1); in BENCHMARK_GEMM()
587 static void f32_gemm_3x8__sse_load1(benchmark::State& state, const char* net) { in f32_gemm_3x8__sse_load1() argument
588 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, 3, 8, 1, 1); in f32_gemm_3x8__sse_load1()
590 static void f32_gemm_4x8__sse_load1(benchmark::State& state, const char* net) { in f32_gemm_4x8__sse_load1() argument
591 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, 4, 8, 1, 1); in f32_gemm_4x8__sse_load1()
593 static void f32_gemm_5x8__sse_load1(benchmark::State& state, const char* net) { in f32_gemm_5x8__sse_load1() argument
594 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, 5, 8, 1, 1); in f32_gemm_5x8__sse_load1()
597 static void f32_gemm_1x8__sse_dup(benchmark::State& state, const char* net) { in f32_gemm_1x8__sse_dup() argument
598 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__sse_dup, 1, 8, 1, 1); in f32_gemm_1x8__sse_dup()
600 static void f32_gemm_3x8__sse_dup(benchmark::State& state, const char* net) { in f32_gemm_3x8__sse_dup() argument
601 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, 3, 8, 1, 1); in f32_gemm_3x8__sse_dup()
603 static void f32_gemm_4x8__sse_dup(benchmark::State& state, const char* net) { in f32_gemm_4x8__sse_dup() argument
604 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__sse_dup, 4, 8, 1, 1); in f32_gemm_4x8__sse_dup()
606 static void f32_gemm_5x8__sse_dup(benchmark::State& state, const char* net) { in f32_gemm_5x8__sse_dup() argument
607 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, 5, 8, 1, 1); in f32_gemm_5x8__sse_dup()
610 static void f32_gemm_1x8s4__sse(benchmark::State& state, const char* net) { in f32_gemm_1x8s4__sse() argument
611 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8s4__sse, 1, 8, 1, 4); in f32_gemm_1x8s4__sse()
613 static void f32_gemm_3x8s4__sse(benchmark::State& state, const char* net) { in f32_gemm_3x8s4__sse() argument
614 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8s4__sse, 3, 8, 1, 4); in f32_gemm_3x8s4__sse()
616 static void f32_gemm_4x8s4__sse(benchmark::State& state, const char* net) { in f32_gemm_4x8s4__sse() argument
617 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__sse, 4, 8, 1, 4); in f32_gemm_4x8s4__sse()
619 static void f32_gemm_5x8s4__sse(benchmark::State& state, const char* net) { in f32_gemm_5x8s4__sse() argument
620 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8s4__sse, 5, 8, 1, 4); in f32_gemm_5x8s4__sse()
623 static void f32_gemm_1x8__sse2_dup(benchmark::State& state, const char* net) { in f32_gemm_1x8__sse2_dup() argument
624 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, 1, 8, 1, 1); in f32_gemm_1x8__sse2_dup()
626 static void f32_gemm_3x8__sse2_dup(benchmark::State& state, const char* net) { in f32_gemm_3x8__sse2_dup() argument
627 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup, 3, 8, 1, 1); in f32_gemm_3x8__sse2_dup()
629 static void f32_gemm_4x8__sse2_dup(benchmark::State& state, const char* net) { in f32_gemm_4x8__sse2_dup() argument
630 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup, 4, 8, 1, 1); in f32_gemm_4x8__sse2_dup()
632 static void f32_gemm_5x8__sse2_dup(benchmark::State& state, const char* net) { in f32_gemm_5x8__sse2_dup() argument
633 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, 5, 8, 1, 1); in f32_gemm_5x8__sse2_dup()
636 static void f32_ppmm_4x8_unipass__sse(benchmark::State& state, const char* net) { in f32_ppmm_4x8_unipass__sse() argument
637 … PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_x32_packx_ukernel_4x__sse, 4, 8); in f32_ppmm_4x8_unipass__sse()
639 static void f32_ppmm_4x8_twopass__sse(benchmark::State& state, const char* net) { in f32_ppmm_4x8_twopass__sse() argument
640 … PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_x32_packx_ukernel_4x__sse, 4, 8); in f32_ppmm_4x8_twopass__sse()
643 static void f32_gemm_1x8__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_1x8__avx_broadcast() argument
644 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, 1, 8, 1, 1, benchmark::utils:… in f32_gemm_1x8__avx_broadcast()
646 static void f32_gemm_4x8__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_4x8__avx_broadcast() argument
647 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast, 4, 8, 1, 1, benchmark::utils:… in f32_gemm_4x8__avx_broadcast()
649 static void f32_gemm_5x8__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_5x8__avx_broadcast() argument
650 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, 5, 8, 1, 1, benchmark::utils:… in f32_gemm_5x8__avx_broadcast()
652 static void f32_gemm_6x8__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_6x8__avx_broadcast() argument
653 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, 6, 8, 1, 1, benchmark::utils:… in f32_gemm_6x8__avx_broadcast()
655 static void f32_gemm_7x8__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_7x8__avx_broadcast() argument
656 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast, 7, 8, 1, 1, benchmark::utils:… in f32_gemm_7x8__avx_broadcast()
658 static void f32_gemm_1x16__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_1x16__avx_broadcast() argument
659 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast, 1, 16, 1, 1, benchmark::util… in f32_gemm_1x16__avx_broadcast()
661 static void f32_gemm_3x16__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_3x16__avx_broadcast() argument
662 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, 4, 16, 1, 1, benchmark::util… in f32_gemm_3x16__avx_broadcast()
664 static void f32_gemm_4x16__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_4x16__avx_broadcast() argument
665 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast, 4, 16, 1, 1, benchmark::util… in f32_gemm_4x16__avx_broadcast()
667 static void f32_gemm_5x16__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_5x16__avx_broadcast() argument
668 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast, 5, 16, 1, 1, benchmark::util… in f32_gemm_5x16__avx_broadcast()
671 static void f32_gemm_1x8__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_1x8__fma3_broadcast() argument
672 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, 1, 8, 1, 1, benchmark::utils… in f32_gemm_1x8__fma3_broadcast()
674 static void f32_gemm_4x8__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_4x8__fma3_broadcast() argument
675 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, 4, 8, 1, 1, benchmark::utils… in f32_gemm_4x8__fma3_broadcast()
677 static void f32_gemm_5x8__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_5x8__fma3_broadcast() argument
678 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, 5, 8, 1, 1, benchmark::utils… in f32_gemm_5x8__fma3_broadcast()
680 static void f32_gemm_6x8__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_6x8__fma3_broadcast() argument
681 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast, 6, 8, 1, 1, benchmark::utils… in f32_gemm_6x8__fma3_broadcast()
683 static void f32_gemm_7x8__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_7x8__fma3_broadcast() argument
684 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast, 7, 8, 1, 1, benchmark::utils… in f32_gemm_7x8__fma3_broadcast()
686 static void f32_gemm_8x8__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_8x8__fma3_broadcast() argument
687 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast, 8, 8, 1, 1, benchmark::utils… in f32_gemm_8x8__fma3_broadcast()
689 static void f32_gemm_1x16__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_1x16__fma3_broadcast() argument
690 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, 1, 16, 1, 1, benchmark::uti… in f32_gemm_1x16__fma3_broadcast()
692 static void f32_gemm_3x16__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_3x16__fma3_broadcast() argument
693 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, 4, 16, 1, 1, benchmark::uti… in f32_gemm_3x16__fma3_broadcast()
695 static void f32_gemm_4x16__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_4x16__fma3_broadcast() argument
696 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast, 4, 16, 1, 1, benchmark::uti… in f32_gemm_4x16__fma3_broadcast()
698 static void f32_gemm_5x16__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_5x16__fma3_broadcast() argument
699 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast, 5, 16, 1, 1, benchmark::uti… in f32_gemm_5x16__fma3_broadcast()
702 static void f32_gemm_1x16s4__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_1x16s4__fma3_broadcast() argument
703 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast, 1, 16, 1, 4, benchmark::u… in f32_gemm_1x16s4__fma3_broadcast()
705 static void f32_gemm_3x16s4__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_3x16s4__fma3_broadcast() argument
706 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast, 4, 16, 1, 4, benchmark::u… in f32_gemm_3x16s4__fma3_broadcast()
708 static void f32_gemm_4x16s4__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_4x16s4__fma3_broadcast() argument
709 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast, 4, 16, 1, 4, benchmark::u… in f32_gemm_4x16s4__fma3_broadcast()
711 static void f32_gemm_5x16s4__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_5x16s4__fma3_broadcast() argument
712 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast, 5, 16, 1, 4, benchmark::u… in f32_gemm_5x16s4__fma3_broadcast()
715 static void f32_gemm_1x16__avx512f_broadcast(benchmark::State& state, const char* net) { in f32_gemm_1x16__avx512f_broadcast() argument
716 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, 1, 16, 1, 1, benchmark::… in f32_gemm_1x16__avx512f_broadcast()
718 static void f32_gemm_4x16__avx512f_broadcast(benchmark::State& state, const char* net) { in f32_gemm_4x16__avx512f_broadcast() argument
719 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast, 4, 16, 1, 1, benchmark::… in f32_gemm_4x16__avx512f_broadcast()
721 static void f32_gemm_5x16__avx512f_broadcast(benchmark::State& state, const char* net) { in f32_gemm_5x16__avx512f_broadcast() argument
722 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast, 5, 16, 1, 1, benchmark::… in f32_gemm_5x16__avx512f_broadcast()
724 static void f32_gemm_6x16__avx512f_broadcast(benchmark::State& state, const char* net) { in f32_gemm_6x16__avx512f_broadcast() argument
725 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, 6, 16, 1, 1, benchmark::… in f32_gemm_6x16__avx512f_broadcast()
727 static void f32_gemm_7x16__avx512f_broadcast(benchmark::State& state, const char* net) { in f32_gemm_7x16__avx512f_broadcast() argument
728 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, 7, 16, 1, 1, benchmark::… in f32_gemm_7x16__avx512f_broadcast()
730 static void f32_gemm_8x16__avx512f_broadcast(benchmark::State& state, const char* net) { in f32_gemm_8x16__avx512f_broadcast() argument
731 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, 8, 16, 1, 1, benchmark::… in f32_gemm_8x16__avx512f_broadcast()
792 static void f32_gemm_3x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
793 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, 3, 8, 1, 1); in BENCHMARK_GEMM()
796 static void f32_gemm_4x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) { in f32_gemm_4x8__wasmsimd_arm_loadsplat() argument
797 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, 4, 8, 1, 1); in f32_gemm_4x8__wasmsimd_arm_loadsplat()
800 static void f32_gemm_5x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) { in f32_gemm_5x8__wasmsimd_arm_loadsplat() argument
801 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, 5, 8, 1, 1); in f32_gemm_5x8__wasmsimd_arm_loadsplat()
804 static void f32_gemm_6x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) { in f32_gemm_6x8__wasmsimd_arm_loadsplat() argument
805 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, 6, 8, 1, 1); in f32_gemm_6x8__wasmsimd_arm_loadsplat()
808 static void f32_gemm_3x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) { in f32_gemm_3x8__wasmsimd_x86_loadsplat() argument
809 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, 3, 8, 1, 1); in f32_gemm_3x8__wasmsimd_x86_loadsplat()
812 static void f32_gemm_4x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) { in f32_gemm_4x8__wasmsimd_x86_loadsplat() argument
813 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, 4, 8, 1, 1); in f32_gemm_4x8__wasmsimd_x86_loadsplat()
816 static void f32_gemm_5x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) { in f32_gemm_5x8__wasmsimd_x86_loadsplat() argument
817 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, 5, 8, 1, 1); in f32_gemm_5x8__wasmsimd_x86_loadsplat()
820 static void f32_gemm_6x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) { in f32_gemm_6x8__wasmsimd_x86_loadsplat() argument
821 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, 6, 8, 1, 1); in f32_gemm_6x8__wasmsimd_x86_loadsplat()
824 static void f32_gemm_3x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) { in f32_gemm_3x8__wasmsimd_arm_splat() argument
825 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat, 3, 8, 1, 1); in f32_gemm_3x8__wasmsimd_arm_splat()
828 static void f32_gemm_4x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) { in f32_gemm_4x8__wasmsimd_arm_splat() argument
829 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_splat, 4, 8, 1, 1); in f32_gemm_4x8__wasmsimd_arm_splat()
832 static void f32_gemm_5x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) { in f32_gemm_5x8__wasmsimd_arm_splat() argument
833 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat, 5, 8, 1, 1); in f32_gemm_5x8__wasmsimd_arm_splat()
836 static void f32_gemm_6x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) { in f32_gemm_6x8__wasmsimd_arm_splat() argument
837 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, 6, 8, 1, 1); in f32_gemm_6x8__wasmsimd_arm_splat()
840 static void f32_gemm_3x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) { in f32_gemm_3x8__wasmsimd_x86_splat() argument
841 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat, 3, 8, 1, 1); in f32_gemm_3x8__wasmsimd_x86_splat()
844 static void f32_gemm_4x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) { in f32_gemm_4x8__wasmsimd_x86_splat() argument
845 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_splat, 4, 8, 1, 1); in f32_gemm_4x8__wasmsimd_x86_splat()
848 static void f32_gemm_5x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) { in f32_gemm_5x8__wasmsimd_x86_splat() argument
849 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat, 5, 8, 1, 1); in f32_gemm_5x8__wasmsimd_x86_splat()
852 static void f32_gemm_6x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) { in f32_gemm_6x8__wasmsimd_x86_splat() argument
853 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, 6, 8, 1, 1); in f32_gemm_6x8__wasmsimd_x86_splat()
856 static void f32_gemm_3x8s4__wasmsimd_arm(benchmark::State& state, const char* net) { in f32_gemm_3x8s4__wasmsimd_arm() argument
857 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, 3, 8, 1, 4); in f32_gemm_3x8s4__wasmsimd_arm()
860 static void f32_gemm_4x8s4__wasmsimd_arm(benchmark::State& state, const char* net) { in f32_gemm_4x8s4__wasmsimd_arm() argument
861 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_arm, 4, 8, 1, 4); in f32_gemm_4x8s4__wasmsimd_arm()
864 static void f32_gemm_5x8s4__wasmsimd_arm(benchmark::State& state, const char* net) { in f32_gemm_5x8s4__wasmsimd_arm() argument
865 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm, 5, 8, 1, 4); in f32_gemm_5x8s4__wasmsimd_arm()
868 static void f32_gemm_6x8s4__wasmsimd_arm(benchmark::State& state, const char* net) { in f32_gemm_6x8s4__wasmsimd_arm() argument
869 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, 6, 8, 1, 4); in f32_gemm_6x8s4__wasmsimd_arm()
872 static void f32_gemm_3x8s4__wasmsimd_x86(benchmark::State& state, const char* net) { in f32_gemm_3x8s4__wasmsimd_x86() argument
873 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, 3, 8, 1, 4); in f32_gemm_3x8s4__wasmsimd_x86()
876 static void f32_gemm_4x8s4__wasmsimd_x86(benchmark::State& state, const char* net) { in f32_gemm_4x8s4__wasmsimd_x86() argument
877 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_x86, 4, 8, 1, 4); in f32_gemm_4x8s4__wasmsimd_x86()
880 static void f32_gemm_5x8s4__wasmsimd_x86(benchmark::State& state, const char* net) { in f32_gemm_5x8s4__wasmsimd_x86() argument
881 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86, 5, 8, 1, 4); in f32_gemm_5x8s4__wasmsimd_x86()
884 static void f32_gemm_6x8s4__wasmsimd_x86(benchmark::State& state, const char* net) { in f32_gemm_6x8s4__wasmsimd_x86() argument
885 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, 6, 8, 1, 4); in f32_gemm_6x8s4__wasmsimd_x86()
888 static void f32_ppmm_4x8_unipass__wasmsimd_arm_splat(benchmark::State& state, const char* net) { in f32_ppmm_4x8_unipass__wasmsimd_arm_splat() argument
889 …PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_x32_packx_ukernel_… in f32_ppmm_4x8_unipass__wasmsimd_arm_splat()
891 static void f32_ppmm_4x8_unipass__wasmsimd_x86_splat(benchmark::State& state, const char* net) { in f32_ppmm_4x8_unipass__wasmsimd_x86_splat() argument
892 …PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_x32_packx_ukernel_… in f32_ppmm_4x8_unipass__wasmsimd_x86_splat()
895 static void f32_ppmm_4x8_twopass__wasmsimd_arm_splat(benchmark::State& state, const char* net) { in f32_ppmm_4x8_twopass__wasmsimd_arm_splat() argument
896 …PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_x32_packx_ukernel_… in f32_ppmm_4x8_twopass__wasmsimd_arm_splat()
898 static void f32_ppmm_4x8_twopass__wasmsimd_x86_splat(benchmark::State& state, const char* net) { in f32_ppmm_4x8_twopass__wasmsimd_x86_splat() argument
899 …PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_x32_packx_ukernel_… in f32_ppmm_4x8_twopass__wasmsimd_x86_splat()
932 static void f32_gemm_1x4__scalar(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
933 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x4__scalar, 1, 4, 1, 1); in BENCHMARK_GEMM()
936 static void f32_gemm_2x4__scalar(benchmark::State& state, const char* net) { in f32_gemm_2x4__scalar() argument
937 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_2x4__scalar, 2, 4, 1, 1); in f32_gemm_2x4__scalar()
940 static void f32_gemm_4x4__scalar(benchmark::State& state, const char* net) { in f32_gemm_4x4__scalar() argument
941 GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x4__scalar, 4, 4, 1, 1); in f32_gemm_4x4__scalar()
944 static void f32_ppmm_2x4_unipass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_2x4_unipass__scalar() argument
945 …PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_x32_packx_ukernel_2x__scalar, … in f32_ppmm_2x4_unipass__scalar()
948 static void f32_ppmm_4x2_unipass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_4x2_unipass__scalar() argument
949 …PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_x32_packx_ukernel_4x__scalar, … in f32_ppmm_4x2_unipass__scalar()
952 static void f32_ppmm_4x4_unipass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_4x4_unipass__scalar() argument
953 …PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_x32_packx_ukernel_4x__scalar, … in f32_ppmm_4x4_unipass__scalar()
956 static void f32_ppmm_3x3_unipass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_3x3_unipass__scalar() argument
957 …PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_x32_packx_ukernel_3x__scalar, … in f32_ppmm_3x3_unipass__scalar()
960 static void f32_ppmm_2x4_twopass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_2x4_twopass__scalar() argument
961 …PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_x32_packx_ukernel_2x__scalar, … in f32_ppmm_2x4_twopass__scalar()
964 static void f32_ppmm_4x2_twopass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_4x2_twopass__scalar() argument
965 …PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_x32_packx_ukernel_4x__scalar, … in f32_ppmm_4x2_twopass__scalar()
968 static void f32_ppmm_4x4_twopass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_4x4_twopass__scalar() argument
969 …PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_x32_packx_ukernel_4x__scalar, … in f32_ppmm_4x4_twopass__scalar()
972 static void f32_ppmm_3x3_twopass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_3x3_twopass__scalar() argument
973 …PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_x32_packx_ukernel_3x__scalar, … in f32_ppmm_3x3_twopass__scalar()