• Home
  • Raw
  • Download

Lines Matching full:benchmark

18 #include <benchmark/benchmark.h>
34 static void GEMMBenchmark(benchmark::State& state, in GEMMBenchmark()
37 benchmark::utils::IsaCheckFunction isa_check = nullptr) in GEMMBenchmark()
47 const size_t nc_stride = benchmark::utils::RoundUp(nc, nr); in GEMMBenchmark()
48 const size_t kc_stride = benchmark::utils::RoundUp(kc, kr); in GEMMBenchmark()
64 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(), in GEMMBenchmark()
83 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(float)); in GEMMBenchmark()
98 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); in GEMMBenchmark()
103 state.counters["FLOPS"] = benchmark::Counter( in GEMMBenchmark()
104 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in GEMMBenchmark()
107 static void PPMM1PBenchmark(benchmark::State& state, in PPMM1PBenchmark()
111 benchmark::utils::IsaCheckFunction isa_check = nullptr) in PPMM1PBenchmark()
121 const size_t nc_stride = benchmark::utils::RoundUp(nc, nr); in PPMM1PBenchmark()
139 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(), in PPMM1PBenchmark()
158 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(float)); in PPMM1PBenchmark()
174 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); in PPMM1PBenchmark()
179 state.counters["FLOPS"] = benchmark::Counter( in PPMM1PBenchmark()
180 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in PPMM1PBenchmark()
183 static void PPMM2PBenchmark(benchmark::State& state, in PPMM2PBenchmark()
187 benchmark::utils::IsaCheckFunction isa_check = nullptr) in PPMM2PBenchmark()
197 const size_t mc_stride = benchmark::utils::RoundUp(mc, mr); in PPMM2PBenchmark()
198 const size_t nc_stride = benchmark::utils::RoundUp(nc, nr); in PPMM2PBenchmark()
216 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(), in PPMM2PBenchmark()
235 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(float)); in PPMM2PBenchmark()
254 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); in PPMM2PBenchmark()
259 state.counters["FLOPS"] = benchmark::Counter( in PPMM2PBenchmark()
260 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in PPMM2PBenchmark()
264 static void RuyBenchmark(benchmark::State& state, uint32_t threads) in RuyBenchmark()
275 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(), in RuyBenchmark()
287 // Note: context must be static to avoid the cost of re-creating it for each benchmark. in RuyBenchmark()
303 …// Thus, on the first benchmark, we compute GEMM for 500 milliseconds (to be safe) without recordi… in RuyBenchmark()
325 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(float)); in RuyBenchmark()
336 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); in RuyBenchmark()
341 state.counters["FLOPS"] = benchmark::Counter( in RuyBenchmark()
342 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate); in RuyBenchmark()
345 static void ruy_st(benchmark::State& state, const char* net) in ruy_st()
353 static void f32_gemm_1x8__aarch64_neonfma_ld64(benchmark::State& state, const char* net) { in f32_gemm_1x8__aarch64_neonfma_ld64()
356 static void f32_gemm_1x12__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) { in f32_gemm_1x12__aarch64_neonfma_cortex_a53()
359 static void f32_gemm_1x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) { in f32_gemm_1x8__aarch64_neonfma_cortex_a53()
362 static void f32_gemm_1x8__aarch64_neonfma_cortex_a57(benchmark::State& state, const char* net) { in f32_gemm_1x8__aarch64_neonfma_cortex_a57()
365 static void f32_gemm_1x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) { in f32_gemm_1x8__aarch64_neonfma_cortex_a75()
368 static void f32_gemm_4x12__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) { in f32_gemm_4x12__aarch64_neonfma_cortex_a53()
371 static void f32_gemm_4x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch64_neonfma_cortex_a53()
374 static void f32_gemm_4x8__aarch64_neonfma_cortex_a55(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch64_neonfma_cortex_a55()
377 static void f32_gemm_4x8__aarch64_neonfma_cortex_a57(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch64_neonfma_cortex_a57()
380 static void f32_gemm_4x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch64_neonfma_cortex_a75()
383 static void f32_gemm_4x8__aarch64_neonfma_ld64(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch64_neonfma_ld64()
386 static void f32_gemm_4x8__aarch64_neonfma_ld128(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch64_neonfma_ld128()
389 static void f32_gemm_5x8__aarch64_neonfma_cortex_a57(benchmark::State& state, const char* net) { in f32_gemm_5x8__aarch64_neonfma_cortex_a57()
392 static void f32_gemm_5x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) { in f32_gemm_5x8__aarch64_neonfma_cortex_a75()
395 static void f32_gemm_6x8__aarch64_neonfma_ld64(benchmark::State& state, const char* net) { in f32_gemm_6x8__aarch64_neonfma_ld64()
398 static void f32_gemm_6x8__aarch64_neonfma_ld128(benchmark::State& state, const char* net) { in f32_gemm_6x8__aarch64_neonfma_ld128()
401 static void f32_gemm_6x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) { in f32_gemm_6x8__aarch64_neonfma_cortex_a53()
404 static void f32_gemm_6x8__aarch64_neonfma_cortex_a55(benchmark::State& state, const char* net) { in f32_gemm_6x8__aarch64_neonfma_cortex_a55()
407 static void f32_gemm_6x8__aarch64_neonfma_cortex_a73(benchmark::State& state, const char* net) { in f32_gemm_6x8__aarch64_neonfma_cortex_a73()
410 static void f32_gemm_6x8__aarch64_neonfma_cortex_a57(benchmark::State& state, const char* net) { in f32_gemm_6x8__aarch64_neonfma_cortex_a57()
413 static void f32_gemm_6x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) { in f32_gemm_6x8__aarch64_neonfma_cortex_a75()
416 static void f32_gemm_1x8__neonfma_lane_ld64(benchmark::State& state, const char* net) { in f32_gemm_1x8__neonfma_lane_ld64()
419 static void f32_gemm_4x8__neonfma_lane_ld64(benchmark::State& state, const char* net) { in f32_gemm_4x8__neonfma_lane_ld64()
422 static void f32_gemm_4x8__neonfma_lane_ld128(benchmark::State& state, const char* net) { in f32_gemm_4x8__neonfma_lane_ld128()
425 static void f32_gemm_5x8__neonfma_lane_ld64(benchmark::State& state, const char* net) { in f32_gemm_5x8__neonfma_lane_ld64()
428 static void f32_gemm_6x8__neonfma_lane_ld64(benchmark::State& state, const char* net) { in f32_gemm_6x8__neonfma_lane_ld64()
431 static void f32_gemm_6x8__neonfma_lane_ld128(benchmark::State& state, const char* net) { in f32_gemm_6x8__neonfma_lane_ld128()
464 static void f32_gemm_4x4__aarch32_vfp_ld64(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
465 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x4__aarch32_vfp_ld64, 4, 4, 1, 1, benchmark::uti… in BENCHMARK_GEMM()
467 static void f32_gemm_4x8__aarch32_neon_ld64(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch32_neon_ld64()
468 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_ld64, 4, 8, 1, 1, benchmark::ut… in f32_gemm_4x8__aarch32_neon_ld64()
470 static void f32_gemm_4x8__aarch32_neon_cortex_a7(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch32_neon_cortex_a7()
471 …e, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, 4, 8, 1, 1, benchmark::utils::CheckNEO… in f32_gemm_4x8__aarch32_neon_cortex_a7()
473 static void f32_gemm_4x8__aarch32_neon_cortex_a53(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch32_neon_cortex_a53()
474 …, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, 4, 8, 1, 1, benchmark::utils::CheckNEO… in f32_gemm_4x8__aarch32_neon_cortex_a53()
476 static void f32_gemm_4x8__aarch32_neon_cortex_a55(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch32_neon_cortex_a55()
477 …, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, 4, 8, 1, 1, benchmark::utils::CheckNEO… in f32_gemm_4x8__aarch32_neon_cortex_a55()
479 static void f32_gemm_4x8__aarch32_neon_cortex_a75(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch32_neon_cortex_a75()
480 …, xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, 4, 8, 1, 1, benchmark::utils::CheckNEO… in f32_gemm_4x8__aarch32_neon_cortex_a75()
482 static void f32_gemm_4x8__aarch32_neon_pld_cortex_a75(benchmark::State& state, const char* net) { in f32_gemm_4x8__aarch32_neon_pld_cortex_a75()
483 …n_f32_gemm_minmax_ukernel_4x8__aarch32_neon_pld_cortex_a75, 4, 8, 1, 1, benchmark::utils::CheckNEO… in f32_gemm_4x8__aarch32_neon_pld_cortex_a75()
496 static void f32_gemm_1x8__neon_lane_ld64(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
497 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, 1, 8, 1, 1, benchmark::utils… in BENCHMARK_GEMM()
499 static void f32_gemm_4x8__neon_lane_ld64(benchmark::State& state, const char* net) { in f32_gemm_4x8__neon_lane_ld64()
500 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, 4, 8, 1, 1, benchmark::utils… in f32_gemm_4x8__neon_lane_ld64()
502 static void f32_gemm_4x8__neon_lane_ld128(benchmark::State& state, const char* net) { in f32_gemm_4x8__neon_lane_ld128()
503 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, 4, 8, 1, 1, benchmark::util… in f32_gemm_4x8__neon_lane_ld128()
505 static void f32_gemm_5x8__neon_lane_ld64(benchmark::State& state, const char* net) { in f32_gemm_5x8__neon_lane_ld64()
506 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, 5, 8, 1, 1, benchmark::utils… in f32_gemm_5x8__neon_lane_ld64()
508 static void f32_gemm_6x8__neon_lane_ld64(benchmark::State& state, const char* net) { in f32_gemm_6x8__neon_lane_ld64()
509 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64, 6, 8, 1, 1, benchmark::utils… in f32_gemm_6x8__neon_lane_ld64()
511 static void f32_gemm_6x8__neon_lane_ld128(benchmark::State& state, const char* net) { in f32_gemm_6x8__neon_lane_ld128()
512 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, 6, 8, 1, 1, benchmark::util… in f32_gemm_6x8__neon_lane_ld128()
514 static void f32_gemm_1x8__neonfma_dup_ld64(benchmark::State& state, const char* net) { in f32_gemm_1x8__neonfma_dup_ld64()
515 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64, 1, 8, 1, 1, benchmark::uti… in f32_gemm_1x8__neonfma_dup_ld64()
517 static void f32_gemm_4x8__neonfma_dup_ld64(benchmark::State& state, const char* net) { in f32_gemm_4x8__neonfma_dup_ld64()
518 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, 4, 8, 1, 1, benchmark::uti… in f32_gemm_4x8__neonfma_dup_ld64()
520 static void f32_gemm_4x8__neonfma_dup_ld128(benchmark::State& state, const char* net) { in f32_gemm_4x8__neonfma_dup_ld128()
521 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld128, 4, 8, 1, 1, benchmark::ut… in f32_gemm_4x8__neonfma_dup_ld128()
523 static void f32_gemm_6x8__neonfma_dup_ld64(benchmark::State& state, const char* net) { in f32_gemm_6x8__neonfma_dup_ld64()
524 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64, 6, 8, 1, 1, benchmark::uti… in f32_gemm_6x8__neonfma_dup_ld64()
526 static void f32_gemm_6x8__neonfma_dup_ld128(benchmark::State& state, const char* net) { in f32_gemm_6x8__neonfma_dup_ld128()
527 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld128, 6, 8, 1, 1, benchmark::ut… in f32_gemm_6x8__neonfma_dup_ld128()
529 static void f32_gemm_1x8s4__neon(benchmark::State& state, const char* net) { in f32_gemm_1x8s4__neon()
530 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8s4__neon, 1, 8, 1, 4, benchmark::utils::CheckN… in f32_gemm_1x8s4__neon()
532 static void f32_gemm_1x8s4__neonfma(benchmark::State& state, const char* net) { in f32_gemm_1x8s4__neonfma()
533 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, 1, 8, 1, 4, benchmark::utils::Che… in f32_gemm_1x8s4__neonfma()
535 static void f32_gemm_4x8s4__neon(benchmark::State& state, const char* net) { in f32_gemm_4x8s4__neon()
536 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__neon, 4, 8, 1, 4, benchmark::utils::CheckN… in f32_gemm_4x8s4__neon()
538 static void f32_gemm_4x8s4__neonfma(benchmark::State& state, const char* net) { in f32_gemm_4x8s4__neonfma()
539 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__neonfma, 4, 8, 1, 4, benchmark::utils::Che… in f32_gemm_4x8s4__neonfma()
541 static void f32_gemm_6x8s4__neon(benchmark::State& state, const char* net) { in f32_gemm_6x8s4__neon()
542 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__neon, 6, 8, 1, 4, benchmark::utils::CheckN… in f32_gemm_6x8s4__neon()
544 static void f32_gemm_6x8s4__neonfma(benchmark::State& state, const char* net) { in f32_gemm_6x8s4__neonfma()
545 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, 6, 8, 1, 4, benchmark::utils::Che… in f32_gemm_6x8s4__neonfma()
547 static void f32_gemm_8x8s4__neon(benchmark::State& state, const char* net) { in f32_gemm_8x8s4__neon()
548 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_8x8s4__neon, 8, 8, 1, 4, benchmark::utils::CheckN… in f32_gemm_8x8s4__neon()
550 static void f32_gemm_8x8s4__neonfma(benchmark::State& state, const char* net) { in f32_gemm_8x8s4__neonfma()
551 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, 8, 8, 1, 4, benchmark::utils::Che… in f32_gemm_8x8s4__neonfma()
553 static void f32_ppmm_4x8_unipass__neonfma(benchmark::State& state, const char* net) { in f32_ppmm_4x8_unipass__neonfma()
554 …_minmax_ukernel_4x8__neonfma, xnn_x32_packx_ukernel_4x__neon_st4, 4, 8, benchmark::utils::CheckNEO… in f32_ppmm_4x8_unipass__neonfma()
556 static void f32_ppmm_4x8_twopass__neonfma(benchmark::State& state, const char* net) { in f32_ppmm_4x8_twopass__neonfma()
557 …_minmax_ukernel_4x8__neonfma, xnn_x32_packx_ukernel_4x__neon_st4, 4, 8, benchmark::utils::CheckNEO… in f32_ppmm_4x8_twopass__neonfma()
584 static void f32_gemm_1x8__sse_load1(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
587 static void f32_gemm_3x8__sse_load1(benchmark::State& state, const char* net) { in f32_gemm_3x8__sse_load1()
590 static void f32_gemm_4x8__sse_load1(benchmark::State& state, const char* net) { in f32_gemm_4x8__sse_load1()
593 static void f32_gemm_5x8__sse_load1(benchmark::State& state, const char* net) { in f32_gemm_5x8__sse_load1()
597 static void f32_gemm_1x8__sse_dup(benchmark::State& state, const char* net) { in f32_gemm_1x8__sse_dup()
600 static void f32_gemm_3x8__sse_dup(benchmark::State& state, const char* net) { in f32_gemm_3x8__sse_dup()
603 static void f32_gemm_4x8__sse_dup(benchmark::State& state, const char* net) { in f32_gemm_4x8__sse_dup()
606 static void f32_gemm_5x8__sse_dup(benchmark::State& state, const char* net) { in f32_gemm_5x8__sse_dup()
610 static void f32_gemm_1x8s4__sse(benchmark::State& state, const char* net) { in f32_gemm_1x8s4__sse()
613 static void f32_gemm_3x8s4__sse(benchmark::State& state, const char* net) { in f32_gemm_3x8s4__sse()
616 static void f32_gemm_4x8s4__sse(benchmark::State& state, const char* net) { in f32_gemm_4x8s4__sse()
619 static void f32_gemm_5x8s4__sse(benchmark::State& state, const char* net) { in f32_gemm_5x8s4__sse()
623 static void f32_gemm_1x8__sse2_dup(benchmark::State& state, const char* net) { in f32_gemm_1x8__sse2_dup()
626 static void f32_gemm_3x8__sse2_dup(benchmark::State& state, const char* net) { in f32_gemm_3x8__sse2_dup()
629 static void f32_gemm_4x8__sse2_dup(benchmark::State& state, const char* net) { in f32_gemm_4x8__sse2_dup()
632 static void f32_gemm_5x8__sse2_dup(benchmark::State& state, const char* net) { in f32_gemm_5x8__sse2_dup()
636 static void f32_ppmm_4x8_unipass__sse(benchmark::State& state, const char* net) { in f32_ppmm_4x8_unipass__sse()
639 static void f32_ppmm_4x8_twopass__sse(benchmark::State& state, const char* net) { in f32_ppmm_4x8_twopass__sse()
643 static void f32_gemm_1x8__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_1x8__avx_broadcast()
644 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, 1, 8, 1, 1, benchmark::utils:… in f32_gemm_1x8__avx_broadcast()
646 static void f32_gemm_4x8__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_4x8__avx_broadcast()
647 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast, 4, 8, 1, 1, benchmark::utils:… in f32_gemm_4x8__avx_broadcast()
649 static void f32_gemm_5x8__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_5x8__avx_broadcast()
650 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, 5, 8, 1, 1, benchmark::utils:… in f32_gemm_5x8__avx_broadcast()
652 static void f32_gemm_6x8__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_6x8__avx_broadcast()
653 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, 6, 8, 1, 1, benchmark::utils:… in f32_gemm_6x8__avx_broadcast()
655 static void f32_gemm_7x8__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_7x8__avx_broadcast()
656 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast, 7, 8, 1, 1, benchmark::utils:… in f32_gemm_7x8__avx_broadcast()
658 static void f32_gemm_1x16__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_1x16__avx_broadcast()
659 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast, 1, 16, 1, 1, benchmark::util… in f32_gemm_1x16__avx_broadcast()
661 static void f32_gemm_3x16__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_3x16__avx_broadcast()
662 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, 4, 16, 1, 1, benchmark::util… in f32_gemm_3x16__avx_broadcast()
664 static void f32_gemm_4x16__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_4x16__avx_broadcast()
665 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast, 4, 16, 1, 1, benchmark::util… in f32_gemm_4x16__avx_broadcast()
667 static void f32_gemm_5x16__avx_broadcast(benchmark::State& state, const char* net) { in f32_gemm_5x16__avx_broadcast()
668 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast, 5, 16, 1, 1, benchmark::util… in f32_gemm_5x16__avx_broadcast()
671 static void f32_gemm_1x8__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_1x8__fma3_broadcast()
672 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, 1, 8, 1, 1, benchmark::utils… in f32_gemm_1x8__fma3_broadcast()
674 static void f32_gemm_4x8__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_4x8__fma3_broadcast()
675 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, 4, 8, 1, 1, benchmark::utils… in f32_gemm_4x8__fma3_broadcast()
677 static void f32_gemm_5x8__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_5x8__fma3_broadcast()
678 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, 5, 8, 1, 1, benchmark::utils… in f32_gemm_5x8__fma3_broadcast()
680 static void f32_gemm_6x8__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_6x8__fma3_broadcast()
681 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast, 6, 8, 1, 1, benchmark::utils… in f32_gemm_6x8__fma3_broadcast()
683 static void f32_gemm_7x8__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_7x8__fma3_broadcast()
684 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast, 7, 8, 1, 1, benchmark::utils… in f32_gemm_7x8__fma3_broadcast()
686 static void f32_gemm_8x8__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_8x8__fma3_broadcast()
687 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast, 8, 8, 1, 1, benchmark::utils… in f32_gemm_8x8__fma3_broadcast()
689 static void f32_gemm_1x16__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_1x16__fma3_broadcast()
690 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, 1, 16, 1, 1, benchmark::uti… in f32_gemm_1x16__fma3_broadcast()
692 static void f32_gemm_3x16__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_3x16__fma3_broadcast()
693 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, 4, 16, 1, 1, benchmark::uti… in f32_gemm_3x16__fma3_broadcast()
695 static void f32_gemm_4x16__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_4x16__fma3_broadcast()
696 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast, 4, 16, 1, 1, benchmark::uti… in f32_gemm_4x16__fma3_broadcast()
698 static void f32_gemm_5x16__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_5x16__fma3_broadcast()
699 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast, 5, 16, 1, 1, benchmark::uti… in f32_gemm_5x16__fma3_broadcast()
702 static void f32_gemm_1x16s4__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_1x16s4__fma3_broadcast()
703 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast, 1, 16, 1, 4, benchmark::u… in f32_gemm_1x16s4__fma3_broadcast()
705 static void f32_gemm_3x16s4__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_3x16s4__fma3_broadcast()
706 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast, 4, 16, 1, 4, benchmark::u… in f32_gemm_3x16s4__fma3_broadcast()
708 static void f32_gemm_4x16s4__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_4x16s4__fma3_broadcast()
709 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast, 4, 16, 1, 4, benchmark::u… in f32_gemm_4x16s4__fma3_broadcast()
711 static void f32_gemm_5x16s4__fma3_broadcast(benchmark::State& state, const char* net) { in f32_gemm_5x16s4__fma3_broadcast()
712 …GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast, 5, 16, 1, 4, benchmark::u… in f32_gemm_5x16s4__fma3_broadcast()
715 static void f32_gemm_1x16__avx512f_broadcast(benchmark::State& state, const char* net) { in f32_gemm_1x16__avx512f_broadcast()
716 …tate, xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, 1, 16, 1, 1, benchmark::utils::CheckAVX… in f32_gemm_1x16__avx512f_broadcast()
718 static void f32_gemm_4x16__avx512f_broadcast(benchmark::State& state, const char* net) { in f32_gemm_4x16__avx512f_broadcast()
719 …tate, xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast, 4, 16, 1, 1, benchmark::utils::CheckAVX… in f32_gemm_4x16__avx512f_broadcast()
721 static void f32_gemm_5x16__avx512f_broadcast(benchmark::State& state, const char* net) { in f32_gemm_5x16__avx512f_broadcast()
722 …tate, xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast, 5, 16, 1, 1, benchmark::utils::CheckAVX… in f32_gemm_5x16__avx512f_broadcast()
724 static void f32_gemm_6x16__avx512f_broadcast(benchmark::State& state, const char* net) { in f32_gemm_6x16__avx512f_broadcast()
725 …tate, xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, 6, 16, 1, 1, benchmark::utils::CheckAVX… in f32_gemm_6x16__avx512f_broadcast()
727 static void f32_gemm_7x16__avx512f_broadcast(benchmark::State& state, const char* net) { in f32_gemm_7x16__avx512f_broadcast()
728 …tate, xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, 7, 16, 1, 1, benchmark::utils::CheckAVX… in f32_gemm_7x16__avx512f_broadcast()
730 static void f32_gemm_8x16__avx512f_broadcast(benchmark::State& state, const char* net) { in f32_gemm_8x16__avx512f_broadcast()
731 …tate, xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, 8, 16, 1, 1, benchmark::utils::CheckAVX… in f32_gemm_8x16__avx512f_broadcast()
792 static void f32_gemm_3x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
796 static void f32_gemm_4x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) { in f32_gemm_4x8__wasmsimd_arm_loadsplat()
800 static void f32_gemm_5x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) { in f32_gemm_5x8__wasmsimd_arm_loadsplat()
804 static void f32_gemm_6x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) { in f32_gemm_6x8__wasmsimd_arm_loadsplat()
808 static void f32_gemm_3x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) { in f32_gemm_3x8__wasmsimd_x86_loadsplat()
812 static void f32_gemm_4x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) { in f32_gemm_4x8__wasmsimd_x86_loadsplat()
816 static void f32_gemm_5x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) { in f32_gemm_5x8__wasmsimd_x86_loadsplat()
820 static void f32_gemm_6x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) { in f32_gemm_6x8__wasmsimd_x86_loadsplat()
824 static void f32_gemm_3x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) { in f32_gemm_3x8__wasmsimd_arm_splat()
828 static void f32_gemm_4x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) { in f32_gemm_4x8__wasmsimd_arm_splat()
832 static void f32_gemm_5x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) { in f32_gemm_5x8__wasmsimd_arm_splat()
836 static void f32_gemm_6x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) { in f32_gemm_6x8__wasmsimd_arm_splat()
840 static void f32_gemm_3x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) { in f32_gemm_3x8__wasmsimd_x86_splat()
844 static void f32_gemm_4x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) { in f32_gemm_4x8__wasmsimd_x86_splat()
848 static void f32_gemm_5x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) { in f32_gemm_5x8__wasmsimd_x86_splat()
852 static void f32_gemm_6x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) { in f32_gemm_6x8__wasmsimd_x86_splat()
856 static void f32_gemm_3x8s4__wasmsimd_arm(benchmark::State& state, const char* net) { in f32_gemm_3x8s4__wasmsimd_arm()
860 static void f32_gemm_4x8s4__wasmsimd_arm(benchmark::State& state, const char* net) { in f32_gemm_4x8s4__wasmsimd_arm()
864 static void f32_gemm_5x8s4__wasmsimd_arm(benchmark::State& state, const char* net) { in f32_gemm_5x8s4__wasmsimd_arm()
868 static void f32_gemm_6x8s4__wasmsimd_arm(benchmark::State& state, const char* net) { in f32_gemm_6x8s4__wasmsimd_arm()
872 static void f32_gemm_3x8s4__wasmsimd_x86(benchmark::State& state, const char* net) { in f32_gemm_3x8s4__wasmsimd_x86()
876 static void f32_gemm_4x8s4__wasmsimd_x86(benchmark::State& state, const char* net) { in f32_gemm_4x8s4__wasmsimd_x86()
880 static void f32_gemm_5x8s4__wasmsimd_x86(benchmark::State& state, const char* net) { in f32_gemm_5x8s4__wasmsimd_x86()
884 static void f32_gemm_6x8s4__wasmsimd_x86(benchmark::State& state, const char* net) { in f32_gemm_6x8s4__wasmsimd_x86()
888 static void f32_ppmm_4x8_unipass__wasmsimd_arm_splat(benchmark::State& state, const char* net) { in f32_ppmm_4x8_unipass__wasmsimd_arm_splat()
891 static void f32_ppmm_4x8_unipass__wasmsimd_x86_splat(benchmark::State& state, const char* net) { in f32_ppmm_4x8_unipass__wasmsimd_x86_splat()
895 static void f32_ppmm_4x8_twopass__wasmsimd_arm_splat(benchmark::State& state, const char* net) { in f32_ppmm_4x8_twopass__wasmsimd_arm_splat()
898 static void f32_ppmm_4x8_twopass__wasmsimd_x86_splat(benchmark::State& state, const char* net) { in f32_ppmm_4x8_twopass__wasmsimd_x86_splat()
932 static void f32_gemm_1x4__scalar(benchmark::State& state, const char* net) { in BENCHMARK_GEMM()
936 static void f32_gemm_2x4__scalar(benchmark::State& state, const char* net) { in f32_gemm_2x4__scalar()
940 static void f32_gemm_4x4__scalar(benchmark::State& state, const char* net) { in f32_gemm_4x4__scalar()
944 static void f32_ppmm_2x4_unipass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_2x4_unipass__scalar()
948 static void f32_ppmm_4x2_unipass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_4x2_unipass__scalar()
952 static void f32_ppmm_4x4_unipass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_4x4_unipass__scalar()
956 static void f32_ppmm_3x3_unipass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_3x3_unipass__scalar()
960 static void f32_ppmm_2x4_twopass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_2x4_twopass__scalar()
964 static void f32_ppmm_4x2_twopass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_4x2_twopass__scalar()
968 static void f32_ppmm_4x4_twopass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_4x4_twopass__scalar()
972 static void f32_ppmm_3x3_twopass__scalar(benchmark::State& state, const char* net) { in f32_ppmm_3x3_twopass__scalar()