// Copyright 2022 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. #include #include #include #include #include #include "bench/utils.h" #include #include #include #include #include #include void filterbank_accumulate( benchmark::State& state, xnn_u32_filterbank_accumulate_ukernel_function filterbank_accumulate, benchmark::utils::IsaCheckFunction isa_check = nullptr) { if (isa_check && !isa_check(state)) { return; } const size_t rows = state.range(0); const size_t batch = state.range(1); const size_t input_size = (rows + 1) * batch; std::vector> input(input_size); std::vector> weight_widths(rows + 1); std::vector> weights(input_size * 2); std::vector> output(rows); std::iota(input.begin(), input.end(), 0); std::fill(weight_widths.begin(), weight_widths.end(), batch); std::iota(weights.begin(), weights.end(), 0); std::iota(output.begin(), output.end(), 0); for (auto _ : state) { filterbank_accumulate(rows, input.data(), weight_widths.data(), weights.data(), output.data()); } const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); if (cpu_frequency != 0) { state.counters["cpufreq"] = cpu_frequency; } } static void BenchmarkKernelSize(benchmark::internal::Benchmark* b) { b->ArgNames({"rows", "batch"}); b->Args({1, 237}); b->Args({5, 1}); b->Args({10, 2}); b->Args({7, 3}); b->Args({5, 4}); b->Args({5, 5}); b->Args({3, 6}); b->Args({4, 7}); b->Args({2, 8}); b->Args({2, 9}); b->Args({2, 10}); b->Args({3, 11}); b->Args({1, 13}); } #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY BENCHMARK_CAPTURE(filterbank_accumulate, u32_aarch32_arm_x1, xnn_u32_filterbank_accumulate_ukernel__aarch32_arm_x1)->Apply(BenchmarkKernelSize)->UseRealTime(); BENCHMARK_CAPTURE(filterbank_accumulate, u32_aarch32_neon_x1, xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1, benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime(); BENCHMARK_CAPTURE(filterbank_accumulate, u32_aarch32_neon_x2, xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2, benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime(); #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY #if XNN_ARCH_ARM || XNN_ARCH_ARM64 BENCHMARK_CAPTURE(filterbank_accumulate, u32_neon_x1, xnn_u32_filterbank_accumulate_ukernel__neon_x1, benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime(); BENCHMARK_CAPTURE(filterbank_accumulate, u32_neon_x2, xnn_u32_filterbank_accumulate_ukernel__neon_x2, benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime(); #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 BENCHMARK_CAPTURE(filterbank_accumulate, u32_scalar_x1, xnn_u32_filterbank_accumulate_ukernel__scalar_x1)->Apply(BenchmarkKernelSize)->UseRealTime(); #ifndef XNNPACK_BENCHMARK_NO_MAIN BENCHMARK_MAIN(); #endif