• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <algorithm>
10 #include <cfloat>
11 #include <cmath>
12 #include <functional>
13 #include <random>
14 #include <vector>
15 
16 #include <cpuinfo.h>
17 
18 #include <benchmark/benchmark.h>
19 #include "bench/utils.h"
20 #include <xnnpack/AlignedAllocator.h>
21 #include <xnnpack/common.h>
22 #include <xnnpack/requantization-stubs.h>
23 
24 
25 class Requantization : public benchmark::Fixture {
26  public:
Requantization()27   inline Requantization()
28   {
29     cpuinfo_initialize();
30     const size_t l1d_size = cpuinfo_get_l1d_cache(0)->size;
31     const size_t l1d_reserve = 1024;
32     n_ = (l1d_size - l1d_reserve) / (sizeof(int32_t) + sizeof(uint8_t));
33     n_ = n_ / 16 * 16;
34   }
35 
SetUp(benchmark::State & state)36   virtual void SetUp(benchmark::State& state) override
37   {
38     std::random_device random_device;
39     auto rng = std::mt19937(random_device());
40     auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(), std::ref(rng));
41 
42     input_.resize(n());
43     std::generate(input_.begin(), input_.end(), std::ref(i32rng));
44     output_.resize(n());
45     std::fill(output_.begin(), output_.end(), 0xA5);
46 
47     const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
48     if (cpu_frequency != 0) {
49       state.counters["cpufreq"] = cpu_frequency;
50     }
51   }
52 
TearDown(benchmark::State & state)53   virtual void TearDown(benchmark::State& state) override
54   {
55     state.SetItemsProcessed(uint64_t(state.iterations()) * n());
56     state.SetBytesProcessed(uint64_t(state.iterations()) * n() * (sizeof(int32_t) + sizeof(uint8_t)));
57     input_.clear();
58     output_.clear();
59   }
60 
input() const61   inline const int32_t* input() const
62   {
63     return input_.data();
64   }
65 
output()66   inline uint8_t* output()
67   {
68     return output_.data();
69   }
70 
n() const71   inline size_t n() const
72   {
73     return n_;
74   }
75 
76  protected:
77   std::vector<int32_t, AlignedAllocator<int32_t, 64>> input_;
78   std::vector<uint8_t> output_;
79   size_t n_;
80 };
81 
82 
83 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
BENCHMARK_F(Requantization,fp32__neon)84   BENCHMARK_F(Requantization, fp32__neon)(benchmark::State& state) {
85     for (auto _ : state) {
86       xnn_qu8_requantize_fp32__neon(
87           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
88     }
89   }
90 
BENCHMARK_F(Requantization,gemmlowp__neon)91   BENCHMARK_F(Requantization, gemmlowp__neon)(benchmark::State& state) {
92     for (auto _ : state) {
93       xnn_qu8_requantize_gemmlowp__neon(
94           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
95     }
96   }
97 
BENCHMARK_F(Requantization,rndna__neon)98   BENCHMARK_F(Requantization, rndna__neon)(benchmark::State& state) {
99     for (auto _ : state) {
100       xnn_qu8_requantize_rndna__neon(
101           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
102     }
103   }
104 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
105 
106 
107 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
BENCHMARK_F(Requantization,fp32__sse2)108   BENCHMARK_F(Requantization, fp32__sse2)(benchmark::State& state) {
109     for (auto _ : state) {
110       xnn_qu8_requantize_fp32__sse2(
111           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
112     }
113   }
114 
BENCHMARK_F(Requantization,gemmlowp__sse2)115   BENCHMARK_F(Requantization, gemmlowp__sse2)(benchmark::State& state) {
116     for (auto _ : state) {
117       xnn_qu8_requantize_gemmlowp__sse2(
118           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
119     }
120   }
121 
BENCHMARK_F(Requantization,gemmlowp__ssse3)122   BENCHMARK_F(Requantization, gemmlowp__ssse3)(benchmark::State& state) {
123     for (auto _ : state) {
124       xnn_qu8_requantize_gemmlowp__ssse3(
125           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
126     }
127   }
128 
BENCHMARK_F(Requantization,gemmlowp__sse4)129   BENCHMARK_F(Requantization, gemmlowp__sse4)(benchmark::State& state) {
130     for (auto _ : state) {
131       xnn_qu8_requantize_gemmlowp__sse4(
132           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
133     }
134   }
135 
BENCHMARK_F(Requantization,rndna__sse2)136   BENCHMARK_F(Requantization, rndna__sse2)(benchmark::State& state) {
137     for (auto _ : state) {
138       xnn_qu8_requantize_rndna__sse2(
139           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
140     }
141   }
142 
BENCHMARK_F(Requantization,rndna__ssse3)143   BENCHMARK_F(Requantization, rndna__ssse3)(benchmark::State& state) {
144     for (auto _ : state) {
145       xnn_qu8_requantize_rndna__ssse3(
146           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
147     }
148   }
149 
BENCHMARK_F(Requantization,rndna__sse4)150   BENCHMARK_F(Requantization, rndna__sse4)(benchmark::State& state) {
151     for (auto _ : state) {
152       xnn_qu8_requantize_rndna__sse4(
153           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
154     }
155   }
156 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
157 
158 
159 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
BENCHMARK_F(Requantization,fp32__wasmsimd)160   BENCHMARK_F(Requantization, fp32__wasmsimd)(benchmark::State& state) {
161     for (auto _ : state) {
162       xnn_qu8_requantize_fp32__wasmsimd(
163           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
164     }
165   }
166 
BENCHMARK_F(Requantization,gemmlowp__wasmsimd)167   BENCHMARK_F(Requantization, gemmlowp__wasmsimd)(benchmark::State& state) {
168     for (auto _ : state) {
169       xnn_qu8_requantize_gemmlowp__wasmsimd(
170           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
171     }
172   }
173 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
174 
175 
BENCHMARK_F(Requantization,fp32__scalar_lrintf)176 BENCHMARK_F(Requantization, fp32__scalar_lrintf)(benchmark::State& state) {
177   for (auto _ : state) {
178     xnn_qu8_requantize_fp32__scalar_lrintf(
179         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
180   }
181 }
182 
BENCHMARK_F(Requantization,fp32__scalar_fmagic)183 BENCHMARK_F(Requantization, fp32__scalar_fmagic)(benchmark::State& state) {
184   for (auto _ : state) {
185     xnn_qu8_requantize_fp32__scalar_fmagic(
186         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
187   }
188 }
189 
BENCHMARK_F(Requantization,gemmlowp__scalar)190 BENCHMARK_F(Requantization, gemmlowp__scalar)(benchmark::State& state) {
191   for (auto _ : state) {
192     xnn_qu8_requantize_gemmlowp__scalar(
193         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
194   }
195 }
196 
BENCHMARK_F(Requantization,rndna__scalar_signed64)197 BENCHMARK_F(Requantization, rndna__scalar_signed64)(benchmark::State& state) {
198   for (auto _ : state) {
199     xnn_qu8_requantize_rndna__scalar_signed64(
200         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
201   }
202 }
203 
BENCHMARK_F(Requantization,rndna__scalar_unsigned32)204 BENCHMARK_F(Requantization, rndna__scalar_unsigned32)(benchmark::State& state) {
205   for (auto _ : state) {
206     xnn_qu8_requantize_rndna__scalar_unsigned32(
207         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
208   }
209 }
210 
BENCHMARK_F(Requantization,rndna__scalar_unsigned64)211 BENCHMARK_F(Requantization, rndna__scalar_unsigned64)(benchmark::State& state) {
212   for (auto _ : state) {
213     xnn_qu8_requantize_rndna__scalar_unsigned64(
214         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
215   }
216 }
217 
218 
219 #ifndef XNNPACK_BENCHMARK_NO_MAIN
220 BENCHMARK_MAIN();
221 #endif
222