1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <algorithm>
10 #include <cmath>
11 #include <functional>
12 #include <limits>
13 #include <random>
14 #include <vector>
15
16 #include <xnnpack.h>
17
18 #include <benchmark/benchmark.h>
19 #include "bench/utils.h"
20
21
channel_shuffle_x8(benchmark::State & state,const char * net)22 static void channel_shuffle_x8(benchmark::State& state, const char* net) {
23 const size_t batch_size = static_cast<size_t>(state.range(0));
24 const size_t groups = static_cast<size_t>(state.range(1));
25 const size_t group_channels = static_cast<size_t>(state.range(2));
26
27 std::random_device random_device;
28 auto rng = std::mt19937(random_device());
29 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
30
31 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + batch_size * groups * group_channels);
32 std::vector<uint8_t> output(batch_size * groups * group_channels);
33 std::generate(input.begin(), input.end(), std::ref(u8rng));
34
35 xnn_status status = xnn_initialize(nullptr /* allocator */);
36 if (status != xnn_status_success) {
37 state.SkipWithError("failed to initialize XNNPACK");
38 return;
39 }
40
41 xnn_operator_t channel_shuffle_op = nullptr;
42 status = xnn_create_channel_shuffle_nc_x8(
43 groups, group_channels,
44 groups * group_channels /* input stride */,
45 groups * group_channels /* output stride */,
46 0 /* flags */, &channel_shuffle_op);
47 if (status != xnn_status_success || channel_shuffle_op == nullptr) {
48 state.SkipWithError("failed to create X8 Channel Shuffle operator");
49 return;
50 }
51
52 status = xnn_setup_channel_shuffle_nc_x8(
53 channel_shuffle_op,
54 batch_size,
55 input.data(), output.data(),
56 nullptr /* thread pool */);
57 if (status != xnn_status_success) {
58 state.SkipWithError("failed to setup X8 Channel Shuffle operator");
59 return;
60 }
61
62 for (auto _ : state) {
63 status = xnn_run_operator(channel_shuffle_op, nullptr /* thread pool */);
64 if (status != xnn_status_success) {
65 state.SkipWithError("failed to run X8 Channel Shuffle operator");
66 return;
67 }
68 }
69
70 status = xnn_delete_operator(channel_shuffle_op);
71 if (status != xnn_status_success) {
72 state.SkipWithError("failed to delete X8 Channel Shuffle operator");
73 return;
74 }
75
76 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
77 if (cpu_frequency != 0) {
78 state.counters["cpufreq"] = cpu_frequency;
79 }
80
81 const size_t elements_per_iteration = batch_size * groups * group_channels;
82 state.counters["elements"] =
83 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
84
85 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(uint8_t);
86 state.counters["bytes"] =
87 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
88 }
89
channel_shuffle_x32(benchmark::State & state,const char * net)90 static void channel_shuffle_x32(benchmark::State& state, const char* net) {
91 const size_t batch_size = static_cast<size_t>(state.range(0));
92 const size_t groups = static_cast<size_t>(state.range(1));
93 const size_t group_channels = static_cast<size_t>(state.range(2));
94
95 std::random_device random_device;
96 auto rng = std::mt19937(random_device());
97 auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
98
99 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + batch_size * groups * group_channels);
100 std::vector<float> output(batch_size * groups * group_channels);
101 std::generate(input.begin(), input.end(), std::ref(f32rng));
102
103 xnn_status status = xnn_initialize(nullptr /* allocator */);
104 if (status != xnn_status_success) {
105 state.SkipWithError("failed to initialize XNNPACK");
106 return;
107 }
108
109 xnn_operator_t channel_shuffle_op = nullptr;
110 status = xnn_create_channel_shuffle_nc_x32(
111 groups, group_channels,
112 groups * group_channels /* input stride */,
113 groups * group_channels /* output stride */,
114 0 /* flags */, &channel_shuffle_op);
115 if (status != xnn_status_success || channel_shuffle_op == nullptr) {
116 state.SkipWithError("failed to create X32 Channel Shuffle operator");
117 return;
118 }
119
120 status = xnn_setup_channel_shuffle_nc_x32(
121 channel_shuffle_op,
122 batch_size,
123 input.data(), output.data(),
124 nullptr /* thread pool */);
125 if (status != xnn_status_success) {
126 state.SkipWithError("failed to setup X32 Channel Shuffle operator");
127 return;
128 }
129
130 for (auto _ : state) {
131 status = xnn_run_operator(channel_shuffle_op, nullptr /* thread pool */);
132 if (status != xnn_status_success) {
133 state.SkipWithError("failed to run X32 Channel Shuffle operator");
134 return;
135 }
136 }
137
138 status = xnn_delete_operator(channel_shuffle_op);
139 if (status != xnn_status_success) {
140 state.SkipWithError("failed to delete X32 Channel Shuffle operator");
141 return;
142 }
143
144 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
145 if (cpu_frequency != 0) {
146 state.counters["cpufreq"] = cpu_frequency;
147 }
148
149 const size_t elements_per_iteration = batch_size * groups * group_channels;
150 state.counters["elements"] =
151 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
152
153 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(float);
154 state.counters["bytes"] =
155 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
156 }
157
ShuffleNetV1G2Arguments(benchmark::internal::Benchmark * b)158 static void ShuffleNetV1G2Arguments(benchmark::internal::Benchmark* b)
159 {
160 b->ArgNames({"N", "G", "GC"});
161
162 /******** Stage 2 ********/
163 /* H W G CG */
164 b->Args({56 * 56, 2, 25});
165 b->Args({28 * 28, 2, 25});
166
167 /******** Stage 3 ********/
168 /* H W G CG */
169 b->Args({28 * 28, 2, 50});
170 b->Args({14 * 14, 2, 50});
171
172 /******** Stage 4 ********/
173 /* H W G CG */
174 b->Args({14 * 14, 2, 100});
175 b->Args({ 7 * 7, 2, 100});
176 }
177
ShuffleNetV1G3Arguments(benchmark::internal::Benchmark * b)178 static void ShuffleNetV1G3Arguments(benchmark::internal::Benchmark* b)
179 {
180 b->ArgNames({"N", "G", "GC"});
181
182 /******** Stage 2 *******/
183 /* H W G CG */
184 b->Args({56 * 56, 3, 20});
185 b->Args({28 * 28, 3, 20});
186
187 /******** Stage 3 *******/
188 /* H W G CG */
189 b->Args({28 * 28, 3, 40});
190 b->Args({14 * 14, 3, 40});
191
192 /******** Stage 4 *******/
193 /* H W G CG */
194 b->Args({14 * 14, 3, 80});
195 b->Args({ 7 * 7, 3, 80});
196 }
197
ShuffleNetV1G4Arguments(benchmark::internal::Benchmark * b)198 static void ShuffleNetV1G4Arguments(benchmark::internal::Benchmark* b)
199 {
200 b->ArgNames({"N", "G", "GC"});
201
202 /******** Stage 2 *******/
203 /* H W G CG */
204 b->Args({56 * 56, 4, 17});
205 b->Args({28 * 28, 4, 17});
206
207 /******** Stage 3 *******/
208 /* H W G CG */
209 b->Args({28 * 28, 4, 34});
210 b->Args({14 * 14, 4, 34});
211
212 /******** Stage 4 *******/
213 /* H W G CG */
214 b->Args({14 * 14, 4, 68});
215 b->Args({ 7 * 7, 4, 68});
216 }
217
ShuffleNetV1G8Arguments(benchmark::internal::Benchmark * b)218 static void ShuffleNetV1G8Arguments(benchmark::internal::Benchmark* b)
219 {
220 b->ArgNames({"N", "G", "GC"});
221
222 /******** Stage 2 *******/
223 /* H W G CG */
224 b->Args({56 * 56, 8, 12});
225 b->Args({28 * 28, 8, 12});
226
227 /******** Stage 3 *******/
228 /* H W G CG */
229 b->Args({28 * 28, 8, 24});
230 b->Args({14 * 14, 8, 24});
231
232 /******** Stage 4 *******/
233 /* H W G CG */
234 b->Args({14 * 14, 8, 48});
235 b->Args({ 7 * 7, 8, 48});
236 }
237
ShuffleNetV2x0_5Arguments(benchmark::internal::Benchmark * b)238 static void ShuffleNetV2x0_5Arguments(benchmark::internal::Benchmark* b)
239 {
240 b->ArgNames({"N", "G", "GC"});
241
242 /******** Stage 2 *******/
243 /* H W G CG */
244 b->Args({28 * 28, 2, 24});
245
246 /******** Stage 3 *******/
247 /* H W G CG */
248 b->Args({14 * 14, 2, 48});
249
250 /******** Stage 4 *******/
251 /* H W G CG */
252 b->Args({ 7 * 7, 2, 96});
253 }
254
ShuffleNetV2x1_0Arguments(benchmark::internal::Benchmark * b)255 static void ShuffleNetV2x1_0Arguments(benchmark::internal::Benchmark* b)
256 {
257 b->ArgNames({"N", "G", "GC"});
258
259 /******** Stage 2 ********/
260 /* H W G CG */
261 b->Args({28 * 28, 2, 58});
262
263 /******** Stage 3 ********/
264 /* H W G CG */
265 b->Args({14 * 14, 2, 116});
266
267 /******** Stage 4 ********/
268 /* H W G CG */
269 b->Args({ 7 * 7, 2, 232});
270 }
271
ShuffleNetV2x1_5Arguments(benchmark::internal::Benchmark * b)272 static void ShuffleNetV2x1_5Arguments(benchmark::internal::Benchmark* b)
273 {
274 b->ArgNames({"N", "G", "GC"});
275
276 /******** Stage 2 ********/
277 /* H W G CG */
278 b->Args({28 * 28, 2, 88});
279
280 /******** Stage 3 ********/
281 /* H W G CG */
282 b->Args({14 * 14, 2, 176});
283
284 /******** Stage 4 ********/
285 /* H W G CG */
286 b->Args({ 7 * 7, 2, 352});
287 }
288
ShuffleNetV2x2_0Arguments(benchmark::internal::Benchmark * b)289 static void ShuffleNetV2x2_0Arguments(benchmark::internal::Benchmark* b)
290 {
291 b->ArgNames({"N", "G", "GC"});
292
293 /******** Stage 2 ********/
294 /* H W G CG */
295 b->Args({28 * 28, 2, 122});
296
297 /******** Stage 3 ********/
298 /* H W G CG */
299 b->Args({14 * 14, 2, 244});
300
301 /******** Stage 4 ********/
302 /* H W G CG */
303 b->Args({ 7 * 7, 2, 488});
304 }
305
306 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2Arguments)->UseRealTime();
307 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3Arguments)->UseRealTime();
308 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4Arguments)->UseRealTime();
309 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8Arguments)->UseRealTime();
310 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x05, "ShuffleNet v2 x0.5")->Apply(ShuffleNetV2x0_5Arguments)->UseRealTime();
311 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x10, "ShuffleNet v2 x1.0")->Apply(ShuffleNetV2x1_0Arguments)->UseRealTime();
312 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x15, "ShuffleNet v2 x1.5")->Apply(ShuffleNetV2x1_5Arguments)->UseRealTime();
313 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x20, "ShuffleNet v2 x2.0")->Apply(ShuffleNetV2x2_0Arguments)->UseRealTime();
314
315 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2Arguments)->UseRealTime();
316 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3Arguments)->UseRealTime();
317 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4Arguments)->UseRealTime();
318 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8Arguments)->UseRealTime();
319 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x05, "ShuffleNet v2 x0.5")->Apply(ShuffleNetV2x0_5Arguments)->UseRealTime();
320 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x10, "ShuffleNet v2 x1.0")->Apply(ShuffleNetV2x1_0Arguments)->UseRealTime();
321 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x15, "ShuffleNet v2 x1.5")->Apply(ShuffleNetV2x1_5Arguments)->UseRealTime();
322 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x20, "ShuffleNet v2 x2.0")->Apply(ShuffleNetV2x2_0Arguments)->UseRealTime();
323
324 #ifndef XNNPACK_BENCHMARK_NO_MAIN
325 BENCHMARK_MAIN();
326 #endif
327