• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <algorithm>
10 #include <cmath>
11 #include <functional>
12 #include <limits>
13 #include <random>
14 #include <vector>
15 
16 #include <xnnpack.h>
17 
18 #include <benchmark/benchmark.h>
19 #include "bench/utils.h"
20 
21 
channel_shuffle_x8(benchmark::State & state,const char * net)22 static void channel_shuffle_x8(benchmark::State& state, const char* net) {
23   const size_t batch_size = static_cast<size_t>(state.range(0));
24   const size_t groups = static_cast<size_t>(state.range(1));
25   const size_t group_channels = static_cast<size_t>(state.range(2));
26 
27   std::random_device random_device;
28   auto rng = std::mt19937(random_device());
29   auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
30 
31   std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + batch_size * groups * group_channels);
32   std::vector<uint8_t> output(batch_size * groups * group_channels);
33   std::generate(input.begin(), input.end(), std::ref(u8rng));
34 
35   xnn_status status = xnn_initialize(nullptr /* allocator */);
36   if (status != xnn_status_success) {
37     state.SkipWithError("failed to initialize XNNPACK");
38     return;
39   }
40 
41   xnn_operator_t channel_shuffle_op = nullptr;
42   status = xnn_create_channel_shuffle_nc_x8(
43     groups, group_channels,
44     groups * group_channels /* input stride */,
45     groups * group_channels /* output stride */,
46     0 /* flags */, &channel_shuffle_op);
47   if (status != xnn_status_success || channel_shuffle_op == nullptr) {
48     state.SkipWithError("failed to create X8 Channel Shuffle operator");
49     return;
50   }
51 
52   status = xnn_setup_channel_shuffle_nc_x8(
53     channel_shuffle_op,
54     batch_size,
55     input.data(), output.data(),
56     nullptr /* thread pool */);
57   if (status != xnn_status_success) {
58     state.SkipWithError("failed to setup X8 Channel Shuffle operator");
59     return;
60   }
61 
62   for (auto _ : state) {
63     status = xnn_run_operator(channel_shuffle_op, nullptr /* thread pool */);
64     if (status != xnn_status_success) {
65       state.SkipWithError("failed to run X8 Channel Shuffle operator");
66       return;
67     }
68   }
69 
70   status = xnn_delete_operator(channel_shuffle_op);
71   if (status != xnn_status_success) {
72     state.SkipWithError("failed to delete X8 Channel Shuffle operator");
73     return;
74   }
75 
76   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
77   if (cpu_frequency != 0) {
78     state.counters["cpufreq"] = cpu_frequency;
79   }
80 
81   const size_t elements_per_iteration = batch_size * groups * group_channels;
82   state.counters["elements"] =
83     benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
84 
85   const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(uint8_t);
86   state.counters["bytes"] =
87     benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
88 }
89 
channel_shuffle_x32(benchmark::State & state,const char * net)90 static void channel_shuffle_x32(benchmark::State& state, const char* net) {
91   const size_t batch_size = static_cast<size_t>(state.range(0));
92   const size_t groups = static_cast<size_t>(state.range(1));
93   const size_t group_channels = static_cast<size_t>(state.range(2));
94 
95   std::random_device random_device;
96   auto rng = std::mt19937(random_device());
97   auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
98 
99   std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + batch_size * groups * group_channels);
100   std::vector<float> output(batch_size * groups * group_channels);
101   std::generate(input.begin(), input.end(), std::ref(f32rng));
102 
103   xnn_status status = xnn_initialize(nullptr /* allocator */);
104   if (status != xnn_status_success) {
105     state.SkipWithError("failed to initialize XNNPACK");
106     return;
107   }
108 
109   xnn_operator_t channel_shuffle_op = nullptr;
110   status = xnn_create_channel_shuffle_nc_x32(
111     groups, group_channels,
112     groups * group_channels /* input stride */,
113     groups * group_channels /* output stride */,
114     0 /* flags */, &channel_shuffle_op);
115   if (status != xnn_status_success || channel_shuffle_op == nullptr) {
116     state.SkipWithError("failed to create X32 Channel Shuffle operator");
117     return;
118   }
119 
120   status = xnn_setup_channel_shuffle_nc_x32(
121     channel_shuffle_op,
122     batch_size,
123     input.data(), output.data(),
124     nullptr /* thread pool */);
125   if (status != xnn_status_success) {
126     state.SkipWithError("failed to setup X32 Channel Shuffle operator");
127     return;
128   }
129 
130   for (auto _ : state) {
131     status = xnn_run_operator(channel_shuffle_op, nullptr /* thread pool */);
132     if (status != xnn_status_success) {
133       state.SkipWithError("failed to run X32 Channel Shuffle operator");
134       return;
135     }
136   }
137 
138   status = xnn_delete_operator(channel_shuffle_op);
139   if (status != xnn_status_success) {
140     state.SkipWithError("failed to delete X32 Channel Shuffle operator");
141     return;
142   }
143 
144   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
145   if (cpu_frequency != 0) {
146     state.counters["cpufreq"] = cpu_frequency;
147   }
148 
149   const size_t elements_per_iteration = batch_size * groups * group_channels;
150   state.counters["elements"] =
151     benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
152 
153   const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(float);
154   state.counters["bytes"] =
155     benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
156 }
157 
ShuffleNetV1G2Arguments(benchmark::internal::Benchmark * b)158 static void ShuffleNetV1G2Arguments(benchmark::internal::Benchmark* b)
159 {
160   b->ArgNames({"N", "G", "GC"});
161 
162   /******** Stage 2 ********/
163   /*        H    W  G   CG */
164   b->Args({56 * 56, 2,  25});
165   b->Args({28 * 28, 2,  25});
166 
167   /******** Stage 3 ********/
168   /*        H    W  G   CG */
169   b->Args({28 * 28, 2,  50});
170   b->Args({14 * 14, 2,  50});
171 
172   /******** Stage 4 ********/
173   /*        H    W  G   CG */
174   b->Args({14 * 14, 2, 100});
175   b->Args({ 7 *  7, 2, 100});
176 }
177 
ShuffleNetV1G3Arguments(benchmark::internal::Benchmark * b)178 static void ShuffleNetV1G3Arguments(benchmark::internal::Benchmark* b)
179 {
180   b->ArgNames({"N", "G", "GC"});
181 
182   /******** Stage 2 *******/
183   /*        H    W  G  CG */
184   b->Args({56 * 56, 3, 20});
185   b->Args({28 * 28, 3, 20});
186 
187   /******** Stage 3 *******/
188   /*        H    W  G  CG */
189   b->Args({28 * 28, 3, 40});
190   b->Args({14 * 14, 3, 40});
191 
192   /******** Stage 4 *******/
193   /*        H    W  G  CG */
194   b->Args({14 * 14, 3, 80});
195   b->Args({ 7 *  7, 3, 80});
196 }
197 
ShuffleNetV1G4Arguments(benchmark::internal::Benchmark * b)198 static void ShuffleNetV1G4Arguments(benchmark::internal::Benchmark* b)
199 {
200   b->ArgNames({"N", "G", "GC"});
201 
202   /******** Stage 2 *******/
203   /*        H    W  G  CG */
204   b->Args({56 * 56, 4, 17});
205   b->Args({28 * 28, 4, 17});
206 
207   /******** Stage 3 *******/
208   /*        H    W  G  CG */
209   b->Args({28 * 28, 4, 34});
210   b->Args({14 * 14, 4, 34});
211 
212   /******** Stage 4 *******/
213   /*        H    W  G  CG */
214   b->Args({14 * 14, 4, 68});
215   b->Args({ 7 *  7, 4, 68});
216 }
217 
ShuffleNetV1G8Arguments(benchmark::internal::Benchmark * b)218 static void ShuffleNetV1G8Arguments(benchmark::internal::Benchmark* b)
219 {
220   b->ArgNames({"N", "G", "GC"});
221 
222   /******** Stage 2 *******/
223   /*        H    W  G  CG */
224   b->Args({56 * 56, 8, 12});
225   b->Args({28 * 28, 8, 12});
226 
227   /******** Stage 3 *******/
228   /*        H    W  G  CG */
229   b->Args({28 * 28, 8, 24});
230   b->Args({14 * 14, 8, 24});
231 
232   /******** Stage 4 *******/
233   /*        H    W  G  CG */
234   b->Args({14 * 14, 8, 48});
235   b->Args({ 7 *  7, 8, 48});
236 }
237 
ShuffleNetV2x0_5Arguments(benchmark::internal::Benchmark * b)238 static void ShuffleNetV2x0_5Arguments(benchmark::internal::Benchmark* b)
239 {
240   b->ArgNames({"N", "G", "GC"});
241 
242   /******** Stage 2 *******/
243   /*        H    W  G  CG */
244   b->Args({28 * 28, 2, 24});
245 
246   /******** Stage 3 *******/
247   /*        H    W  G  CG */
248   b->Args({14 * 14, 2, 48});
249 
250   /******** Stage 4 *******/
251   /*        H    W  G  CG */
252   b->Args({ 7 *  7, 2, 96});
253 }
254 
ShuffleNetV2x1_0Arguments(benchmark::internal::Benchmark * b)255 static void ShuffleNetV2x1_0Arguments(benchmark::internal::Benchmark* b)
256 {
257   b->ArgNames({"N", "G", "GC"});
258 
259   /******** Stage 2 ********/
260   /*        H    W  G   CG */
261   b->Args({28 * 28, 2,  58});
262 
263   /******** Stage 3 ********/
264   /*        H    W  G   CG */
265   b->Args({14 * 14, 2, 116});
266 
267   /******** Stage 4 ********/
268   /*        H    W  G   CG */
269   b->Args({ 7 *  7, 2, 232});
270 }
271 
ShuffleNetV2x1_5Arguments(benchmark::internal::Benchmark * b)272 static void ShuffleNetV2x1_5Arguments(benchmark::internal::Benchmark* b)
273 {
274   b->ArgNames({"N", "G", "GC"});
275 
276   /******** Stage 2 ********/
277   /*        H    W  G   CG */
278   b->Args({28 * 28, 2,  88});
279 
280   /******** Stage 3 ********/
281   /*        H    W  G   CG */
282   b->Args({14 * 14, 2, 176});
283 
284   /******** Stage 4 ********/
285   /*        H    W  G   CG */
286   b->Args({ 7 *  7, 2, 352});
287 }
288 
ShuffleNetV2x2_0Arguments(benchmark::internal::Benchmark * b)289 static void ShuffleNetV2x2_0Arguments(benchmark::internal::Benchmark* b)
290 {
291   b->ArgNames({"N", "G", "GC"});
292 
293   /******** Stage 2 ********/
294   /*        H    W  G   CG */
295   b->Args({28 * 28, 2, 122});
296 
297   /******** Stage 3 ********/
298   /*        H    W  G   CG */
299   b->Args({14 * 14, 2, 244});
300 
301   /******** Stage 4 ********/
302   /*        H    W  G   CG */
303   b->Args({ 7 *  7, 2, 488});
304 }
305 
306 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2Arguments)->UseRealTime();
307 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3Arguments)->UseRealTime();
308 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4Arguments)->UseRealTime();
309 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8Arguments)->UseRealTime();
310 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x05, "ShuffleNet v2 x0.5")->Apply(ShuffleNetV2x0_5Arguments)->UseRealTime();
311 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x10, "ShuffleNet v2 x1.0")->Apply(ShuffleNetV2x1_0Arguments)->UseRealTime();
312 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x15, "ShuffleNet v2 x1.5")->Apply(ShuffleNetV2x1_5Arguments)->UseRealTime();
313 BENCHMARK_CAPTURE(channel_shuffle_x8, shufflenet_v2_x20, "ShuffleNet v2 x2.0")->Apply(ShuffleNetV2x2_0Arguments)->UseRealTime();
314 
315 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2Arguments)->UseRealTime();
316 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3Arguments)->UseRealTime();
317 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4Arguments)->UseRealTime();
318 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8Arguments)->UseRealTime();
319 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x05, "ShuffleNet v2 x0.5")->Apply(ShuffleNetV2x0_5Arguments)->UseRealTime();
320 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x10, "ShuffleNet v2 x1.0")->Apply(ShuffleNetV2x1_0Arguments)->UseRealTime();
321 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x15, "ShuffleNet v2 x1.5")->Apply(ShuffleNetV2x1_5Arguments)->UseRealTime();
322 BENCHMARK_CAPTURE(channel_shuffle_x32, shufflenet_v2_x20, "ShuffleNet v2 x2.0")->Apply(ShuffleNetV2x2_0Arguments)->UseRealTime();
323 
324 #ifndef XNNPACK_BENCHMARK_NO_MAIN
325 BENCHMARK_MAIN();
326 #endif
327