1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <algorithm>
10 #include <cfloat>
11 #include <cmath>
12 #include <functional>
13 #include <random>
14 #include <vector>
15
16 #include <xnnpack.h>
17
18 #include <benchmark/benchmark.h>
19 #include "bench/utils.h"
20
21
max_pooling_u8(benchmark::State & state,const char * net)22 void max_pooling_u8(benchmark::State& state, const char* net) {
23 const size_t batch_size = state.range(0);
24 const size_t input_height = state.range(1);
25 const size_t input_width = state.range(2);
26 const size_t pooling_size = state.range(3);
27 const size_t padding_size = state.range(4);
28 const size_t stride = state.range(5);
29 const size_t channels = state.range(6);
30
31 std::random_device random_device;
32 auto rng = std::mt19937(random_device());
33 auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
34
35 const size_t output_height = (2 * padding_size + input_height - pooling_size) / stride + 1;
36 const size_t output_width = (2 * padding_size + input_width - pooling_size) / stride + 1;
37
38 std::vector<uint8_t> input(batch_size * input_height * input_width * channels);
39 std::generate(input.begin(), input.end(), std::ref(u8rng));
40 std::vector<uint8_t> output(batch_size * output_height * output_width * channels);
41 std::fill(output.begin(), output.end(), 0xA5);
42
43 xnn_status status = xnn_initialize(nullptr /* allocator */);
44 if (status != xnn_status_success) {
45 state.SkipWithError("failed to initialize XNNPACK");
46 return;
47 }
48
49 xnn_operator_t pooling_op = nullptr;
50 status = xnn_create_max_pooling2d_nhwc_u8(
51 padding_size, padding_size, padding_size, padding_size,
52 pooling_size, pooling_size,
53 stride, stride,
54 1 /* dilation height */, 1 /* dilation width */,
55 channels, channels /* input pixel stride */, channels /* output pixel stride */,
56 0, 255,
57 0 /* flags */, &pooling_op);
58 if (status != xnn_status_success) {
59 state.SkipWithError("failed to create Max Pooling operator");
60 return;
61 }
62
63 status = xnn_setup_max_pooling2d_nhwc_u8(
64 pooling_op,
65 batch_size, input_height, input_width,
66 input.data(), output.data(),
67 nullptr /* thread pool */);
68 if (status != xnn_status_success) {
69 state.SkipWithError("failed to setup Max Pooling operator");
70 return;
71 }
72
73 for (auto _ : state) {
74 status = xnn_run_operator(pooling_op, nullptr /* thread pool */);
75 if (status != xnn_status_success) {
76 state.SkipWithError("failed to run Max Pooling operator");
77 return;
78 }
79 }
80
81 status = xnn_delete_operator(pooling_op);
82 if (status != xnn_status_success) {
83 state.SkipWithError("failed to delete Max Pooling operator");
84 return;
85 }
86 pooling_op = nullptr;
87
88 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
89 state.counters["bytes"] = benchmark::Counter(
90 uint64_t(state.iterations()) *
91 batch_size * (input_height * input_width + output_height * output_width) * channels * sizeof(uint8_t),
92 benchmark::Counter::kIsRate);
93 }
94
max_pooling_f32(benchmark::State & state,const char * net)95 void max_pooling_f32(benchmark::State& state, const char* net) {
96 const size_t batch_size = state.range(0);
97 const size_t input_height = state.range(1);
98 const size_t input_width = state.range(2);
99 const size_t pooling_size = state.range(3);
100 const size_t padding_size = state.range(4);
101 const size_t stride = state.range(5);
102 const size_t channels = state.range(6);
103
104 std::random_device random_device;
105 auto rng = std::mt19937(random_device());
106 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
107
108 const size_t output_height = (2 * padding_size + input_height - pooling_size) / stride + 1;
109 const size_t output_width = (2 * padding_size + input_width - pooling_size) / stride + 1;
110
111 std::vector<float> input(batch_size * input_height * input_width * channels);
112 std::generate(input.begin(), input.end(), std::ref(f32rng));
113 std::vector<float> output(batch_size * output_height * output_width * channels);
114 std::fill(output.begin(), output.end(), nanf(""));
115
116 xnn_status status = xnn_initialize(nullptr /* allocator */);
117 if (status != xnn_status_success) {
118 state.SkipWithError("failed to initialize XNNPACK");
119 return;
120 }
121
122 xnn_operator_t pooling_op = nullptr;
123 status = xnn_create_max_pooling2d_nhwc_f32(
124 padding_size, padding_size, padding_size, padding_size,
125 pooling_size, pooling_size,
126 stride, stride,
127 1 /* dilation height */, 1 /* dilation width */,
128 channels, channels /* input pixel stride */, channels /* output pixel stride */,
129 -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
130 0 /* flags */, &pooling_op);
131 if (status != xnn_status_success) {
132 state.SkipWithError("failed to create Max Pooling operator");
133 return;
134 }
135
136 status = xnn_setup_max_pooling2d_nhwc_f32(
137 pooling_op,
138 batch_size, input_height, input_width,
139 input.data(), output.data(),
140 nullptr /* thread pool */);
141 if (status != xnn_status_success) {
142 state.SkipWithError("failed to setup Max Pooling operator");
143 return;
144 }
145
146 for (auto _ : state) {
147 status = xnn_run_operator(pooling_op, nullptr /* thread pool */);
148 if (status != xnn_status_success) {
149 state.SkipWithError("failed to run Max Pooling operator");
150 return;
151 }
152 }
153
154 status = xnn_delete_operator(pooling_op);
155 if (status != xnn_status_success) {
156 state.SkipWithError("failed to delete Max Pooling operator");
157 return;
158 }
159 pooling_op = nullptr;
160
161 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
162 state.counters["bytes"] = benchmark::Counter(
163 uint64_t(state.iterations()) *
164 batch_size * (input_height * input_width + output_height * output_width) * channels * sizeof(float),
165 benchmark::Counter::kIsRate);
166 }
167
168 // ShuffleNet v1/v2.
ShuffleNet(benchmark::internal::Benchmark * b)169 static void ShuffleNet(benchmark::internal::Benchmark* b) {
170 b->ArgNames({"N", "H", "W", "K", "P", "S", "C"});
171
172 /* N H W K P S C */
173 b->Args({1, 112, 112, 3, 1, 2, 24});
174 }
175
176 // SqueezeNet 1.0
SqueezeNetV10(benchmark::internal::Benchmark * b)177 static void SqueezeNetV10(benchmark::internal::Benchmark* b) {
178 b->ArgNames({"N", "H", "W", "K", "P", "S", "C"});
179
180 /*********** MaxPool 1 ************/
181 /* N H W K P S C */
182 b->Args({1, 111, 111, 3, 0, 2, 96});
183 /*********** MaxPool 4 ************/
184 /* N H W K P S C */
185 b->Args({1, 27, 27, 3, 0, 2, 256});
186 /*********** MaxPool 8 ************/
187 /* N H W K P S C */
188 b->Args({1, 13, 13, 3, 0, 2, 512});
189 }
190
191 // SqueezeNet 1.1
SqueezeNetV11(benchmark::internal::Benchmark * b)192 static void SqueezeNetV11(benchmark::internal::Benchmark* b) {
193 b->ArgNames({"N", "H", "W", "K", "P", "S", "C"});
194
195 /*********** MaxPool 1 ***********/
196 /* N H W K P S C */
197 b->Args({1, 111, 111, 3, 0, 2, 64});
198 /*********** MaxPool 3 ************/
199 /* N H W K P S C */
200 b->Args({1, 55, 55, 3, 0, 2, 128});
201 /*********** MaxPool 5 ************/
202 /* N H W K P S C */
203 b->Args({1, 13, 13, 3, 0, 2, 256});
204 }
205
VGG(benchmark::internal::Benchmark * b)206 static void VGG(benchmark::internal::Benchmark* b) {
207 b->ArgNames({"N", "H", "W", "K", "P", "S", "C"});
208
209 /* N H W K P S C */
210 b->Args({1, 224, 224, 2, 1, 2, 64});
211 b->Args({1, 112, 112, 2, 1, 2, 128});
212 b->Args({1, 56, 56, 2, 1, 2, 256});
213 b->Args({1, 28, 28, 2, 1, 2, 512});
214 b->Args({1, 14, 14, 2, 1, 2, 512});
215 }
216
217 BENCHMARK_CAPTURE(max_pooling_f32, shufflenet, "ShuffleNet v1/v2")->Apply(ShuffleNet)->UseRealTime();
218 BENCHMARK_CAPTURE(max_pooling_f32, squeezenet_v10, "SqueezeNet v1.0")->Apply(SqueezeNetV10)->UseRealTime();
219 BENCHMARK_CAPTURE(max_pooling_f32, squeezenet_v11, "SqueezeNet v1.1")->Apply(SqueezeNetV11)->UseRealTime();
220 BENCHMARK_CAPTURE(max_pooling_f32, vgg, "VGG")->Apply(VGG);
221
222 BENCHMARK_CAPTURE(max_pooling_u8, shufflenet, "ShuffleNet v1/v2")->Apply(ShuffleNet)->UseRealTime();
223 BENCHMARK_CAPTURE(max_pooling_u8, squeezenet_v10, "SqueezeNet v1.0")->Apply(SqueezeNetV10)->UseRealTime();
224 BENCHMARK_CAPTURE(max_pooling_u8, squeezenet_v11, "SqueezeNet v1.1")->Apply(SqueezeNetV11)->UseRealTime();
225 BENCHMARK_CAPTURE(max_pooling_u8, vgg, "VGG")->Apply(VGG);
226
227 #ifndef XNNPACK_BENCHMARK_NO_MAIN
228 BENCHMARK_MAIN();
229 #endif
230