• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <algorithm>
10 #include <cfloat>
11 #include <cmath>
12 #include <functional>
13 #include <random>
14 #include <vector>
15 
16 #include <xnnpack.h>
17 
18 #include <benchmark/benchmark.h>
19 #include "bench/utils.h"
20 
21 
max_pooling_u8(benchmark::State & state,const char * net)22 void max_pooling_u8(benchmark::State& state, const char* net) {
23   const size_t batch_size = state.range(0);
24   const size_t input_height = state.range(1);
25   const size_t input_width = state.range(2);
26   const size_t pooling_size = state.range(3);
27   const size_t padding_size = state.range(4);
28   const size_t stride = state.range(5);
29   const size_t channels = state.range(6);
30 
31   std::random_device random_device;
32   auto rng = std::mt19937(random_device());
33   auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
34 
35   const size_t output_height = (2 * padding_size + input_height - pooling_size) / stride + 1;
36   const size_t output_width = (2 * padding_size + input_width - pooling_size) / stride + 1;
37 
38   std::vector<uint8_t> input(batch_size * input_height * input_width * channels);
39   std::generate(input.begin(), input.end(), std::ref(u8rng));
40   std::vector<uint8_t> output(batch_size * output_height * output_width * channels);
41   std::fill(output.begin(), output.end(), 0xA5);
42 
43   xnn_status status = xnn_initialize(nullptr /* allocator */);
44   if (status != xnn_status_success) {
45     state.SkipWithError("failed to initialize XNNPACK");
46     return;
47   }
48 
49   xnn_operator_t pooling_op = nullptr;
50   status = xnn_create_max_pooling2d_nhwc_u8(
51     padding_size, padding_size, padding_size, padding_size,
52     pooling_size, pooling_size,
53     stride, stride,
54     1 /* dilation height */, 1 /* dilation width */,
55     channels, channels /* input pixel stride */, channels /* output pixel stride */,
56     0, 255,
57     0 /* flags */, &pooling_op);
58   if (status != xnn_status_success) {
59     state.SkipWithError("failed to create Max Pooling operator");
60     return;
61   }
62 
63   status = xnn_setup_max_pooling2d_nhwc_u8(
64     pooling_op,
65     batch_size, input_height, input_width,
66     input.data(), output.data(),
67     nullptr /* thread pool */);
68   if (status != xnn_status_success) {
69     state.SkipWithError("failed to setup Max Pooling operator");
70     return;
71   }
72 
73   for (auto _ : state) {
74     status = xnn_run_operator(pooling_op, nullptr /* thread pool */);
75     if (status != xnn_status_success) {
76       state.SkipWithError("failed to run Max Pooling operator");
77       return;
78     }
79   }
80 
81   status = xnn_delete_operator(pooling_op);
82   if (status != xnn_status_success) {
83     state.SkipWithError("failed to delete Max Pooling operator");
84     return;
85   }
86   pooling_op = nullptr;
87 
88   state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
89   state.counters["bytes"] = benchmark::Counter(
90     uint64_t(state.iterations()) *
91       batch_size * (input_height * input_width + output_height * output_width) * channels * sizeof(uint8_t),
92     benchmark::Counter::kIsRate);
93 }
94 
max_pooling_f32(benchmark::State & state,const char * net)95 void max_pooling_f32(benchmark::State& state, const char* net) {
96   const size_t batch_size = state.range(0);
97   const size_t input_height = state.range(1);
98   const size_t input_width = state.range(2);
99   const size_t pooling_size = state.range(3);
100   const size_t padding_size = state.range(4);
101   const size_t stride = state.range(5);
102   const size_t channels = state.range(6);
103 
104   std::random_device random_device;
105   auto rng = std::mt19937(random_device());
106   auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
107 
108   const size_t output_height = (2 * padding_size + input_height - pooling_size) / stride + 1;
109   const size_t output_width = (2 * padding_size + input_width - pooling_size) / stride + 1;
110 
111   std::vector<float> input(batch_size * input_height * input_width * channels);
112   std::generate(input.begin(), input.end(), std::ref(f32rng));
113   std::vector<float> output(batch_size * output_height * output_width * channels);
114   std::fill(output.begin(), output.end(), nanf(""));
115 
116   xnn_status status = xnn_initialize(nullptr /* allocator */);
117   if (status != xnn_status_success) {
118     state.SkipWithError("failed to initialize XNNPACK");
119     return;
120   }
121 
122   xnn_operator_t pooling_op = nullptr;
123   status = xnn_create_max_pooling2d_nhwc_f32(
124     padding_size, padding_size, padding_size, padding_size,
125     pooling_size, pooling_size,
126     stride, stride,
127     1 /* dilation height */, 1 /* dilation width */,
128     channels, channels /* input pixel stride */, channels /* output pixel stride */,
129     -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
130     0 /* flags */, &pooling_op);
131   if (status != xnn_status_success) {
132     state.SkipWithError("failed to create Max Pooling operator");
133     return;
134   }
135 
136   status = xnn_setup_max_pooling2d_nhwc_f32(
137     pooling_op,
138     batch_size, input_height, input_width,
139     input.data(), output.data(),
140     nullptr /* thread pool */);
141   if (status != xnn_status_success) {
142     state.SkipWithError("failed to setup Max Pooling operator");
143     return;
144   }
145 
146   for (auto _ : state) {
147     status = xnn_run_operator(pooling_op, nullptr /* thread pool */);
148     if (status != xnn_status_success) {
149       state.SkipWithError("failed to run Max Pooling operator");
150       return;
151     }
152   }
153 
154   status = xnn_delete_operator(pooling_op);
155   if (status != xnn_status_success) {
156     state.SkipWithError("failed to delete Max Pooling operator");
157     return;
158   }
159   pooling_op = nullptr;
160 
161   state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
162   state.counters["bytes"] = benchmark::Counter(
163     uint64_t(state.iterations()) *
164       batch_size * (input_height * input_width + output_height * output_width) * channels * sizeof(float),
165     benchmark::Counter::kIsRate);
166 }
167 
168 // ShuffleNet v1/v2.
ShuffleNet(benchmark::internal::Benchmark * b)169 static void ShuffleNet(benchmark::internal::Benchmark* b) {
170   b->ArgNames({"N", "H", "W", "K", "P", "S", "C"});
171 
172   /*       N   H   W    K  P  S   C */
173   b->Args({1, 112, 112, 3, 1, 2, 24});
174 }
175 
176 // SqueezeNet 1.0
SqueezeNetV10(benchmark::internal::Benchmark * b)177 static void SqueezeNetV10(benchmark::internal::Benchmark* b) {
178   b->ArgNames({"N", "H", "W", "K", "P", "S", "C"});
179 
180   /*********** MaxPool 1 ************/
181   /*       N   H    W   K  P  S   C */
182   b->Args({1, 111, 111, 3, 0, 2,  96});
183   /*********** MaxPool 4 ************/
184   /*       N   H    W   K  P  S   C */
185   b->Args({1,  27,  27, 3, 0, 2, 256});
186   /*********** MaxPool 8 ************/
187   /*       N   H    W   K  P  S   C */
188   b->Args({1,  13,  13, 3, 0, 2, 512});
189 }
190 
191 // SqueezeNet 1.1
SqueezeNetV11(benchmark::internal::Benchmark * b)192 static void SqueezeNetV11(benchmark::internal::Benchmark* b) {
193   b->ArgNames({"N", "H", "W", "K", "P", "S", "C"});
194 
195   /*********** MaxPool 1 ***********/
196   /*       N   H    W   K  P  S   C */
197   b->Args({1, 111, 111, 3, 0, 2,  64});
198   /*********** MaxPool 3 ************/
199   /*       N   H    W   K  P  S   C */
200   b->Args({1,  55,  55, 3, 0, 2, 128});
201   /*********** MaxPool 5 ************/
202   /*       N   H    W   K  P  S   C */
203   b->Args({1,  13,  13, 3, 0, 2, 256});
204 }
205 
VGG(benchmark::internal::Benchmark * b)206 static void VGG(benchmark::internal::Benchmark* b) {
207   b->ArgNames({"N", "H", "W", "K", "P", "S", "C"});
208 
209   /*       N   H    W   K  P  S   C */
210   b->Args({1, 224, 224, 2, 1, 2,  64});
211   b->Args({1, 112, 112, 2, 1, 2, 128});
212   b->Args({1,  56,  56, 2, 1, 2, 256});
213   b->Args({1,  28,  28, 2, 1, 2, 512});
214   b->Args({1,  14,  14, 2, 1, 2, 512});
215 }
216 
217 BENCHMARK_CAPTURE(max_pooling_f32, shufflenet, "ShuffleNet v1/v2")->Apply(ShuffleNet)->UseRealTime();
218 BENCHMARK_CAPTURE(max_pooling_f32, squeezenet_v10, "SqueezeNet v1.0")->Apply(SqueezeNetV10)->UseRealTime();
219 BENCHMARK_CAPTURE(max_pooling_f32, squeezenet_v11, "SqueezeNet v1.1")->Apply(SqueezeNetV11)->UseRealTime();
220 BENCHMARK_CAPTURE(max_pooling_f32, vgg, "VGG")->Apply(VGG);
221 
222 BENCHMARK_CAPTURE(max_pooling_u8, shufflenet, "ShuffleNet v1/v2")->Apply(ShuffleNet)->UseRealTime();
223 BENCHMARK_CAPTURE(max_pooling_u8, squeezenet_v10, "SqueezeNet v1.0")->Apply(SqueezeNetV10)->UseRealTime();
224 BENCHMARK_CAPTURE(max_pooling_u8, squeezenet_v11, "SqueezeNet v1.1")->Apply(SqueezeNetV11)->UseRealTime();
225 BENCHMARK_CAPTURE(max_pooling_u8, vgg, "VGG")->Apply(VGG);
226 
227 #ifndef XNNPACK_BENCHMARK_NO_MAIN
228 BENCHMARK_MAIN();
229 #endif
230