1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <gtest/gtest.h> 12 13 #include <algorithm> 14 #include <cassert> 15 #include <cstddef> 16 #include <cstdlib> 17 #include <functional> 18 #include <random> 19 #include <vector> 20 21 #include <xnnpack.h> 22 #include <xnnpack/params-init.h> 23 #include <xnnpack/params.h> 24 25 26 class MaxPoolMicrokernelTester { 27 public: 28 enum class Variant { 29 Native, 30 Scalar, 31 }; 32 output_pixels(size_t output_pixels)33 inline MaxPoolMicrokernelTester& output_pixels(size_t output_pixels) { 34 assert(output_pixels != 0); 35 this->output_pixels_ = output_pixels; 36 return *this; 37 } 38 output_pixels()39 inline size_t output_pixels() const { 40 return this->output_pixels_; 41 } 42 step(size_t step)43 inline MaxPoolMicrokernelTester& step(size_t step) { 44 assert(step != 0); 45 this->step_ = step; 46 return *this; 47 } 48 step()49 inline size_t step() const { 50 return this->step_; 51 } 52 input_offset(size_t input_offset)53 inline MaxPoolMicrokernelTester& input_offset(size_t input_offset) { 54 assert(input_offset != 0); 55 this->input_offset_ = input_offset; 56 return *this; 57 } 58 input_offset()59 inline size_t input_offset() const { 60 return this->input_offset_; 61 } 62 pooling_elements(size_t pooling_elements)63 inline MaxPoolMicrokernelTester& pooling_elements(size_t pooling_elements) { 64 assert(pooling_elements != 0); 65 this->pooling_elements_ = pooling_elements; 66 return *this; 67 } 68 pooling_elements()69 inline size_t pooling_elements() const { 70 return this->pooling_elements_; 71 } 72 packed_pooling_elements()73 inline size_t packed_pooling_elements() const { 74 if (pooling_elements() <= primary_pooling_tile()) { 75 return primary_pooling_tile(); 76 } else { 77 return (pooling_elements() - primary_pooling_tile()) % incremental_pooling_tile() == 0 ? pooling_elements() : ((pooling_elements() - primary_pooling_tile()) / incremental_pooling_tile() + 1) * incremental_pooling_tile() + primary_pooling_tile(); 78 } 79 } 80 pooling_tile(size_t primary_tile,size_t incremental_tile)81 inline MaxPoolMicrokernelTester& pooling_tile(size_t primary_tile, size_t incremental_tile) { 82 assert(primary_tile != 0); 83 this->primary_pooling_tile_ = primary_tile; 84 this->incremental_pooling_tile_ = incremental_tile; 85 return *this; 86 } 87 primary_pooling_tile(size_t primary_pooling_tile)88 inline MaxPoolMicrokernelTester& primary_pooling_tile(size_t primary_pooling_tile) { 89 assert(primary_pooling_tile != 0); 90 this->primary_pooling_tile_ = primary_pooling_tile; 91 return *this; 92 } 93 primary_pooling_tile()94 inline size_t primary_pooling_tile() const { 95 return this->primary_pooling_tile_; 96 } 97 incremental_pooling_tile(size_t incremental_pooling_tile)98 inline MaxPoolMicrokernelTester& incremental_pooling_tile(size_t incremental_pooling_tile) { 99 assert(incremental_pooling_tile != 0); 100 this->incremental_pooling_tile_ = incremental_pooling_tile; 101 return *this; 102 } 103 incremental_pooling_tile()104 inline size_t incremental_pooling_tile() const { 105 return this->incremental_pooling_tile_; 106 } 107 channels(size_t channels)108 inline MaxPoolMicrokernelTester& channels(size_t channels) { 109 assert(channels != 0); 110 this->channels_ = channels; 111 return *this; 112 } 113 channels()114 inline size_t channels() const { 115 return this->channels_; 116 } 117 output_stride(size_t output_stride)118 inline MaxPoolMicrokernelTester& output_stride(size_t output_stride) { 119 assert(output_stride != 0); 120 this->output_stride_ = output_stride; 121 return *this; 122 } 123 output_stride()124 inline size_t output_stride() const { 125 if (this->output_stride_ == 0) { 126 return channels(); 127 } else { 128 assert(this->output_stride_ >= channels()); 129 return this->output_stride_; 130 } 131 } 132 qmin(uint8_t qmin)133 inline MaxPoolMicrokernelTester& qmin(uint8_t qmin) { 134 this->qmin_ = qmin; 135 return *this; 136 } 137 qmin()138 inline uint8_t qmin() const { 139 return this->qmin_; 140 } 141 qmax(uint8_t qmax)142 inline MaxPoolMicrokernelTester& qmax(uint8_t qmax) { 143 this->qmax_ = qmax; 144 return *this; 145 } 146 qmax()147 inline uint8_t qmax() const { 148 return this->qmax_; 149 } 150 iterations(size_t iterations)151 inline MaxPoolMicrokernelTester& iterations(size_t iterations) { 152 this->iterations_ = iterations; 153 return *this; 154 } 155 iterations()156 inline size_t iterations() const { 157 return this->iterations_; 158 } 159 160 void Test(xnn_u8_maxpool_ukernel_function maxpool, Variant variant = Variant::Native) const { 161 std::random_device random_device; 162 auto rng = std::mt19937(random_device()); 163 auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng); 164 165 std::vector<const uint8_t*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements()); 166 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + 167 indirect_input.size() * channels()); 168 std::vector<uint8_t> output(XNN_EXTRA_BYTES / sizeof(uint8_t) + 169 (output_pixels() - 1) * output_stride() + channels()); 170 std::vector<uint8_t> output_ref(output_pixels() * channels()); 171 for (size_t iteration = 0; iteration < iterations(); iteration++) { 172 do { 173 std::generate(input.begin(), input.end(), std::ref(u8rng)); 174 } while (input.size() > 1 && *std::max_element(input.cbegin(), input.cend()) == *std::min_element(input.cbegin(), input.cend())); 175 std::fill(output.begin(), output.end(), 0xA5); 176 177 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) { 178 indirect_input[i] = input.data() + i * channels() - input_offset(); 179 } 180 std::shuffle(indirect_input.begin(), 181 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng); 182 183 // Prepare output parameters. 184 xnn_u8_output_params output_params = { }; 185 switch (variant) { 186 case Variant::Native: 187 output_params = xnn_init_u8_output_params(qmin(), qmax()); 188 break; 189 case Variant::Scalar: 190 output_params = xnn_init_scalar_u8_output_params(qmin(), qmax()); 191 break; 192 } 193 194 // Compute reference results. 195 for (size_t x = 0; x < output_pixels(); x++) { 196 for (size_t c = 0; c < channels(); c++) { 197 uint8_t max_value = 0; 198 for (size_t p = 0; p < pooling_elements(); p++) { 199 max_value = std::max(max_value, indirect_input[x * step() + p][c + input_offset()]); 200 } 201 max_value = std::min(max_value, qmax()); 202 max_value = std::max(max_value, qmin()); 203 output_ref[x * channels() + c] = max_value; 204 } 205 } 206 207 // Call optimized micro-kernel. 208 maxpool(output_pixels(), pooling_elements(), channels(), 209 indirect_input.data(), input_offset() * sizeof(uint8_t), output.data(), 210 (step() - packed_pooling_elements()) * sizeof(void*), 211 (output_stride() - channels()) * sizeof(uint8_t), 212 &output_params); 213 214 // Verify results. 215 for (size_t x = 0; x < output_pixels(); x++) { 216 for (size_t c = 0; c < channels(); c++) { 217 ASSERT_GE(uint32_t(output[x * output_stride() + c]), uint32_t(qmin())) 218 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 219 << ", pooling elements = " << pooling_elements() << ", step = " << step() 220 << ", input offset = " << input_offset(); 221 ASSERT_LE(uint32_t(output[x * output_stride() + c]), uint32_t(qmax())) 222 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 223 << ", pooling elements = " << pooling_elements() << ", step = " << step() 224 << ", input offset = " << input_offset(); 225 ASSERT_EQ(uint32_t(output_ref[x * channels() + c]), uint32_t(output[x * output_stride() + c])) 226 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 227 << ", pooling elements = " << pooling_elements() << ", step = " << step() 228 << ", input offset = " << input_offset(); 229 } 230 } 231 } 232 } 233 234 void Test(xnn_f32_maxpool_ukernel_function maxpool, Variant variant = Variant::Native) const { 235 std::random_device random_device; 236 auto rng = std::mt19937(random_device()); 237 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng); 238 239 std::vector<const float*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements()); 240 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + 241 ((output_pixels() - 1) * step() + pooling_elements()) * channels()); 242 std::vector<float> output(XNN_EXTRA_BYTES / sizeof(float) + 243 (output_pixels() - 1) * output_stride() + channels()); 244 std::vector<float> output_ref(output_pixels() * channels()); 245 for (size_t iteration = 0; iteration < iterations(); iteration++) { 246 std::generate(input.begin(), input.end(), std::ref(f32rng)); 247 std::fill(output.begin(), output.end(), nanf("")); 248 249 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) { 250 indirect_input[i] = input.data() + i * channels() - input_offset(); 251 } 252 std::shuffle(indirect_input.begin(), 253 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng); 254 255 // Compute reference results, without clamping. 256 for (size_t x = 0; x < output_pixels(); x++) { 257 for (size_t c = 0; c < channels(); c++) { 258 float max_value = -std::numeric_limits<float>::infinity(); 259 for (size_t p = 0; p < pooling_elements(); p++) { 260 max_value = std::max(max_value, indirect_input[x * step() + p][c + input_offset()]); 261 } 262 output_ref[x * channels() + c] = max_value; 263 } 264 } 265 266 // Compute clamping parameters. 267 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 268 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 269 const float accumulated_range = accumulated_max - accumulated_min; 270 const float output_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range; 271 const float output_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range; 272 273 274 // Prepare output parameters. 275 xnn_f32_output_params output_params = { }; 276 switch (variant) { 277 case Variant::Native: 278 output_params = xnn_init_f32_output_params(output_min, output_max); 279 break; 280 case Variant::Scalar: 281 output_params = xnn_init_scalar_f32_output_params(output_min, output_max); 282 break; 283 } 284 285 // Clamp reference results. 286 for (float& output_value : output_ref) { 287 output_value = std::max(std::min(output_value, output_max), output_min); 288 } 289 290 // Call optimized micro-kernel. 291 maxpool(output_pixels(), pooling_elements(), channels(), 292 indirect_input.data(), input_offset() * sizeof(float), output.data(), 293 (step() - packed_pooling_elements()) * sizeof(void*), 294 (output_stride() - channels()) * sizeof(float), 295 &output_params); 296 297 // Verify results. 298 for (size_t x = 0; x < output_pixels(); x++) { 299 for (size_t c = 0; c < channels(); c++) { 300 ASSERT_GE(output[x * output_stride() + c], output_min) 301 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 302 << ", pooling elements = " << pooling_elements() << ", step = " << step() 303 << ", input offset = " << input_offset(); 304 ASSERT_LE(output[x * output_stride() + c], output_max) 305 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 306 << ", pooling elements = " << pooling_elements() << ", step = " << step() 307 << ", input offset = " << input_offset(); 308 ASSERT_EQ(output_ref[x * channels() + c], output[x * output_stride() + c]) 309 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 310 << ", pooling elements = " << pooling_elements() << ", step = " << step() 311 << ", input offset = " << input_offset(); 312 } 313 } 314 } 315 } 316 317 private: 318 size_t output_pixels_{1}; 319 size_t pooling_elements_{1}; 320 size_t channels_{1}; 321 size_t input_offset_{0}; 322 size_t step_{1}; 323 size_t primary_pooling_tile_{1}; 324 size_t incremental_pooling_tile_{1}; 325 size_t output_stride_{0}; 326 uint8_t qmin_{0}; 327 uint8_t qmax_{255}; 328 size_t iterations_{3}; 329 }; 330