1 // Copyright 2019 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <gtest/gtest.h> 9 10 #include <algorithm> 11 #include <cassert> 12 #include <cstddef> 13 #include <cstdlib> 14 #include <functional> 15 #include <random> 16 #include <vector> 17 18 #include <xnnpack.h> 19 #include <xnnpack/AlignedAllocator.h> 20 #include <xnnpack/params-init.h> 21 #include <xnnpack/params.h> 22 23 24 class ArgMaxPoolMicrokernelTester { 25 public: 26 enum class Variant { 27 Native, 28 Scalar, 29 }; 30 output_pixels(size_t output_pixels)31 inline ArgMaxPoolMicrokernelTester& output_pixels(size_t output_pixels) { 32 assert(output_pixels != 0); 33 this->output_pixels_ = output_pixels; 34 return *this; 35 } 36 output_pixels()37 inline size_t output_pixels() const { 38 return this->output_pixels_; 39 } 40 step(size_t step)41 inline ArgMaxPoolMicrokernelTester& step(size_t step) { 42 assert(step != 0); 43 this->step_ = step; 44 return *this; 45 } 46 step()47 inline size_t step() const { 48 return this->step_; 49 } 50 input_offset(size_t input_offset)51 inline ArgMaxPoolMicrokernelTester& input_offset(size_t input_offset) { 52 assert(input_offset != 0); 53 this->input_offset_ = input_offset; 54 return *this; 55 } 56 input_offset()57 inline size_t input_offset() const { 58 return this->input_offset_; 59 } 60 pooling_elements(size_t pooling_elements)61 inline ArgMaxPoolMicrokernelTester& pooling_elements(size_t pooling_elements) { 62 assert(pooling_elements != 0); 63 this->pooling_elements_ = pooling_elements; 64 return *this; 65 } 66 pooling_elements()67 inline size_t pooling_elements() const { 68 return this->pooling_elements_; 69 } 70 packed_pooling_elements()71 inline size_t packed_pooling_elements() const { 72 if (pooling_elements() <= primary_pooling_tile()) { 73 return primary_pooling_tile(); 74 } else { 75 return (pooling_elements() - primary_pooling_tile()) % incremental_pooling_tile() == 0 ? pooling_elements() : ((pooling_elements() - primary_pooling_tile()) / incremental_pooling_tile() + 1) * incremental_pooling_tile() + primary_pooling_tile(); 76 } 77 } 78 pooling_tile(size_t primary_tile)79 inline ArgMaxPoolMicrokernelTester& pooling_tile(size_t primary_tile) { 80 assert(primary_tile != 0); 81 this->primary_pooling_tile_ = primary_tile; 82 this->incremental_pooling_tile_ = 0; 83 return *this; 84 } 85 pooling_tile(size_t primary_tile,size_t incremental_tile)86 inline ArgMaxPoolMicrokernelTester& pooling_tile(size_t primary_tile, size_t incremental_tile) { 87 assert(primary_tile != 0); 88 this->primary_pooling_tile_ = primary_tile; 89 this->incremental_pooling_tile_ = incremental_tile; 90 return *this; 91 } 92 primary_pooling_tile(size_t primary_pooling_tile)93 inline ArgMaxPoolMicrokernelTester& primary_pooling_tile(size_t primary_pooling_tile) { 94 assert(primary_pooling_tile != 0); 95 this->primary_pooling_tile_ = primary_pooling_tile; 96 return *this; 97 } 98 primary_pooling_tile()99 inline size_t primary_pooling_tile() const { 100 return this->primary_pooling_tile_; 101 } 102 incremental_pooling_tile(size_t incremental_pooling_tile)103 inline ArgMaxPoolMicrokernelTester& incremental_pooling_tile(size_t incremental_pooling_tile) { 104 assert(incremental_pooling_tile != 0); 105 this->incremental_pooling_tile_ = incremental_pooling_tile; 106 return *this; 107 } 108 incremental_pooling_tile()109 inline size_t incremental_pooling_tile() const { 110 return this->incremental_pooling_tile_; 111 } 112 channels(size_t channels)113 inline ArgMaxPoolMicrokernelTester& channels(size_t channels) { 114 assert(channels != 0); 115 this->channels_ = channels; 116 return *this; 117 } 118 channels()119 inline size_t channels() const { 120 return this->channels_; 121 } 122 output_stride(size_t output_stride)123 inline ArgMaxPoolMicrokernelTester& output_stride(size_t output_stride) { 124 assert(output_stride != 0); 125 this->output_stride_ = output_stride; 126 return *this; 127 } 128 output_stride()129 inline size_t output_stride() const { 130 if (this->output_stride_ == 0) { 131 return channels(); 132 } else { 133 assert(this->output_stride_ >= channels()); 134 return this->output_stride_; 135 } 136 } 137 iterations(size_t iterations)138 inline ArgMaxPoolMicrokernelTester& iterations(size_t iterations) { 139 this->iterations_ = iterations; 140 return *this; 141 } 142 iterations()143 inline size_t iterations() const { 144 return this->iterations_; 145 } 146 147 void Test(xnn_f32_argmaxpool_unipass_ukernel_function argmaxpool, Variant variant = Variant::Native) const { 148 std::random_device random_device; 149 auto rng = std::mt19937(random_device()); 150 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng); 151 152 std::vector<const float*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements()); 153 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + 154 ((output_pixels() - 1) * step() + pooling_elements()) * channels()); 155 std::vector<float> output((output_pixels() - 1) * output_stride() + channels()); 156 std::vector<uint32_t> index(output_pixels() * channels()); 157 std::vector<float> output_ref(output_pixels() * channels()); 158 std::vector<uint32_t> index_ref(output_pixels() * channels()); 159 for (size_t iteration = 0; iteration < iterations(); iteration++) { 160 std::generate(input.begin(), input.end(), std::ref(f32rng)); 161 std::fill(output.begin(), output.end(), nanf("")); 162 163 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) { 164 indirect_input[i] = input.data() + i * channels() - input_offset(); 165 } 166 std::shuffle(indirect_input.begin(), 167 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng); 168 169 // Compute reference results, without clamping. 170 for (size_t x = 0; x < output_pixels(); x++) { 171 for (size_t c = 0; c < channels(); c++) { 172 float max_value = indirect_input[x * step()][c + input_offset()]; 173 uint32_t max_index = 0; 174 for (size_t p = 0; p < pooling_elements(); p++) { 175 const float value = indirect_input[x * step() + p][c + input_offset()]; 176 if (value > max_value) { 177 max_value = value; 178 max_index = p; 179 } 180 } 181 output_ref[x * channels() + c] = max_value; 182 index_ref[x * channels() + c] = max_index; 183 } 184 } 185 186 // Call optimized micro-kernel. 187 argmaxpool(output_pixels(), pooling_elements(), channels(), 188 indirect_input.data(), input_offset() * sizeof(float), output.data(), index.data(), 189 step() * sizeof(void*), 190 (output_stride() - channels()) * sizeof(float)); 191 192 // Verify results. 193 for (size_t x = 0; x < output_pixels(); x++) { 194 for (size_t c = 0; c < channels(); c++) { 195 ASSERT_EQ(output_ref[x * channels() + c], output[x * output_stride() + c]) 196 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 197 << ", pooling elements = " << pooling_elements() << ", step = " << step() 198 << ", input offset = " << input_offset(); 199 ASSERT_EQ( 200 indirect_input[x * step() + index_ref[x * channels() + c]][c + input_offset()], 201 indirect_input[x * step() + index[x * channels() + c]][c + input_offset()]) 202 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 203 << ", pooling elements = " << pooling_elements() << ", step = " << step() 204 << ", input offset = " << input_offset(); 205 ASSERT_EQ(index_ref[x * channels() + c], index[x * channels() + c]) 206 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 207 << ", pooling elements = " << pooling_elements() << ", step = " << step() 208 << ", input offset = " << input_offset(); 209 } 210 } 211 } 212 } 213 214 void Test(xnn_f32_argmaxpool_multipass_ukernel_function argmaxpool, Variant variant = Variant::Native) const { 215 std::random_device random_device; 216 auto rng = std::mt19937(random_device()); 217 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng); 218 219 std::vector<const float*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements()); 220 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + 221 ((output_pixels() - 1) * step() + pooling_elements()) * channels()); 222 std::vector<float> output((output_pixels() - 1) * output_stride() + channels()); 223 std::vector<uint32_t> index(output_pixels() * channels()); 224 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> index_buffer( 225 channels() + XNN_EXTRA_BYTES / sizeof(uint32_t)); 226 std::vector<float, AlignedAllocator<float, 64>> output_buffer( 227 channels() + XNN_EXTRA_BYTES / sizeof(float)); 228 std::vector<float> output_ref(output_pixels() * channels()); 229 std::vector<uint32_t> index_ref(output_pixels() * channels()); 230 for (size_t iteration = 0; iteration < iterations(); iteration++) { 231 std::generate(input.begin(), input.end(), std::ref(f32rng)); 232 std::fill(output.begin(), output.end(), nanf("")); 233 234 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) { 235 indirect_input[i] = input.data() + i * channels() - input_offset(); 236 } 237 std::shuffle(indirect_input.begin(), 238 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng); 239 240 // Compute reference results, without clamping. 241 for (size_t x = 0; x < output_pixels(); x++) { 242 for (size_t c = 0; c < channels(); c++) { 243 float max_value = indirect_input[x * step()][c + input_offset()]; 244 uint32_t max_index = 0; 245 for (size_t p = 0; p < pooling_elements(); p++) { 246 const float value = indirect_input[x * step() + p][c + input_offset()]; 247 if (value > max_value) { 248 max_value = value; 249 max_index = p; 250 } 251 } 252 output_ref[x * channels() + c] = max_value; 253 index_ref[x * channels() + c] = max_index; 254 } 255 } 256 257 // Call optimized micro-kernel. 258 argmaxpool(output_pixels(), pooling_elements(), channels(), 259 indirect_input.data(), input_offset() * sizeof(float), 260 output_buffer.data(), index_buffer.data(), 261 output.data(), index.data(), 262 (step() - (packed_pooling_elements() - incremental_pooling_tile())) * sizeof(void*), 263 (output_stride() - channels()) * sizeof(float)); 264 265 // Verify results. 266 for (size_t x = 0; x < output_pixels(); x++) { 267 for (size_t c = 0; c < channels(); c++) { 268 ASSERT_EQ(output_ref[x * channels() + c], output[x * output_stride() + c]) 269 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 270 << ", pooling elements = " << pooling_elements() << ", step = " << step() 271 << ", input offset = " << input_offset(); 272 ASSERT_EQ( 273 indirect_input[x * step() + index_ref[x * channels() + c]][c + input_offset()], 274 indirect_input[x * step() + index[x * channels() + c]][c + input_offset()]) 275 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 276 << ", pooling elements = " << pooling_elements() << ", step = " << step() 277 << ", input offset = " << input_offset(); 278 ASSERT_EQ(index_ref[x * channels() + c], index[x * channels() + c]) 279 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels() 280 << ", pooling elements = " << pooling_elements() << ", step = " << step() 281 << ", input offset = " << input_offset(); 282 } 283 } 284 } 285 } 286 287 private: 288 size_t output_pixels_{1}; 289 size_t pooling_elements_{1}; 290 size_t channels_{1}; 291 size_t input_offset_{0}; 292 size_t step_{1}; 293 size_t primary_pooling_tile_{1}; 294 size_t incremental_pooling_tile_{1}; 295 size_t output_stride_{0}; 296 size_t iterations_{3}; 297 }; 298