1 // Copyright 2019 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <gtest/gtest.h> 9 10 #include <algorithm> 11 #include <cassert> 12 #include <cmath> 13 #include <cstddef> 14 #include <cstdint> 15 #include <functional> 16 #include <random> 17 #include <vector> 18 19 #include <xnnpack.h> 20 #include <xnnpack/AlignedAllocator.h> 21 #include <xnnpack/math.h> 22 #include <xnnpack/params.h> 23 24 25 class IBilinearMicrokernelTester { 26 public: pixels(uint32_t pixels)27 inline IBilinearMicrokernelTester& pixels(uint32_t pixels) { 28 assert(pixels >= 1); 29 this->pixels_ = pixels; 30 return *this; 31 } 32 pixels()33 inline uint32_t pixels() const { 34 return this->pixels_; 35 } 36 channels(uint32_t channels)37 inline IBilinearMicrokernelTester& channels(uint32_t channels) { 38 assert(channels >= 1); 39 this->channels_ = channels; 40 return *this; 41 } 42 channels()43 inline uint32_t channels() const { 44 return this->channels_; 45 } 46 input_offset(uint32_t input_offset)47 inline IBilinearMicrokernelTester& input_offset(uint32_t input_offset) { 48 this->input_offset_ = input_offset; 49 return *this; 50 } 51 input_offset()52 inline uint32_t input_offset() const { 53 return this->input_offset_; 54 } 55 output_stride(uint32_t output_stride)56 inline IBilinearMicrokernelTester& output_stride(uint32_t output_stride) { 57 assert(output_stride != 0); 58 this->output_stride_ = output_stride; 59 return *this; 60 } 61 output_stride()62 inline uint32_t output_stride() const { 63 if (this->output_stride_ == 0) { 64 return channels(); 65 } else { 66 assert(this->output_stride_ >= channels()); 67 return this->output_stride_; 68 } 69 } 70 iterations(size_t iterations)71 inline IBilinearMicrokernelTester& iterations(size_t iterations) { 72 this->iterations_ = iterations; 73 return *this; 74 } 75 iterations()76 inline size_t iterations() const { 77 return this->iterations_; 78 } 79 input_stride(uint32_t input_stride)80 inline IBilinearMicrokernelTester& input_stride(uint32_t input_stride) { 81 assert(input_stride != 0); 82 this->input_stride_ = input_stride; 83 return *this; 84 } 85 input_stride()86 inline uint32_t input_stride() const { 87 if (this->input_stride_ == 0) { 88 return 4 * pixels(); 89 } else { 90 assert(this->input_stride_ >= 4 * pixels()); 91 return this->input_stride_; 92 } 93 } 94 Test(xnn_f32_ibilinear_ukernel_function ibilinear)95 void Test(xnn_f32_ibilinear_ukernel_function ibilinear) const { 96 std::random_device random_device; 97 auto rng = std::mt19937(random_device()); 98 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng); 99 100 std::vector<const float*> indirection(pixels() * 4); 101 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + indirection.size() * channels()); 102 std::vector<float, AlignedAllocator<float, 64>> packed_weights(pixels() * 2); 103 std::vector<float> output((pixels() - 1) * output_stride() + channels()); 104 std::vector<float> output_ref(pixels() * channels()); 105 106 for (size_t iteration = 0; iteration < iterations(); iteration++) { 107 std::generate(input.begin(), input.end(), std::ref(f32rng)); 108 std::generate(packed_weights.begin(), packed_weights.end(), std::ref(f32rng)); 109 std::fill(output.begin(), output.end(), nanf("")); 110 111 for (size_t i = 0; i < indirection.size(); i++) { 112 indirection[i] = input.data() + i * channels() - input_offset(); 113 } 114 std::shuffle(indirection.begin(), indirection.end(), rng); 115 116 // Compute reference results. 117 for (size_t i = 0; i < pixels(); i++) { 118 for (size_t c = 0; c < channels(); c++) { 119 const float alpha_h = packed_weights[i * 2 + 0]; 120 const float alpha_v = packed_weights[i * 2 + 1]; 121 output_ref[i * channels() + c] = 122 indirection[i * 4 + 0][c + input_offset()] * (1.0f - alpha_h) * (1.0f - alpha_v) + 123 indirection[i * 4 + 1][c + input_offset()] * alpha_h * (1.0f - alpha_v) + 124 indirection[i * 4 + 2][c + input_offset()] * (1.0f - alpha_h) * alpha_v + 125 indirection[i * 4 + 3][c + input_offset()] * alpha_h * alpha_v; 126 } 127 } 128 129 // Call optimized micro-kernel. 130 ibilinear( 131 pixels(), channels() * sizeof(float), 132 indirection.data(), input_offset() * sizeof(float), 133 packed_weights.data(), output.data(), 134 (output_stride() - channels()) * sizeof(float)); 135 136 // Verify results. 137 for (size_t i = 0; i < pixels(); i++) { 138 for (size_t c = 0; c < channels(); c++) { 139 ASSERT_NEAR( 140 output_ref[i * channels() + c], 141 output[i * output_stride() + c], 142 std::abs(output_ref[i * channels() + c]) * 1.0e-4) 143 << "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels(); 144 } 145 } 146 } 147 } 148 Test(xnn_s8_ibilinear_ukernel_function ibilinear)149 void Test(xnn_s8_ibilinear_ukernel_function ibilinear) const { 150 std::random_device random_device; 151 auto rng = std::mt19937(random_device()); 152 auto i8rng = std::bind( 153 std::uniform_int_distribution<int16_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), 154 std::ref(rng)); 155 auto w11rng = std::bind(std::uniform_int_distribution<int16_t>(0, 2047), std::ref(rng)); 156 157 std::vector<const int8_t*> indirection(pixels() * 4); 158 std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + indirection.size() * channels()); 159 std::vector<int16_t, AlignedAllocator<int16_t, 64>> packed_weights(pixels() * 2); 160 std::vector<int8_t> output((pixels() - 1) * output_stride() + channels()); 161 std::vector<int8_t> output_ref(pixels() * channels()); 162 163 for (size_t iteration = 0; iteration < iterations(); iteration++) { 164 std::generate(input.begin(), input.end(), std::ref(i8rng)); 165 std::generate(packed_weights.begin(), packed_weights.end(), std::ref(w11rng)); 166 std::fill(output.begin(), output.end(), INT8_C(0xFA)); 167 168 for (size_t i = 0; i < indirection.size(); i++) { 169 indirection[i] = input.data() + i * channels() - input_offset(); 170 } 171 std::shuffle(indirection.begin(), indirection.end(), rng); 172 173 // Compute reference results. 174 for (size_t i = 0; i < pixels(); i++) { 175 for (size_t c = 0; c < channels(); c++) { 176 const int32_t alpha_h = packed_weights[i * 2 + 0]; 177 const int32_t alpha_v = packed_weights[i * 2 + 1]; 178 const int32_t acc = asr_s32( 179 int32_t(indirection[i * 4 + 0][c + input_offset()]) * (2048 - alpha_h) * (2048 - alpha_v) + 180 int32_t(indirection[i * 4 + 1][c + input_offset()]) * alpha_h * (2048 - alpha_v) + 181 int32_t(indirection[i * 4 + 2][c + input_offset()]) * (2048 - alpha_h) * alpha_v + 182 int32_t(indirection[i * 4 + 3][c + input_offset()]) * alpha_h * alpha_v + 183 2097152, 22); 184 ASSERT_GE(acc, std::numeric_limits<int8_t>::min()); 185 ASSERT_LE(acc, std::numeric_limits<int8_t>::max()); 186 output_ref[i * channels() + c] = (int8_t) acc; 187 } 188 } 189 190 // Call optimized micro-kernel. 191 ibilinear( 192 pixels(), channels() * sizeof(int8_t), 193 indirection.data(), input_offset() * sizeof(int8_t), 194 packed_weights.data(), output.data(), 195 (output_stride() - channels()) * sizeof(int8_t)); 196 197 // Verify results. 198 for (size_t i = 0; i < pixels(); i++) { 199 for (size_t c = 0; c < channels(); c++) { 200 ASSERT_EQ(int32_t(output_ref[i * channels() + c]), int32_t(output[i * output_stride() + c])) 201 << "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels(); 202 } 203 } 204 } 205 } 206 Test(xnn_u8_ibilinear_ukernel_function ibilinear)207 void Test(xnn_u8_ibilinear_ukernel_function ibilinear) const { 208 std::random_device random_device; 209 auto rng = std::mt19937(random_device()); 210 auto u8rng = std::bind( 211 std::uniform_int_distribution<uint16_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng)); 212 auto w11rng = std::bind(std::uniform_int_distribution<uint16_t>(0, 2047), std::ref(rng)); 213 214 std::vector<const uint8_t*> indirection(pixels() * 4); 215 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + indirection.size() * channels()); 216 std::vector<int16_t, AlignedAllocator<int16_t, 64>> packed_weights(pixels() * 2); 217 std::vector<uint8_t> output((pixels() - 1) * output_stride() + channels()); 218 std::vector<uint8_t> output_ref(pixels() * channels()); 219 220 for (size_t iteration = 0; iteration < iterations(); iteration++) { 221 std::generate(input.begin(), input.end(), std::ref(u8rng)); 222 std::generate(packed_weights.begin(), packed_weights.end(), std::ref(w11rng)); 223 std::fill(output.begin(), output.end(), UINT8_C(0xFA)); 224 225 for (size_t i = 0; i < indirection.size(); i++) { 226 indirection[i] = input.data() + i * channels() - input_offset(); 227 } 228 std::shuffle(indirection.begin(), indirection.end(), rng); 229 230 // Compute reference results. 231 for (size_t i = 0; i < pixels(); i++) { 232 for (size_t c = 0; c < channels(); c++) { 233 const uint32_t alpha_h = uint32_t(int32_t(packed_weights[i * 2 + 0])); 234 const uint32_t alpha_v = uint32_t(int32_t(packed_weights[i * 2 + 1])); 235 const uint32_t acc = (2097152 + 236 int32_t(indirection[i * 4 + 0][c + input_offset()]) * (2048 - alpha_h) * (2048 - alpha_v) + 237 int32_t(indirection[i * 4 + 1][c + input_offset()]) * alpha_h * (2048 - alpha_v) + 238 int32_t(indirection[i * 4 + 2][c + input_offset()]) * (2048 - alpha_h) * alpha_v + 239 int32_t(indirection[i * 4 + 3][c + input_offset()]) * alpha_h * alpha_v) >> 22; 240 ASSERT_LE(acc, std::numeric_limits<uint8_t>::max()); 241 output_ref[i * channels() + c] = (uint8_t) acc; 242 } 243 } 244 245 // Call optimized micro-kernel. 246 ibilinear( 247 pixels(), channels() * sizeof(uint8_t), 248 indirection.data(), input_offset() * sizeof(uint8_t), 249 packed_weights.data(), output.data(), 250 (output_stride() - channels()) * sizeof(uint8_t)); 251 252 // Verify results. 253 for (size_t i = 0; i < pixels(); i++) { 254 for (size_t c = 0; c < channels(); c++) { 255 ASSERT_EQ(uint32_t(output_ref[i * channels() + c]), uint32_t(output[i * output_stride() + c])) 256 << "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels(); 257 } 258 } 259 } 260 } 261 TestCHW(xnn_f32_ibilinear_chw_ukernel_function ibilinear)262 void TestCHW(xnn_f32_ibilinear_chw_ukernel_function ibilinear) const { 263 std::random_device random_device; 264 auto rng = std::mt19937(random_device()); 265 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng); 266 267 std::vector<const float*> indirection(pixels() * 2); 268 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + (channels() - 1) * input_stride() + 4 * pixels()); 269 std::vector<float, AlignedAllocator<float, 64>> packed_weights(pixels() * 2); 270 std::vector<float> output(pixels() * channels()); 271 std::vector<float> output_ref(pixels() * channels()); 272 273 for (size_t iteration = 0; iteration < iterations(); iteration++) { 274 std::generate(input.begin(), input.end(), std::ref(f32rng)); 275 std::generate(packed_weights.begin(), packed_weights.end(), std::ref(f32rng)); 276 std::fill(output.begin(), output.end(), nanf("")); 277 278 // Indirection will point to the even ("left") pixels of the input. 279 // The kernels will expect "right" pixels to be placed right next to them. 280 for (size_t i = 0; i < indirection.size(); i++) { 281 const float* left_corner = input.data() + 2 * i - input_offset(); 282 indirection[i] = left_corner; 283 } 284 std::shuffle(indirection.begin(), indirection.end(), rng); 285 286 // Compute reference results. 287 for (size_t i = 0; i < pixels(); i++) { 288 for (size_t c = 0; c < channels(); c++) { 289 const float alpha_h = packed_weights[i * 2 + 0]; 290 const float alpha_v = packed_weights[i * 2 + 1]; 291 // `c * pixels() + i` because the output is NCHW. 292 output_ref[c * pixels() + i] = 293 // `c * indirection.size()` because the input is NCHW. 294 (indirection[i * 2 + 0] + 0)[c * input_stride() + input_offset()] * (1.0f - alpha_h) * (1.0f - alpha_v) + 295 (indirection[i * 2 + 0] + 1)[c * input_stride() + input_offset()] * alpha_h * (1.0f - alpha_v) + 296 (indirection[i * 2 + 1] + 0)[c * input_stride() + input_offset()] * (1.0f - alpha_h) * alpha_v + 297 (indirection[i * 2 + 1] + 1)[c * input_stride() + input_offset()] * alpha_h * alpha_v; 298 } 299 } 300 301 // Call optimized micro-kernel. 302 ibilinear( 303 pixels(), channels(), 304 indirection.data(), input_offset() * sizeof(float), 305 packed_weights.data(), output.data(), input_stride() * sizeof(float)); 306 307 // Verify results. 308 for (size_t c = 0; c < channels(); c++) { 309 for (size_t i = 0; i < pixels(); i++) { 310 ASSERT_NEAR( 311 output_ref[c * pixels() + i], 312 output[c * pixels() + i], 313 std::abs(output_ref[c * pixels() + i]) * 1.0e-4) 314 << "i = " << i << ", channel = " << c; 315 } 316 } 317 } 318 } 319 320 private: 321 uint32_t channels_{1}; 322 uint32_t pixels_{1}; 323 uint32_t output_stride_{0}; 324 uint32_t input_stride_{0}; 325 uint32_t input_offset_{0}; 326 size_t iterations_{3}; 327 }; 328