1 // Copyright 2019 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <gtest/gtest.h> 9 10 #include <algorithm> 11 #include <cassert> 12 #include <cmath> 13 #include <cstddef> 14 #include <cstdint> 15 #include <functional> 16 #include <random> 17 #include <vector> 18 19 #include <xnnpack.h> 20 #include <xnnpack/AlignedAllocator.h> 21 #include <xnnpack/params.h> 22 23 24 class IBilinearMicrokernelTester { 25 public: pixels(uint32_t pixels)26 inline IBilinearMicrokernelTester& pixels(uint32_t pixels) { 27 assert(pixels >= 1); 28 this->pixels_ = pixels; 29 return *this; 30 } 31 pixels()32 inline uint32_t pixels() const { 33 return this->pixels_; 34 } 35 channels(uint32_t channels)36 inline IBilinearMicrokernelTester& channels(uint32_t channels) { 37 assert(channels >= 1); 38 this->channels_ = channels; 39 return *this; 40 } 41 channels()42 inline uint32_t channels() const { 43 return this->channels_; 44 } 45 input_offset(uint32_t input_offset)46 inline IBilinearMicrokernelTester& input_offset(uint32_t input_offset) { 47 this->input_offset_ = input_offset; 48 return *this; 49 } 50 input_offset()51 inline uint32_t input_offset() const { 52 return this->input_offset_; 53 } 54 output_stride(uint32_t output_stride)55 inline IBilinearMicrokernelTester& output_stride(uint32_t output_stride) { 56 assert(output_stride != 0); 57 this->output_stride_ = output_stride; 58 return *this; 59 } 60 output_stride()61 inline uint32_t output_stride() const { 62 if (this->output_stride_ == 0) { 63 return channels(); 64 } else { 65 assert(this->output_stride_ >= channels()); 66 return this->output_stride_; 67 } 68 } 69 iterations(size_t iterations)70 inline IBilinearMicrokernelTester& iterations(size_t iterations) { 71 this->iterations_ = iterations; 72 return *this; 73 } 74 iterations()75 inline size_t iterations() const { 76 return this->iterations_; 77 } 78 input_stride(uint32_t input_stride)79 inline IBilinearMicrokernelTester& input_stride(uint32_t input_stride) { 80 assert(input_stride != 0); 81 this->input_stride_ = input_stride; 82 return *this; 83 } 84 input_stride()85 inline uint32_t input_stride() const { 86 if (this->input_stride_ == 0) { 87 return 4 * pixels(); 88 } else { 89 assert(this->input_stride_ >= 4 * pixels()); 90 return this->input_stride_; 91 } 92 } 93 Test(xnn_f32_ibilinear_ukernel_function ibilinear)94 void Test(xnn_f32_ibilinear_ukernel_function ibilinear) const { 95 std::random_device random_device; 96 auto rng = std::mt19937(random_device()); 97 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng); 98 99 std::vector<const float*> indirection(pixels() * 4); 100 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + indirection.size() * channels()); 101 std::vector<float, AlignedAllocator<float, 64>> packed_weights(pixels() * 2); 102 std::vector<float> output((pixels() - 1) * output_stride() + channels()); 103 std::vector<float> output_ref(pixels() * channels()); 104 105 for (size_t iteration = 0; iteration < iterations(); iteration++) { 106 std::generate(input.begin(), input.end(), std::ref(f32rng)); 107 std::generate(packed_weights.begin(), packed_weights.end(), std::ref(f32rng)); 108 std::fill(output.begin(), output.end(), nanf("")); 109 110 for (size_t i = 0; i < indirection.size(); i++) { 111 indirection[i] = input.data() + i * channels() - input_offset(); 112 } 113 std::shuffle(indirection.begin(), indirection.end(), rng); 114 115 // Compute reference results. 116 for (size_t i = 0; i < pixels(); i++) { 117 for (size_t c = 0; c < channels(); c++) { 118 const float alpha_h = packed_weights[i * 2 + 0]; 119 const float alpha_v = packed_weights[i * 2 + 1]; 120 output_ref[i * channels() + c] = 121 indirection[i * 4 + 0][c + input_offset()] * (1.0f - alpha_h) * (1.0f - alpha_v) + 122 indirection[i * 4 + 1][c + input_offset()] * alpha_h * (1.0f - alpha_v) + 123 indirection[i * 4 + 2][c + input_offset()] * (1.0f - alpha_h) * alpha_v + 124 indirection[i * 4 + 3][c + input_offset()] * alpha_h * alpha_v; 125 } 126 } 127 128 // Call optimized micro-kernel. 129 ibilinear( 130 pixels(), channels() * sizeof(float), 131 indirection.data(), input_offset() * sizeof(float), 132 packed_weights.data(), output.data(), 133 (output_stride() - channels()) * sizeof(float)); 134 135 // Verify results. 136 for (size_t i = 0; i < pixels(); i++) { 137 for (size_t c = 0; c < channels(); c++) { 138 ASSERT_NEAR( 139 output_ref[i * channels() + c], 140 output[i * output_stride() + c], 141 std::abs(output_ref[i * channels() + c]) * 1.0e-4) 142 << "i = " << i << ", channel = " << c; 143 } 144 } 145 } 146 } 147 TestCHW(xnn_f32_ibilinear_chw_ukernel_function ibilinear)148 void TestCHW(xnn_f32_ibilinear_chw_ukernel_function ibilinear) const { 149 std::random_device random_device; 150 auto rng = std::mt19937(random_device()); 151 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng); 152 153 std::vector<const float*> indirection(pixels() * 2); 154 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + (channels() - 1) * input_stride() + 4 * pixels()); 155 std::vector<float, AlignedAllocator<float, 64>> packed_weights(pixels() * 2); 156 std::vector<float> output(pixels() * channels()); 157 std::vector<float> output_ref(pixels() * channels()); 158 159 for (size_t iteration = 0; iteration < iterations(); iteration++) { 160 std::generate(input.begin(), input.end(), std::ref(f32rng)); 161 std::generate(packed_weights.begin(), packed_weights.end(), std::ref(f32rng)); 162 std::fill(output.begin(), output.end(), nanf("")); 163 164 // Indirection will point to the even ("left") pixels of the input. 165 // The kernels will expect "right" pixels to be placed right next to them. 166 for (size_t i = 0; i < indirection.size(); i++) { 167 const float* left_corner = input.data() + 2 * i - input_offset(); 168 indirection[i] = left_corner; 169 } 170 std::shuffle(indirection.begin(), indirection.end(), rng); 171 172 // Compute reference results. 173 for (size_t i = 0; i < pixels(); i++) { 174 for (size_t c = 0; c < channels(); c++) { 175 const float alpha_h = packed_weights[i * 2 + 0]; 176 const float alpha_v = packed_weights[i * 2 + 1]; 177 // `c * pixels() + i` because the output is NCHW. 178 output_ref[c * pixels() + i] = 179 // `c * indirection.size()` because the input is NCHW. 180 (indirection[i * 2 + 0] + 0)[c * input_stride() + input_offset()] * (1.0f - alpha_h) * (1.0f - alpha_v) + 181 (indirection[i * 2 + 0] + 1)[c * input_stride() + input_offset()] * alpha_h * (1.0f - alpha_v) + 182 (indirection[i * 2 + 1] + 0)[c * input_stride() + input_offset()] * (1.0f - alpha_h) * alpha_v + 183 (indirection[i * 2 + 1] + 1)[c * input_stride() + input_offset()] * alpha_h * alpha_v; 184 } 185 } 186 187 // Call optimized micro-kernel. 188 ibilinear( 189 pixels(), channels(), 190 indirection.data(), input_offset() * sizeof(float), 191 packed_weights.data(), output.data(), input_stride() * sizeof(float)); 192 193 // Verify results. 194 for (size_t c = 0; c < channels(); c++) { 195 for (size_t i = 0; i < pixels(); i++) { 196 ASSERT_NEAR( 197 output_ref[c * pixels() + i], 198 output[c * pixels() + i], 199 std::abs(output_ref[c * pixels() + i]) * 1.0e-4) 200 << "i = " << i << ", channel = " << c; 201 } 202 } 203 } 204 } 205 206 private: 207 uint32_t channels_{1}; 208 uint32_t pixels_{1}; 209 uint32_t output_stride_{0}; 210 uint32_t input_stride_{0}; 211 uint32_t input_offset_{0}; 212 size_t iterations_{3}; 213 }; 214