1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <gtest/gtest.h> 12 13 #include <algorithm> 14 #include <cassert> 15 #include <cmath> 16 #include <cstddef> 17 #include <cstdlib> 18 #include <functional> 19 #include <random> 20 #include <vector> 21 22 #include <xnnpack.h> 23 #include <xnnpack/AlignedAllocator.h> 24 #include <xnnpack/math.h> 25 #include <xnnpack/pack.h> 26 #include <xnnpack/params-init.h> 27 #include <xnnpack/params.h> 28 29 30 class DWConv2DMicrokernelTester { 31 public: 32 enum class Variant { 33 Native, 34 Scalar, 35 }; 36 padding_left(uint32_t padding_left)37 inline DWConv2DMicrokernelTester& padding_left(uint32_t padding_left) { 38 this->padding_left_ = padding_left; 39 return *this; 40 } 41 padding_left()42 inline uint32_t padding_left() const { 43 return this->padding_left_; 44 } 45 padding_right(uint32_t padding_right)46 inline DWConv2DMicrokernelTester& padding_right(uint32_t padding_right) { 47 this->padding_right_ = padding_right; 48 return *this; 49 } 50 padding_right()51 inline uint32_t padding_right() const { 52 return this->padding_right_; 53 } 54 padding_top(uint32_t padding_top)55 inline DWConv2DMicrokernelTester& padding_top(uint32_t padding_top) { 56 this->padding_top_ = padding_top; 57 return *this; 58 } 59 padding_top()60 inline uint32_t padding_top() const { 61 return this->padding_top_; 62 } 63 64 padding_bottom(uint32_t padding_bottom)65 inline DWConv2DMicrokernelTester& padding_bottom(uint32_t padding_bottom) { 66 this->padding_bottom_ = padding_bottom; 67 return *this; 68 } padding_bottom()69 inline uint32_t padding_bottom() const { 70 return this->padding_bottom_; 71 } 72 input_height(uint32_t input_height)73 inline DWConv2DMicrokernelTester& input_height(uint32_t input_height) { 74 assert(input_height >= 1); 75 this->input_height_ = input_height; 76 return *this; 77 } 78 input_height()79 inline uint32_t input_height() const { 80 return this->input_height_; 81 } 82 input_width(uint32_t input_width)83 inline DWConv2DMicrokernelTester& input_width(uint32_t input_width) { 84 assert(input_width >= 1); 85 this->input_width_ = input_width; 86 return *this; 87 } 88 input_width()89 inline uint32_t input_width() const { 90 return this->input_width_; 91 } 92 subsampling(uint32_t subsampling)93 inline DWConv2DMicrokernelTester& subsampling(uint32_t subsampling) { 94 assert(subsampling >= 1); 95 this->subsampling_ = subsampling; 96 return *this; 97 } 98 subsampling()99 inline uint32_t subsampling() const { 100 return this->subsampling_; 101 } 102 kernel_height(uint32_t kernel_height)103 inline DWConv2DMicrokernelTester& kernel_height(uint32_t kernel_height) { 104 assert(kernel_height != 0); 105 this->kernel_height_ = kernel_height; 106 return *this; 107 } 108 kernel_height()109 inline uint32_t kernel_height() const { 110 return this->kernel_height_; 111 } 112 kernel_width(uint32_t kernel_width)113 inline DWConv2DMicrokernelTester& kernel_width(uint32_t kernel_width) { 114 assert(kernel_width != 0); 115 this->kernel_width_ = kernel_width; 116 return *this; 117 } 118 kernel_width()119 inline uint32_t kernel_width() const { 120 return this->kernel_width_; 121 } 122 kernel_size()123 inline uint32_t kernel_size() const { 124 return kernel_height() * kernel_width(); 125 } 126 output_height()127 inline uint32_t output_height() const { 128 const uint32_t padded_input_height = padding_top() + input_height() + padding_bottom(); 129 if (padded_input_height <= kernel_height()) { 130 return 1; 131 } else { 132 return (padded_input_height - kernel_height()) / subsampling() + 1; 133 } 134 } 135 output_width()136 inline uint32_t output_width() const { 137 const uint32_t padded_input_width = padding_left() + input_width() + padding_right(); 138 if (padded_input_width <= kernel_width()) { 139 return 1; 140 } else { 141 return (padded_input_width - kernel_width()) / subsampling() + 1; 142 } 143 } 144 qmin(uint8_t qmin)145 inline DWConv2DMicrokernelTester& qmin(uint8_t qmin) { 146 this->qmin_ = qmin; 147 return *this; 148 } 149 qmin()150 inline uint8_t qmin() const { 151 return this->qmin_; 152 } 153 qmax(uint8_t qmax)154 inline DWConv2DMicrokernelTester& qmax(uint8_t qmax) { 155 this->qmax_ = qmax; 156 return *this; 157 } 158 qmax()159 inline uint8_t qmax() const { 160 return this->qmax_; 161 } 162 iterations(size_t iterations)163 inline DWConv2DMicrokernelTester& iterations(size_t iterations) { 164 this->iterations_ = iterations; 165 return *this; 166 } 167 iterations()168 inline size_t iterations() const { 169 return this->iterations_; 170 } 171 172 void Test(xnn_f32_dwconv2d_chw_ukernel_function dwconv, Variant variant = Variant::Native) const { 173 std::random_device random_device; 174 auto rng = std::mt19937(random_device()); 175 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng); 176 177 std::vector<float, AlignedAllocator<float, 64>> input(input_height() * input_width() + 2 * XNN_EXTRA_BYTES); 178 std::vector<float> zero(input_width() + 2 * XNN_EXTRA_BYTES); 179 std::vector<float> packed_weights(kernel_size() + 1); 180 std::vector<float, AlignedAllocator<float, 64>> output(output_height() * output_width()); 181 std::vector<float> output_ref(output_height() * output_width()); 182 183 for (size_t iteration = 0; iteration < iterations(); iteration++) { 184 std::generate(input.begin(), input.end(), std::ref(f32rng)); 185 std::generate(packed_weights.begin(), packed_weights.end(), std::ref(f32rng)); 186 std::fill(output.begin(), output.end(), nanf("")); 187 188 for (size_t oy = 0; oy < output_height(); oy++) { 189 for (size_t ox = 0; ox < output_width(); ox++) { 190 float acc = packed_weights[0]; 191 for (size_t ky = 0; ky < kernel_height(); ky++) { 192 const size_t iy = oy * subsampling() + ky - padding_top(); 193 for (size_t kx = 0; kx < kernel_width(); kx++) { 194 const size_t ix = ox * subsampling() + kx - padding_left(); 195 if (ix < input_width() && iy < input_height()) { 196 const float input_val = input[iy * input_width() + ix]; 197 const float kernel_val = packed_weights[1 + ky * kernel_width() + kx]; 198 acc += input_val * kernel_val; 199 } 200 } 201 } 202 output_ref[oy * output_width() + ox] = acc; 203 } 204 } 205 206 // Compute clamping parameters. 207 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 208 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 209 const float accumulated_range = accumulated_max - accumulated_min; 210 const float output_min = accumulated_min + accumulated_range / 255.0f * float(qmin()); 211 const float output_max = accumulated_max - accumulated_range / 255.0f * float(255 - qmax()); 212 213 // Prepare parameters. 214 xnn_f32_chw_params chw_params; 215 switch (variant) { 216 case Variant::Native: 217 xnn_init_f32_chw_params(&chw_params, input_width(), output_min, output_max); 218 break; 219 case Variant::Scalar: 220 xnn_init_scalar_f32_chw_params(&chw_params, input_width(), output_min, output_max); 221 break; 222 } 223 224 // Clamp reference results. 225 for (float& output_val : output_ref) { 226 output_val = std::max(std::min(output_val, output_max), output_min); 227 } 228 229 // Call optimized micro-kernel. 230 dwconv( 231 input_height(), input_width() * sizeof(float), 232 input.data(), packed_weights.data(), zero.data(), output.data(), 233 padding_top(), 234 &chw_params); 235 236 // Verify results. 237 for (size_t y = 0; y < output_height(); y++) { 238 for (size_t x = 0; x < output_width(); x++) { 239 ASSERT_NEAR( 240 output_ref[y * output_width() + x], 241 output[y * output_width() + x], 242 std::abs(output_ref[y * output_width() + x]) * 1.0e-5) 243 << "x = " << x << ", y = " << y; 244 } 245 } 246 } 247 } 248 249 private: 250 uint32_t padding_left_{0}; 251 uint32_t padding_right_{0}; 252 uint32_t padding_top_{0}; 253 uint32_t padding_bottom_{0}; 254 uint32_t input_height_{1}; 255 uint32_t input_width_{1}; 256 uint32_t subsampling_{1}; 257 uint32_t kernel_height_{1}; 258 uint32_t kernel_width_{1}; 259 uint8_t qmin_{0}; 260 uint8_t qmax_{255}; 261 size_t iterations_{1}; 262 }; 263