1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <gtest/gtest.h> 12 13 #include <algorithm> 14 #include <cassert> 15 #include <cmath> 16 #include <cstddef> 17 #include <cstdlib> 18 #include <functional> 19 #include <limits> 20 #include <random> 21 #include <vector> 22 23 #include <fp16.h> 24 25 #include <xnnpack.h> 26 #include <xnnpack/AlignedAllocator.h> 27 #include <xnnpack/params-init.h> 28 #include <xnnpack/params.h> 29 #include <xnnpack/requantization.h> 30 31 32 class GAvgPoolMicrokernelTester { 33 public: rows(size_t rows)34 inline GAvgPoolMicrokernelTester& rows(size_t rows) { 35 assert(rows != 0); 36 this->rows_ = rows; 37 return *this; 38 } 39 rows()40 inline size_t rows() const { 41 return this->rows_; 42 } 43 channels(size_t channels)44 inline GAvgPoolMicrokernelTester& channels(size_t channels) { 45 assert(channels != 0); 46 this->channels_ = channels; 47 return *this; 48 } 49 channels()50 inline size_t channels() const { 51 return this->channels_; 52 } 53 channel_tile(size_t channel_tile)54 inline GAvgPoolMicrokernelTester& channel_tile(size_t channel_tile) { 55 assert(channel_tile != 0); 56 this->channel_tile_ = channel_tile; 57 return *this; 58 } 59 channel_tile()60 inline size_t channel_tile() const { 61 return this->channel_tile_; 62 } 63 input_stride(size_t input_stride)64 inline GAvgPoolMicrokernelTester& input_stride(size_t input_stride) { 65 assert(input_stride != 0); 66 this->input_stride_ = input_stride; 67 return *this; 68 } 69 input_stride()70 inline size_t input_stride() const { 71 if (this->input_stride_ == 0) { 72 return channels(); 73 } else { 74 assert(this->input_stride_ >= channels()); 75 return this->input_stride_; 76 } 77 } 78 input_scale(float input_scale)79 inline GAvgPoolMicrokernelTester& input_scale(float input_scale) { 80 assert(input_scale > 0.0f); 81 assert(std::isnormal(input_scale)); 82 this->input_scale_ = input_scale; 83 return *this; 84 } 85 input_scale()86 inline float input_scale() const { 87 return this->input_scale_; 88 } 89 input_zero_point(uint8_t input_zero_point)90 inline GAvgPoolMicrokernelTester& input_zero_point(uint8_t input_zero_point) { 91 this->input_zero_point_ = input_zero_point; 92 return *this; 93 } 94 input_zero_point()95 inline uint8_t input_zero_point() const { 96 return this->input_zero_point_; 97 } 98 output_scale(float output_scale)99 inline GAvgPoolMicrokernelTester& output_scale(float output_scale) { 100 assert(output_scale > 0.0f); 101 assert(std::isnormal(output_scale)); 102 this->output_scale_ = output_scale; 103 return *this; 104 } 105 output_scale()106 inline float output_scale() const { 107 return this->output_scale_; 108 } 109 output_zero_point(uint8_t output_zero_point)110 inline GAvgPoolMicrokernelTester& output_zero_point(uint8_t output_zero_point) { 111 this->output_zero_point_ = output_zero_point; 112 return *this; 113 } 114 output_zero_point()115 inline uint8_t output_zero_point() const { 116 return this->output_zero_point_; 117 } 118 qmin(uint8_t qmin)119 inline GAvgPoolMicrokernelTester& qmin(uint8_t qmin) { 120 this->qmin_ = qmin; 121 return *this; 122 } 123 qmin()124 inline uint8_t qmin() const { 125 return this->qmin_; 126 } 127 qmax(uint8_t qmax)128 inline GAvgPoolMicrokernelTester& qmax(uint8_t qmax) { 129 this->qmax_ = qmax; 130 return *this; 131 } 132 qmax()133 inline uint8_t qmax() const { 134 return this->qmax_; 135 } 136 iterations(size_t iterations)137 inline GAvgPoolMicrokernelTester& iterations(size_t iterations) { 138 this->iterations_ = iterations; 139 return *this; 140 } 141 iterations()142 inline size_t iterations() const { 143 return this->iterations_; 144 } 145 Test(xnn_qu8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_qu8_avgpool_minmax_params_fn init_params,xnn_qu8_requantize_fn requantize)146 void Test( 147 xnn_qu8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, 148 xnn_init_qu8_avgpool_minmax_params_fn init_params, 149 xnn_qu8_requantize_fn requantize) const 150 { 151 std::random_device random_device; 152 auto rng = std::mt19937(random_device()); 153 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng); 154 155 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + 156 (rows() - 1) * input_stride() + channels()); 157 std::vector<uint8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 158 std::vector<uint8_t> output(channels()); 159 std::vector<uint8_t> output_ref(channels()); 160 std::vector<float> output_fp(channels()); 161 std::vector<int32_t> accumulators(channels()); 162 for (size_t iteration = 0; iteration < iterations(); iteration++) { 163 std::generate(input.begin(), input.end(), std::ref(u8rng)); 164 std::fill(output.begin(), output.end(), 0xA5); 165 166 // Prepare parameters. 167 union xnn_qu8_avgpool_minmax_params params; 168 init_params( 169 ¶ms, 170 -int32_t(input_zero_point()) * int32_t(rows()), 171 input_scale() / (output_scale() * float(rows())), 172 output_zero_point(), qmin(), qmax()); 173 174 // Compute reference results. 175 for (size_t c = 0; c < channels(); c++) { 176 int32_t acc = 0; 177 for (size_t n = 0; n < rows(); n++) { 178 acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point()); 179 } 180 accumulators[c] = acc; 181 output_ref[c] = requantize( 182 acc, input_scale() / (output_scale() * float(rows())), output_zero_point(), qmin(), qmax()); 183 output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point()); 184 output_fp[c] = std::min<float>(output_fp[c], float(qmax())); 185 output_fp[c] = std::max<float>(output_fp[c], float(qmin())); 186 } 187 188 // Call optimized micro-kernel. 189 gavgpool_minmax(rows(), channels(), 190 input.data(), input_stride() * sizeof(uint8_t), 191 zero.data(), 192 output.data(), 193 ¶ms); 194 195 // Verify results. 196 for (size_t c = 0; c < channels(); c++) { 197 ASSERT_LE(uint32_t(output[c]), uint32_t(qmax())) 198 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 199 ASSERT_GE(uint32_t(output[c]), uint32_t(qmin())) 200 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 201 ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.5f) 202 << "at position " << c << ", rows = " << rows() << ", channels = " << channels() 203 << ", acc = " << accumulators[c]; 204 ASSERT_EQ(uint32_t(output_ref[c]), uint32_t(output[c])) 205 << "at position " << c << ", rows = " << rows() << ", channels = " << channels() 206 << ", acc = " << accumulators[c]; 207 } 208 } 209 } 210 Test(xnn_qu8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_qu8_avgpool_minmax_params_fn init_params,xnn_qu8_requantize_fn requantize)211 void Test( 212 xnn_qu8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, 213 xnn_init_qu8_avgpool_minmax_params_fn init_params, 214 xnn_qu8_requantize_fn requantize) const 215 { 216 std::random_device random_device; 217 auto rng = std::mt19937(random_device()); 218 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng); 219 220 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + 221 (rows() - 1) * input_stride() + channels()); 222 std::vector<int32_t, AlignedAllocator<int32_t, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 223 std::vector<uint8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 224 std::vector<uint8_t> output(channels()); 225 std::vector<uint8_t> output_ref(channels()); 226 std::vector<float> output_fp(channels()); 227 std::vector<int32_t> accumulators(channels()); 228 for (size_t iteration = 0; iteration < iterations(); iteration++) { 229 std::generate(input.begin(), input.end(), std::ref(u8rng)); 230 std::fill(output.begin(), output.end(), 0xA5); 231 232 // Prepare parameters. 233 union xnn_qu8_avgpool_minmax_params params; 234 init_params( 235 ¶ms, 236 -int32_t(input_zero_point()) * int32_t(rows()), 237 input_scale() / (output_scale() * float(rows())), 238 output_zero_point(), qmin(), qmax()); 239 240 // Compute reference results. 241 for (size_t c = 0; c < channels(); c++) { 242 int32_t acc = 0; 243 for (size_t n = 0; n < rows(); n++) { 244 acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point()); 245 } 246 247 accumulators[c] = acc; 248 output_ref[c] = requantize( 249 acc, input_scale() / (output_scale() * float(rows())), output_zero_point(), qmin(), qmax()); 250 output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point()); 251 output_fp[c] = std::min<float>(output_fp[c], float(qmax())); 252 output_fp[c] = std::max<float>(output_fp[c], float(qmin())); 253 } 254 255 // Call optimized micro-kernel. 256 gavgpool_minmax(rows(), channels(), 257 input.data(), input_stride() * sizeof(uint8_t), 258 zero.data(), 259 buffer.data(), 260 output.data(), 261 ¶ms); 262 263 // Verify results. 264 for (size_t c = 0; c < channels(); c++) { 265 ASSERT_LE(uint32_t(output[c]), uint32_t(qmax())) 266 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 267 ASSERT_GE(uint32_t(output[c]), uint32_t(qmin())) 268 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 269 ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.5f) 270 << "at position " << c << ", rows = " << rows() << ", channels = " << channels() 271 << ", acc = " << accumulators[c]; 272 ASSERT_EQ(uint32_t(output_ref[c]), uint32_t(output[c])) 273 << "at position " << c << ", rows = " << rows() << ", channels = " << channels() 274 << ", acc = " << accumulators[c]; 275 } 276 } 277 } 278 Test(xnn_qs8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_qs8_avgpool_minmax_params_fn init_params,xnn_qs8_requantize_fn requantize)279 void Test( 280 xnn_qs8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, 281 xnn_init_qs8_avgpool_minmax_params_fn init_params, 282 xnn_qs8_requantize_fn requantize) const 283 { 284 std::random_device random_device; 285 auto rng = std::mt19937(random_device()); 286 auto i8rng = std::bind( 287 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), rng); 288 289 std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + 290 (rows() - 1) * input_stride() + channels()); 291 std::vector<int8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(int8_t)); 292 std::vector<int8_t> output(channels()); 293 std::vector<int8_t> output_ref(channels()); 294 std::vector<float> output_fp(channels()); 295 std::vector<int32_t> accumulators(channels()); 296 for (size_t iteration = 0; iteration < iterations(); iteration++) { 297 std::generate(input.begin(), input.end(), std::ref(i8rng)); 298 std::fill(output.begin(), output.end(), 0xA5); 299 300 // Prepare parameters. 301 union xnn_qs8_avgpool_minmax_params params; 302 init_params( 303 ¶ms, 304 -int32_t(input_zero_point() - 0x80) * int32_t(rows()), 305 input_scale() / (output_scale() * float(rows())), 306 int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 307 308 // Compute reference results. 309 for (size_t c = 0; c < channels(); c++) { 310 int32_t acc = 0; 311 for (size_t n = 0; n < rows(); n++) { 312 acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point() - 0x80); 313 } 314 accumulators[c] = acc; 315 output_ref[c] = requantize( 316 acc, input_scale() / (output_scale() * float(rows())), int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 317 output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point() - 0x80); 318 output_fp[c] = std::min<float>(output_fp[c], float(qmax() - 0x80)); 319 output_fp[c] = std::max<float>(output_fp[c], float(qmin() - 0x80)); 320 } 321 322 // Call optimized micro-kernel. 323 gavgpool_minmax(rows(), channels(), 324 input.data(), input_stride() * sizeof(int8_t), 325 zero.data(), 326 output.data(), 327 ¶ms); 328 329 // Verify results. 330 for (size_t c = 0; c < channels(); c++) { 331 ASSERT_LE(int32_t(output[c]), int32_t(qmax() - 0x80)) 332 << "at channel " << c << " / " << channels() << ", rows = " << rows(); 333 ASSERT_GE(int32_t(output[c]), int32_t(qmin() - 0x80)) 334 << "at channel " << c << " / " << channels() << ", rows = " << rows(); 335 ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.5f) 336 << "at channel " << c << " / " << channels() << ", rows = " << rows() 337 << ", accumulator = " << accumulators[c]; 338 ASSERT_EQ(int32_t(output_ref[c]), int32_t(output[c])) 339 << "at channel " << c << " / " << channels() << ", rows = " << rows() 340 << ", accumulator = " << accumulators[c]; 341 } 342 } 343 } 344 Test(xnn_qs8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_qs8_avgpool_minmax_params_fn init_params,xnn_qs8_requantize_fn requantize)345 void Test( 346 xnn_qs8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, 347 xnn_init_qs8_avgpool_minmax_params_fn init_params, 348 xnn_qs8_requantize_fn requantize) const 349 { 350 std::random_device random_device; 351 auto rng = std::mt19937(random_device()); 352 auto i8rng = std::bind( 353 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), rng); 354 355 std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + 356 (rows() - 1) * input_stride() + channels()); 357 std::vector<int32_t, AlignedAllocator<int32_t, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(int8_t)); 358 std::vector<int8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(int8_t)); 359 std::vector<int8_t> output(channels()); 360 std::vector<int8_t> output_ref(channels()); 361 std::vector<float> output_fp(channels()); 362 std::vector<int32_t> accumulators(channels()); 363 for (size_t iteration = 0; iteration < iterations(); iteration++) { 364 std::generate(input.begin(), input.end(), std::ref(i8rng)); 365 std::fill(output.begin(), output.end(), 0xA5); 366 367 // Prepare parameters. 368 union xnn_qs8_avgpool_minmax_params params; 369 init_params( 370 ¶ms, 371 -int32_t(input_zero_point() - 0x80) * int32_t(rows()), 372 input_scale() / (output_scale() * float(rows())), 373 int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 374 375 // Compute reference results. 376 for (size_t c = 0; c < channels(); c++) { 377 int32_t acc = 0; 378 for (size_t n = 0; n < rows(); n++) { 379 acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point() - 0x80); 380 } 381 accumulators[c] = acc; 382 output_ref[c] = requantize( 383 acc, input_scale() / (output_scale() * float(rows())), int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 384 output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point() - 0x80); 385 output_fp[c] = std::min<float>(output_fp[c], float(qmax() - 0x80)); 386 output_fp[c] = std::max<float>(output_fp[c], float(qmin() - 0x80)); 387 } 388 389 // Call optimized micro-kernel. 390 gavgpool_minmax(rows(), channels(), 391 input.data(), input_stride() * sizeof(int8_t), 392 zero.data(), 393 buffer.data(), 394 output.data(), 395 ¶ms); 396 397 // Verify results. 398 for (size_t c = 0; c < channels(); c++) { 399 ASSERT_LE(int32_t(output[c]), int32_t(qmax() - 0x80)) 400 << "at channel " << c << " / " << channels() << ", rows = " << rows(); 401 ASSERT_GE(int32_t(output[c]), int32_t(qmin() - 0x80)) 402 << "at channel " << c << " / " << channels() << ", rows = " << rows(); 403 ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.5f) 404 << "at channel " << c << " / " << channels() << ", rows = " << rows() 405 << ", accumulator = " << accumulators[c]; 406 ASSERT_EQ(int32_t(output_ref[c]), int32_t(output[c])) 407 << "at channel " << c << " / " << channels() << ", rows = " << rows() 408 << ", accumulator = " << accumulators[c]; 409 } 410 } 411 } 412 Test(xnn_f16_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_f16_scaleminmax_params_fn init_params)413 void Test(xnn_f16_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, xnn_init_f16_scaleminmax_params_fn init_params) const { 414 std::random_device random_device; 415 auto rng = std::mt19937(random_device()); 416 auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng); 417 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng); 418 419 std::vector<uint16_t> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 420 std::vector<uint16_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 421 std::vector<uint16_t> output(channels()); 422 std::vector<float> output_ref(channels()); 423 424 std::fill(zero.begin(), zero.end(), 0); 425 for (size_t iteration = 0; iteration < iterations(); iteration++) { 426 std::generate(input.begin(), input.end(), std::ref(f16rng)); 427 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 428 429 // Compute reference results, without clamping. 430 for (size_t c = 0; c < channels(); c++) { 431 float acc = 0.0f; 432 for (size_t n = 0; n < rows(); n++) { 433 acc += fp16_ieee_to_fp32_value(input[n * input_stride() + c]); 434 } 435 output_ref[c] = acc / float(rows()); 436 } 437 438 // Compute clamping parameters. 439 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 440 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 441 const float accumulated_range = accumulated_max - accumulated_min; 442 const float output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + float(qmin()) / 255.0f * accumulated_range)); 443 const float output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range)); 444 445 // Clamp reference results. 446 for (float& output_values : output_ref) { 447 output_values = std::max(std::min(output_values, output_max), output_min); 448 } 449 450 // Prepare parameters. 451 xnn_f16_scaleminmax_params params; 452 init_params(¶ms, 453 fp16_ieee_from_fp32_value(1.0f / float(rows())), 454 fp16_ieee_from_fp32_value(output_min), 455 fp16_ieee_from_fp32_value(output_max)); 456 457 // Call optimized micro-kernel. 458 gavgpool_minmax(rows(), channels(), 459 input.data(), input_stride() * sizeof(uint16_t), 460 zero.data(), 461 output.data(), 462 ¶ms); 463 464 // Verify results. 465 for (size_t c = 0; c < channels(); c++) { 466 ASSERT_LE(fp16_ieee_to_fp32_value(output[c]), output_max) 467 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 468 ASSERT_GE(fp16_ieee_to_fp32_value(output[c]), output_min) 469 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 470 ASSERT_NEAR(fp16_ieee_to_fp32_value(output[c]), output_ref[c], std::max(1.0e-4f, std::abs(output_ref[c]) * 1.0e-2f)) 471 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 472 } 473 } 474 } 475 Test(xnn_f16_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_f16_scaleminmax_params_fn init_params)476 void Test(xnn_f16_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, xnn_init_f16_scaleminmax_params_fn init_params) const { 477 std::random_device random_device; 478 auto rng = std::mt19937(random_device()); 479 auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng); 480 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng); 481 482 std::vector<uint16_t> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 483 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 484 std::vector<uint16_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 485 std::vector<uint16_t> output(channels()); 486 std::vector<float> output_ref(channels()); 487 for (size_t iteration = 0; iteration < iterations(); iteration++) { 488 std::generate(input.begin(), input.end(), std::ref(f16rng)); 489 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 490 491 // Compute reference results, without clamping. 492 for (size_t c = 0; c < channels(); c++) { 493 float acc = 0.0f; 494 for (size_t n = 0; n < rows(); n++) { 495 acc += fp16_ieee_to_fp32_value(input[n * input_stride() + c]); 496 } 497 output_ref[c] = acc / float(rows()); 498 } 499 500 // Compute clamping parameters. 501 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 502 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 503 const float accumulated_range = accumulated_max - accumulated_min; 504 const float output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + float(qmin()) / 255.0f * accumulated_range)); 505 const float output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range)); 506 507 // Prepare parameters. 508 xnn_f16_scaleminmax_params params; 509 init_params(¶ms, 510 fp16_ieee_from_fp32_value(1.0f / float(rows())), 511 fp16_ieee_from_fp32_value(output_min), 512 fp16_ieee_from_fp32_value(output_max)); 513 514 // Clamp reference results. 515 for (float& output_values : output_ref) { 516 output_values = std::max(std::min(output_values, output_max), output_min); 517 } 518 519 // Call optimized micro-kernel. 520 gavgpool_minmax(rows(), channels(), 521 input.data(), input_stride() * sizeof(uint16_t), 522 zero.data(), 523 buffer.data(), 524 output.data(), 525 ¶ms); 526 527 // Verify results. 528 for (size_t c = 0; c < channels(); c++) { 529 ASSERT_LE(fp16_ieee_to_fp32_value(output[c]), output_max) 530 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 531 ASSERT_GE(fp16_ieee_to_fp32_value(output[c]), output_min) 532 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 533 ASSERT_NEAR(fp16_ieee_to_fp32_value(output[c]), output_ref[c], std::abs(output_ref[c]) * 1.0e-0f) 534 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 535 } 536 } 537 } 538 Test(xnn_f32_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_f32_scaleminmax_params_fn init_params)539 void Test(xnn_f32_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, xnn_init_f32_scaleminmax_params_fn init_params) const { 540 std::random_device random_device; 541 auto rng = std::mt19937(random_device()); 542 auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng); 543 544 std::vector<float> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float)); 545 std::vector<float> zero(channels() + XNN_EXTRA_BYTES / sizeof(float)); 546 std::vector<float> output(channels()); 547 std::vector<float> output_ref(channels()); 548 549 std::fill(zero.begin(), zero.end(), 0.0f); 550 for (size_t iteration = 0; iteration < iterations(); iteration++) { 551 std::generate(input.begin(), input.end(), std::ref(f32rng)); 552 std::fill(output.begin(), output.end(), std::nanf("")); 553 554 // Compute reference results, without clamping. 555 for (size_t c = 0; c < channels(); c++) { 556 float acc = 0.0f; 557 for (size_t n = 0; n < rows(); n++) { 558 acc += input[n * input_stride() + c]; 559 } 560 output_ref[c] = acc / float(rows()); 561 } 562 563 // Compute clamping parameters. 564 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 565 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 566 const float accumulated_range = accumulated_max - accumulated_min; 567 const float output_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range; 568 const float output_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range; 569 570 // Clamp reference results. 571 for (float& output_values : output_ref) { 572 output_values = std::max(std::min(output_values, output_max), output_min); 573 } 574 575 // Prepare parameters. 576 union xnn_f32_scaleminmax_params params; 577 init_params(¶ms, 1.0f / float(rows()), output_min, output_max); 578 579 // Call optimized micro-kernel. 580 gavgpool_minmax(rows(), channels(), 581 input.data(), input_stride() * sizeof(float), 582 zero.data(), 583 output.data(), 584 ¶ms); 585 586 // Verify results. 587 for (size_t c = 0; c < channels(); c++) { 588 ASSERT_LE(output[c], output_max) 589 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 590 ASSERT_GE(output[c], output_min) 591 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 592 ASSERT_NEAR(output[c], output_ref[c], std::abs(output_ref[c]) * 1.0e-6f) 593 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 594 } 595 } 596 } 597 Test(xnn_f32_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_f32_scaleminmax_params_fn init_params)598 void Test(xnn_f32_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, xnn_init_f32_scaleminmax_params_fn init_params) const { 599 std::random_device random_device; 600 auto rng = std::mt19937(random_device()); 601 auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng); 602 603 std::vector<float> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float)); 604 std::vector<float, AlignedAllocator<float, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(float)); 605 std::vector<float> zero(channels() + XNN_EXTRA_BYTES / sizeof(float)); 606 std::vector<float> output(channels()); 607 std::vector<float> output_ref(channels()); 608 for (size_t iteration = 0; iteration < iterations(); iteration++) { 609 std::generate(input.begin(), input.end(), std::ref(f32rng)); 610 std::fill(output.begin(), output.end(), std::nanf("")); 611 612 // Compute reference results, without clamping. 613 for (size_t c = 0; c < channels(); c++) { 614 float acc = 0.0f; 615 for (size_t n = 0; n < rows(); n++) { 616 acc += input[n * input_stride() + c]; 617 } 618 output_ref[c] = acc / float(rows()); 619 } 620 621 // Compute clamping parameters. 622 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 623 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 624 const float accumulated_range = accumulated_max - accumulated_min; 625 const float output_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range; 626 const float output_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range; 627 628 // Prepare parameters. 629 union xnn_f32_scaleminmax_params params; 630 init_params(¶ms, 1.0f / float(rows()), output_min, output_max); 631 632 // Clamp reference results. 633 for (float& output_values : output_ref) { 634 output_values = std::max(std::min(output_values, output_max), output_min); 635 } 636 637 // Call optimized micro-kernel. 638 gavgpool_minmax(rows(), channels(), 639 input.data(), input_stride() * sizeof(float), 640 zero.data(), 641 buffer.data(), 642 output.data(), 643 ¶ms); 644 645 // Verify results. 646 for (size_t c = 0; c < channels(); c++) { 647 ASSERT_LE(output[c], output_max) 648 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 649 ASSERT_GE(output[c], output_min) 650 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 651 ASSERT_NEAR(output[c], output_ref[c], std::abs(output_ref[c]) * 1.0e-6f) 652 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 653 } 654 } 655 } 656 657 private: 658 size_t rows_{1}; 659 size_t channels_{1}; 660 size_t channel_tile_{1}; 661 size_t input_stride_{0}; 662 float input_scale_{1.25f}; 663 float output_scale_{0.75f}; 664 uint8_t input_zero_point_{121}; 665 uint8_t output_zero_point_{133}; 666 uint8_t qmin_{0}; 667 uint8_t qmax_{255}; 668 size_t iterations_{15}; 669 }; 670