1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <gtest/gtest.h> 12 13 #include <algorithm> 14 #include <cassert> 15 #include <cmath> 16 #include <cstddef> 17 #include <cstdlib> 18 #include <functional> 19 #include <limits> 20 #include <random> 21 #include <vector> 22 23 #include <fp16.h> 24 25 #include <xnnpack.h> 26 #include <xnnpack/AlignedAllocator.h> 27 #include <xnnpack/params-init.h> 28 #include <xnnpack/params.h> 29 #include <xnnpack/requantization.h> 30 31 32 class GAvgPoolMicrokernelTester { 33 public: 34 enum class Variant { 35 Native, 36 Scalar, 37 }; 38 rows(size_t rows)39 inline GAvgPoolMicrokernelTester& rows(size_t rows) { 40 assert(rows != 0); 41 this->rows_ = rows; 42 return *this; 43 } 44 rows()45 inline size_t rows() const { 46 return this->rows_; 47 } 48 channels(size_t channels)49 inline GAvgPoolMicrokernelTester& channels(size_t channels) { 50 assert(channels != 0); 51 this->channels_ = channels; 52 return *this; 53 } 54 channels()55 inline size_t channels() const { 56 return this->channels_; 57 } 58 channel_tile(size_t channel_tile)59 inline GAvgPoolMicrokernelTester& channel_tile(size_t channel_tile) { 60 assert(channel_tile != 0); 61 this->channel_tile_ = channel_tile; 62 return *this; 63 } 64 channel_tile()65 inline size_t channel_tile() const { 66 return this->channel_tile_; 67 } 68 input_stride(size_t input_stride)69 inline GAvgPoolMicrokernelTester& input_stride(size_t input_stride) { 70 assert(input_stride != 0); 71 this->input_stride_ = input_stride; 72 return *this; 73 } 74 input_stride()75 inline size_t input_stride() const { 76 if (this->input_stride_ == 0) { 77 return channels(); 78 } else { 79 assert(this->input_stride_ >= channels()); 80 return this->input_stride_; 81 } 82 } 83 input_scale(float input_scale)84 inline GAvgPoolMicrokernelTester& input_scale(float input_scale) { 85 assert(input_scale > 0.0f); 86 assert(std::isnormal(input_scale)); 87 this->input_scale_ = input_scale; 88 return *this; 89 } 90 input_scale()91 inline float input_scale() const { 92 return this->input_scale_; 93 } 94 input_zero_point(uint8_t input_zero_point)95 inline GAvgPoolMicrokernelTester& input_zero_point(uint8_t input_zero_point) { 96 this->input_zero_point_ = input_zero_point; 97 return *this; 98 } 99 input_zero_point()100 inline uint8_t input_zero_point() const { 101 return this->input_zero_point_; 102 } 103 output_scale(float output_scale)104 inline GAvgPoolMicrokernelTester& output_scale(float output_scale) { 105 assert(output_scale > 0.0f); 106 assert(std::isnormal(output_scale)); 107 this->output_scale_ = output_scale; 108 return *this; 109 } 110 output_scale()111 inline float output_scale() const { 112 return this->output_scale_; 113 } 114 output_zero_point(uint8_t output_zero_point)115 inline GAvgPoolMicrokernelTester& output_zero_point(uint8_t output_zero_point) { 116 this->output_zero_point_ = output_zero_point; 117 return *this; 118 } 119 output_zero_point()120 inline uint8_t output_zero_point() const { 121 return this->output_zero_point_; 122 } 123 qmin(uint8_t qmin)124 inline GAvgPoolMicrokernelTester& qmin(uint8_t qmin) { 125 this->qmin_ = qmin; 126 return *this; 127 } 128 qmin()129 inline uint8_t qmin() const { 130 return this->qmin_; 131 } 132 qmax(uint8_t qmax)133 inline GAvgPoolMicrokernelTester& qmax(uint8_t qmax) { 134 this->qmax_ = qmax; 135 return *this; 136 } 137 qmax()138 inline uint8_t qmax() const { 139 return this->qmax_; 140 } 141 iterations(size_t iterations)142 inline GAvgPoolMicrokernelTester& iterations(size_t iterations) { 143 this->iterations_ = iterations; 144 return *this; 145 } 146 iterations()147 inline size_t iterations() const { 148 return this->iterations_; 149 } 150 151 void Test(xnn_qu8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, Variant variant = Variant::Native) const { 152 std::random_device random_device; 153 auto rng = std::mt19937(random_device()); 154 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng); 155 156 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + 157 (rows() - 1) * input_stride() + channels()); 158 std::vector<uint8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 159 std::vector<uint8_t> output(channels()); 160 std::vector<uint8_t> output_ref(channels()); 161 std::vector<float> output_fp(channels()); 162 std::vector<int32_t> accumulators(channels()); 163 for (size_t iteration = 0; iteration < iterations(); iteration++) { 164 std::generate(input.begin(), input.end(), std::ref(u8rng)); 165 std::fill(output.begin(), output.end(), 0xA5); 166 167 // Prepare parameters. 168 union xnn_qu8_avgpool_params quantization_params = { }; 169 switch (variant) { 170 case Variant::Native: 171 quantization_params = xnn_init_qu8_avgpool_params( 172 -int32_t(input_zero_point()) * int32_t(rows()), 173 input_scale() / (output_scale() * float(rows())), 174 output_zero_point(), qmin(), qmax()); 175 break; 176 case Variant::Scalar: 177 quantization_params = xnn_init_scalar_qu8_avgpool_params( 178 -int32_t(input_zero_point()) * int32_t(rows()), 179 input_scale() / (output_scale() * float(rows())), 180 output_zero_point(), qmin(), qmax()); 181 break; 182 } 183 const union xnn_qu8_avgpool_params scalar_quantization_params = 184 xnn_init_scalar_qu8_avgpool_params( 185 -int32_t(input_zero_point()) * int32_t(rows()), 186 input_scale() / (output_scale() * float(rows())), 187 output_zero_point(), qmin(), qmax()); 188 189 // Compute reference results. 190 for (size_t c = 0; c < channels(); c++) { 191 int32_t acc = scalar_quantization_params.scalar.bias; 192 for (size_t n = 0; n < rows(); n++) { 193 acc += input[n * input_stride() + c]; 194 } 195 accumulators[c] = acc; 196 output_ref[c] = xnn_qu8_quantize_avgpool(acc, scalar_quantization_params); 197 output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point()); 198 output_fp[c] = std::min<float>(output_fp[c], float(qmax())); 199 output_fp[c] = std::max<float>(output_fp[c], float(qmin())); 200 } 201 202 // Call optimized micro-kernel. 203 gavgpool_minmax(rows(), channels(), 204 input.data(), input_stride() * sizeof(uint8_t), 205 zero.data(), 206 output.data(), 207 &quantization_params); 208 209 // Verify results. 210 for (size_t c = 0; c < channels(); c++) { 211 ASSERT_LE(uint32_t(output[c]), uint32_t(qmax())) 212 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 213 ASSERT_GE(uint32_t(output[c]), uint32_t(qmin())) 214 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 215 ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.5f) 216 << "at position " << c << ", rows = " << rows() << ", channels = " << channels() 217 << ", acc = " << accumulators[c]; 218 ASSERT_EQ(uint32_t(output_ref[c]), uint32_t(output[c])) 219 << "at position " << c << ", rows = " << rows() << ", channels = " << channels() 220 << ", acc = " << accumulators[c]; 221 } 222 } 223 } 224 225 void Test(xnn_qu8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, Variant variant = Variant::Native) const { 226 std::random_device random_device; 227 auto rng = std::mt19937(random_device()); 228 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng); 229 230 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + 231 (rows() - 1) * input_stride() + channels()); 232 std::vector<int32_t, AlignedAllocator<int32_t, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 233 std::vector<uint8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 234 std::vector<uint8_t> output(channels()); 235 std::vector<uint8_t> output_ref(channels()); 236 std::vector<float> output_fp(channels()); 237 std::vector<int32_t> accumulators(channels()); 238 for (size_t iteration = 0; iteration < iterations(); iteration++) { 239 std::generate(input.begin(), input.end(), std::ref(u8rng)); 240 std::fill(output.begin(), output.end(), 0xA5); 241 242 // Prepare parameters. 243 union xnn_qu8_avgpool_params quantization_params = { }; 244 switch (variant) { 245 case Variant::Native: 246 quantization_params = xnn_init_qu8_avgpool_params( 247 -int32_t(input_zero_point()) * int32_t(rows()), 248 input_scale() / (output_scale() * float(rows())), 249 output_zero_point(), qmin(), qmax()); 250 break; 251 case Variant::Scalar: 252 quantization_params = xnn_init_scalar_qu8_avgpool_params( 253 -int32_t(input_zero_point()) * int32_t(rows()), 254 input_scale() / (output_scale() * float(rows())), 255 output_zero_point(), qmin(), qmax()); 256 break; 257 } 258 const union xnn_qu8_avgpool_params scalar_quantization_params = 259 xnn_init_scalar_qu8_avgpool_params( 260 -int32_t(input_zero_point()) * int32_t(rows()), 261 input_scale() / (output_scale() * float(rows())), 262 output_zero_point(), qmin(), qmax()); 263 264 // Compute reference results. 265 for (size_t c = 0; c < channels(); c++) { 266 int32_t acc = scalar_quantization_params.scalar.bias; 267 for (size_t n = 0; n < rows(); n++) { 268 acc += input[n * input_stride() + c]; 269 } 270 271 accumulators[c] = acc; 272 output_ref[c] = xnn_qu8_quantize_avgpool(acc, scalar_quantization_params); 273 output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point()); 274 output_fp[c] = std::min<float>(output_fp[c], float(qmax())); 275 output_fp[c] = std::max<float>(output_fp[c], float(qmin())); 276 } 277 278 // Call optimized micro-kernel. 279 gavgpool_minmax(rows(), channels(), 280 input.data(), input_stride() * sizeof(uint8_t), 281 zero.data(), 282 buffer.data(), 283 output.data(), 284 &quantization_params); 285 286 // Verify results. 287 for (size_t c = 0; c < channels(); c++) { 288 ASSERT_LE(uint32_t(output[c]), uint32_t(qmax())) 289 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 290 ASSERT_GE(uint32_t(output[c]), uint32_t(qmin())) 291 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 292 ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.5f) 293 << "at position " << c << ", rows = " << rows() << ", channels = " << channels() 294 << ", acc = " << accumulators[c]; 295 ASSERT_EQ(uint32_t(output_ref[c]), uint32_t(output[c])) 296 << "at position " << c << ", rows = " << rows() << ", channels = " << channels() 297 << ", acc = " << accumulators[c]; 298 } 299 } 300 } 301 302 void Test(xnn_qs8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, Variant variant = Variant::Native) const { 303 std::random_device random_device; 304 auto rng = std::mt19937(random_device()); 305 auto i8rng = std::bind( 306 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), rng); 307 308 std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + 309 (rows() - 1) * input_stride() + channels()); 310 std::vector<int8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(int8_t)); 311 std::vector<int8_t> output(channels()); 312 std::vector<int8_t> output_ref(channels()); 313 std::vector<float> output_fp(channels()); 314 std::vector<int32_t> accumulators(channels()); 315 for (size_t iteration = 0; iteration < iterations(); iteration++) { 316 std::generate(input.begin(), input.end(), std::ref(i8rng)); 317 std::fill(output.begin(), output.end(), 0xA5); 318 319 // Prepare parameters. 320 union xnn_qs8_avgpool_params quantization_params = { }; 321 switch (variant) { 322 case Variant::Native: 323 quantization_params = xnn_init_qs8_avgpool_params( 324 -int32_t(input_zero_point() - 0x80) * int32_t(rows()), 325 input_scale() / (output_scale() * float(rows())), 326 int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 327 break; 328 case Variant::Scalar: 329 quantization_params = xnn_init_scalar_qs8_avgpool_params( 330 -int32_t(input_zero_point() - 0x80) * int32_t(rows()), 331 input_scale() / (output_scale() * float(rows())), 332 int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 333 break; 334 } 335 const union xnn_qs8_avgpool_params scalar_quantization_params = 336 xnn_init_scalar_qs8_avgpool_params( 337 -int32_t(input_zero_point() - 0x80) * int32_t(rows()), 338 input_scale() / (output_scale() * float(rows())), 339 int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 340 341 // Compute reference results. 342 for (size_t c = 0; c < channels(); c++) { 343 int32_t acc = scalar_quantization_params.scalar.bias; 344 for (size_t n = 0; n < rows(); n++) { 345 acc += input[n * input_stride() + c]; 346 } 347 accumulators[c] = acc; 348 output_ref[c] = xnn_qs8_quantize_avgpool(acc, scalar_quantization_params); 349 output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point() - 0x80); 350 output_fp[c] = std::min<float>(output_fp[c], float(qmax() - 0x80)); 351 output_fp[c] = std::max<float>(output_fp[c], float(qmin() - 0x80)); 352 } 353 354 // Call optimized micro-kernel. 355 gavgpool_minmax(rows(), channels(), 356 input.data(), input_stride() * sizeof(int8_t), 357 zero.data(), 358 output.data(), 359 &quantization_params); 360 361 // Verify results. 362 for (size_t c = 0; c < channels(); c++) { 363 ASSERT_LE(int32_t(output[c]), int32_t(qmax() - 0x80)) 364 << "at channel " << c << " / " << channels() << ", rows = " << rows(); 365 ASSERT_GE(int32_t(output[c]), int32_t(qmin() - 0x80)) 366 << "at channel " << c << " / " << channels() << ", rows = " << rows(); 367 ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.5f) 368 << "at channel " << c << " / " << channels() << ", rows = " << rows() 369 << ", accumulator = " << accumulators[c]; 370 ASSERT_EQ(int32_t(output_ref[c]), int32_t(output[c])) 371 << "at channel " << c << " / " << channels() << ", rows = " << rows() 372 << ", accumulator = " << accumulators[c]; 373 } 374 } 375 } 376 377 void Test(xnn_qs8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, Variant variant = Variant::Native) const { 378 std::random_device random_device; 379 auto rng = std::mt19937(random_device()); 380 auto i8rng = std::bind( 381 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), rng); 382 383 std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + 384 (rows() - 1) * input_stride() + channels()); 385 std::vector<int32_t, AlignedAllocator<int32_t, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(int8_t)); 386 std::vector<int8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(int8_t)); 387 std::vector<int8_t> output(channels()); 388 std::vector<int8_t> output_ref(channels()); 389 std::vector<float> output_fp(channels()); 390 std::vector<int32_t> accumulators(channels()); 391 for (size_t iteration = 0; iteration < iterations(); iteration++) { 392 std::generate(input.begin(), input.end(), std::ref(i8rng)); 393 std::fill(output.begin(), output.end(), 0xA5); 394 395 // Prepare parameters. 396 union xnn_qs8_avgpool_params quantization_params = { }; 397 switch (variant) { 398 case Variant::Native: 399 quantization_params = xnn_init_qs8_avgpool_params( 400 -int32_t(input_zero_point() - 0x80) * int32_t(rows()), 401 input_scale() / (output_scale() * float(rows())), 402 int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 403 break; 404 case Variant::Scalar: 405 quantization_params = xnn_init_scalar_qs8_avgpool_params( 406 -int32_t(input_zero_point() - 0x80) * int32_t(rows()), 407 input_scale() / (output_scale() * float(rows())), 408 int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 409 break; 410 } 411 const union xnn_qs8_avgpool_params scalar_quantization_params = 412 xnn_init_scalar_qs8_avgpool_params( 413 -int32_t(input_zero_point() - 0x80) * int32_t(rows()), 414 input_scale() / (output_scale() * float(rows())), 415 int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 416 417 // Compute reference results. 418 for (size_t c = 0; c < channels(); c++) { 419 int32_t acc = scalar_quantization_params.scalar.bias; 420 for (size_t n = 0; n < rows(); n++) { 421 acc += input[n * input_stride() + c]; 422 } 423 accumulators[c] = acc; 424 output_ref[c] = xnn_qs8_quantize_avgpool(acc, scalar_quantization_params); 425 output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point() - 0x80); 426 output_fp[c] = std::min<float>(output_fp[c], float(qmax() - 0x80)); 427 output_fp[c] = std::max<float>(output_fp[c], float(qmin() - 0x80)); 428 } 429 430 // Call optimized micro-kernel. 431 gavgpool_minmax(rows(), channels(), 432 input.data(), input_stride() * sizeof(int8_t), 433 zero.data(), 434 buffer.data(), 435 output.data(), 436 &quantization_params); 437 438 // Verify results. 439 for (size_t c = 0; c < channels(); c++) { 440 ASSERT_LE(int32_t(output[c]), int32_t(qmax() - 0x80)) 441 << "at channel " << c << " / " << channels() << ", rows = " << rows(); 442 ASSERT_GE(int32_t(output[c]), int32_t(qmin() - 0x80)) 443 << "at channel " << c << " / " << channels() << ", rows = " << rows(); 444 ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.5f) 445 << "at channel " << c << " / " << channels() << ", rows = " << rows() 446 << ", accumulator = " << accumulators[c]; 447 ASSERT_EQ(int32_t(output_ref[c]), int32_t(output[c])) 448 << "at channel " << c << " / " << channels() << ", rows = " << rows() 449 << ", accumulator = " << accumulators[c]; 450 } 451 } 452 } 453 454 void Test(xnn_f16_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, Variant variant = Variant::Native) const { 455 std::random_device random_device; 456 auto rng = std::mt19937(random_device()); 457 auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng); 458 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng); 459 460 std::vector<uint16_t> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 461 std::vector<uint16_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 462 std::vector<uint16_t> output(channels()); 463 std::vector<float> output_ref(channels()); 464 465 std::fill(zero.begin(), zero.end(), 0); 466 for (size_t iteration = 0; iteration < iterations(); iteration++) { 467 std::generate(input.begin(), input.end(), std::ref(f16rng)); 468 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 469 470 // Compute reference results, without clamping. 471 for (size_t c = 0; c < channels(); c++) { 472 float acc = 0.0f; 473 for (size_t n = 0; n < rows(); n++) { 474 acc += fp16_ieee_to_fp32_value(input[n * input_stride() + c]); 475 } 476 output_ref[c] = acc / float(rows()); 477 } 478 479 // Compute clamping parameters. 480 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 481 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 482 const float accumulated_range = accumulated_max - accumulated_min; 483 const float output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + float(qmin()) / 255.0f * accumulated_range)); 484 const float output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range)); 485 486 // Clamp reference results. 487 for (float& output_values : output_ref) { 488 output_values = std::max(std::min(output_values, output_max), output_min); 489 } 490 491 // Prepare parameters. 492 xnn_f16_scaleminmax_params params = xnn_init_f16_scaleminmax_params( 493 fp16_ieee_from_fp32_value(1.0f / float(rows())), 494 fp16_ieee_from_fp32_value(output_min), 495 fp16_ieee_from_fp32_value(output_max)); 496 497 // Call optimized micro-kernel. 498 gavgpool_minmax(rows(), channels(), 499 input.data(), input_stride() * sizeof(uint16_t), 500 zero.data(), 501 output.data(), 502 ¶ms); 503 504 // Verify results. 505 for (size_t c = 0; c < channels(); c++) { 506 ASSERT_LE(fp16_ieee_to_fp32_value(output[c]), output_max) 507 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 508 ASSERT_GE(fp16_ieee_to_fp32_value(output[c]), output_min) 509 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 510 ASSERT_NEAR(fp16_ieee_to_fp32_value(output[c]), output_ref[c], std::max(1.0e-4f, std::abs(output_ref[c]) * 1.0e-2f)) 511 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 512 } 513 } 514 } 515 516 void Test(xnn_f16_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, Variant variant = Variant::Native) const { 517 std::random_device random_device; 518 auto rng = std::mt19937(random_device()); 519 auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng); 520 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng); 521 522 std::vector<uint16_t> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 523 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 524 std::vector<uint16_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 525 std::vector<uint16_t> output(channels()); 526 std::vector<float> output_ref(channels()); 527 for (size_t iteration = 0; iteration < iterations(); iteration++) { 528 std::generate(input.begin(), input.end(), std::ref(f16rng)); 529 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 530 531 // Compute reference results, without clamping. 532 for (size_t c = 0; c < channels(); c++) { 533 float acc = 0.0f; 534 for (size_t n = 0; n < rows(); n++) { 535 acc += fp16_ieee_to_fp32_value(input[n * input_stride() + c]); 536 } 537 output_ref[c] = acc / float(rows()); 538 } 539 540 // Compute clamping parameters. 541 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 542 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 543 const float accumulated_range = accumulated_max - accumulated_min; 544 const float output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + float(qmin()) / 255.0f * accumulated_range)); 545 const float output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range)); 546 547 // Prepare parameters. 548 xnn_f16_scaleminmax_params params = xnn_init_f16_scaleminmax_params( 549 fp16_ieee_from_fp32_value(1.0f / float(rows())), 550 fp16_ieee_from_fp32_value(output_min), 551 fp16_ieee_from_fp32_value(output_max)); 552 553 // Clamp reference results. 554 for (float& output_values : output_ref) { 555 output_values = std::max(std::min(output_values, output_max), output_min); 556 } 557 558 // Call optimized micro-kernel. 559 gavgpool_minmax(rows(), channels(), 560 input.data(), input_stride() * sizeof(uint16_t), 561 zero.data(), 562 buffer.data(), 563 output.data(), 564 ¶ms); 565 566 // Verify results. 567 for (size_t c = 0; c < channels(); c++) { 568 ASSERT_LE(fp16_ieee_to_fp32_value(output[c]), output_max) 569 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 570 ASSERT_GE(fp16_ieee_to_fp32_value(output[c]), output_min) 571 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 572 ASSERT_NEAR(fp16_ieee_to_fp32_value(output[c]), output_ref[c], std::abs(output_ref[c]) * 1.0e-0f) 573 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 574 } 575 } 576 } 577 578 void Test(xnn_f32_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, Variant variant = Variant::Native) const { 579 std::random_device random_device; 580 auto rng = std::mt19937(random_device()); 581 auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng); 582 583 std::vector<float> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float)); 584 std::vector<float> zero(channels() + XNN_EXTRA_BYTES / sizeof(float)); 585 std::vector<float> output(channels()); 586 std::vector<float> output_ref(channels()); 587 588 std::fill(zero.begin(), zero.end(), 0.0f); 589 for (size_t iteration = 0; iteration < iterations(); iteration++) { 590 std::generate(input.begin(), input.end(), std::ref(f32rng)); 591 std::fill(output.begin(), output.end(), std::nanf("")); 592 593 // Compute reference results, without clamping. 594 for (size_t c = 0; c < channels(); c++) { 595 float acc = 0.0f; 596 for (size_t n = 0; n < rows(); n++) { 597 acc += input[n * input_stride() + c]; 598 } 599 output_ref[c] = acc / float(rows()); 600 } 601 602 // Compute clamping parameters. 603 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 604 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 605 const float accumulated_range = accumulated_max - accumulated_min; 606 const float output_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range; 607 const float output_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range; 608 609 // Clamp reference results. 610 for (float& output_values : output_ref) { 611 output_values = std::max(std::min(output_values, output_max), output_min); 612 } 613 614 // Prepare parameters. 615 union xnn_f32_scaleminmax_params params = { }; 616 switch (variant) { 617 case Variant::Native: 618 params = xnn_init_f32_scaleminmax_params( 619 1.0f / float(rows()), output_min, output_max); 620 break; 621 case Variant::Scalar: 622 params = xnn_init_scalar_f32_scaleminmax_params( 623 1.0f / float(rows()), output_min, output_max); 624 break; 625 } 626 627 // Call optimized micro-kernel. 628 gavgpool_minmax(rows(), channels(), 629 input.data(), input_stride() * sizeof(float), 630 zero.data(), 631 output.data(), 632 ¶ms); 633 634 // Verify results. 635 for (size_t c = 0; c < channels(); c++) { 636 ASSERT_LE(output[c], output_max) 637 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 638 ASSERT_GE(output[c], output_min) 639 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 640 ASSERT_NEAR(output[c], output_ref[c], std::abs(output_ref[c]) * 1.0e-6f) 641 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 642 } 643 } 644 } 645 646 void Test(xnn_f32_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, Variant variant = Variant::Native) const { 647 std::random_device random_device; 648 auto rng = std::mt19937(random_device()); 649 auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng); 650 651 std::vector<float> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float)); 652 std::vector<float, AlignedAllocator<float, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(float)); 653 std::vector<float> zero(channels() + XNN_EXTRA_BYTES / sizeof(float)); 654 std::vector<float> output(channels()); 655 std::vector<float> output_ref(channels()); 656 for (size_t iteration = 0; iteration < iterations(); iteration++) { 657 std::generate(input.begin(), input.end(), std::ref(f32rng)); 658 std::fill(output.begin(), output.end(), std::nanf("")); 659 660 // Compute reference results, without clamping. 661 for (size_t c = 0; c < channels(); c++) { 662 float acc = 0.0f; 663 for (size_t n = 0; n < rows(); n++) { 664 acc += input[n * input_stride() + c]; 665 } 666 output_ref[c] = acc / float(rows()); 667 } 668 669 // Compute clamping parameters. 670 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 671 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 672 const float accumulated_range = accumulated_max - accumulated_min; 673 const float output_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range; 674 const float output_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range; 675 676 // Prepare parameters. 677 union xnn_f32_scaleminmax_params params = { }; 678 switch (variant) { 679 case Variant::Native: 680 params = xnn_init_f32_scaleminmax_params( 681 1.0f / float(rows()), output_min, output_max); 682 break; 683 case Variant::Scalar: 684 params = xnn_init_scalar_f32_scaleminmax_params( 685 1.0f / float(rows()), output_min, output_max); 686 break; 687 } 688 689 // Clamp reference results. 690 for (float& output_values : output_ref) { 691 output_values = std::max(std::min(output_values, output_max), output_min); 692 } 693 694 // Call optimized micro-kernel. 695 gavgpool_minmax(rows(), channels(), 696 input.data(), input_stride() * sizeof(float), 697 zero.data(), 698 buffer.data(), 699 output.data(), 700 ¶ms); 701 702 // Verify results. 703 for (size_t c = 0; c < channels(); c++) { 704 ASSERT_LE(output[c], output_max) 705 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 706 ASSERT_GE(output[c], output_min) 707 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 708 ASSERT_NEAR(output[c], output_ref[c], std::abs(output_ref[c]) * 1.0e-6f) 709 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 710 } 711 } 712 } 713 714 private: 715 size_t rows_{1}; 716 size_t channels_{1}; 717 size_t channel_tile_{1}; 718 size_t input_stride_{0}; 719 float input_scale_{1.25f}; 720 float output_scale_{0.75f}; 721 uint8_t input_zero_point_{121}; 722 uint8_t output_zero_point_{133}; 723 uint8_t qmin_{0}; 724 uint8_t qmax_{255}; 725 size_t iterations_{15}; 726 }; 727