1 // Copyright 2019 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <gtest/gtest.h> 9 10 #include <algorithm> 11 #include <cassert> 12 #include <cstddef> 13 #include <cstdlib> 14 #include <functional> 15 #include <random> 16 #include <vector> 17 18 #include <fp16.h> 19 20 #include <xnnpack.h> 21 #include <xnnpack/params-init.h> 22 #include <xnnpack/params.h> 23 24 25 class VBinaryCMicrokernelTester { 26 public: 27 enum class OpType { 28 AddC, 29 DivC, 30 RDivC, 31 MaxC, 32 MinC, 33 MulC, 34 SqrDiffC, 35 SubC, 36 RSubC, 37 }; 38 batch_size(size_t batch_size)39 inline VBinaryCMicrokernelTester& batch_size(size_t batch_size) { 40 assert(batch_size != 0); 41 this->batch_size_ = batch_size; 42 return *this; 43 } 44 batch_size()45 inline size_t batch_size() const { 46 return this->batch_size_; 47 } 48 inplace(bool inplace)49 inline VBinaryCMicrokernelTester& inplace(bool inplace) { 50 this->inplace_ = inplace; 51 return *this; 52 } 53 inplace()54 inline bool inplace() const { 55 return this->inplace_; 56 } 57 qmin(uint8_t qmin)58 inline VBinaryCMicrokernelTester& qmin(uint8_t qmin) { 59 this->qmin_ = qmin; 60 return *this; 61 } 62 qmin()63 inline uint8_t qmin() const { 64 return this->qmin_; 65 } 66 qmax(uint8_t qmax)67 inline VBinaryCMicrokernelTester& qmax(uint8_t qmax) { 68 this->qmax_ = qmax; 69 return *this; 70 } 71 qmax()72 inline uint8_t qmax() const { 73 return this->qmax_; 74 } 75 iterations(size_t iterations)76 inline VBinaryCMicrokernelTester& iterations(size_t iterations) { 77 this->iterations_ = iterations; 78 return *this; 79 } 80 iterations()81 inline size_t iterations() const { 82 return this->iterations_; 83 } 84 Test(xnn_f16_vbinary_ukernel_function vbinaryc,OpType op_type)85 void Test(xnn_f16_vbinary_ukernel_function vbinaryc, OpType op_type) const { 86 std::random_device random_device; 87 auto rng = std::mt19937(random_device()); 88 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng); 89 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng); 90 91 std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 92 const uint16_t b = f16rng(); 93 std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0)); 94 std::vector<float> y_ref(batch_size()); 95 for (size_t iteration = 0; iteration < iterations(); iteration++) { 96 std::generate(a.begin(), a.end(), std::ref(f16rng)); 97 if (inplace()) { 98 std::generate(y.begin(), y.end(), std::ref(f16rng)); 99 } else { 100 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */); 101 } 102 const uint16_t* a_data = inplace() ? y.data() : a.data(); 103 104 // Compute reference results. 105 for (size_t i = 0; i < batch_size(); i++) { 106 switch (op_type) { 107 case OpType::AddC: 108 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b); 109 break; 110 case OpType::DivC: 111 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b); 112 break; 113 case OpType::RDivC: 114 y_ref[i] = fp16_ieee_to_fp32_value(b) / fp16_ieee_to_fp32_value(a_data[i]); 115 break; 116 case OpType::MaxC: 117 y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b)); 118 break; 119 case OpType::MinC: 120 y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b)); 121 break; 122 case OpType::MulC: 123 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b); 124 break; 125 case OpType::SqrDiffC: 126 { 127 const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b); 128 y_ref[i] = diff * diff; 129 break; 130 } 131 case OpType::SubC: 132 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b); 133 break; 134 case OpType::RSubC: 135 y_ref[i] = fp16_ieee_to_fp32_value(b) - fp16_ieee_to_fp32_value(a_data[i]); 136 break; 137 } 138 } 139 // Call optimized micro-kernel. 140 vbinaryc(batch_size() * sizeof(uint16_t), a_data, &b, y.data(), nullptr); 141 142 // Verify results. 143 for (size_t i = 0; i < batch_size(); i++) { 144 ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f)) 145 << "at " << i << " / " << batch_size(); 146 } 147 } 148 } 149 Test(xnn_f16_vbinary_minmax_ukernel_function vbinaryc_minmax,OpType op_type,xnn_init_f16_minmax_params_fn init_params)150 void Test(xnn_f16_vbinary_minmax_ukernel_function vbinaryc_minmax, OpType op_type, xnn_init_f16_minmax_params_fn init_params) const { 151 std::random_device random_device; 152 auto rng = std::mt19937(random_device()); 153 auto f32rng = std::bind(std::uniform_real_distribution<float>(1.0e-3f, 1.0f), rng); 154 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng); 155 156 std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 157 const uint16_t b = f16rng(); 158 std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0)); 159 std::vector<float> y_ref(batch_size()); 160 for (size_t iteration = 0; iteration < iterations(); iteration++) { 161 std::generate(a.begin(), a.end(), std::ref(f16rng)); 162 if (inplace()) { 163 std::generate(y.begin(), y.end(), std::ref(f16rng)); 164 } else { 165 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */); 166 } 167 const uint16_t* a_data = inplace() ? y.data() : a.data(); 168 169 // Compute reference results. 170 for (size_t i = 0; i < batch_size(); i++) { 171 switch (op_type) { 172 case OpType::AddC: 173 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b); 174 break; 175 case OpType::DivC: 176 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b); 177 break; 178 case OpType::RDivC: 179 y_ref[i] = fp16_ieee_to_fp32_value(b) / fp16_ieee_to_fp32_value(a_data[i]); 180 break; 181 case OpType::MaxC: 182 y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b)); 183 break; 184 case OpType::MinC: 185 y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b)); 186 break; 187 case OpType::MulC: 188 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b); 189 break; 190 case OpType::SqrDiffC: 191 { 192 const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b); 193 y_ref[i] = diff * diff; 194 break; 195 } 196 case OpType::SubC: 197 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b); 198 break; 199 case OpType::RSubC: 200 y_ref[i] = fp16_ieee_to_fp32_value(b) - fp16_ieee_to_fp32_value(a_data[i]); 201 break; 202 } 203 } 204 const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend()); 205 const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend()); 206 const float accumulated_range = accumulated_max - accumulated_min; 207 const float y_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ? 208 (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) : 209 +std::numeric_limits<float>::infinity())); 210 const float y_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ? 211 (accumulated_min + accumulated_range / 255.0f * float(qmin())) : 212 -std::numeric_limits<float>::infinity())); 213 for (size_t i = 0; i < batch_size(); i++) { 214 y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min); 215 } 216 217 // Prepare parameters. 218 xnn_f16_minmax_params params; 219 init_params(¶ms, 220 fp16_ieee_from_fp32_value(y_min), fp16_ieee_from_fp32_value(y_max)); 221 222 // Call optimized micro-kernel. 223 vbinaryc_minmax(batch_size() * sizeof(uint16_t), a_data, &b, y.data(), ¶ms); 224 225 // Verify results. 226 for (size_t i = 0; i < batch_size(); i++) { 227 ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f)) 228 << "at " << i << " / " << batch_size(); 229 } 230 } 231 } 232 233 void Test(xnn_f32_vbinary_ukernel_function vbinaryc, OpType op_type, xnn_init_f32_default_params_fn init_params = nullptr) const { 234 std::random_device random_device; 235 auto rng = std::mt19937(random_device()); 236 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng); 237 238 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 239 const float b = f32rng(); 240 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0)); 241 std::vector<float> y_ref(batch_size()); 242 for (size_t iteration = 0; iteration < iterations(); iteration++) { 243 std::generate(a.begin(), a.end(), std::ref(f32rng)); 244 if (inplace()) { 245 std::generate(y.begin(), y.end(), std::ref(f32rng)); 246 } else { 247 std::fill(y.begin(), y.end(), nanf("")); 248 } 249 const float* a_data = inplace() ? y.data() : a.data(); 250 251 // Compute reference results. 252 for (size_t i = 0; i < batch_size(); i++) { 253 switch (op_type) { 254 case OpType::AddC: 255 y_ref[i] = a_data[i] + b; 256 break; 257 case OpType::DivC: 258 y_ref[i] = a_data[i] / b; 259 break; 260 case OpType::RDivC: 261 y_ref[i] = b / a_data[i]; 262 break; 263 case OpType::MaxC: 264 y_ref[i] = std::max<float>(a_data[i], b); 265 break; 266 case OpType::MinC: 267 y_ref[i] = std::min<float>(a_data[i], b); 268 break; 269 case OpType::MulC: 270 y_ref[i] = a_data[i] * b; 271 break; 272 case OpType::SqrDiffC: 273 { 274 const float diff = a_data[i] - b; 275 y_ref[i] = diff * diff; 276 break; 277 } 278 case OpType::SubC: 279 y_ref[i] = a_data[i] - b; 280 break; 281 case OpType::RSubC: 282 y_ref[i] = b - a_data[i]; 283 break; 284 } 285 } 286 287 // Prepare parameters. 288 xnn_f32_default_params params; 289 if (init_params) { 290 init_params(¶ms); 291 } 292 293 // Call optimized micro-kernel. 294 vbinaryc(batch_size() * sizeof(float), a_data, &b, y.data(), init_params != nullptr ? ¶ms : nullptr); 295 296 // Verify results. 297 for (size_t i = 0; i < batch_size(); i++) { 298 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f) 299 << "at " << i << " / " << batch_size(); 300 } 301 } 302 } 303 Test(xnn_f32_vbinary_relu_ukernel_function vbinaryc_relu,OpType op_type)304 void Test(xnn_f32_vbinary_relu_ukernel_function vbinaryc_relu, OpType op_type) const { 305 std::random_device random_device; 306 auto rng = std::mt19937(random_device()); 307 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), rng); 308 309 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 310 const float b = f32rng(); 311 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0)); 312 std::vector<float> y_ref(batch_size()); 313 for (size_t iteration = 0; iteration < iterations(); iteration++) { 314 std::generate(a.begin(), a.end(), std::ref(f32rng)); 315 if (inplace()) { 316 std::generate(y.begin(), y.end(), std::ref(f32rng)); 317 } else { 318 std::fill(y.begin(), y.end(), nanf("")); 319 } 320 const float* a_data = inplace() ? y.data() : a.data(); 321 322 // Compute reference results. 323 for (size_t i = 0; i < batch_size(); i++) { 324 switch (op_type) { 325 case OpType::AddC: 326 y_ref[i] = a_data[i] + b; 327 break; 328 case OpType::DivC: 329 y_ref[i] = a_data[i] / b; 330 break; 331 case OpType::RDivC: 332 y_ref[i] = b / a_data[i]; 333 break; 334 case OpType::MaxC: 335 y_ref[i] = std::max<float>(a_data[i], b); 336 break; 337 case OpType::MinC: 338 y_ref[i] = std::min<float>(a_data[i], b); 339 break; 340 case OpType::MulC: 341 y_ref[i] = a_data[i] * b; 342 break; 343 case OpType::SqrDiffC: 344 { 345 const float diff = a_data[i] - b; 346 y_ref[i] = diff * diff; 347 break; 348 } 349 case OpType::SubC: 350 y_ref[i] = a_data[i] - b; 351 break; 352 case OpType::RSubC: 353 y_ref[i] = b - a_data[i]; 354 break; 355 } 356 } 357 for (size_t i = 0; i < batch_size(); i++) { 358 y_ref[i] = std::max(y_ref[i], 0.0f); 359 } 360 361 // Call optimized micro-kernel. 362 vbinaryc_relu(batch_size() * sizeof(float), a_data, &b, y.data(), nullptr); 363 364 // Verify results. 365 for (size_t i = 0; i < batch_size(); i++) { 366 ASSERT_GE(y[i], 0.0f) 367 << "at " << i << " / " << batch_size(); 368 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f) 369 << "at " << i << " / " << batch_size(); 370 } 371 } 372 } 373 Test(xnn_f32_vbinary_minmax_ukernel_function vbinaryc_minmax,OpType op_type,xnn_init_f32_minmax_params_fn init_params)374 void Test(xnn_f32_vbinary_minmax_ukernel_function vbinaryc_minmax, OpType op_type, xnn_init_f32_minmax_params_fn init_params) const { 375 std::random_device random_device; 376 auto rng = std::mt19937(random_device()); 377 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng); 378 379 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 380 const float b = f32rng(); 381 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0)); 382 std::vector<float> y_ref(batch_size()); 383 for (size_t iteration = 0; iteration < iterations(); iteration++) { 384 std::generate(a.begin(), a.end(), std::ref(f32rng)); 385 if (inplace()) { 386 std::generate(y.begin(), y.end(), std::ref(f32rng)); 387 } else { 388 std::fill(y.begin(), y.end(), nanf("")); 389 } 390 const float* a_data = inplace() ? y.data() : a.data(); 391 392 // Compute reference results. 393 for (size_t i = 0; i < batch_size(); i++) { 394 switch (op_type) { 395 case OpType::AddC: 396 y_ref[i] = a_data[i] + b; 397 break; 398 case OpType::DivC: 399 y_ref[i] = a_data[i] / b; 400 break; 401 case OpType::RDivC: 402 y_ref[i] = b / a_data[i]; 403 break; 404 case OpType::MaxC: 405 y_ref[i] = std::max<float>(a_data[i], b); 406 break; 407 case OpType::MinC: 408 y_ref[i] = std::min<float>(a_data[i], b); 409 break; 410 case OpType::MulC: 411 y_ref[i] = a_data[i] * b; 412 break; 413 case OpType::SqrDiffC: 414 { 415 const float diff = a_data[i] - b; 416 y_ref[i] = diff * diff; 417 break; 418 } 419 case OpType::SubC: 420 y_ref[i] = a_data[i] - b; 421 break; 422 case OpType::RSubC: 423 y_ref[i] = b - a_data[i]; 424 break; 425 } 426 } 427 const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend()); 428 const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend()); 429 const float accumulated_range = accumulated_max - accumulated_min; 430 const float y_max = accumulated_range > 0.0f ? 431 (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) : 432 +std::numeric_limits<float>::infinity(); 433 const float y_min = accumulated_range > 0.0f ? 434 (accumulated_min + accumulated_range / 255.0f * float(qmin())) : 435 -std::numeric_limits<float>::infinity(); 436 for (size_t i = 0; i < batch_size(); i++) { 437 y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min); 438 } 439 440 // Prepare parameters. 441 xnn_f32_minmax_params params; 442 init_params(¶ms, y_min, y_max); 443 444 // Call optimized micro-kernel. 445 vbinaryc_minmax(batch_size() * sizeof(float), a_data, &b, y.data(), ¶ms); 446 447 // Verify results. 448 for (size_t i = 0; i < batch_size(); i++) { 449 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f) 450 << "at " << i << " / " << batch_size(); 451 } 452 } 453 } 454 455 private: 456 size_t batch_size_{1}; 457 bool inplace_{false}; 458 uint8_t qmin_{0}; 459 uint8_t qmax_{255}; 460 size_t iterations_{15}; 461 }; 462