1 // Copyright 2019 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <gtest/gtest.h> 9 10 #include <algorithm> 11 #include <cassert> 12 #include <cstddef> 13 #include <cstdlib> 14 #include <functional> 15 #include <random> 16 #include <vector> 17 18 #include <fp16.h> 19 20 #include <xnnpack.h> 21 #include <xnnpack/params-init.h> 22 #include <xnnpack/params.h> 23 24 25 class VBinaryMicrokernelTester { 26 public: 27 enum class OpType { 28 Add, 29 Div, 30 Max, 31 Min, 32 Mul, 33 Sub, 34 SqrDiff, 35 }; 36 batch_size(size_t batch_size)37 inline VBinaryMicrokernelTester& batch_size(size_t batch_size) { 38 assert(batch_size != 0); 39 this->batch_size_ = batch_size; 40 return *this; 41 } 42 batch_size()43 inline size_t batch_size() const { 44 return this->batch_size_; 45 } 46 inplace_a(bool inplace_a)47 inline VBinaryMicrokernelTester& inplace_a(bool inplace_a) { 48 this->inplace_a_ = inplace_a; 49 return *this; 50 } 51 inplace_a()52 inline bool inplace_a() const { 53 return this->inplace_a_; 54 } 55 inplace_b(bool inplace_b)56 inline VBinaryMicrokernelTester& inplace_b(bool inplace_b) { 57 this->inplace_b_ = inplace_b; 58 return *this; 59 } 60 inplace_b()61 inline bool inplace_b() const { 62 return this->inplace_b_; 63 } 64 qmin(uint8_t qmin)65 inline VBinaryMicrokernelTester& qmin(uint8_t qmin) { 66 this->qmin_ = qmin; 67 return *this; 68 } 69 qmin()70 inline uint8_t qmin() const { 71 return this->qmin_; 72 } 73 qmax(uint8_t qmax)74 inline VBinaryMicrokernelTester& qmax(uint8_t qmax) { 75 this->qmax_ = qmax; 76 return *this; 77 } 78 qmax()79 inline uint8_t qmax() const { 80 return this->qmax_; 81 } 82 iterations(size_t iterations)83 inline VBinaryMicrokernelTester& iterations(size_t iterations) { 84 this->iterations_ = iterations; 85 return *this; 86 } 87 iterations()88 inline size_t iterations() const { 89 return this->iterations_; 90 } 91 Test(xnn_f16_vbinary_ukernel_function vbinary,OpType op_type)92 void Test(xnn_f16_vbinary_ukernel_function vbinary, OpType op_type) const { 93 std::random_device random_device; 94 auto rng = std::mt19937(random_device()); 95 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng); 96 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng); 97 98 std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 99 std::vector<uint16_t> b(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 100 std::vector<uint16_t> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0)); 101 std::vector<float> y_ref(batch_size()); 102 for (size_t iteration = 0; iteration < iterations(); iteration++) { 103 std::generate(a.begin(), a.end(), std::ref(f16rng)); 104 std::generate(b.begin(), b.end(), std::ref(f16rng)); 105 if (inplace_a() || inplace_b()) { 106 std::generate(y.begin(), y.end(), std::ref(f16rng)); 107 } else { 108 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */); 109 } 110 const uint16_t* a_data = inplace_a() ? y.data() : a.data(); 111 const uint16_t* b_data = inplace_b() ? y.data() : b.data(); 112 113 // Compute reference results. 114 for (size_t i = 0; i < batch_size(); i++) { 115 switch (op_type) { 116 case OpType::Add: 117 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b_data[i]); 118 break; 119 case OpType::Div: 120 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b_data[i]); 121 break; 122 case OpType::Max: 123 y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i])); 124 break; 125 case OpType::Min: 126 y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i])); 127 break; 128 case OpType::Mul: 129 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b_data[i]); 130 break; 131 case OpType::SqrDiff: 132 { 133 const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]); 134 y_ref[i] = diff * diff; 135 break; 136 } 137 case OpType::Sub: 138 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]); 139 break; 140 } 141 } 142 143 // Call optimized micro-kernel. 144 vbinary(batch_size() * sizeof(uint16_t), a_data, b_data, y.data(), nullptr); 145 146 // Verify results. 147 for (size_t i = 0; i < batch_size(); i++) { 148 ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f)) 149 << "at " << i << " / " << batch_size(); 150 } 151 } 152 } 153 Test(xnn_f16_vbinary_minmax_ukernel_function vbinary_minmax,OpType op_type,xnn_init_f16_minmax_params_fn init_params)154 void Test(xnn_f16_vbinary_minmax_ukernel_function vbinary_minmax, OpType op_type, xnn_init_f16_minmax_params_fn init_params) const { 155 std::random_device random_device; 156 auto rng = std::mt19937(random_device()); 157 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng); 158 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng); 159 160 std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 161 std::vector<uint16_t> b(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 162 std::vector<uint16_t> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0)); 163 std::vector<float> y_ref(batch_size()); 164 for (size_t iteration = 0; iteration < iterations(); iteration++) { 165 std::generate(a.begin(), a.end(), std::ref(f16rng)); 166 std::generate(b.begin(), b.end(), std::ref(f16rng)); 167 if (inplace_a() || inplace_b()) { 168 std::generate(y.begin(), y.end(), std::ref(f16rng)); 169 } else { 170 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */); 171 } 172 const uint16_t* a_data = inplace_a() ? y.data() : a.data(); 173 const uint16_t* b_data = inplace_b() ? y.data() : b.data(); 174 175 // Compute reference results. 176 for (size_t i = 0; i < batch_size(); i++) { 177 switch (op_type) { 178 case OpType::Add: 179 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b_data[i]); 180 break; 181 case OpType::Div: 182 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b_data[i]); 183 break; 184 case OpType::Max: 185 y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i])); 186 break; 187 case OpType::Min: 188 y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i])); 189 break; 190 case OpType::Mul: 191 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b_data[i]); 192 break; 193 case OpType::SqrDiff: 194 { 195 const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]); 196 y_ref[i] = diff * diff; 197 break; 198 } 199 case OpType::Sub: 200 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]); 201 break; 202 } 203 } 204 205 const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend()); 206 const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend()); 207 const float accumulated_range = accumulated_max - accumulated_min; 208 const float y_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ? 209 (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) : 210 +std::numeric_limits<float>::infinity())); 211 const float y_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ? 212 (accumulated_min + accumulated_range / 255.0f * float(qmin())) : 213 -std::numeric_limits<float>::infinity())); 214 for (size_t i = 0; i < batch_size(); i++) { 215 y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min); 216 } 217 218 // Prepare parameters. 219 xnn_f16_minmax_params params; 220 init_params(¶ms, 221 fp16_ieee_from_fp32_value(y_min), fp16_ieee_from_fp32_value(y_max)); 222 223 // Call optimized micro-kernel. 224 vbinary_minmax(batch_size() * sizeof(uint16_t), a_data, b_data, y.data(), ¶ms); 225 226 // Verify results. 227 for (size_t i = 0; i < batch_size(); i++) { 228 ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f)) 229 << "at " << i << " / " << batch_size(); 230 } 231 } 232 } 233 234 void Test(xnn_f32_vbinary_ukernel_function vbinary, OpType op_type, xnn_init_f32_default_params_fn init_params = nullptr) const { 235 std::random_device random_device; 236 auto rng = std::mt19937(random_device()); 237 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng); 238 239 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 240 std::vector<float> b(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 241 std::vector<float> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(float) : 0)); 242 std::vector<float> y_ref(batch_size()); 243 for (size_t iteration = 0; iteration < iterations(); iteration++) { 244 std::generate(a.begin(), a.end(), std::ref(f32rng)); 245 std::generate(b.begin(), b.end(), std::ref(f32rng)); 246 if (inplace_a() || inplace_b()) { 247 std::generate(y.begin(), y.end(), std::ref(f32rng)); 248 } else { 249 std::fill(y.begin(), y.end(), nanf("")); 250 } 251 const float* a_data = inplace_a() ? y.data() : a.data(); 252 const float* b_data = inplace_b() ? y.data() : b.data(); 253 254 // Compute reference results. 255 for (size_t i = 0; i < batch_size(); i++) { 256 switch (op_type) { 257 case OpType::Add: 258 y_ref[i] = a_data[i] + b_data[i]; 259 break; 260 case OpType::Div: 261 y_ref[i] = a_data[i] / b_data[i]; 262 break; 263 case OpType::Max: 264 y_ref[i] = std::max<float>(a_data[i], b_data[i]); 265 break; 266 case OpType::Min: 267 y_ref[i] = std::min<float>(a_data[i], b_data[i]); 268 break; 269 case OpType::Mul: 270 y_ref[i] = a_data[i] * b_data[i]; 271 break; 272 case OpType::SqrDiff: 273 { 274 const float diff = a_data[i] - b_data[i]; 275 y_ref[i] = diff * diff; 276 break; 277 } 278 case OpType::Sub: 279 y_ref[i] = a_data[i] - b_data[i]; 280 break; 281 } 282 } 283 284 // Prepare parameters. 285 xnn_f32_default_params params; 286 if (init_params) { 287 init_params(¶ms); 288 } 289 290 // Call optimized micro-kernel. 291 vbinary(batch_size() * sizeof(float), a_data, b_data, y.data(), init_params != nullptr ? ¶ms : nullptr); 292 293 // Verify results. 294 for (size_t i = 0; i < batch_size(); i++) { 295 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f) 296 << "at " << i << " / " << batch_size(); 297 } 298 } 299 } 300 Test(xnn_f32_vbinary_relu_ukernel_function vbinary_relu,OpType op_type)301 void Test(xnn_f32_vbinary_relu_ukernel_function vbinary_relu, OpType op_type) const { 302 std::random_device random_device; 303 auto rng = std::mt19937(random_device()); 304 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), rng); 305 306 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 307 std::vector<float> b(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 308 std::vector<float> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(float) : 0)); 309 std::vector<float> y_ref(batch_size()); 310 for (size_t iteration = 0; iteration < iterations(); iteration++) { 311 std::generate(a.begin(), a.end(), std::ref(f32rng)); 312 std::generate(b.begin(), b.end(), std::ref(f32rng)); 313 if (inplace_a() || inplace_b()) { 314 std::generate(y.begin(), y.end(), std::ref(f32rng)); 315 } else { 316 std::fill(y.begin(), y.end(), nanf("")); 317 } 318 const float* a_data = inplace_a() ? y.data() : a.data(); 319 const float* b_data = inplace_b() ? y.data() : b.data(); 320 321 // Compute reference results. 322 for (size_t i = 0; i < batch_size(); i++) { 323 switch (op_type) { 324 case OpType::Add: 325 y_ref[i] = a_data[i] + b_data[i]; 326 break; 327 case OpType::Div: 328 y_ref[i] = a_data[i] / b_data[i]; 329 break; 330 case OpType::Max: 331 y_ref[i] = std::max<float>(a_data[i], b_data[i]); 332 break; 333 case OpType::Min: 334 y_ref[i] = std::min<float>(a_data[i], b_data[i]); 335 break; 336 case OpType::Mul: 337 y_ref[i] = a_data[i] * b_data[i]; 338 break; 339 case OpType::SqrDiff: 340 { 341 const float diff = a_data[i] - b_data[i]; 342 y_ref[i] = diff * diff; 343 break; 344 } 345 case OpType::Sub: 346 y_ref[i] = a_data[i] - b_data[i]; 347 break; 348 } 349 } 350 for (size_t i = 0; i < batch_size(); i++) { 351 y_ref[i] = std::max(y_ref[i], 0.0f); 352 } 353 354 // Call optimized micro-kernel. 355 vbinary_relu(batch_size() * sizeof(float), a_data, b_data, y.data(), nullptr); 356 357 // Verify results. 358 for (size_t i = 0; i < batch_size(); i++) { 359 ASSERT_GE(y[i], 0.0f) 360 << "at " << i << " / " << batch_size(); 361 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f) 362 << "at " << i << " / " << batch_size(); 363 } 364 } 365 } 366 Test(xnn_f32_vbinary_minmax_ukernel_function vbinary_minmax,OpType op_type,xnn_init_f32_minmax_params_fn init_params)367 void Test(xnn_f32_vbinary_minmax_ukernel_function vbinary_minmax, OpType op_type, xnn_init_f32_minmax_params_fn init_params) const { 368 std::random_device random_device; 369 auto rng = std::mt19937(random_device()); 370 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng); 371 372 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 373 std::vector<float> b(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 374 std::vector<float> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(float) : 0)); 375 std::vector<float> y_ref(batch_size()); 376 for (size_t iteration = 0; iteration < iterations(); iteration++) { 377 std::generate(a.begin(), a.end(), std::ref(f32rng)); 378 std::generate(b.begin(), b.end(), std::ref(f32rng)); 379 if (inplace_a() || inplace_b()) { 380 std::generate(y.begin(), y.end(), std::ref(f32rng)); 381 } else { 382 std::fill(y.begin(), y.end(), nanf("")); 383 } 384 const float* a_data = inplace_a() ? y.data() : a.data(); 385 const float* b_data = inplace_b() ? y.data() : b.data(); 386 387 // Compute reference results. 388 for (size_t i = 0; i < batch_size(); i++) { 389 switch (op_type) { 390 case OpType::Add: 391 y_ref[i] = a_data[i] + b_data[i]; 392 break; 393 case OpType::Div: 394 y_ref[i] = a_data[i] / b_data[i]; 395 break; 396 case OpType::Max: 397 y_ref[i] = std::max<float>(a_data[i], b_data[i]); 398 break; 399 case OpType::Min: 400 y_ref[i] = std::min<float>(a_data[i], b_data[i]); 401 break; 402 case OpType::Mul: 403 y_ref[i] = a_data[i] * b_data[i]; 404 break; 405 case OpType::SqrDiff: 406 { 407 const float diff = a_data[i] - b_data[i]; 408 y_ref[i] = diff * diff; 409 break; 410 } 411 case OpType::Sub: 412 y_ref[i] = a_data[i] - b_data[i]; 413 break; 414 } 415 } 416 const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend()); 417 const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend()); 418 const float accumulated_range = accumulated_max - accumulated_min; 419 const float y_max = accumulated_range > 0.0f ? 420 (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) : 421 +std::numeric_limits<float>::infinity(); 422 const float y_min = accumulated_range > 0.0f ? 423 (accumulated_min + accumulated_range / 255.0f * float(qmin())) : 424 -std::numeric_limits<float>::infinity(); 425 for (size_t i = 0; i < batch_size(); i++) { 426 y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min); 427 } 428 429 // Prepare parameters. 430 xnn_f32_minmax_params params; 431 init_params(¶ms, y_min, y_max); 432 433 // Call optimized micro-kernel. 434 vbinary_minmax(batch_size() * sizeof(float), a_data, b_data, y.data(), ¶ms); 435 436 // Verify results. 437 for (size_t i = 0; i < batch_size(); i++) { 438 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f) 439 << "at " << i << " / " << batch_size(); 440 } 441 } 442 } 443 444 private: 445 size_t batch_size_{1}; 446 bool inplace_a_{false}; 447 bool inplace_b_{false}; 448 uint8_t qmin_{0}; 449 uint8_t qmax_{255}; 450 size_t iterations_{15}; 451 }; 452