1 // Copyright 2021 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <gtest/gtest.h> 9 10 #include <algorithm> 11 #include <cassert> 12 #include <cmath> 13 #include <cstddef> 14 #include <cstdlib> 15 #include <functional> 16 #include <random> 17 #include <vector> 18 19 #include <fp16.h> 20 21 #include <xnnpack.h> 22 23 24 class ConvertOperatorTester { 25 public: channels(size_t channels)26 inline ConvertOperatorTester& channels(size_t channels) { 27 assert(channels != 0); 28 this->channels_ = channels; 29 return *this; 30 } 31 channels()32 inline size_t channels() const { 33 return this->channels_; 34 } 35 input_stride(size_t input_stride)36 inline ConvertOperatorTester& input_stride(size_t input_stride) { 37 assert(input_stride != 0); 38 this->input_stride_ = input_stride; 39 return *this; 40 } 41 input_stride()42 inline size_t input_stride() const { 43 if (this->input_stride_ == 0) { 44 return this->channels_; 45 } else { 46 assert(this->input_stride_ >= this->channels_); 47 return this->input_stride_; 48 } 49 } 50 output_stride(size_t output_stride)51 inline ConvertOperatorTester& output_stride(size_t output_stride) { 52 assert(output_stride != 0); 53 this->output_stride_ = output_stride; 54 return *this; 55 } 56 output_stride()57 inline size_t output_stride() const { 58 if (this->output_stride_ == 0) { 59 return this->channels_; 60 } else { 61 assert(this->output_stride_ >= this->channels_); 62 return this->output_stride_; 63 } 64 } 65 batch_size(size_t batch_size)66 inline ConvertOperatorTester& batch_size(size_t batch_size) { 67 assert(batch_size != 0); 68 this->batch_size_ = batch_size; 69 return *this; 70 } 71 batch_size()72 inline size_t batch_size() const { 73 return this->batch_size_; 74 } 75 scale(float scale)76 inline ConvertOperatorTester& scale(float scale) { 77 assert(scale >= 0.0f); 78 assert(std::isnormal(scale)); 79 this->scale_ = scale; 80 return *this; 81 } 82 scale()83 inline float scale() const { 84 return this->scale_; 85 } 86 zero_point(int16_t zero_point)87 inline ConvertOperatorTester& zero_point(int16_t zero_point) { 88 this->zero_point_ = zero_point; 89 return *this; 90 } 91 zero_point()92 inline int16_t zero_point() const { 93 return this->zero_point_; 94 } 95 qmin(int16_t qmin)96 inline ConvertOperatorTester& qmin(int16_t qmin) { 97 this->qmin_ = qmin; 98 return *this; 99 } 100 qmin()101 inline int16_t qmin() const { 102 return this->qmin_; 103 } 104 qmax(int16_t qmax)105 inline ConvertOperatorTester& qmax(int16_t qmax) { 106 this->qmax_ = qmax; 107 return *this; 108 } 109 qmax()110 inline int16_t qmax() const { 111 return this->qmax_; 112 } 113 iterations(size_t iterations)114 inline ConvertOperatorTester& iterations(size_t iterations) { 115 this->iterations_ = iterations; 116 return *this; 117 } 118 iterations()119 inline size_t iterations() const { 120 return this->iterations_; 121 } 122 TestF16toF32()123 void TestF16toF32() const { 124 std::random_device random_device; 125 auto rng = std::mt19937(random_device()); 126 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), rng); 127 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng); 128 129 std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + 130 (batch_size() - 1) * input_stride() + channels()); 131 std::vector<float> output((batch_size() - 1) * output_stride() + channels()); 132 std::vector<float> output_ref(batch_size() * channels()); 133 for (size_t iteration = 0; iteration < iterations(); iteration++) { 134 std::generate(input.begin(), input.end(), std::ref(f16rng)); 135 std::fill(output.begin(), output.end(), std::nanf("")); 136 137 // Compute reference results. 138 for (size_t i = 0; i < batch_size(); i++) { 139 for (size_t c = 0; c < channels(); c++) { 140 output_ref[i * channels() + c] = fp16_ieee_to_fp32_value(input[i * input_stride() + c]); 141 } 142 } 143 144 // Create, setup, run, and destroy Convert operator. 145 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 146 xnn_operator_t convert_op = nullptr; 147 148 ASSERT_EQ(xnn_status_success, 149 xnn_create_convert_nc_f16_f32( 150 channels(), input_stride(), output_stride(), 151 0, &convert_op)); 152 ASSERT_NE(nullptr, convert_op); 153 154 // Smart pointer to automatically delete convert op. 155 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convert_op(convert_op, xnn_delete_operator); 156 157 ASSERT_EQ(xnn_status_success, 158 xnn_setup_convert_nc_f16_f32( 159 convert_op, 160 batch_size(), 161 input.data(), output.data(), 162 nullptr /* thread pool */)); 163 164 ASSERT_EQ(xnn_status_success, 165 xnn_run_operator(convert_op, nullptr /* thread pool */)); 166 167 // Verify results. 168 for (size_t i = 0; i < batch_size(); i++) { 169 for (size_t c = 0; c < channels(); c++) { 170 ASSERT_EQ(output_ref[i * channels() + c], output[i * output_stride() + c]) 171 << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 172 } 173 } 174 } 175 } 176 TestF32toF16()177 void TestF32toF16() const { 178 std::random_device random_device; 179 auto rng = std::mt19937(random_device()); 180 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), rng); 181 182 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + 183 (batch_size() - 1) * input_stride() + channels()); 184 std::vector<uint16_t> output((batch_size() - 1) * output_stride() + channels()); 185 std::vector<uint16_t> output_ref(batch_size() * channels()); 186 for (size_t iteration = 0; iteration < iterations(); iteration++) { 187 std::generate(input.begin(), input.end(), std::ref(f32rng)); 188 std::fill(output.begin(), output.end(), UINT16_C(0x7E)); 189 190 // Compute reference results. 191 for (size_t i = 0; i < batch_size(); i++) { 192 for (size_t c = 0; c < channels(); c++) { 193 output_ref[i * channels() + c] = fp16_ieee_from_fp32_value(input[i * input_stride() + c]); 194 } 195 } 196 197 // Create, setup, run, and destroy Convert operator. 198 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 199 xnn_operator_t convert_op = nullptr; 200 201 ASSERT_EQ(xnn_status_success, 202 xnn_create_convert_nc_f32_f16( 203 channels(), input_stride(), output_stride(), 204 0, &convert_op)); 205 ASSERT_NE(nullptr, convert_op); 206 207 // Smart pointer to automatically delete convert op. 208 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convert_op(convert_op, xnn_delete_operator); 209 210 ASSERT_EQ(xnn_status_success, 211 xnn_setup_convert_nc_f32_f16( 212 convert_op, 213 batch_size(), 214 input.data(), output.data(), 215 nullptr /* thread pool */)); 216 217 ASSERT_EQ(xnn_status_success, 218 xnn_run_operator(convert_op, nullptr /* thread pool */)); 219 220 // Verify results. 221 for (size_t i = 0; i < batch_size(); i++) { 222 for (size_t c = 0; c < channels(); c++) { 223 ASSERT_EQ(output_ref[i * channels() + c], output[i * output_stride() + c]) 224 << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 225 } 226 } 227 } 228 } 229 TestF32toQS8()230 void TestF32toQS8() const { 231 ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min()); 232 ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max()); 233 ASSERT_LT(qmin(), qmax()); 234 235 ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min()); 236 ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max()); 237 238 std::random_device random_device; 239 auto rng = std::mt19937(random_device()); 240 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), rng); 241 242 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + 243 (batch_size() - 1) * input_stride() + channels()); 244 std::vector<int8_t> output((batch_size() - 1) * output_stride() + channels()); 245 std::vector<int8_t> output_ref(batch_size() * channels()); 246 for (size_t iteration = 0; iteration < iterations(); iteration++) { 247 std::generate(input.begin(), input.end(), std::ref(f32rng)); 248 std::fill(output.begin(), output.end(), UINT16_C(0x7E)); 249 250 // Compute reference results. 251 const float inv_scale = 1.0f / scale(); 252 for (size_t i = 0; i < batch_size(); i++) { 253 for (size_t c = 0; c < channels(); c++) { 254 float scaled_input = input[i * input_stride() + c] * inv_scale; 255 scaled_input = std::min<float>(scaled_input, float(qmax() - zero_point())); 256 scaled_input = std::max<float>(scaled_input, float(qmin() - zero_point())); 257 output_ref[i * channels() + c] = int8_t(std::lrintf(scaled_input) + long(zero_point())); 258 } 259 } 260 261 // Create, setup, run, and destroy Convert operator. 262 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 263 xnn_operator_t convert_op = nullptr; 264 265 ASSERT_EQ(xnn_status_success, 266 xnn_create_convert_nc_f32_qs8( 267 channels(), input_stride(), output_stride(), 268 scale(), int8_t(zero_point()), int8_t(qmin()), int8_t(qmax()), 269 0, &convert_op)); 270 ASSERT_NE(nullptr, convert_op); 271 272 // Smart pointer to automatically delete convert op. 273 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convert_op(convert_op, xnn_delete_operator); 274 275 ASSERT_EQ(xnn_status_success, 276 xnn_setup_convert_nc_f32_qs8( 277 convert_op, 278 batch_size(), 279 input.data(), output.data(), 280 nullptr /* thread pool */)); 281 282 ASSERT_EQ(xnn_status_success, 283 xnn_run_operator(convert_op, nullptr /* thread pool */)); 284 285 // Verify results. 286 for (size_t i = 0; i < batch_size(); i++) { 287 for (size_t c = 0; c < channels(); c++) { 288 ASSERT_EQ(int32_t(output_ref[i * channels() + c]), int32_t(output[i * output_stride() + c])) 289 << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 290 } 291 } 292 } 293 } 294 TestF32toQU8()295 void TestF32toQU8() const { 296 ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min()); 297 ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max()); 298 ASSERT_LT(qmin(), qmax()); 299 300 ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min()); 301 ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max()); 302 303 std::random_device random_device; 304 auto rng = std::mt19937(random_device()); 305 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), rng); 306 307 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + 308 (batch_size() - 1) * input_stride() + channels()); 309 std::vector<uint8_t> output((batch_size() - 1) * output_stride() + channels()); 310 std::vector<uint8_t> output_ref(batch_size() * channels()); 311 for (size_t iteration = 0; iteration < iterations(); iteration++) { 312 std::generate(input.begin(), input.end(), std::ref(f32rng)); 313 std::fill(output.begin(), output.end(), UINT16_C(0x7E)); 314 315 // Compute reference results. 316 const float inv_scale = 1.0f / scale(); 317 for (size_t i = 0; i < batch_size(); i++) { 318 for (size_t c = 0; c < channels(); c++) { 319 float scaled_input = input[i * input_stride() + c] * inv_scale; 320 scaled_input = std::min<float>(scaled_input, float(qmax() - zero_point())); 321 scaled_input = std::max<float>(scaled_input, float(qmin() - zero_point())); 322 output_ref[i * channels() + c] = uint8_t(std::lrintf(scaled_input) + long(zero_point())); 323 } 324 } 325 326 // Create, setup, run, and destroy Convert operator. 327 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 328 xnn_operator_t convert_op = nullptr; 329 330 ASSERT_EQ(xnn_status_success, 331 xnn_create_convert_nc_f32_qu8( 332 channels(), input_stride(), output_stride(), 333 scale(), uint8_t(zero_point()), uint8_t(qmin()), uint8_t(qmax()), 334 0, &convert_op)); 335 ASSERT_NE(nullptr, convert_op); 336 337 // Smart pointer to automatically delete convert op. 338 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convert_op(convert_op, xnn_delete_operator); 339 340 ASSERT_EQ(xnn_status_success, 341 xnn_setup_convert_nc_f32_qu8( 342 convert_op, 343 batch_size(), 344 input.data(), output.data(), 345 nullptr /* thread pool */)); 346 347 ASSERT_EQ(xnn_status_success, 348 xnn_run_operator(convert_op, nullptr /* thread pool */)); 349 350 // Verify results. 351 for (size_t i = 0; i < batch_size(); i++) { 352 for (size_t c = 0; c < channels(); c++) { 353 ASSERT_EQ(uint32_t(output_ref[i * channels() + c]), uint32_t(output[i * output_stride() + c])) 354 << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 355 } 356 } 357 } 358 } 359 TestQS8toF32()360 void TestQS8toF32() const { 361 ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min()); 362 ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max()); 363 364 std::random_device random_device; 365 auto rng = std::mt19937(random_device()); 366 auto i8rng = std::bind( 367 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), 368 std::ref(rng)); 369 370 std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + 371 (batch_size() - 1) * input_stride() + channels()); 372 std::vector<float> output((batch_size() - 1) * output_stride() + channels()); 373 std::vector<float> output_ref(batch_size() * channels()); 374 for (size_t iteration = 0; iteration < iterations(); iteration++) { 375 std::generate(input.begin(), input.end(), std::ref(i8rng)); 376 std::fill(output.begin(), output.end(), std::nanf("")); 377 378 // Compute reference results. 379 for (size_t i = 0; i < batch_size(); i++) { 380 for (size_t c = 0; c < channels(); c++) { 381 output_ref[i * channels() + c] = float(input[i * input_stride() + c] - zero_point()) * scale(); 382 } 383 } 384 385 // Create, setup, run, and destroy Convert operator. 386 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 387 xnn_operator_t convert_op = nullptr; 388 389 ASSERT_EQ(xnn_status_success, 390 xnn_create_convert_nc_qs8_f32( 391 channels(), input_stride(), output_stride(), 392 scale(), int8_t(zero_point()), 393 0, &convert_op)); 394 ASSERT_NE(nullptr, convert_op); 395 396 // Smart pointer to automatically delete convert op. 397 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convert_op(convert_op, xnn_delete_operator); 398 399 ASSERT_EQ(xnn_status_success, 400 xnn_setup_convert_nc_qs8_f32( 401 convert_op, 402 batch_size(), 403 input.data(), output.data(), 404 nullptr /* thread pool */)); 405 406 ASSERT_EQ(xnn_status_success, 407 xnn_run_operator(convert_op, nullptr /* thread pool */)); 408 409 // Verify results. 410 for (size_t i = 0; i < batch_size(); i++) { 411 for (size_t c = 0; c < channels(); c++) { 412 ASSERT_EQ(output_ref[i * channels() + c], output[i * output_stride() + c]) 413 << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 414 } 415 } 416 } 417 } 418 TestQU8toF32()419 void TestQU8toF32() const { 420 ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min()); 421 ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max()); 422 423 std::random_device random_device; 424 auto rng = std::mt19937(random_device()); 425 auto u8rng = std::bind( 426 std::uniform_int_distribution<int32_t>(std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()), 427 std::ref(rng)); 428 429 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + 430 (batch_size() - 1) * input_stride() + channels()); 431 std::vector<float> output((batch_size() - 1) * output_stride() + channels()); 432 std::vector<float> output_ref(batch_size() * channels()); 433 for (size_t iteration = 0; iteration < iterations(); iteration++) { 434 std::generate(input.begin(), input.end(), std::ref(u8rng)); 435 std::fill(output.begin(), output.end(), std::nanf("")); 436 437 // Compute reference results. 438 for (size_t i = 0; i < batch_size(); i++) { 439 for (size_t c = 0; c < channels(); c++) { 440 output_ref[i * channels() + c] = float(input[i * input_stride() + c] - zero_point()) * scale(); 441 } 442 } 443 444 // Create, setup, run, and destroy Convert operator. 445 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 446 xnn_operator_t convert_op = nullptr; 447 448 ASSERT_EQ(xnn_status_success, 449 xnn_create_convert_nc_qu8_f32( 450 channels(), input_stride(), output_stride(), 451 scale(), uint8_t(zero_point()), 452 0, &convert_op)); 453 ASSERT_NE(nullptr, convert_op); 454 455 // Smart pointer to automatically delete convert op. 456 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convert_op(convert_op, xnn_delete_operator); 457 458 ASSERT_EQ(xnn_status_success, 459 xnn_setup_convert_nc_qu8_f32( 460 convert_op, 461 batch_size(), 462 input.data(), output.data(), 463 nullptr /* thread pool */)); 464 465 ASSERT_EQ(xnn_status_success, 466 xnn_run_operator(convert_op, nullptr /* thread pool */)); 467 468 // Verify results. 469 for (size_t i = 0; i < batch_size(); i++) { 470 for (size_t c = 0; c < channels(); c++) { 471 ASSERT_EQ(output_ref[i * channels() + c], output[i * output_stride() + c]) 472 << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 473 } 474 } 475 } 476 } 477 478 private: 479 size_t batch_size_{1}; 480 size_t channels_{1}; 481 size_t input_stride_{0}; 482 size_t output_stride_{0}; 483 float scale_{150.0f}; 484 int16_t zero_point_{1}; 485 int16_t qmin_{std::numeric_limits<int16_t>::min()}; 486 int16_t qmax_{std::numeric_limits<int16_t>::max()}; 487 size_t iterations_{15}; 488 }; 489