// Copyright 2016 The Gemmlowp Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // test_fixedpoint.cc: unit tests covering the fixedpoint/ directory. #define GEMMLOWP_ENABLE_FIXEDPOINT_CONSTANTS_CHECKS #include #include #include #include #include #include #include "../fixedpoint/fixedpoint.h" #include "test.h" namespace gemmlowp { namespace { template T Load(const typename FixedPointRawTypeTraits::ScalarRawType* src) { return *src; } template void Store(typename FixedPointRawTypeTraits::ScalarRawType* dst, T v) { *dst = v; } #ifdef GEMMLOWP_NEON template <> int32x4_t Load(const std::int32_t* src) { return vld1q_s32(src); } template <> int16x8_t Load(const std::int16_t* src) { return vld1q_s16(src); } template <> void Store(std::int32_t* dst, int32x4_t v) { vst1q_s32(dst, v); } template <> void Store(std::int16_t* dst, int16x8_t v) { vst1q_s16(dst, v); } #endif #ifdef GEMMLOWP_SSE4 template <> __m128i Load<__m128i>(const std::int32_t* src) { return _mm_loadu_si128(reinterpret_cast(src)); } template <> void Store<__m128i>(std::int32_t* dst, __m128i v) { _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), v); } template <> int16x8_m128i Load(const std::int16_t* src) { return to_int16x8_m128i( _mm_loadu_si128(reinterpret_cast(src))); } template <> void Store(std::int16_t* dst, int16x8_m128i v) { _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), v.v); } #endif #ifdef GEMMLOWP_MSA template <> v4i32 Load(const std::int32_t* src) { return __builtin_msa_ld_w(const_cast(src), 0); } template <> v8i16 Load(const std::int16_t* src) { return __builtin_msa_ld_h(const_cast(src), 0); } template <> void Store(std::int32_t* dst, v4i32 v) { __builtin_msa_st_w(v, dst, 0); } template <> void Store(std::int16_t* dst, v8i16 v) { __builtin_msa_st_h(v, dst, 0); } #endif #ifdef GEMMLOWP_AVX2 template <> __m256i Load<__m256i>(const std::int32_t* src) { return _mm256_loadu_si256(reinterpret_cast(src)); } template <> int16x16_m256i Load(const std::int16_t* src) { return to_int16x16_m256i( _mm256_loadu_si256(reinterpret_cast(src))); } template <> void Store<__m256i>(std::int32_t* dst, __m256i v) { _mm256_storeu_si256(reinterpret_cast<__m256i*>(dst), v); } template <> void Store(std::int16_t* dst, int16x16_m256i v) { _mm256_storeu_si256(reinterpret_cast<__m256i*>(dst), v.v); } #endif template class TestFixedPoint { public: using SimdType = tSimdType; using SimdTypeTraits = FixedPointRawTypeTraits; using ScalarType = typename SimdTypeTraits::ScalarRawType; static constexpr int kSimdLanes = SimdTypeTraits::kLanes; static constexpr int kScalarTypeBits = 8 * sizeof(ScalarType); // Explanation of UnaryOpBase, its *Op subclasses below, and TestUnaryOp: // Most (though not all) of the fixedpoint functionality being tested // consists of functions taking one fixedpoint value and returning one // fixedpoint value, e.g. "exp" or "tanh". We call them "unary operators". // We factor a lot of testing boilerplate into a common TestUnaryOp function // taking a "unary op" object that fully describes the function to be tested. // These objects inherit UnaryOpBase mostly as a means to share some default // values for some properties. // // An important design element here is that the fixed-point values are passed // around as raw integers (e.g. int32_t or SIMD types such as int32x4_t), not // as higher-level FixedPoint objects. The motivation for this design is 1) to // avoid having to templatize everything in the tIntegerBits parameter of // class FixedPoint, and 2) to allow directly testing low-level functions // operating on raw types (e.g. RoundingDivideByPOT) without needlessly // requiring // wrapping raw values in FixedPoint objects. class UnaryOpBase { public: // Min bound of the input range of this op. For example, an op only handling // nonnegative values would return 0. ScalarType MinInput() const { return std::numeric_limits::min(); } // Max bound of the input range of this op. For example, an op only handling // nonpositive values would return 0. ScalarType MaxInput() const { return std::numeric_limits::max(); } // Tolerated difference between actual and reference ScalarType values. // Note that the corresponding real-numbers tolerance depends on the number // of integer bits of the fixed-point representation of the results of this // op. // For example, for an op returning fixed-point values with 0 integer bits, // the correspondence between real-number values and raw values is // real_number = (2^31) * raw_value. ScalarType Tolerance() const { return 0; } }; // Op wrapping RoundingDivideByPOT class RoundingDivideByPOTOp final : public UnaryOpBase { public: RoundingDivideByPOTOp(int exponent) : exponent_(exponent) {} ScalarType ReferenceOp(ScalarType x) const { const double d = static_cast(x) / (1ll << exponent_); return static_cast(std::round(d)); } template RawType Op(RawType x) const { return RoundingDivideByPOT(x, exponent_); } private: const int exponent_; }; // Op wrapping SaturatingRoundingMultiplyByPOT template class SaturatingRoundingMultiplyByPOTOp final : public UnaryOpBase { public: ScalarType ReferenceOp(ScalarType x) const { const double d = static_cast(x) * std::pow(2., tExponent); const double clamp_min = std::numeric_limits::min(); const double clamp_max = std::numeric_limits::max(); const double clamped = std::min(clamp_max, std::max(clamp_min, d)); return static_cast(std::round(clamped)); } template RawType Op(RawType x) const { return SaturatingRoundingMultiplyByPOT(x); } }; // Op wrapping exp_on_interval_between_negative_one_quarter_and_0_excl class ExpOnIntervalBetweenNegativeOneQuarterAnd0ExclOp final : public UnaryOpBase { public: ScalarType MinInput() const { return -(1 << (kScalarTypeBits - 3)); } ScalarType MaxInput() const { return 0; } ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 500 : 1; } ScalarType ReferenceOp(ScalarType x) const { using F = FixedPoint; const double d = ToDouble(F::FromRaw(x)); const double e = std::exp(d); return F::FromDouble(e).raw(); } template RawType Op(RawType x) const { using F = FixedPoint; const F f = F::FromRaw(x); const F e = exp_on_interval_between_negative_one_quarter_and_0_excl(f); return e.raw(); } }; // Op wrapping exp_on_negative_values template class ExpOnNegativeValuesOp final : public UnaryOpBase { public: ScalarType MaxInput() const { return 0; } ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 500 : 2; } ScalarType ReferenceOp(ScalarType x) const { using F = FixedPoint; using F0 = FixedPoint; const double d = ToDouble(F::FromRaw(x)); const double e = std::exp(d); return F0::FromDouble(e).raw(); } template RawType Op(RawType x) const { using F = FixedPoint; const F f = F::FromRaw(x); return exp_on_negative_values(f).raw(); } }; // Op wrapping one_minus_x_over_one_plus_x_for_x_in_0_1 class OneMinusXOverOnePlusXForXIn01Op final : public UnaryOpBase { public: ScalarType MinInput() const { return 0; } ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 12 : 11; } ScalarType ReferenceOp(ScalarType x) const { using F = FixedPoint; const double d = ToDouble(F::FromRaw(x)); const double e = (1 - d) / (1 + d); return F::FromDouble(e).raw(); } template RawType Op(RawType x) const { using F = FixedPoint; const F f = F::FromRaw(x); return one_minus_x_over_one_plus_x_for_x_in_0_1(f).raw(); } }; // Op wrapping tanh template class TanhOp final : public UnaryOpBase { public: ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 310 : 12; } ScalarType ReferenceOp(ScalarType x) const { using F = FixedPoint; using F0 = FixedPoint; const double d = ToDouble(F::FromRaw(x)); const double e = std::tanh(d); return F0::FromDouble(e).raw(); } template RawType Op(RawType x) const { using F = FixedPoint; const F f = F::FromRaw(x); return tanh(f).raw(); } }; // Op wrapping one_over_one_plus_x_for_x_in_0_1 class OneOverOnePlusXForXIn01Op final : public UnaryOpBase { public: ScalarType MinInput() const { return 0; } ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 6 : 5; } ScalarType ReferenceOp(ScalarType x) const { using F = FixedPoint; const double d = ToDouble(F::FromRaw(x)); const double e = 1 / (1 + d); return F::FromDouble(e).raw(); } template RawType Op(RawType x) const { using F = FixedPoint; const F f = F::FromRaw(x); return one_over_one_plus_x_for_x_in_0_1(f).raw(); } }; // Op wrapping logistic template class LogisticOp final : public UnaryOpBase { public: ScalarType Tolerance() const { return kScalarTypeBits == 32 ? 155 : 6; } ScalarType ReferenceOp(ScalarType x) const { using F = FixedPoint; using F0 = FixedPoint; const double d = ToDouble(F::FromRaw(x)); const double e = 1 / (1 + std::exp(-d)); return F0::FromDouble(e).raw(); } template RawType Op(RawType x) const { using F = FixedPoint; const F f = F::FromRaw(x); return logistic(f).raw(); } }; // Tests a given op, on a given list of int32 input values. template void TestUnaryOp(const tUnaryOpType& unary_op, const std::vector& testvals) { Check(0 == (testvals.size() % kSimdLanes)); for (std::size_t i = 0; i < testvals.size(); i += kSimdLanes) { // First, clamp input values accoding to the MinInput() and MaxInput() // bounds returned by the op. ScalarType input[kSimdLanes] = {0}; for (std::size_t j = 0; j < kSimdLanes; j++) { const ScalarType raw_input = testvals[i + j]; input[j] = std::min(unary_op.MaxInput(), std::max(unary_op.MinInput(), raw_input)); } // Compute reference results and check that the actual results on // scalar inputs agree with them, to the Tolerance() returned by the op. ScalarType reference[kSimdLanes] = {0}; ScalarType actual_scalar[kSimdLanes] = {0}; for (std::size_t j = 0; j < kSimdLanes; j++) { reference[j] = unary_op.ReferenceOp(input[j]); actual_scalar[j] = unary_op.Op(input[j]); const std::int64_t diff = static_cast(actual_scalar[j]) - static_cast(reference[j]); if (std::abs(diff) > unary_op.Tolerance()) { fprintf(stderr, "abs(diff) (%" PRId64 ") > tolerance (%d)\n", diff, unary_op.Tolerance()); } Check(std::abs(diff) <= unary_op.Tolerance()); } // Check that the actual results on SIMD inputs agree *exactly* with the // actual results on scalar inputs. I.e. SIMD must make absolutely no // difference // to the results, regardless of the fact that both scalar and SIMD // results may differ from the reference results. ScalarType actual_simd[kSimdLanes] = {0}; Store(actual_simd, unary_op.Op(Load(input))); for (std::size_t j = 0; j < kSimdLanes; j++) { if (actual_simd[j] != actual_scalar[j]) { fprintf(stderr, "SIMD (%d) != scalar (%d)\n", actual_simd[j], actual_scalar[j]); } Check(actual_simd[j] == actual_scalar[j]); } } } template void test_convert(FixedPoint x) { typedef FixedPoint F; F y = F::FromDouble(ToDouble(x)); Check(y == x); } template void test_Rescale(FixedPoint a) { FixedPoint actual = Rescale(a); FixedPoint expected = FixedPoint::FromDouble(ToDouble(a)); Check(actual == expected); } template void test_Rescale(const std::vector& testvals) { for (auto a : testvals) { FixedPoint aq; aq.raw() = a; test_Rescale(aq); } } template void test_mul(FixedPoint a, FixedPoint b) { static const int ProductIntegerBits = tIntegerBits_a + tIntegerBits_b; using ProductFixedPoint = FixedPoint; ProductFixedPoint ab; ab = a * b; double a_double = ToDouble(a); double b_double = ToDouble(b); double ab_double = a_double * b_double; ProductFixedPoint expected = ProductFixedPoint::FromDouble(ab_double); std::int64_t diff = std::int64_t(ab.raw()) - std::int64_t(expected.raw()); Check(std::abs(diff) <= 1); } template void test_mul(const std::vector& testvals) { for (auto a : testvals) { for (auto b : testvals) { FixedPoint aq; FixedPoint bq; aq.raw() = a; bq.raw() = b; test_mul(aq, bq); } } } template void test_ExactMulByPot(FixedPoint a) { double x = ToDouble(a) * std::pow(2.0, tExponent); double y = ToDouble(ExactMulByPot(a)); Check(x == y); } template void test_ExactMulByPot(const std::vector& testvals) { for (auto a : testvals) { FixedPoint aq; aq.raw() = a; test_ExactMulByPot(aq); } } // Make the list of test values to test each op against. std::vector MakeTestVals() { std::vector testvals; for (int i = 0; i < kScalarTypeBits - 1; i++) { testvals.push_back((1 << i) - 2); testvals.push_back((1 << i) - 1); testvals.push_back((1 << i)); testvals.push_back((1 << i) + 1); testvals.push_back((1 << i) + 2); testvals.push_back(-(1 << i) - 2); testvals.push_back(-(1 << i) - 1); testvals.push_back(-(1 << i)); testvals.push_back(-(1 << i) + 1); testvals.push_back(-(1 << i) + 2); } testvals.push_back(std::numeric_limits::min()); testvals.push_back(std::numeric_limits::min() + 1); testvals.push_back(std::numeric_limits::min() + 2); testvals.push_back(std::numeric_limits::max() - 2); testvals.push_back(std::numeric_limits::max() - 1); testvals.push_back(std::numeric_limits::max()); std::mt19937 random_engine; std::uniform_int_distribution uniform_distribution( std::numeric_limits::min(), std::numeric_limits::max()); for (int i = 0; i < 1000; i++) { testvals.push_back(uniform_distribution(random_engine)); } // SIMD tests will require the length of testvals to be a multiple // of SIMD vector size. while (testvals.size() % kSimdLanes) { testvals.push_back(0); } std::sort(testvals.begin(), testvals.end()); return testvals; } void RunTests(const char* msg) { const std::vector testvals = MakeTestVals(); for (int s = 0; s < kScalarTypeBits; s++) { TestUnaryOp(RoundingDivideByPOTOp(s), testvals); } TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<1 - kScalarTypeBits>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<2 - kScalarTypeBits>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<3 - kScalarTypeBits>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<14 - kScalarTypeBits>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<15 - kScalarTypeBits>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-15>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-4>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-3>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-2>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<-1>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<0>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<1>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<2>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<3>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<4>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp<15>(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp(), testvals); TestUnaryOp(SaturatingRoundingMultiplyByPOTOp(), testvals); TestUnaryOp(ExpOnIntervalBetweenNegativeOneQuarterAnd0ExclOp(), testvals); TestUnaryOp(ExpOnNegativeValuesOp<0>(), testvals); TestUnaryOp(ExpOnNegativeValuesOp<1>(), testvals); TestUnaryOp(ExpOnNegativeValuesOp<2>(), testvals); TestUnaryOp(ExpOnNegativeValuesOp<3>(), testvals); TestUnaryOp(ExpOnNegativeValuesOp<4>(), testvals); TestUnaryOp(ExpOnNegativeValuesOp<5>(), testvals); TestUnaryOp(ExpOnNegativeValuesOp<6>(), testvals); TestUnaryOp(OneMinusXOverOnePlusXForXIn01Op(), testvals); TestUnaryOp(TanhOp<0>(), testvals); TestUnaryOp(TanhOp<1>(), testvals); TestUnaryOp(TanhOp<2>(), testvals); TestUnaryOp(TanhOp<3>(), testvals); TestUnaryOp(TanhOp<4>(), testvals); TestUnaryOp(TanhOp<5>(), testvals); TestUnaryOp(TanhOp<6>(), testvals); TestUnaryOp(OneOverOnePlusXForXIn01Op(), testvals); TestUnaryOp(LogisticOp<0>(), testvals); TestUnaryOp(LogisticOp<1>(), testvals); TestUnaryOp(LogisticOp<2>(), testvals); TestUnaryOp(LogisticOp<3>(), testvals); TestUnaryOp(LogisticOp<4>(), testvals); TestUnaryOp(LogisticOp<5>(), testvals); TestUnaryOp(LogisticOp<6>(), testvals); for (auto a : testvals) { FixedPoint x; x.raw() = a; test_convert(x); } test_mul<0, 0>(testvals); test_mul<0, 1>(testvals); test_mul<2, 0>(testvals); test_mul<1, 1>(testvals); test_mul<4, 4>(testvals); test_mul<3, 5>(testvals); test_mul<7, 2>(testvals); test_mul(testvals); test_Rescale<0, 0>(testvals); test_Rescale<0, 1>(testvals); test_Rescale<2, 0>(testvals); test_Rescale<4, 4>(testvals); test_Rescale<4, 5>(testvals); test_Rescale<6, 3>(testvals); test_Rescale<13, 9>(testvals); test_ExactMulByPot<0, 0>(testvals); test_ExactMulByPot<0, 4>(testvals); test_ExactMulByPot<1, 4>(testvals); test_ExactMulByPot<3, 2>(testvals); test_ExactMulByPot<-4, 5>(testvals); test_ExactMulByPot<-2, 6>(testvals); fprintf(stderr, "PASS (%s)\n", msg); } }; } // end anonymous namespace } // end namespace gemmlowp int main() { gemmlowp::TestFixedPoint().RunTests("Scalar int32"); gemmlowp::TestFixedPoint().RunTests("Scalar int16"); #ifdef GEMMLOWP_SSE4 gemmlowp::TestFixedPoint<__m128i>().RunTests("SSE4 __m128i = int32x4"); gemmlowp::TestFixedPoint().RunTests( "SSE4 __m128i = int16x8"); #endif #ifdef GEMMLOWP_NEON gemmlowp::TestFixedPoint().RunTests("NEON int32x4_t"); gemmlowp::TestFixedPoint().RunTests("NEON int16x8_t"); #endif #ifdef GEMMLOWP_MSA gemmlowp::TestFixedPoint().RunTests("MSA v4i32"); gemmlowp::TestFixedPoint().RunTests("MSA v8i16"); #endif #ifdef GEMMLOWP_AVX2 gemmlowp::TestFixedPoint<__m256i>().RunTests("AVX __m256i"); gemmlowp::TestFixedPoint().RunTests( "AVX2 __m256i = int16x16"); #endif }