// Copyright 2021 The libgav1 Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "src/dsp/arm/common_neon.h" #include "gtest/gtest.h" #include "src/utils/cpu.h" #if LIBGAV1_ENABLE_NEON #include #include "tests/block_utils.h" namespace libgav1 { namespace dsp { namespace { constexpr int kMaxBlockWidth = 16; constexpr int kMaxBlockHeight = 16; template class TransposeTest : public testing::Test { public: TransposeTest() { for (int y = 0; y < kMaxBlockHeight; ++y) { for (int x = 0; x < kMaxBlockWidth; ++x) { src_block_[y][x] = y * 16 + x; expected_transpose_[y][x] = x * 16 + y; } } } TransposeTest(const TransposeTest&) = delete; TransposeTest& operator=(const TransposeTest&) = delete; ~TransposeTest() override = default; protected: Pixel src_block_[kMaxBlockHeight][kMaxBlockWidth]; Pixel expected_transpose_[kMaxBlockHeight][kMaxBlockWidth]; }; using TransposeTestLowBitdepth = TransposeTest; TEST_F(TransposeTestLowBitdepth, Transpose4x4Test) { uint8x8_t a = Load4<1>(src_block_[1], Load4(src_block_[0])); uint8x8_t b = Load4<1>(src_block_[3], Load4(src_block_[2])); Transpose4x4(&a, &b); uint8_t output_4x4[4][4]; StoreLo4(output_4x4[0], a); StoreLo4(output_4x4[1], b); StoreHi4(output_4x4[2], a); StoreHi4(output_4x4[3], b); EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_4x4[0], 4, 4, kMaxBlockWidth, 4, false)); } TEST_F(TransposeTestLowBitdepth, Transpose8x4Test) { uint8x8_t a0 = Load4<1>(src_block_[4], Load4(src_block_[0])); uint8x8_t a1 = Load4<1>(src_block_[5], Load4(src_block_[1])); uint8x8_t a2 = Load4<1>(src_block_[6], Load4(src_block_[2])); uint8x8_t a3 = Load4<1>(src_block_[7], Load4(src_block_[3])); Transpose8x4(&a0, &a1, &a2, &a3); uint8_t output_8x4[4][8]; vst1_u8(output_8x4[0], a0); vst1_u8(output_8x4[1], a1); vst1_u8(output_8x4[2], a2); vst1_u8(output_8x4[3], a3); EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x4[0], 8, 4, kMaxBlockWidth, 8, false)); } TEST_F(TransposeTestLowBitdepth, Transpose8x8Test) { uint8x8_t input_8x8[8]; for (int i = 0; i < 8; ++i) { input_8x8[i] = vld1_u8(src_block_[i]); } Transpose8x8(input_8x8); uint8_t output_8x8[8][8]; for (int i = 0; i < 8; ++i) { vst1_u8(output_8x8[i], input_8x8[i]); } EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x8[0], 8, 8, kMaxBlockWidth, 8, false)); } TEST_F(TransposeTestLowBitdepth, Transpose8x16Test) { uint8x16_t input_8x16[8]; for (int i = 0; i < 8; ++i) { input_8x16[i] = vcombine_u8(vld1_u8(src_block_[i]), vld1_u8(src_block_[i + 8])); } Transpose8x16(input_8x16); uint8_t output_16x8[8][16]; for (int i = 0; i < 8; ++i) { vst1q_u8(output_16x8[i], input_8x16[i]); } EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_16x8[0], 16, 8, kMaxBlockWidth, 16, false)); } using TransposeTestHighBitdepth = TransposeTest; TEST_F(TransposeTestHighBitdepth, Transpose4x4Test) { uint16x4_t input_4x4[4]; input_4x4[0] = vld1_u16(src_block_[0]); input_4x4[1] = vld1_u16(src_block_[1]); input_4x4[2] = vld1_u16(src_block_[2]); input_4x4[3] = vld1_u16(src_block_[3]); Transpose4x4(input_4x4); uint16_t output_4x4[4][4]; for (int i = 0; i < 4; ++i) { vst1_u16(output_4x4[i], input_4x4[i]); } EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_4x4[0], 4, 4, kMaxBlockWidth, 4, false)); } TEST_F(TransposeTestHighBitdepth, Transpose4x8Test) { uint16x8_t input_4x8[4]; for (int i = 0; i < 4; ++i) { input_4x8[i] = vld1q_u16(src_block_[i]); } Transpose4x8(input_4x8); uint16_t output_4x8[4][8]; for (int i = 0; i < 4; ++i) { vst1q_u16(output_4x8[i], input_4x8[i]); memcpy(&expected_transpose_[i][4], &expected_transpose_[i + 4][0], 4 * sizeof(expected_transpose_[0][0])); } EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_4x8[0], 8, 4, kMaxBlockWidth, 8, false)); } TEST_F(TransposeTestHighBitdepth, LoopFilterTranspose4x8Test) { uint16x8_t input_4x8[4]; for (int i = 0; i < 4; ++i) { input_4x8[i] = vld1q_u16(src_block_[i]); } LoopFilterTranspose4x8(input_4x8); uint16_t output_4x8[4][8]; for (int i = 0; i < 4; ++i) { vst1q_u16(output_4x8[i], input_4x8[i]); } // a[0]: 03 13 23 33 04 14 24 34 p0q0 // a[1]: 02 12 22 32 05 15 25 35 p1q1 // a[2]: 01 11 21 31 06 16 26 36 p2q2 // a[3]: 00 10 20 30 07 17 27 37 p3q3 static constexpr uint16_t expected_output[4][8] = { {0x03, 0x13, 0x23, 0x33, 0x04, 0x14, 0x24, 0x34}, {0x02, 0x12, 0x22, 0x32, 0x05, 0x15, 0x25, 0x35}, {0x01, 0x11, 0x21, 0x31, 0x06, 0x16, 0x26, 0x36}, {0x00, 0x10, 0x20, 0x30, 0x07, 0x17, 0x27, 0x37}, }; EXPECT_TRUE(test_utils::CompareBlocks(expected_output[0], output_4x8[0], 8, 4, 8, 8, false)); } TEST_F(TransposeTestHighBitdepth, Transpose8x8Test) { uint16x8_t input_8x8[8]; for (int i = 0; i < 8; ++i) { input_8x8[i] = vld1q_u16(src_block_[i]); } Transpose8x8(input_8x8); uint16_t output_8x8[8][8]; for (int i = 0; i < 8; ++i) { vst1q_u16(output_8x8[i], input_8x8[i]); } EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x8[0], 8, 8, kMaxBlockWidth, 8, false)); } TEST_F(TransposeTestHighBitdepth, Transpose8x8SignedTest) { int16x8_t input_8x8[8]; for (int i = 0; i < 8; ++i) { input_8x8[i] = vreinterpretq_s16_u16(vld1q_u16(src_block_[i])); } Transpose8x8(input_8x8); uint16_t output_8x8[8][8]; for (int i = 0; i < 8; ++i) { vst1q_u16(output_8x8[i], vreinterpretq_u16_s16(input_8x8[i])); } EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x8[0], 8, 8, kMaxBlockWidth, 8, false)); } } // namespace } // namespace dsp } // namespace libgav1 #else // !LIBGAV1_ENABLE_NEON TEST(CommonDspTest, NEON) { GTEST_SKIP() << "Build this module for Arm with NEON enabled to enable the tests."; } #endif // LIBGAV1_ENABLE_NEON