1 #include <gtest/gtest.h> 2 #include "cpu.h" 3 #include "cpu_core.h" 4 #include "deblocking_common.h" 5 #include "macros.h" 6 7 #define WRAP_LUMA_FUNC(func) \ 8 void func ## _wrap (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) { \ 9 func (pPixY, iStride, iAlpha, iBeta); \ 10 } 11 #define WRAP_CHROMA_FUNC(func) \ 12 void func ## _wrap (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) { \ 13 func (pPixCb, pPixCr, iStride, iAlpha, iBeta); \ 14 } 15 16 #define GENERATE_LUMA_UT(name, func, ref, CPUFLAGS, HORIZ) \ 17 TEST(DeblockTest, name) { \ 18 int32_t iRunTimes = 1000; \ 19 ENFORCE_STACK_ALIGN_1D (uint8_t, pTestBuffer, 16*17, 16); \ 20 ENFORCE_STACK_ALIGN_1D (uint8_t, pRefBuffer, 16*17, 16); \ 21 int32_t iNumberofCPUCore = 1; \ 22 uint32_t uiCPUFlags = WelsCPUFeatureDetect(&iNumberofCPUCore); \ 23 if ((uiCPUFlags & CPUFLAGS) == 0) \ 24 return; \ 25 while (iRunTimes--) { \ 26 int iAlpha = rand() & 255; \ 27 int iBeta = rand() & 255; \ 28 ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); \ 29 for (int i = 0; i < 4; i++) \ 30 iTc[i] = -1 + (rand() % 28); \ 31 if (iRunTimes == 1) { /* special case to test all pixels */ \ 32 iAlpha = iBeta = 255; \ 33 for (int i = 0; i < 4; i++) \ 34 iTc[i] = 27; \ 35 } \ 36 for (int i = 0; i < 16*17; i++) \ 37 pRefBuffer[i] = pTestBuffer[i] = rand() & 255; \ 38 uint8_t* pRefStart = HORIZ ? pRefBuffer + 16 : pRefBuffer + 16*8; \ 39 uint8_t* pTestStart = HORIZ ? pTestBuffer + 16 : pTestBuffer + 16*8; \ 40 func (pTestStart, 16, iAlpha, iBeta, iTc); \ 41 ref (pRefStart, 16, iAlpha, iBeta, iTc); \ 42 bool ok = true; \ 43 for (int i = 0; i < 16*17; i++) { \ 44 if (pTestBuffer[i] != pRefBuffer[i]) { \ 45 ok = false; \ 46 break; \ 47 } \ 48 } \ 49 EXPECT_EQ(ok, true); \ 50 } \ 51 } 52 53 #define GENERATE_CHROMA_UT(name, func, ref, CPUFLAGS, HORIZ) \ 54 TEST(DeblockTest, name) { \ 55 int32_t iRunTimes = 1000; \ 56 ENFORCE_STACK_ALIGN_1D (uint8_t, pTestBuffer, 8 + 8*8*2, 16); \ 57 ENFORCE_STACK_ALIGN_1D (uint8_t, pRefBuffer, 8 + 8*8*2, 16); \ 58 int32_t iNumberofCPUCore = 1; \ 59 uint32_t uiCPUFlags = WelsCPUFeatureDetect(&iNumberofCPUCore); \ 60 if ((uiCPUFlags & CPUFLAGS) == 0) \ 61 return; \ 62 while (iRunTimes--) { \ 63 int iAlpha = rand() & 255; \ 64 int iBeta = rand() & 255; \ 65 ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); \ 66 for (int i = 0; i < 4; i++) \ 67 iTc[i] = -1 + (rand() % 28); \ 68 if (iRunTimes == 1) { /* special case to test all pixels */ \ 69 iAlpha = iBeta = 255; \ 70 for (int i = 0; i < 4; i++) \ 71 iTc[i] = 27; \ 72 } \ 73 for (int i = 0; i < 8 + 8*8*2; i++) \ 74 pRefBuffer[i] = pTestBuffer[i] = rand() & 255; \ 75 uint8_t* pCbRefStart = HORIZ ? pRefBuffer + 8 : pRefBuffer + 8*4; \ 76 uint8_t* pCbTestStart = HORIZ ? pTestBuffer + 8 : pTestBuffer + 8*4; \ 77 uint8_t* pCrRefStart = pCbRefStart + 8*8; \ 78 uint8_t* pCrTestStart = pCbTestStart + 8*8; \ 79 func (pCbTestStart, pCrTestStart, 8, iAlpha, iBeta, iTc); \ 80 ref (pCbRefStart, pCrRefStart, 8, iAlpha, iBeta, iTc); \ 81 bool ok = true; \ 82 for (int i = 0; i < 8 + 8*8*2; i++) { \ 83 if (pTestBuffer[i] != pRefBuffer[i]) { \ 84 ok = false; \ 85 break; \ 86 } \ 87 } \ 88 EXPECT_EQ(ok, true); \ 89 } \ 90 } 91 92 WRAP_LUMA_FUNC (DeblockLumaEq4V_c) 93 WRAP_LUMA_FUNC (DeblockLumaEq4H_c) 94 WRAP_CHROMA_FUNC (DeblockChromaEq4V_c) 95 WRAP_CHROMA_FUNC (DeblockChromaEq4H_c) 96 97 #if defined(X86_ASM) 98 WRAP_LUMA_FUNC (DeblockLumaEq4V_ssse3) 99 WRAP_LUMA_FUNC (DeblockLumaEq4H_ssse3) 100 WRAP_CHROMA_FUNC (DeblockChromaEq4V_ssse3) 101 WRAP_CHROMA_FUNC (DeblockChromaEq4H_ssse3) 102 103 GENERATE_LUMA_UT (LumaLt4V_ssse3, DeblockLumaLt4V_ssse3, DeblockLumaLt4V_c, WELS_CPU_SSSE3, 0) 104 GENERATE_LUMA_UT (LumaLt4H_ssse3, DeblockLumaLt4H_ssse3, DeblockLumaLt4H_c, WELS_CPU_SSSE3, 1) 105 GENERATE_LUMA_UT (LumaEq4V_ssse3, DeblockLumaEq4V_ssse3_wrap, DeblockLumaEq4V_c_wrap, WELS_CPU_SSSE3, 0) 106 GENERATE_LUMA_UT (LumaEq4H_ssse3, DeblockLumaEq4H_ssse3_wrap, DeblockLumaEq4H_c_wrap, WELS_CPU_SSSE3, 1) 107 108 GENERATE_CHROMA_UT (ChromaLt4V_ssse3, DeblockChromaLt4V_ssse3, DeblockChromaLt4V_c, WELS_CPU_SSSE3, 0) 109 GENERATE_CHROMA_UT (ChromaLt4H_ssse3, DeblockChromaLt4H_ssse3, DeblockChromaLt4H_c, WELS_CPU_SSSE3, 1) 110 GENERATE_CHROMA_UT (ChromaEq4V_ssse3, DeblockChromaEq4V_ssse3_wrap, DeblockChromaEq4V_c_wrap, WELS_CPU_SSSE3, 0) 111 GENERATE_CHROMA_UT (ChromaEq4H_ssse3, DeblockChromaEq4H_ssse3_wrap, DeblockChromaEq4H_c_wrap, WELS_CPU_SSSE3, 1) 112 #endif 113 114 #if defined(HAVE_NEON) 115 WRAP_LUMA_FUNC (DeblockLumaEq4V_neon) 116 WRAP_LUMA_FUNC (DeblockLumaEq4H_neon) 117 WRAP_CHROMA_FUNC (DeblockChromaEq4V_neon) 118 WRAP_CHROMA_FUNC (DeblockChromaEq4H_neon) 119 120 GENERATE_LUMA_UT (LumaLt4V_neon, DeblockLumaLt4V_neon, DeblockLumaLt4V_c, WELS_CPU_NEON, 0) 121 GENERATE_LUMA_UT (LumaLt4H_neon, DeblockLumaLt4H_neon, DeblockLumaLt4H_c, WELS_CPU_NEON, 1) 122 GENERATE_LUMA_UT (LumaEq4V_neon, DeblockLumaEq4V_neon_wrap, DeblockLumaEq4V_c_wrap, WELS_CPU_NEON, 0) 123 GENERATE_LUMA_UT (LumaEq4H_neon, DeblockLumaEq4H_neon_wrap, DeblockLumaEq4H_c_wrap, WELS_CPU_NEON, 1) 124 125 GENERATE_CHROMA_UT (ChromaLt4V_neon, DeblockChromaLt4V_neon, DeblockChromaLt4V_c, WELS_CPU_NEON, 0) 126 GENERATE_CHROMA_UT (ChromaLt4H_neon, DeblockChromaLt4H_neon, DeblockChromaLt4H_c, WELS_CPU_NEON, 1) 127 GENERATE_CHROMA_UT (ChromaEq4V_neon, DeblockChromaEq4V_neon_wrap, DeblockChromaEq4V_c_wrap, WELS_CPU_NEON, 0) 128 GENERATE_CHROMA_UT (ChromaEq4H_neon, DeblockChromaEq4H_neon_wrap, DeblockChromaEq4H_c_wrap, WELS_CPU_NEON, 1) 129 #endif 130 131 #if defined(HAVE_NEON_AARCH64) 132 WRAP_LUMA_FUNC (DeblockLumaEq4V_AArch64_neon) 133 WRAP_LUMA_FUNC (DeblockLumaEq4H_AArch64_neon) 134 WRAP_CHROMA_FUNC (DeblockChromaEq4V_AArch64_neon) 135 WRAP_CHROMA_FUNC (DeblockChromaEq4H_AArch64_neon) 136 137 GENERATE_LUMA_UT (LumaLt4V_AArch64_neon, DeblockLumaLt4V_AArch64_neon, DeblockLumaLt4V_c, WELS_CPU_NEON, 0) 138 GENERATE_LUMA_UT (LumaLt4H_AArch64_neon, DeblockLumaLt4H_AArch64_neon, DeblockLumaLt4H_c, WELS_CPU_NEON, 1) 139 GENERATE_LUMA_UT (LumaEq4V_AArch64_neon, DeblockLumaEq4V_AArch64_neon_wrap, DeblockLumaEq4V_c_wrap, WELS_CPU_NEON, 0) 140 GENERATE_LUMA_UT (LumaEq4H_AArch64_neon, DeblockLumaEq4H_AArch64_neon_wrap, DeblockLumaEq4H_c_wrap, WELS_CPU_NEON, 1) 141 142 GENERATE_CHROMA_UT (ChromaLt4V_AArch64_neon, DeblockChromaLt4V_AArch64_neon, DeblockChromaLt4V_c, WELS_CPU_NEON, 0) 143 GENERATE_CHROMA_UT (ChromaLt4H_AArch64_neon, DeblockChromaLt4H_AArch64_neon, DeblockChromaLt4H_c, WELS_CPU_NEON, 1) 144 GENERATE_CHROMA_UT (ChromaEq4V_AArch64_neon, DeblockChromaEq4V_AArch64_neon_wrap, DeblockChromaEq4V_c_wrap, 145 WELS_CPU_NEON, 0) 146 GENERATE_CHROMA_UT (ChromaEq4H_AArch64_neon, DeblockChromaEq4H_AArch64_neon_wrap, DeblockChromaEq4H_c_wrap, 147 WELS_CPU_NEON, 1) 148 #endif 149 150 #if defined(HAVE_MMI) 151 WRAP_LUMA_FUNC (DeblockLumaEq4V_mmi) 152 WRAP_LUMA_FUNC (DeblockLumaEq4H_mmi) 153 WRAP_CHROMA_FUNC (DeblockChromaEq4V_mmi) 154 WRAP_CHROMA_FUNC (DeblockChromaEq4H_mmi) 155 156 GENERATE_LUMA_UT (LumaLt4V_mmi, DeblockLumaLt4V_mmi, DeblockLumaLt4V_c, WELS_CPU_MMI, 0) 157 GENERATE_LUMA_UT (LumaLt4H_mmi, DeblockLumaLt4H_mmi, DeblockLumaLt4H_c, WELS_CPU_MMI, 1) 158 GENERATE_LUMA_UT (LumaEq4V_mmi, DeblockLumaEq4V_mmi_wrap, DeblockLumaEq4V_c_wrap, WELS_CPU_MMI, 0) 159 GENERATE_LUMA_UT (LumaEq4H_mmi, DeblockLumaEq4H_mmi_wrap, DeblockLumaEq4H_c_wrap, WELS_CPU_MMI, 1) 160 161 GENERATE_CHROMA_UT (ChromaLt4V_mmi, DeblockChromaLt4V_mmi, DeblockChromaLt4V_c, WELS_CPU_MMI, 0) 162 GENERATE_CHROMA_UT (ChromaLt4H_mmi, DeblockChromaLt4H_mmi, DeblockChromaLt4H_c, WELS_CPU_MMI, 1) 163 GENERATE_CHROMA_UT (ChromaEq4V_mmi, DeblockChromaEq4V_mmi_wrap, DeblockChromaEq4V_c_wrap, WELS_CPU_MMI, 0) 164 GENERATE_CHROMA_UT (ChromaEq4H_mmi, DeblockChromaEq4H_mmi_wrap, DeblockChromaEq4H_c_wrap, WELS_CPU_MMI, 1) 165 #endif//HAVE_MMI 166 167 #if defined(HAVE_MSA) 168 WRAP_LUMA_FUNC (DeblockLumaEq4V_msa) 169 WRAP_LUMA_FUNC (DeblockLumaEq4H_msa) 170 WRAP_CHROMA_FUNC (DeblockChromaEq4V_msa) 171 WRAP_CHROMA_FUNC (DeblockChromaEq4H_msa) 172 173 GENERATE_LUMA_UT (LumaLt4V_msa, DeblockLumaLt4V_msa, DeblockLumaLt4V_c, WELS_CPU_MSA, 0) 174 GENERATE_LUMA_UT (LumaLt4H_msa, DeblockLumaLt4H_msa, DeblockLumaLt4H_c, WELS_CPU_MSA, 1) 175 GENERATE_LUMA_UT (LumaEq4V_msa, DeblockLumaEq4V_msa_wrap, DeblockLumaEq4V_c_wrap, WELS_CPU_MSA, 0) 176 GENERATE_LUMA_UT (LumaEq4H_msa, DeblockLumaEq4H_msa_wrap, DeblockLumaEq4H_c_wrap, WELS_CPU_MSA, 1) 177 178 GENERATE_CHROMA_UT (ChromaLt4V_msa, DeblockChromaLt4V_msa, DeblockChromaLt4V_c, WELS_CPU_MSA, 0) 179 GENERATE_CHROMA_UT (ChromaLt4H_msa, DeblockChromaLt4H_msa, DeblockChromaLt4H_c, WELS_CPU_MSA, 1) 180 GENERATE_CHROMA_UT (ChromaEq4V_msa, DeblockChromaEq4V_msa_wrap, DeblockChromaEq4V_c_wrap, WELS_CPU_MSA, 0) 181 GENERATE_CHROMA_UT (ChromaEq4H_msa, DeblockChromaEq4H_msa_wrap, DeblockChromaEq4H_c_wrap, WELS_CPU_MSA, 1) 182 #endif//HAVE_MSA 183