• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <gtest/gtest.h>
2 #include <math.h>
3 #include <stdlib.h>
4 #include <time.h>
5 
6 #include "cpu_core.h"
7 #include "cpu.h"
8 #include "macros.h"
9 #include "ls_defines.h"
10 #include "svc_motion_estimate.h"
11 
12 using namespace WelsEnc;
13 #define SVC_ME_TEST_NUM 10
FillWithRandomData(uint8_t * p,int32_t Len)14 static void FillWithRandomData (uint8_t* p, int32_t Len) {
15   for (int32_t i = 0; i < Len; i++) {
16     p[i] = rand() % 256;
17   }
18 }
19 
20 //preprocess related
SumOf8x8SingleBlock_ref(uint8_t * pRef,const int32_t kiRefStride)21 int32_t SumOf8x8SingleBlock_ref (uint8_t* pRef, const int32_t kiRefStride) {
22   int32_t iSum = 0, i;
23   for (i = 0; i < 8; i++) {
24     iSum +=  pRef[0]    + pRef[1]  + pRef[2]  + pRef[3];
25     iSum +=  pRef[4]    + pRef[5]  + pRef[6]  + pRef[7];
26     pRef += kiRefStride;
27   }
28   return iSum;
29 }
SumOf16x16SingleBlock_ref(uint8_t * pRef,const int32_t kiRefStride)30 int32_t SumOf16x16SingleBlock_ref (uint8_t* pRef, const int32_t kiRefStride) {
31   int32_t iSum = 0, i;
32   for (i = 0; i < 16; i++) {
33     iSum +=  pRef[0]    + pRef[1]  + pRef[2]  + pRef[3];
34     iSum +=  pRef[4]    + pRef[5]  + pRef[6]  + pRef[7];
35     iSum    +=  pRef[8]    + pRef[9]  + pRef[10]  + pRef[11];
36     iSum    +=  pRef[12]  + pRef[13]  + pRef[14]  + pRef[15];
37     pRef += kiRefStride;
38   }
39   return iSum;
40 }
41 
SumOf8x8BlockOfFrame_ref(uint8_t * pRefPicture,const int32_t kiWidth,const int32_t kiHeight,const int32_t kiRefStride,uint16_t * pFeatureOfBlock,uint32_t pTimesOfFeatureValue[])42 void SumOf8x8BlockOfFrame_ref (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
43                                const int32_t kiRefStride,
44                                uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) {
45   int32_t x, y;
46   uint8_t* pRef;
47   uint16_t* pBuffer;
48   int32_t iSum;
49   for (y = 0; y < kiHeight; y++) {
50     pRef = pRefPicture  + kiRefStride * y;
51     pBuffer  = pFeatureOfBlock + kiWidth * y;
52     for (x = 0; x < kiWidth; x++) {
53       iSum = SumOf8x8SingleBlock_c (pRef + x, kiRefStride);
54 
55       pBuffer[x] = iSum;
56       pTimesOfFeatureValue[iSum]++;
57     }
58   }
59 }
60 
SumOf16x16BlockOfFrame_ref(uint8_t * pRefPicture,const int32_t kiWidth,const int32_t kiHeight,const int32_t kiRefStride,uint16_t * pFeatureOfBlock,uint32_t pTimesOfFeatureValue[])61 void SumOf16x16BlockOfFrame_ref (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
62                                  const int32_t kiRefStride,
63                                  uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) {
64   //TODO: this is similar to SumOf8x8BlockOfFrame_c expect the calling of single block func, refactor-able?
65   int32_t x, y;
66   uint8_t* pRef;
67   uint16_t* pBuffer;
68   int32_t iSum;
69   for (y = 0; y < kiHeight; y++) {
70     pRef = pRefPicture  + kiRefStride * y;
71     pBuffer  = pFeatureOfBlock + kiWidth * y;
72     for (x = 0; x < kiWidth; x++) {
73       iSum = SumOf16x16SingleBlock_c (pRef + x, kiRefStride);
74 
75       pBuffer[x] = iSum;
76       pTimesOfFeatureValue[iSum]++;
77     }
78   }
79 }
80 
81 
InitializeHashforFeature_ref(uint32_t * pTimesOfFeatureValue,uint16_t * pBuf,const int32_t kiListSize,uint16_t ** pLocationOfFeature,uint16_t ** pFeatureValuePointerList)82 void InitializeHashforFeature_ref (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
83                                    uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList) {
84   //assign location pointer
85   uint16_t* pBufPos  = pBuf;
86   for (int32_t i = 0 ; i < kiListSize; ++i) {
87     pLocationOfFeature[i] =
88       pFeatureValuePointerList[i] = pBufPos;
89     pBufPos      += (pTimesOfFeatureValue[i] << 1);
90   }
91 }
FillQpelLocationByFeatureValue_ref(uint16_t * pFeatureOfBlock,const int32_t kiWidth,const int32_t kiHeight,uint16_t ** pFeatureValuePointerList)92 void FillQpelLocationByFeatureValue_ref (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
93     uint16_t** pFeatureValuePointerList) {
94   //assign each pixel's position
95   uint16_t* pSrcPointer  =  pFeatureOfBlock;
96   int32_t iQpelY = 0;
97   for (int32_t y = 0; y < kiHeight; y++) {
98     for (int32_t x = 0; x < kiWidth; x++) {
99       uint16_t uiFeature = pSrcPointer[x];
100       pFeatureValuePointerList[uiFeature][0] = x << 2;
101       pFeatureValuePointerList[uiFeature][1] = iQpelY;
102       pFeatureValuePointerList[uiFeature] += 2;
103     }
104     iQpelY += 4;
105     pSrcPointer += kiWidth;
106   }
107 }
108 
109 #define GENERATE_SumOfSingleBlock(anchor, method, flag) \
110 TEST (SVC_ME_FunTest, method) {\
111   uint32_t uiCPUFlags = WelsCPUFeatureDetect(NULL); \
112   if ((uiCPUFlags & flag) == 0 && flag != 0) \
113     return; \
114   ENFORCE_STACK_ALIGN_1D (uint8_t,  uiRefBuf,   16*320, 16);\
115   int32_t iRes[2];\
116   for (int32_t k = 0; k < SVC_ME_TEST_NUM; k++) {\
117     FillWithRandomData (uiRefBuf,16*320);\
118     iRes[0] = anchor (uiRefBuf,320);\
119     iRes[1] = method (uiRefBuf,320);\
120     ASSERT_EQ (iRes[0], iRes[1]);\
121   }\
122 }
123 
124 GENERATE_SumOfSingleBlock (SumOf8x8SingleBlock_ref, SumOf8x8SingleBlock_c, 0)
125 GENERATE_SumOfSingleBlock (SumOf16x16SingleBlock_ref, SumOf16x16SingleBlock_c, 0)
126 
127 #ifdef X86_ASM
128 GENERATE_SumOfSingleBlock (SumOf8x8SingleBlock_ref, SumOf8x8SingleBlock_sse2, WELS_CPU_SSE2)
129 GENERATE_SumOfSingleBlock (SumOf16x16SingleBlock_ref, SumOf16x16SingleBlock_sse2, WELS_CPU_SSE2)
130 #endif
131 
132 #ifdef HAVE_NEON
133 GENERATE_SumOfSingleBlock (SumOf8x8SingleBlock_ref, SumOf8x8SingleBlock_neon, WELS_CPU_NEON)
134 GENERATE_SumOfSingleBlock (SumOf16x16SingleBlock_ref, SumOf16x16SingleBlock_neon, WELS_CPU_NEON)
135 #endif
136 
137 #ifdef HAVE_NEON_AARCH64
138 GENERATE_SumOfSingleBlock (SumOf8x8SingleBlock_ref, SumOf8x8SingleBlock_AArch64_neon, WELS_CPU_NEON)
139 GENERATE_SumOfSingleBlock (SumOf16x16SingleBlock_ref, SumOf16x16SingleBlock_AArch64_neon, WELS_CPU_NEON)
140 #endif
141 
142 
143 #define ENFORCE_NEW_ALIGN_1D(_tp, _nm, _nbuff, _sz, _al) \
144 _tp *_nbuff = new _tp[(_sz)+(_al)-1]; \
145 _tp *_nm = _nbuff + ((_al)-1) - (((uintptr_t)(_nbuff + ((_al)-1)) & ((_al)-1))/sizeof(_tp));
146 
147 #define GENERATE_SumOfFrame(anchor, method, kiWidth, kiHeight, flag) \
148 TEST (SVC_ME_FunTest, method##_##kiWidth##x##kiHeight) {\
149 uint32_t uiCPUFlags = WelsCPUFeatureDetect(NULL); \
150 if ((uiCPUFlags & flag) == 0 && flag != 0) \
151   return; \
152 ENFORCE_NEW_ALIGN_1D (uint8_t, pRefPicture, pRefPictureBuff, ((kiHeight+16)*((((kiWidth+15)>>4)<<4)+16)), 16) \
153 ENFORCE_NEW_ALIGN_1D (uint16_t, pFeatureOfBlock1, pFeatureOfBlockBuff1, (kiWidth*kiHeight), 16) \
154 ENFORCE_NEW_ALIGN_1D (uint16_t, pFeatureOfBlock2, pFeatureOfBlockBuff2, (kiWidth*kiHeight), 16) \
155 uint32_t pTimesOfFeatureValue[2][65536]; \
156 for (int32_t k = 0; k < SVC_ME_TEST_NUM; k++) {\
157   FillWithRandomData (pRefPicture,(kiHeight+16)*((((kiWidth+15)>>4)<<4)+16));\
158   memset(pTimesOfFeatureValue[0], 0, 65536*sizeof(uint32_t)); \
159   memset(pTimesOfFeatureValue[1], 0, 65536*sizeof(uint32_t)); \
160   anchor (pRefPicture,kiWidth,kiHeight,((((kiWidth+15)>>4)<<4)+16),pFeatureOfBlock1,pTimesOfFeatureValue[0]); \
161   method (pRefPicture,kiWidth,kiHeight,((((kiWidth+15)>>4)<<4)+16),pFeatureOfBlock2,pTimesOfFeatureValue[1]); \
162   for(int32_t j=0;j<kiWidth*kiHeight;j++){\
163       ASSERT_EQ (pFeatureOfBlock1[j], pFeatureOfBlock2[j]);\
164   }\
165   for(int32_t  j=0;j<65536;j++){\
166       ASSERT_EQ (pTimesOfFeatureValue[0][j], pTimesOfFeatureValue[1][j]);\
167   }\
168 }\
169 delete[] pRefPictureBuff; \
170 delete[] pFeatureOfBlockBuff1; \
171 delete[] pFeatureOfBlockBuff2; \
172 }
173 
174 #define GENERATE_InitializeHashforFeature(anchor, method, kiWidth, kiHeight, flag) \
175 TEST (SVC_ME_FunTest, method##_##kiWidth##x##kiHeight) {\
176 uint32_t uiCPUFlags = WelsCPUFeatureDetect(NULL); \
177 if ((uiCPUFlags & flag) == 0 && flag != 0) \
178   return; \
179 ENFORCE_NEW_ALIGN_1D (uint8_t, pRefPicture, pRefPictureBuff, ((kiHeight+16)*((((kiWidth+15)>>4)<<4)+16)), 16) \
180 ENFORCE_NEW_ALIGN_1D (uint16_t, pFeatureOfBlock, pFeatureOfBlockBuff, (kiWidth*kiHeight), 16) \
181 ENFORCE_NEW_ALIGN_1D (uint16_t, pLocation1, pLocationBuff1, (kiWidth*kiHeight)*2, 16) \
182 ENFORCE_NEW_ALIGN_1D (uint32_t, pTimesOfFeatureValue, pTimesOfFeatureValueBuff, 65536, 16) \
183 ENFORCE_NEW_ALIGN_1D (uint16_t*, pLocationFeature0, pLocationFeature0Buff, 65536, 16) \
184 ENFORCE_NEW_ALIGN_1D (uint16_t*, pLocationFeature1, pLocationFeature1Buff, 65536, 16) \
185 ENFORCE_NEW_ALIGN_1D (uint16_t*, pFeaturePointValueList0, pFeaturePointValueList0Buff, 65536, 16) \
186 ENFORCE_NEW_ALIGN_1D (uint16_t*, pFeaturePointValueList1, pFeaturePointValueList1Buff, 65536, 16) \
187 for (int32_t k = 0; k < SVC_ME_TEST_NUM; k++) { \
188   FillWithRandomData (pRefPicture,(kiHeight+16)*((((kiWidth+15)>>4)<<4)+16)); \
189   memset(pTimesOfFeatureValue, 0, 65536*sizeof(uint32_t)); \
190   memset(pLocationFeature0, 0, 65536*sizeof(uint16_t*)); \
191   memset(pFeaturePointValueList0, 0, 65536*sizeof(uint16_t*)); \
192   memset(pLocationFeature1, 0, 65536*sizeof(uint16_t*)); \
193   memset(pFeaturePointValueList1, 0, 65536*sizeof(uint16_t*)); \
194   SumOf8x8BlockOfFrame_c (pRefPicture,kiWidth,kiHeight,((((kiWidth+15)>>4)<<4)+16),pFeatureOfBlock,pTimesOfFeatureValue); \
195   int32_t iActSize = 65536;\
196   anchor ( pTimesOfFeatureValue, pLocation1, iActSize, pLocationFeature0, pFeaturePointValueList0);\
197   method ( pTimesOfFeatureValue, pLocation1, iActSize, pLocationFeature1, pFeaturePointValueList1); \
198   for(int32_t j =0; j<65536; j++) { \
199     EXPECT_EQ (pLocationFeature0[j], pLocationFeature1[j]); \
200     EXPECT_EQ (pFeaturePointValueList0[j], pFeaturePointValueList1[j]); \
201   } \
202 } \
203 delete[] pRefPictureBuff; \
204 delete[] pFeatureOfBlockBuff; \
205 delete[] pLocationBuff1; \
206 delete[] pTimesOfFeatureValueBuff; \
207 delete[] pLocationFeature0Buff; \
208 delete[] pFeaturePointValueList0Buff; \
209 delete[] pLocationFeature1Buff; \
210 delete[] pFeaturePointValueList1Buff; \
211 }
212 
213 
214 #define GENERATE_FillQpelLocationByFeatureValue(anchor, method, kiWidth, kiHeight, flag) \
215 TEST (SVC_ME_FunTest, method##_##kiWidth##x##kiHeight) {\
216 uint32_t uiCPUFlags = WelsCPUFeatureDetect(NULL); \
217 if ((uiCPUFlags & flag) == 0 && flag != 0) \
218   return; \
219 ENFORCE_NEW_ALIGN_1D (uint8_t, pRefPicture, pRefPictureBuff, ((kiHeight+16)*((((kiWidth+15)>>4)<<4)+16)), 16) \
220 ENFORCE_NEW_ALIGN_1D (uint16_t, pFeatureOfBlock, pFeatureOfBlockBuff, (kiWidth*kiHeight), 16) \
221 ENFORCE_NEW_ALIGN_1D (uint16_t, pLocation1, pLocationBuff1, (kiWidth*kiHeight)*2, 16) \
222 ENFORCE_NEW_ALIGN_1D (uint16_t, pLocation2, pLocationBuff2, (kiWidth*kiHeight)*2, 16) \
223 ENFORCE_NEW_ALIGN_1D (uint32_t, pTimesOfFeatureValue, pTimesOfFeatureValueBuff, 65536, 16) \
224 ENFORCE_NEW_ALIGN_1D (uint16_t*, pLocationFeature0, pLocationFeature0Buff, 65536, 16) \
225 ENFORCE_NEW_ALIGN_1D (uint16_t*, pLocationFeature1, pLocationFeature1Buff, 65536, 16) \
226 ENFORCE_NEW_ALIGN_1D (uint16_t*, pFeaturePointValueList0, pFeaturePointValueList0Buff, 65536, 16) \
227 ENFORCE_NEW_ALIGN_1D (uint16_t*, pFeaturePointValueList1, pFeaturePointValueList1Buff, 65536, 16) \
228 for (int32_t k = 0; k < SVC_ME_TEST_NUM; k++) { \
229   FillWithRandomData (pRefPicture,(kiHeight+16)*((((kiWidth+15)>>4)<<4)+16)); \
230   memset(pTimesOfFeatureValue, 0, 65536*sizeof(uint32_t)); \
231   memset(pLocationFeature0, 0, 65536*sizeof(uint16_t*)); \
232   memset(pFeaturePointValueList0, 0, 65536*sizeof(uint16_t*)); \
233   memset(pLocationFeature1, 0, 65536*sizeof(uint16_t*)); \
234   memset(pFeaturePointValueList1, 0, 65536*sizeof(uint16_t*)); \
235   SumOf8x8BlockOfFrame_c (pRefPicture,kiWidth,kiHeight,((((kiWidth+15)>>4)<<4)+16),pFeatureOfBlock,pTimesOfFeatureValue); \
236   int32_t iActSize = 65536; \
237   InitializeHashforFeature_c ( pTimesOfFeatureValue, pLocation1, iActSize, pLocationFeature0, pFeaturePointValueList0); \
238   InitializeHashforFeature_c( pTimesOfFeatureValue, pLocation2, iActSize, pLocationFeature1, pFeaturePointValueList1); \
239   anchor(pFeatureOfBlock, kiWidth, kiHeight, pFeaturePointValueList0); \
240   method(pFeatureOfBlock, kiWidth, kiHeight, pFeaturePointValueList1); \
241   for(int32_t j =0; j<kiWidth*kiHeight*2; j++) { \
242     EXPECT_EQ (pLocation1[j], pLocation2[j]); \
243   } \
244 } \
245 delete[] pRefPictureBuff; \
246 delete[] pFeatureOfBlockBuff; \
247 delete[] pLocationBuff1; \
248 delete[] pLocationBuff2; \
249 delete[] pTimesOfFeatureValueBuff; \
250 delete[] pLocationFeature0Buff; \
251 delete[] pFeaturePointValueList0Buff; \
252 delete[] pLocationFeature1Buff; \
253 delete[] pFeaturePointValueList1Buff; \
254 }
255 
256 GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_c, 10, 10, 0)
257 GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_c, 16, 16, 0)
258 GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_c, 640, 320, 0)
259 GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_c, 640, 320, 0)
260 #ifdef X86_ASM
261 GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_sse2, 10, 10, WELS_CPU_SSE2)
262 GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_sse2, 16,
263     16, WELS_CPU_SSE2)
264 GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_sse2, 640, 320, WELS_CPU_SSE2)
265 GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_sse2, 640,
266     320, WELS_CPU_SSE2)
267 #endif
268 
269 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_c, 1, 1, 0)
270 GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_c, 1, 1, 0)
271 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_c, 1, 320, 0)
272 GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_c, 1, 320, 0)
273 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_c, 640, 320, 0)
274 GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_c, 640, 320, 0)
275 
276 #ifdef X86_ASM
277 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_sse2, 6, 6, WELS_CPU_SSE2)
278 GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_sse2, 6, 6, WELS_CPU_SSE2)
279 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_sse2, 6, 320, WELS_CPU_SSE2)
280 GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_sse2, 6, 320, WELS_CPU_SSE2)
281 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_sse2, 640, 320, WELS_CPU_SSE2)
282 GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_sse2, 640, 320, WELS_CPU_SSE2)
283 
284 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_sse4, 8, 2, WELS_CPU_SSE41)
285 GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_sse4, 16, 2, WELS_CPU_SSE41)
286 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_sse4, 8, 320, WELS_CPU_SSE41)
287 GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_sse4, 16, 320, WELS_CPU_SSE41)
288 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_sse4, 640, 320, WELS_CPU_SSE41)
289 GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_sse4, 640, 320, WELS_CPU_SSE41)
290 #endif
291 
292 #ifdef HAVE_NEON
293 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_neon, 1, 1, WELS_CPU_NEON)
294 GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_neon, 1, 1, WELS_CPU_NEON)
295 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_neon, 1, 320, WELS_CPU_NEON)
296 GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_neon, 1, 320, WELS_CPU_NEON)
297 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_neon, 640, 320, WELS_CPU_NEON)
298 GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_neon, 640, 320, WELS_CPU_NEON)
299 GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_neon, 10, 10, WELS_CPU_NEON)
300 GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_neon, 16,
301     16, WELS_CPU_NEON)
302 GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_neon, 640, 320, WELS_CPU_NEON)
303 GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_neon, 640,
304     320, WELS_CPU_NEON)
305 #endif
306 
307 #ifdef HAVE_NEON_AARCH64
308 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_AArch64_neon, 1, 1, WELS_CPU_NEON)
309 GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_AArch64_neon, 1, 1, WELS_CPU_NEON)
310 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_AArch64_neon, 1, 320, WELS_CPU_NEON)
311 GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_AArch64_neon, 1, 320, WELS_CPU_NEON)
312 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_AArch64_neon, 640, 320, WELS_CPU_NEON)
313 GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_AArch64_neon, 640, 320, WELS_CPU_NEON)
314 GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_AArch64_neon, 10, 10,
315     WELS_CPU_NEON)
316 GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref,
317     FillQpelLocationByFeatureValue_AArch64_neon, 16, 16, WELS_CPU_NEON)
318 GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_AArch64_neon, 640, 320,
319     WELS_CPU_NEON)
320 GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref,
321     FillQpelLocationByFeatureValue_AArch64_neon, 640, 320, WELS_CPU_NEON)
322 #endif
323