• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <gtest/gtest.h>
2 #include "cpu.h"
3 #include "cpu_core.h"
4 #include "util.h"
5 #include "macros.h"
6 #include "IWelsVP.h"
7 #include "downsample.h"
8 
9 using namespace WelsVP;
10 
DyadicBilinearDownsampler_ref(uint8_t * pDst,const int32_t kiDstStride,uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiSrcHeight)11 void DyadicBilinearDownsampler_ref (uint8_t* pDst, const int32_t kiDstStride,
12                                     uint8_t* pSrc, const int32_t kiSrcStride,
13                                     const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
14   uint8_t* pDstLine = pDst;
15   uint8_t* pSrcLine = pSrc;
16   const int32_t kiSrcStridex2 = kiSrcStride << 1;
17   const int32_t kiDstWidth    = kiSrcWidth  >> 1;
18   const int32_t kiDstHeight   = kiSrcHeight >> 1;
19 
20   for (int32_t j = 0; j < kiDstHeight; j ++) {
21     for (int32_t i = 0; i < kiDstWidth; i ++) {
22       const int32_t kiSrcX = i << 1;
23       const int32_t kiTempRow1 = (pSrcLine[kiSrcX] + pSrcLine[kiSrcX + 1] + 1) >> 1;
24       const int32_t kiTempRow2 = (pSrcLine[kiSrcX + kiSrcStride] + pSrcLine[kiSrcX + kiSrcStride + 1] + 1) >> 1;
25 
26       pDstLine[i] = (uint8_t) ((kiTempRow1 + kiTempRow2 + 1) >> 1);
27     }
28     pDstLine += kiDstStride;
29     pSrcLine += kiSrcStridex2;
30   }
31 }
32 
DyadicBilinearDownsampler2_ref(uint8_t * pDst,const int32_t kiDstStride,const uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiSrcHeight)33 void DyadicBilinearDownsampler2_ref (uint8_t* pDst, const int32_t kiDstStride,
34                                      const uint8_t* pSrc, const int32_t kiSrcStride,
35                                      const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
36   uint8_t* pDstLine = pDst;
37   const uint8_t* pSrcLine1 = pSrc;
38   const uint8_t* pSrcLine2 = pSrc + kiSrcStride;
39   const int32_t kiDstWidth  = kiSrcWidth >> 1;
40   const int32_t kiDstHeight = kiSrcHeight >> 1;
41 
42   for (int32_t j = 0; j < kiDstHeight; j++) {
43     for (int32_t i = 0; i < kiDstWidth; i++) {
44       const int32_t kiTempCol1 = (pSrcLine1[2 * i + 0] + pSrcLine2[2 * i + 0] + 1) >> 1;
45       const int32_t kiTempCol2 = (pSrcLine1[2 * i + 1] + pSrcLine2[2 * i + 1] + 1) >> 1;
46       pDstLine[i] = (uint8_t) ((kiTempCol1 + kiTempCol2 + 1) >> 1);
47     }
48     pDstLine += kiDstStride;
49     pSrcLine1 += 2 * kiSrcStride;
50     pSrcLine2 += 2 * kiSrcStride;
51   }
52 }
53 
GeneralBilinearFastDownsampler_ref(uint8_t * pDst,const int32_t kiDstStride,const int32_t kiDstWidth,const int32_t kiDstHeight,uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiSrcHeight)54 void GeneralBilinearFastDownsampler_ref (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
55     const int32_t kiDstHeight,
56     uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
57   const uint32_t kuiScaleBitWidth = 16, kuiScaleBitHeight = 15;
58   const uint32_t kuiScaleWidth = (1 << kuiScaleBitWidth), kuiScaleHeight = (1 << kuiScaleBitHeight);
59   int32_t fScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScaleWidth);
60   int32_t fScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScaleHeight);
61   uint32_t x;
62   int32_t iYInverse, iXInverse;
63 
64   uint8_t* pByDst = pDst;
65   uint8_t* pByLineDst = pDst;
66 
67   iYInverse = 1 << (kuiScaleBitHeight - 1);
68   for (int32_t i = 0; i < kiDstHeight - 1; i++) {
69     int32_t iYy = iYInverse >> kuiScaleBitHeight;
70     int32_t fv = iYInverse & (kuiScaleHeight - 1);
71 
72     uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
73 
74     pByDst = pByLineDst;
75     iXInverse = 1 << (kuiScaleBitWidth - 1);
76     for (int32_t j = 0; j < kiDstWidth - 1; j++) {
77       int32_t iXx = iXInverse >> kuiScaleBitWidth;
78       int32_t iFu = iXInverse & (kuiScaleWidth - 1);
79 
80       uint8_t* pByCurrent = pBySrc + iXx;
81       uint8_t a, b, c, d;
82 
83       a = *pByCurrent;
84       b = * (pByCurrent + 1);
85       c = * (pByCurrent + kiSrcStride);
86       d = * (pByCurrent + kiSrcStride + 1);
87 
88       x  = (((uint32_t) (kuiScaleWidth - 1 - iFu)) * (kuiScaleHeight - 1 - fv) >> kuiScaleBitWidth) * a;
89       x += (((uint32_t) (iFu)) * (kuiScaleHeight - 1 - fv) >> kuiScaleBitWidth) * b;
90       x += (((uint32_t) (kuiScaleWidth - 1 - iFu)) * (fv) >> kuiScaleBitWidth) * c;
91       x += (((uint32_t) (iFu)) * (fv) >> kuiScaleBitWidth) * d;
92       x >>= (kuiScaleBitHeight - 1);
93       x += 1;
94       x >>= 1;
95       //x = (((__int64)(SCALE_BIG - 1 - iFu))*(SCALE_BIG - 1 - fv)*a + ((__int64)iFu)*(SCALE_BIG - 1 -fv)*b + ((__int64)(SCALE_BIG - 1 -iFu))*fv*c +
96       // ((__int64)iFu)*fv*d + (1 << (2*SCALE_BIT_BIG-1)) ) >> (2*SCALE_BIT_BIG);
97       x = WELS_CLAMP (x, 0, 255);
98       *pByDst++ = (uint8_t)x;
99 
100       iXInverse += fScalex;
101     }
102     *pByDst = * (pBySrc + (iXInverse >> kuiScaleBitWidth));
103     pByLineDst += kiDstStride;
104     iYInverse += fScaley;
105   }
106 
107   // last row special
108   {
109     int32_t iYy = iYInverse >> kuiScaleBitHeight;
110     uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
111 
112     pByDst = pByLineDst;
113     iXInverse = 1 << (kuiScaleBitWidth - 1);
114     for (int32_t j = 0; j < kiDstWidth; j++) {
115       int32_t iXx = iXInverse >> kuiScaleBitWidth;
116       *pByDst++ = * (pBySrc + iXx);
117 
118       iXInverse += fScalex;
119     }
120   }
121 }
122 
GeneralBilinearAccurateDownsampler_ref(uint8_t * pDst,const int32_t kiDstStride,const int32_t kiDstWidth,const int32_t kiDstHeight,uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiSrcHeight)123 void GeneralBilinearAccurateDownsampler_ref (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
124     const int32_t kiDstHeight,
125     uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
126   const int32_t kiScaleBit = 15;
127   const int32_t kiScale = (1 << kiScaleBit);
128   int32_t iScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kiScale);
129   int32_t iScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kiScale);
130   int64_t x;
131   int32_t iYInverse, iXInverse;
132 
133   uint8_t* pByDst = pDst;
134   uint8_t* pByLineDst = pDst;
135 
136   iYInverse = 1 << (kiScaleBit - 1);
137   for (int32_t i = 0; i < kiDstHeight - 1; i++) {
138     int32_t iYy = iYInverse >> kiScaleBit;
139     int32_t iFv = iYInverse & (kiScale - 1);
140 
141     uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
142 
143     pByDst = pByLineDst;
144     iXInverse = 1 << (kiScaleBit - 1);
145     for (int32_t j = 0; j < kiDstWidth - 1; j++) {
146       int32_t iXx = iXInverse >> kiScaleBit;
147       int32_t iFu = iXInverse & (kiScale - 1);
148 
149       uint8_t* pByCurrent = pBySrc + iXx;
150       uint8_t a, b, c, d;
151 
152       a = *pByCurrent;
153       b = * (pByCurrent + 1);
154       c = * (pByCurrent + kiSrcStride);
155       d = * (pByCurrent + kiSrcStride + 1);
156 
157       x = (((int64_t) (kiScale - 1 - iFu)) * (kiScale - 1 - iFv) * a + ((int64_t)iFu) * (kiScale - 1 - iFv) * b + ((int64_t) (
158              kiScale - 1 - iFu)) * iFv * c +
159            ((int64_t)iFu) * iFv * d + (int64_t) (1 << (2 * kiScaleBit - 1))) >> (2 * kiScaleBit);
160       x = WELS_CLAMP (x, 0, 255);
161       *pByDst++ = (uint8_t)x;
162 
163       iXInverse += iScalex;
164     }
165     *pByDst = * (pBySrc + (iXInverse >> kiScaleBit));
166     pByLineDst += kiDstStride;
167     iYInverse += iScaley;
168   }
169 
170   // last row special
171   {
172     int32_t iYy = iYInverse >> kiScaleBit;
173     uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
174 
175     pByDst = pByLineDst;
176     iXInverse = 1 << (kiScaleBit - 1);
177     for (int32_t j = 0; j < kiDstWidth; j++) {
178       int32_t iXx = iXInverse >> kiScaleBit;
179       *pByDst++ = * (pBySrc + iXx);
180 
181       iXInverse += iScalex;
182     }
183   }
184 }
185 
186 #define GENERATE_DyadicBilinearDownsampler_UT_with_ref(func, ASM, CPUFLAGS, ref_func) \
187 TEST (DownSampleTest, func) { \
188   if (ASM) {\
189     int32_t iCpuCores = 0; \
190     uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores); \
191     if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
192     return; \
193   } \
194   ENFORCE_STACK_ALIGN_1D (uint8_t, dst_c, 50000, 16); \
195   ENFORCE_STACK_ALIGN_1D (uint8_t, src_c, 50000, 16); \
196   int dst_stride_c; \
197   int src_stride_c; \
198   int src_width_c; \
199   int src_height_c; \
200   ENFORCE_STACK_ALIGN_1D (uint8_t, dst_a, 50000, 16); \
201   ENFORCE_STACK_ALIGN_1D (uint8_t, src_a, 50000, 16); \
202   int dst_stride_a; \
203   int src_stride_a; \
204   int src_width_a; \
205   int src_height_a; \
206   dst_stride_c = dst_stride_a = 560; \
207   src_stride_c = src_stride_a = 560; \
208   src_width_c = src_width_a = 512; \
209   src_height_c = src_height_a = 80; \
210   for (int j = 0; j < 50000; j++) { \
211     dst_c[j] = dst_a[j] = rand() % 256; \
212     src_c[j] = src_a[j] = rand() % 256; \
213   } \
214   ref_func (dst_c, dst_stride_c, src_c, src_stride_c, src_width_c, src_height_c); \
215   func (dst_a, dst_stride_a, src_a, src_stride_a, src_width_a, src_height_a); \
216   for (int j = 0; j < (src_height_c >> 1); j++) { \
217     for (int m = 0; m < (src_width_c >> 1); m++) { \
218       ASSERT_EQ (dst_c[m + j * dst_stride_c], dst_a[m + j * dst_stride_a]); \
219     } \
220   } \
221 }
222 
223 #define GENERATE_DyadicBilinearDownsampler_UT(func, ASM, CPUFLAGS) \
224   GENERATE_DyadicBilinearDownsampler_UT_with_ref(func, ASM, CPUFLAGS, DyadicBilinearDownsampler_ref)
225 #define GENERATE_DyadicBilinearDownsampler2_UT(func, ASM, CPUFLAGS) \
226   GENERATE_DyadicBilinearDownsampler_UT_with_ref(func, ASM, CPUFLAGS, DyadicBilinearDownsampler2_ref)
227 
228 #define GENERATE_DyadicBilinearOneThirdDownsampler_UT(func, ASM, CPUFLAGS) \
229 TEST (DownSampleTest, func) { \
230   if (ASM) {\
231     int32_t iCpuCores = 0; \
232     uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores); \
233     if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
234     return; \
235   } \
236   ENFORCE_STACK_ALIGN_1D (uint8_t, dst_c, 50000, 16); \
237   ENFORCE_STACK_ALIGN_1D (uint8_t, src_c, 50000, 16); \
238   int dst_stride_c; \
239   int src_stride_c; \
240   int src_width_c; \
241   int src_height_c; \
242   ENFORCE_STACK_ALIGN_1D (uint8_t, dst_a, 50000, 16); \
243   ENFORCE_STACK_ALIGN_1D (uint8_t, src_a, 50000, 16); \
244   int dst_stride_a; \
245   int src_stride_a; \
246   int src_width_a; \
247   int src_height_a; \
248   dst_stride_c = dst_stride_a = 560; \
249   src_stride_c = src_stride_a = 560; \
250   src_width_c = src_width_a = 480; \
251   src_height_c = src_height_a = 30; \
252   for (int j = 0; j < 50000; j++) { \
253     dst_c[j] = dst_a[j] = rand() % 256; \
254     src_c[j] = src_a[j] = rand() % 256; \
255   } \
256   DyadicBilinearOneThirdDownsampler_c (dst_c, dst_stride_c, src_c, src_stride_c, src_width_c, src_height_c/3); \
257   func (dst_a, dst_stride_a, src_a, src_stride_a, src_width_a, src_height_a/3); \
258   for (int j = 0; j < (src_height_c /3 ); j++) { \
259     for (int m = 0; m < (src_width_c /3); m++) { \
260       ASSERT_EQ (dst_c[m + j * dst_stride_c], dst_a[m + j * dst_stride_a]); \
261     } \
262   } \
263 }
264 
265 #define GENERATE_DyadicBilinearQuarterDownsampler_UT(func, ASM, CPUFLAGS) \
266 TEST (DownSampleTest, func) { \
267   if (ASM) {\
268     int32_t iCpuCores = 0; \
269     uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores); \
270     if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
271     return; \
272   } \
273   ENFORCE_STACK_ALIGN_1D (uint8_t, dst_c, 50000, 16); \
274   ENFORCE_STACK_ALIGN_1D (uint8_t, src_c, 50000, 16); \
275   int dst_stride_c; \
276   int src_stride_c; \
277   int src_width_c; \
278   int src_height_c; \
279   ENFORCE_STACK_ALIGN_1D (uint8_t, dst_a, 50000, 16); \
280   ENFORCE_STACK_ALIGN_1D (uint8_t, src_a, 50000, 16); \
281   int dst_stride_a; \
282   int src_stride_a; \
283   int src_width_a; \
284   int src_height_a; \
285   dst_stride_c = dst_stride_a = 560; \
286   src_stride_c = src_stride_a = 560; \
287   src_width_c = src_width_a = 640; \
288   src_height_c = src_height_a = 80; \
289   for (int j = 0; j < 50000; j++) { \
290     dst_c[j] = dst_a[j] = rand() % 256; \
291     src_c[j] = src_a[j] = rand() % 256; \
292   } \
293   DyadicBilinearQuarterDownsampler_c (dst_c, dst_stride_c, src_c, src_stride_c, src_width_c, src_height_c); \
294   func (dst_a, dst_stride_a, src_a, src_stride_a, src_width_a, src_height_a); \
295   for (int j = 0; j < (src_height_c >> 2); j++) { \
296     for (int m = 0; m < (src_width_c >> 2); m++) { \
297       ASSERT_EQ (dst_c[m + j * dst_stride_c], dst_a[m + j * dst_stride_a]); \
298     } \
299   } \
300 }
301 #define GENERATE_GeneralBilinearDownsampler_UT(func, ref, ASM, CPUFLAGS) \
302 TEST (DownSampleTest, func) { \
303   if (ASM) {\
304     int32_t iCpuCores = 0; \
305     uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores); \
306     if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
307     return; \
308   } \
309   ENFORCE_STACK_ALIGN_1D (uint8_t, dst_c, 70000, 16); \
310   ENFORCE_STACK_ALIGN_1D (uint8_t, src_c, 70000, 16); \
311   int dst_stride_c; \
312   int dst_width_c; \
313   int dst_height_c; \
314   int src_stride_c; \
315   int src_width_c; \
316   int src_height_c; \
317   ENFORCE_STACK_ALIGN_1D (uint8_t, dst_a, 70000, 16); \
318   ENFORCE_STACK_ALIGN_1D (uint8_t, src_a, 70000, 16); \
319   int dst_stride_a; \
320   int dst_width_a; \
321   int dst_height_a; \
322   int src_stride_a; \
323   int src_width_a; \
324   int src_height_a; \
325   for (int i = 0; i < 5; i++) { \
326     dst_stride_c = dst_stride_a = 320; \
327     src_stride_c = src_stride_a = 320; \
328     src_width_c = src_width_a = 320; \
329     src_height_c = src_height_a = 180; \
330     dst_width_c = dst_width_a = (src_width_c >> (i + 1)) + rand() % (src_width_c >> (i + 1)); \
331     dst_height_c = dst_height_a = (src_height_c >> (i + 1)) + rand() % (src_height_c >> (i + 1)); \
332     for (int j = 0; j < 70000; j++) { \
333       dst_c[j] = dst_a[j] = rand() % 256; \
334       src_c[j] = src_a[j] = rand() % 256; \
335     } \
336     ref (dst_c, dst_stride_c, dst_width_c, dst_height_c, src_c, src_stride_c, src_width_c, src_height_c); \
337     func (dst_a, dst_stride_a, dst_width_a, dst_height_a, src_a, src_stride_a, src_width_a, src_height_a); \
338     for (int j = 0; j < dst_height_c; j++) { \
339       for (int m = 0; m < dst_width_c ; m++) { \
340         ASSERT_EQ (dst_c[m + j * dst_stride_c], dst_a[m + j * dst_stride_a]); \
341       } \
342     } \
343   } \
344 }
345 
346 
347 GENERATE_DyadicBilinearDownsampler_UT (DyadicBilinearDownsampler_c, 0, 0)
348 GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearFastDownsampler_c, GeneralBilinearFastDownsampler_ref, 0, 0)
349 GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearAccurateDownsampler_c, GeneralBilinearAccurateDownsampler_ref, 0,
350                                         0)
351 
352 #if defined(X86_ASM)
353 GENERATE_DyadicBilinearDownsampler_UT (DyadicBilinearDownsamplerWidthx32_sse, 1, WELS_CPU_SSE)
354 GENERATE_DyadicBilinearDownsampler_UT (DyadicBilinearDownsamplerWidthx16_sse, 1, WELS_CPU_SSE)
355 GENERATE_DyadicBilinearDownsampler_UT (DyadicBilinearDownsamplerWidthx8_sse, 1, WELS_CPU_SSE)
356 
357 GENERATE_DyadicBilinearDownsampler2_UT (DyadicBilinearDownsamplerWidthx32_ssse3, 1, WELS_CPU_SSSE3)
358 GENERATE_DyadicBilinearDownsampler2_UT (DyadicBilinearDownsamplerWidthx16_ssse3, 1, WELS_CPU_SSSE3)
359 
360 GENERATE_DyadicBilinearOneThirdDownsampler_UT (DyadicBilinearOneThirdDownsampler_ssse3, 1, WELS_CPU_SSSE3)
361 GENERATE_DyadicBilinearOneThirdDownsampler_UT (DyadicBilinearOneThirdDownsampler_sse4, 1, WELS_CPU_SSE41)
362 
363 GENERATE_DyadicBilinearQuarterDownsampler_UT (DyadicBilinearQuarterDownsampler_sse, 1, WELS_CPU_SSE)
364 GENERATE_DyadicBilinearQuarterDownsampler_UT (DyadicBilinearQuarterDownsampler_ssse3, 1, WELS_CPU_SSSE3)
365 GENERATE_DyadicBilinearQuarterDownsampler_UT (DyadicBilinearQuarterDownsampler_sse4, 1, WELS_CPU_SSE41)
366 
367 GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearFastDownsamplerWrap_sse2, GeneralBilinearFastDownsampler_ref, 1,
368                                         WELS_CPU_SSE2)
369 GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearAccurateDownsamplerWrap_sse2,
370                                         GeneralBilinearAccurateDownsampler_ref, 1, WELS_CPU_SSE2)
371 GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearFastDownsamplerWrap_ssse3, GeneralBilinearFastDownsampler_ref, 1,
372                                         WELS_CPU_SSSE3)
373 GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearAccurateDownsamplerWrap_sse41,
374                                         GeneralBilinearAccurateDownsampler_ref, 1, WELS_CPU_SSE41)
375 #ifdef HAVE_AVX2
376 GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearFastDownsamplerWrap_avx2, GeneralBilinearFastDownsampler_ref, 1,
377                                         WELS_CPU_AVX2)
378 GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearAccurateDownsamplerWrap_avx2,
379                                         GeneralBilinearAccurateDownsampler_ref, 1, WELS_CPU_AVX2)
380 #endif
381 
382 #endif
383 
384 #if defined(HAVE_NEON)
385 GENERATE_DyadicBilinearDownsampler_UT (DyadicBilinearDownsamplerWidthx32_neon, 1, WELS_CPU_NEON)
386 GENERATE_DyadicBilinearDownsampler_UT (DyadicBilinearDownsampler_neon, 1, WELS_CPU_NEON)
387 
388 GENERATE_DyadicBilinearOneThirdDownsampler_UT (DyadicBilinearOneThirdDownsampler_neon, 1, WELS_CPU_NEON)
389 
390 GENERATE_DyadicBilinearQuarterDownsampler_UT (DyadicBilinearQuarterDownsampler_neon, 1, WELS_CPU_NEON)
391 
392 GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearAccurateDownsamplerWrap_neon,
393                                         GeneralBilinearAccurateDownsampler_ref, 1, WELS_CPU_NEON)
394 #endif
395 
396 #if defined(HAVE_NEON_AARCH64)
397 GENERATE_DyadicBilinearDownsampler_UT (DyadicBilinearDownsamplerWidthx32_AArch64_neon, 1, WELS_CPU_NEON)
398 GENERATE_DyadicBilinearDownsampler_UT (DyadicBilinearDownsampler_AArch64_neon, 1, WELS_CPU_NEON)
399 
400 GENERATE_DyadicBilinearOneThirdDownsampler_UT (DyadicBilinearOneThirdDownsampler_AArch64_neon, 1, WELS_CPU_NEON)
401 
402 GENERATE_DyadicBilinearQuarterDownsampler_UT (DyadicBilinearQuarterDownsampler_AArch64_neon, 1, WELS_CPU_NEON)
403 
404 GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearAccurateDownsamplerWrap_AArch64_neon,
405                                         GeneralBilinearAccurateDownsampler_ref, 1, WELS_CPU_NEON)
406 #endif
407