• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2  * \copy
3  *     Copyright (c)  2008-2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  *  downsample_yuv.c
32  *
33  *  Abstract
34  *      Implementation for source yuv data downsampling used before spatial encoding.
35  *
36  *  History
37  *      10/24/2008 Created
38  *
39  *****************************************************************************/
40 
41 #include "downsample.h"
42 
43 
44 WELSVP_NAMESPACE_BEGIN
45 
46 
DyadicBilinearDownsampler_c(uint8_t * pDst,const int32_t kiDstStride,uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiSrcHeight)47 void DyadicBilinearDownsampler_c (uint8_t* pDst, const int32_t kiDstStride,
48                                   uint8_t* pSrc, const int32_t kiSrcStride,
49                                   const int32_t kiSrcWidth, const int32_t kiSrcHeight)
50 
51 {
52   uint8_t* pDstLine     = pDst;
53   uint8_t* pSrcLine     = pSrc;
54   const int32_t kiSrcStridex2   = kiSrcStride << 1;
55   const int32_t kiDstWidth      = kiSrcWidth  >> 1;
56   const int32_t kiDstHeight     = kiSrcHeight >> 1;
57 
58   for (int32_t j = 0; j < kiDstHeight; j ++) {
59     for (int32_t i = 0; i < kiDstWidth; i ++) {
60       const int32_t kiSrcX = i << 1;
61       const int32_t kiTempRow1 = (pSrcLine[kiSrcX] + pSrcLine[kiSrcX + 1] + 1) >> 1;
62       const int32_t kiTempRow2 = (pSrcLine[kiSrcX + kiSrcStride] + pSrcLine[kiSrcX + kiSrcStride + 1] + 1) >> 1;
63 
64       pDstLine[i] = (uint8_t) ((kiTempRow1 + kiTempRow2 + 1) >> 1);
65     }
66     pDstLine    += kiDstStride;
67     pSrcLine    += kiSrcStridex2;
68   }
69 }
70 
DyadicBilinearQuarterDownsampler_c(uint8_t * pDst,const int32_t kiDstStride,uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiSrcHeight)71 void DyadicBilinearQuarterDownsampler_c (uint8_t* pDst, const int32_t kiDstStride,
72     uint8_t* pSrc, const int32_t kiSrcStride,
73     const int32_t kiSrcWidth, const int32_t kiSrcHeight)
74 
75 {
76   uint8_t* pDstLine     = pDst;
77   uint8_t* pSrcLine     = pSrc;
78   const int32_t kiSrcStridex4   = kiSrcStride << 2;
79   const int32_t kiDstWidth      = kiSrcWidth  >> 2;
80   const int32_t kiDstHeight     = kiSrcHeight >> 2;
81 
82   for (int32_t j = 0; j < kiDstHeight; j ++) {
83     for (int32_t i = 0; i < kiDstWidth; i ++) {
84       const int32_t kiSrcX = i << 2;
85       const int32_t kiTempRow1 = (pSrcLine[kiSrcX] + pSrcLine[kiSrcX + 1] + 1) >> 1;
86       const int32_t kiTempRow2 = (pSrcLine[kiSrcX + kiSrcStride] + pSrcLine[kiSrcX + kiSrcStride + 1] + 1) >> 1;
87 
88       pDstLine[i] = (uint8_t) ((kiTempRow1 + kiTempRow2 + 1) >> 1);
89     }
90     pDstLine    += kiDstStride;
91     pSrcLine    += kiSrcStridex4;
92   }
93 }
94 
DyadicBilinearOneThirdDownsampler_c(uint8_t * pDst,const int32_t kiDstStride,uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiDstHeight)95 void DyadicBilinearOneThirdDownsampler_c (uint8_t* pDst, const int32_t kiDstStride,
96     uint8_t* pSrc, const int32_t kiSrcStride,
97     const int32_t kiSrcWidth, const int32_t kiDstHeight)
98 
99 {
100   uint8_t* pDstLine     = pDst;
101   uint8_t* pSrcLine     = pSrc;
102   const int32_t kiSrcStridex3   = kiSrcStride * 3;
103   const int32_t kiDstWidth      = kiSrcWidth / 3;
104 
105   for (int32_t j = 0; j < kiDstHeight; j ++) {
106     for (int32_t i = 0; i < kiDstWidth; i ++) {
107       const int32_t kiSrcX = i * 3;
108       const int32_t kiTempRow1 = (pSrcLine[kiSrcX] + pSrcLine[kiSrcX + 1] + 1) >> 1;
109       const int32_t kiTempRow2 = (pSrcLine[kiSrcX + kiSrcStride] + pSrcLine[kiSrcX + kiSrcStride + 1] + 1) >> 1;
110 
111       pDstLine[i] = (uint8_t) ((kiTempRow1 + kiTempRow2 + 1) >> 1);
112     }
113     pDstLine    += kiDstStride;
114     pSrcLine    += kiSrcStridex3;
115   }
116 }
117 
GeneralBilinearFastDownsampler_c(uint8_t * pDst,const int32_t kiDstStride,const int32_t kiDstWidth,const int32_t kiDstHeight,uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiSrcHeight)118 void GeneralBilinearFastDownsampler_c (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
119                                        const int32_t kiDstHeight,
120                                        uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
121   const uint32_t kuiScaleBitWidth = 16, kuiScaleBitHeight = 15;
122   const uint32_t kuiScaleWidth = (1 << kuiScaleBitWidth), kuiScaleHeight = (1 << kuiScaleBitHeight);
123   int32_t fScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScaleWidth);
124   int32_t fScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScaleHeight);
125   uint32_t x;
126   int32_t iYInverse, iXInverse;
127 
128   uint8_t* pByDst = pDst;
129   uint8_t* pByLineDst = pDst;
130 
131   iYInverse = 1 << (kuiScaleBitHeight - 1);
132   for (int32_t i = 0; i < kiDstHeight - 1; i++) {
133     int32_t iYy = iYInverse >> kuiScaleBitHeight;
134     int32_t fv = iYInverse & (kuiScaleHeight - 1);
135 
136     uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
137 
138     pByDst = pByLineDst;
139     iXInverse = 1 << (kuiScaleBitWidth - 1);
140     for (int32_t j = 0; j < kiDstWidth - 1; j++) {
141       int32_t iXx = iXInverse >> kuiScaleBitWidth;
142       int32_t iFu = iXInverse & (kuiScaleWidth - 1);
143 
144       uint8_t* pByCurrent = pBySrc + iXx;
145       uint8_t a, b, c, d;
146 
147       a = *pByCurrent;
148       b = * (pByCurrent + 1);
149       c = * (pByCurrent + kiSrcStride);
150       d = * (pByCurrent + kiSrcStride + 1);
151 
152       x  = (((uint32_t) (kuiScaleWidth - 1 - iFu)) * (kuiScaleHeight - 1 - fv) >> kuiScaleBitWidth) * a;
153       x += (((uint32_t) (iFu)) * (kuiScaleHeight - 1 - fv) >> kuiScaleBitWidth) * b;
154       x += (((uint32_t) (kuiScaleWidth - 1 - iFu)) * (fv) >> kuiScaleBitWidth) * c;
155       x += (((uint32_t) (iFu)) * (fv) >> kuiScaleBitWidth) * d;
156       x >>= (kuiScaleBitHeight - 1);
157       x += 1;
158       x >>= 1;
159       //x = (((__int64)(SCALE_BIG - 1 - iFu))*(SCALE_BIG - 1 - fv)*a + ((__int64)iFu)*(SCALE_BIG - 1 -fv)*b + ((__int64)(SCALE_BIG - 1 -iFu))*fv*c +
160       // ((__int64)iFu)*fv*d + (1 << (2*SCALE_BIT_BIG-1)) ) >> (2*SCALE_BIT_BIG);
161       x = WELS_CLAMP (x, 0, 255);
162       *pByDst++ = (uint8_t)x;
163 
164       iXInverse += fScalex;
165     }
166     *pByDst = * (pBySrc + (iXInverse >> kuiScaleBitWidth));
167     pByLineDst += kiDstStride;
168     iYInverse += fScaley;
169   }
170 
171   // last row special
172   {
173     int32_t iYy = iYInverse >> kuiScaleBitHeight;
174     uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
175 
176     pByDst = pByLineDst;
177     iXInverse = 1 << (kuiScaleBitWidth - 1);
178     for (int32_t j = 0; j < kiDstWidth; j++) {
179       int32_t iXx = iXInverse >> kuiScaleBitWidth;
180       *pByDst++ = * (pBySrc + iXx);
181 
182       iXInverse += fScalex;
183     }
184   }
185 }
186 
GeneralBilinearAccurateDownsampler_c(uint8_t * pDst,const int32_t kiDstStride,const int32_t kiDstWidth,const int32_t kiDstHeight,uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiSrcHeight)187 void GeneralBilinearAccurateDownsampler_c (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
188     const int32_t kiDstHeight,
189     uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
190   const int32_t kiScaleBit = 15;
191   const int32_t kiScale = (1 << kiScaleBit);
192   int32_t iScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kiScale);
193   int32_t iScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kiScale);
194   int64_t x;
195   int32_t iYInverse, iXInverse;
196 
197   uint8_t* pByDst = pDst;
198   uint8_t* pByLineDst = pDst;
199 
200   iYInverse = 1 << (kiScaleBit - 1);
201   for (int32_t i = 0; i < kiDstHeight - 1; i++) {
202     int32_t iYy = iYInverse >> kiScaleBit;
203     int32_t iFv = iYInverse & (kiScale - 1);
204 
205     uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
206 
207     pByDst = pByLineDst;
208     iXInverse = 1 << (kiScaleBit - 1);
209     for (int32_t j = 0; j < kiDstWidth - 1; j++) {
210       int32_t iXx = iXInverse >> kiScaleBit;
211       int32_t iFu = iXInverse & (kiScale - 1);
212 
213       uint8_t* pByCurrent = pBySrc + iXx;
214       uint8_t a, b, c, d;
215 
216       a = *pByCurrent;
217       b = * (pByCurrent + 1);
218       c = * (pByCurrent + kiSrcStride);
219       d = * (pByCurrent + kiSrcStride + 1);
220 
221       x = (((int64_t) (kiScale - 1 - iFu)) * (kiScale - 1 - iFv) * a + ((int64_t)iFu) * (kiScale - 1 - iFv) * b + ((int64_t) (
222              kiScale - 1 - iFu)) * iFv * c +
223            ((int64_t)iFu) * iFv * d + (int64_t) (1 << (2 * kiScaleBit - 1))) >> (2 * kiScaleBit);
224       x = WELS_CLAMP (x, 0, 255);
225       *pByDst++ = (uint8_t)x;
226 
227       iXInverse += iScalex;
228     }
229     *pByDst = * (pBySrc + (iXInverse >> kiScaleBit));
230     pByLineDst += kiDstStride;
231     iYInverse += iScaley;
232   }
233 
234   // last row special
235   {
236     int32_t iYy = iYInverse >> kiScaleBit;
237     uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
238 
239     pByDst = pByLineDst;
240     iXInverse = 1 << (kiScaleBit - 1);
241     for (int32_t j = 0; j < kiDstWidth; j++) {
242       int32_t iXx = iXInverse >> kiScaleBit;
243       *pByDst++ = * (pBySrc + iXx);
244 
245       iXInverse += iScalex;
246     }
247   }
248 }
249 
250 #if defined(X86_ASM) || defined(HAVE_NEON) || defined(HAVE_NEON_AARCH64)
GeneralBilinearDownsamplerWrap(uint8_t * pDst,const int32_t kiDstStride,const int32_t kiDstWidth,const int32_t kiDstHeight,uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiSrcHeight,const int32_t kiScaleBitWidth,const int32_t kiScaleBitHeight,void (* func)(uint8_t * pDst,int32_t iDstStride,int32_t iDstWidth,int32_t iDstHeight,uint8_t * pSrc,int32_t iSrcStride,uint32_t uiScaleX,uint32_t uiScaleY))251 static void GeneralBilinearDownsamplerWrap (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
252     const int32_t kiDstHeight,
253     uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight,
254     const int32_t kiScaleBitWidth, const int32_t kiScaleBitHeight,
255     void (*func) (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth, int32_t iDstHeight,
256                   uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX, uint32_t uiScaleY)) {
257   const uint32_t kuiScaleWidth = (1 << kiScaleBitWidth), kuiScaleHeight = (1 << kiScaleBitHeight);
258 
259   uint32_t uiScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScaleWidth);
260   uint32_t uiScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScaleHeight);
261 
262   func (pDst, kiDstStride, kiDstWidth, kiDstHeight, pSrc, kiSrcStride, uiScalex, uiScaley);
263 }
264 
265 #define DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP(suffix) \
266   void GeneralBilinearFastDownsamplerWrap_ ## suffix ( \
267       uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth, const int32_t kiDstHeight, \
268       uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) { \
269     GeneralBilinearDownsamplerWrap (pDst, kiDstStride, kiDstWidth, kiDstHeight, \
270         pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, 16, 15, GeneralBilinearFastDownsampler_ ## suffix); \
271   }
272 
273 #define DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP(suffix) \
274   void GeneralBilinearAccurateDownsamplerWrap_ ## suffix ( \
275       uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth, const int32_t kiDstHeight, \
276       uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) { \
277     GeneralBilinearDownsamplerWrap (pDst, kiDstStride, kiDstWidth, kiDstHeight, \
278         pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, 15, 15, GeneralBilinearAccurateDownsampler_ ## suffix); \
279   }
280 #endif
281 
282 #ifdef X86_ASM
283 DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (sse2)
284 DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (sse2)
285 DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (ssse3)
286 DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (sse41)
287 #ifdef HAVE_AVX2
288 DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (avx2)
289 DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (avx2)
290 #endif
291 #endif //X86_ASM
292 
293 #ifdef HAVE_NEON
294 DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (neon)
295 #endif
296 
297 #ifdef HAVE_NEON_AARCH64
298 DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (AArch64_neon)
299 #endif
300 WELSVP_NAMESPACE_END
301