1 /*!
2 * \copy
3 * Copyright (c) 2008-2013, Cisco Systems
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 * downsample_yuv.c
32 *
33 * Abstract
34 * Implementation for source yuv data downsampling used before spatial encoding.
35 *
36 * History
37 * 10/24/2008 Created
38 *
39 *****************************************************************************/
40
41 #include "downsample.h"
42
43
44 WELSVP_NAMESPACE_BEGIN
45
46
DyadicBilinearDownsampler_c(uint8_t * pDst,const int32_t kiDstStride,uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiSrcHeight)47 void DyadicBilinearDownsampler_c (uint8_t* pDst, const int32_t kiDstStride,
48 uint8_t* pSrc, const int32_t kiSrcStride,
49 const int32_t kiSrcWidth, const int32_t kiSrcHeight)
50
51 {
52 uint8_t* pDstLine = pDst;
53 uint8_t* pSrcLine = pSrc;
54 const int32_t kiSrcStridex2 = kiSrcStride << 1;
55 const int32_t kiDstWidth = kiSrcWidth >> 1;
56 const int32_t kiDstHeight = kiSrcHeight >> 1;
57
58 for (int32_t j = 0; j < kiDstHeight; j ++) {
59 for (int32_t i = 0; i < kiDstWidth; i ++) {
60 const int32_t kiSrcX = i << 1;
61 const int32_t kiTempRow1 = (pSrcLine[kiSrcX] + pSrcLine[kiSrcX + 1] + 1) >> 1;
62 const int32_t kiTempRow2 = (pSrcLine[kiSrcX + kiSrcStride] + pSrcLine[kiSrcX + kiSrcStride + 1] + 1) >> 1;
63
64 pDstLine[i] = (uint8_t) ((kiTempRow1 + kiTempRow2 + 1) >> 1);
65 }
66 pDstLine += kiDstStride;
67 pSrcLine += kiSrcStridex2;
68 }
69 }
70
DyadicBilinearQuarterDownsampler_c(uint8_t * pDst,const int32_t kiDstStride,uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiSrcHeight)71 void DyadicBilinearQuarterDownsampler_c (uint8_t* pDst, const int32_t kiDstStride,
72 uint8_t* pSrc, const int32_t kiSrcStride,
73 const int32_t kiSrcWidth, const int32_t kiSrcHeight)
74
75 {
76 uint8_t* pDstLine = pDst;
77 uint8_t* pSrcLine = pSrc;
78 const int32_t kiSrcStridex4 = kiSrcStride << 2;
79 const int32_t kiDstWidth = kiSrcWidth >> 2;
80 const int32_t kiDstHeight = kiSrcHeight >> 2;
81
82 for (int32_t j = 0; j < kiDstHeight; j ++) {
83 for (int32_t i = 0; i < kiDstWidth; i ++) {
84 const int32_t kiSrcX = i << 2;
85 const int32_t kiTempRow1 = (pSrcLine[kiSrcX] + pSrcLine[kiSrcX + 1] + 1) >> 1;
86 const int32_t kiTempRow2 = (pSrcLine[kiSrcX + kiSrcStride] + pSrcLine[kiSrcX + kiSrcStride + 1] + 1) >> 1;
87
88 pDstLine[i] = (uint8_t) ((kiTempRow1 + kiTempRow2 + 1) >> 1);
89 }
90 pDstLine += kiDstStride;
91 pSrcLine += kiSrcStridex4;
92 }
93 }
94
DyadicBilinearOneThirdDownsampler_c(uint8_t * pDst,const int32_t kiDstStride,uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiDstHeight)95 void DyadicBilinearOneThirdDownsampler_c (uint8_t* pDst, const int32_t kiDstStride,
96 uint8_t* pSrc, const int32_t kiSrcStride,
97 const int32_t kiSrcWidth, const int32_t kiDstHeight)
98
99 {
100 uint8_t* pDstLine = pDst;
101 uint8_t* pSrcLine = pSrc;
102 const int32_t kiSrcStridex3 = kiSrcStride * 3;
103 const int32_t kiDstWidth = kiSrcWidth / 3;
104
105 for (int32_t j = 0; j < kiDstHeight; j ++) {
106 for (int32_t i = 0; i < kiDstWidth; i ++) {
107 const int32_t kiSrcX = i * 3;
108 const int32_t kiTempRow1 = (pSrcLine[kiSrcX] + pSrcLine[kiSrcX + 1] + 1) >> 1;
109 const int32_t kiTempRow2 = (pSrcLine[kiSrcX + kiSrcStride] + pSrcLine[kiSrcX + kiSrcStride + 1] + 1) >> 1;
110
111 pDstLine[i] = (uint8_t) ((kiTempRow1 + kiTempRow2 + 1) >> 1);
112 }
113 pDstLine += kiDstStride;
114 pSrcLine += kiSrcStridex3;
115 }
116 }
117
GeneralBilinearFastDownsampler_c(uint8_t * pDst,const int32_t kiDstStride,const int32_t kiDstWidth,const int32_t kiDstHeight,uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiSrcHeight)118 void GeneralBilinearFastDownsampler_c (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
119 const int32_t kiDstHeight,
120 uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
121 const uint32_t kuiScaleBitWidth = 16, kuiScaleBitHeight = 15;
122 const uint32_t kuiScaleWidth = (1 << kuiScaleBitWidth), kuiScaleHeight = (1 << kuiScaleBitHeight);
123 int32_t fScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScaleWidth);
124 int32_t fScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScaleHeight);
125 uint32_t x;
126 int32_t iYInverse, iXInverse;
127
128 uint8_t* pByDst = pDst;
129 uint8_t* pByLineDst = pDst;
130
131 iYInverse = 1 << (kuiScaleBitHeight - 1);
132 for (int32_t i = 0; i < kiDstHeight - 1; i++) {
133 int32_t iYy = iYInverse >> kuiScaleBitHeight;
134 int32_t fv = iYInverse & (kuiScaleHeight - 1);
135
136 uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
137
138 pByDst = pByLineDst;
139 iXInverse = 1 << (kuiScaleBitWidth - 1);
140 for (int32_t j = 0; j < kiDstWidth - 1; j++) {
141 int32_t iXx = iXInverse >> kuiScaleBitWidth;
142 int32_t iFu = iXInverse & (kuiScaleWidth - 1);
143
144 uint8_t* pByCurrent = pBySrc + iXx;
145 uint8_t a, b, c, d;
146
147 a = *pByCurrent;
148 b = * (pByCurrent + 1);
149 c = * (pByCurrent + kiSrcStride);
150 d = * (pByCurrent + kiSrcStride + 1);
151
152 x = (((uint32_t) (kuiScaleWidth - 1 - iFu)) * (kuiScaleHeight - 1 - fv) >> kuiScaleBitWidth) * a;
153 x += (((uint32_t) (iFu)) * (kuiScaleHeight - 1 - fv) >> kuiScaleBitWidth) * b;
154 x += (((uint32_t) (kuiScaleWidth - 1 - iFu)) * (fv) >> kuiScaleBitWidth) * c;
155 x += (((uint32_t) (iFu)) * (fv) >> kuiScaleBitWidth) * d;
156 x >>= (kuiScaleBitHeight - 1);
157 x += 1;
158 x >>= 1;
159 //x = (((__int64)(SCALE_BIG - 1 - iFu))*(SCALE_BIG - 1 - fv)*a + ((__int64)iFu)*(SCALE_BIG - 1 -fv)*b + ((__int64)(SCALE_BIG - 1 -iFu))*fv*c +
160 // ((__int64)iFu)*fv*d + (1 << (2*SCALE_BIT_BIG-1)) ) >> (2*SCALE_BIT_BIG);
161 x = WELS_CLAMP (x, 0, 255);
162 *pByDst++ = (uint8_t)x;
163
164 iXInverse += fScalex;
165 }
166 *pByDst = * (pBySrc + (iXInverse >> kuiScaleBitWidth));
167 pByLineDst += kiDstStride;
168 iYInverse += fScaley;
169 }
170
171 // last row special
172 {
173 int32_t iYy = iYInverse >> kuiScaleBitHeight;
174 uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
175
176 pByDst = pByLineDst;
177 iXInverse = 1 << (kuiScaleBitWidth - 1);
178 for (int32_t j = 0; j < kiDstWidth; j++) {
179 int32_t iXx = iXInverse >> kuiScaleBitWidth;
180 *pByDst++ = * (pBySrc + iXx);
181
182 iXInverse += fScalex;
183 }
184 }
185 }
186
GeneralBilinearAccurateDownsampler_c(uint8_t * pDst,const int32_t kiDstStride,const int32_t kiDstWidth,const int32_t kiDstHeight,uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiSrcHeight)187 void GeneralBilinearAccurateDownsampler_c (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
188 const int32_t kiDstHeight,
189 uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
190 const int32_t kiScaleBit = 15;
191 const int32_t kiScale = (1 << kiScaleBit);
192 int32_t iScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kiScale);
193 int32_t iScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kiScale);
194 int64_t x;
195 int32_t iYInverse, iXInverse;
196
197 uint8_t* pByDst = pDst;
198 uint8_t* pByLineDst = pDst;
199
200 iYInverse = 1 << (kiScaleBit - 1);
201 for (int32_t i = 0; i < kiDstHeight - 1; i++) {
202 int32_t iYy = iYInverse >> kiScaleBit;
203 int32_t iFv = iYInverse & (kiScale - 1);
204
205 uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
206
207 pByDst = pByLineDst;
208 iXInverse = 1 << (kiScaleBit - 1);
209 for (int32_t j = 0; j < kiDstWidth - 1; j++) {
210 int32_t iXx = iXInverse >> kiScaleBit;
211 int32_t iFu = iXInverse & (kiScale - 1);
212
213 uint8_t* pByCurrent = pBySrc + iXx;
214 uint8_t a, b, c, d;
215
216 a = *pByCurrent;
217 b = * (pByCurrent + 1);
218 c = * (pByCurrent + kiSrcStride);
219 d = * (pByCurrent + kiSrcStride + 1);
220
221 x = (((int64_t) (kiScale - 1 - iFu)) * (kiScale - 1 - iFv) * a + ((int64_t)iFu) * (kiScale - 1 - iFv) * b + ((int64_t) (
222 kiScale - 1 - iFu)) * iFv * c +
223 ((int64_t)iFu) * iFv * d + (int64_t) (1 << (2 * kiScaleBit - 1))) >> (2 * kiScaleBit);
224 x = WELS_CLAMP (x, 0, 255);
225 *pByDst++ = (uint8_t)x;
226
227 iXInverse += iScalex;
228 }
229 *pByDst = * (pBySrc + (iXInverse >> kiScaleBit));
230 pByLineDst += kiDstStride;
231 iYInverse += iScaley;
232 }
233
234 // last row special
235 {
236 int32_t iYy = iYInverse >> kiScaleBit;
237 uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
238
239 pByDst = pByLineDst;
240 iXInverse = 1 << (kiScaleBit - 1);
241 for (int32_t j = 0; j < kiDstWidth; j++) {
242 int32_t iXx = iXInverse >> kiScaleBit;
243 *pByDst++ = * (pBySrc + iXx);
244
245 iXInverse += iScalex;
246 }
247 }
248 }
249
250 #if defined(X86_ASM) || defined(HAVE_NEON) || defined(HAVE_NEON_AARCH64)
GeneralBilinearDownsamplerWrap(uint8_t * pDst,const int32_t kiDstStride,const int32_t kiDstWidth,const int32_t kiDstHeight,uint8_t * pSrc,const int32_t kiSrcStride,const int32_t kiSrcWidth,const int32_t kiSrcHeight,const int32_t kiScaleBitWidth,const int32_t kiScaleBitHeight,void (* func)(uint8_t * pDst,int32_t iDstStride,int32_t iDstWidth,int32_t iDstHeight,uint8_t * pSrc,int32_t iSrcStride,uint32_t uiScaleX,uint32_t uiScaleY))251 static void GeneralBilinearDownsamplerWrap (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
252 const int32_t kiDstHeight,
253 uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight,
254 const int32_t kiScaleBitWidth, const int32_t kiScaleBitHeight,
255 void (*func) (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth, int32_t iDstHeight,
256 uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX, uint32_t uiScaleY)) {
257 const uint32_t kuiScaleWidth = (1 << kiScaleBitWidth), kuiScaleHeight = (1 << kiScaleBitHeight);
258
259 uint32_t uiScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScaleWidth);
260 uint32_t uiScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScaleHeight);
261
262 func (pDst, kiDstStride, kiDstWidth, kiDstHeight, pSrc, kiSrcStride, uiScalex, uiScaley);
263 }
264
265 #define DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP(suffix) \
266 void GeneralBilinearFastDownsamplerWrap_ ## suffix ( \
267 uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth, const int32_t kiDstHeight, \
268 uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) { \
269 GeneralBilinearDownsamplerWrap (pDst, kiDstStride, kiDstWidth, kiDstHeight, \
270 pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, 16, 15, GeneralBilinearFastDownsampler_ ## suffix); \
271 }
272
273 #define DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP(suffix) \
274 void GeneralBilinearAccurateDownsamplerWrap_ ## suffix ( \
275 uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth, const int32_t kiDstHeight, \
276 uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) { \
277 GeneralBilinearDownsamplerWrap (pDst, kiDstStride, kiDstWidth, kiDstHeight, \
278 pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, 15, 15, GeneralBilinearAccurateDownsampler_ ## suffix); \
279 }
280 #endif
281
282 #ifdef X86_ASM
283 DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (sse2)
284 DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (sse2)
285 DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (ssse3)
286 DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (sse41)
287 #ifdef HAVE_AVX2
288 DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (avx2)
289 DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (avx2)
290 #endif
291 #endif //X86_ASM
292
293 #ifdef HAVE_NEON
294 DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (neon)
295 #endif
296
297 #ifdef HAVE_NEON_AARCH64
298 DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (AArch64_neon)
299 #endif
300 WELSVP_NAMESPACE_END
301