1 /*!
2 * \copy
3 * Copyright (c) 2013, Cisco Systems
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 */
32
33 #include "downsample.h"
34 #include "cpu.h"
35 #include <assert.h>
36
37 WELSVP_NAMESPACE_BEGIN
38 #define MAX_SAMPLE_WIDTH 1920
39 #define MAX_SAMPLE_HEIGHT 1088
40
41 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
42
CDownsampling(int32_t iCpuFlag)43 CDownsampling::CDownsampling (int32_t iCpuFlag) {
44 m_iCPUFlag = iCpuFlag;
45 m_eMethod = METHOD_DOWNSAMPLE;
46 WelsMemset (&m_pfDownsample, 0, sizeof (m_pfDownsample));
47 InitDownsampleFuncs (m_pfDownsample, m_iCPUFlag);
48 WelsMemset(m_pSampleBuffer,0,sizeof(m_pSampleBuffer));
49 m_bNoSampleBuffer = AllocateSampleBuffer();
50 }
51
~CDownsampling()52 CDownsampling::~CDownsampling() {
53 FreeSampleBuffer();
54 }
AllocateSampleBuffer()55 bool CDownsampling::AllocateSampleBuffer() {
56 for (int32_t i = 0; i < 2; i++) {
57 m_pSampleBuffer[i][0] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT);
58 if (!m_pSampleBuffer[i][0])
59 goto FREE_RET;
60 m_pSampleBuffer[i][1] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT / 4);
61 if (!m_pSampleBuffer[i][1])
62 goto FREE_RET;
63 m_pSampleBuffer[i][2] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT / 4);
64 if (!m_pSampleBuffer[i][2])
65 goto FREE_RET;
66 }
67 return false;
68 FREE_RET:
69 FreeSampleBuffer();
70 return true;
71
72 }
FreeSampleBuffer()73 void CDownsampling::FreeSampleBuffer() {
74 for (int32_t i = 0; i < 2; i++) {
75 WelsFree (m_pSampleBuffer[i][0]);
76 m_pSampleBuffer[i][0] = NULL;
77 WelsFree (m_pSampleBuffer[i][1]);
78 m_pSampleBuffer[i][1] = NULL;
79 WelsFree (m_pSampleBuffer[i][2]);
80 m_pSampleBuffer[i][2] = NULL;
81 }
82 }
83
InitDownsampleFuncs(SDownsampleFuncs & sDownsampleFunc,int32_t iCpuFlag)84 void CDownsampling::InitDownsampleFuncs (SDownsampleFuncs& sDownsampleFunc, int32_t iCpuFlag) {
85 sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsampler_c;
86 sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsampler_c;
87 sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_c;
88 sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_c;
89 sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsampler_c;
90 sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsampler_c;
91 #if defined(X86_ASM)
92 if (iCpuFlag & WELS_CPU_SSE) {
93 sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_sse;
94 sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsamplerWidthx16_sse;
95 sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_sse;
96 }
97 if (iCpuFlag & WELS_CPU_SSE2) {
98 sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_sse2;
99 sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_sse2;
100 }
101 if (iCpuFlag & WELS_CPU_SSSE3) {
102 sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_ssse3;
103 sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsamplerWidthx16_ssse3;
104 sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_ssse3;
105 sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_ssse3;
106 sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_ssse3;
107 }
108 if (iCpuFlag & WELS_CPU_SSE41) {
109 sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_sse4;
110 sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_sse4;
111 sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_sse41;
112 }
113 #ifdef HAVE_AVX2
114 if (iCpuFlag & WELS_CPU_AVX2) {
115 sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_avx2;
116 sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_avx2;
117 }
118 #endif
119 #endif//X86_ASM
120
121 #if defined(HAVE_NEON)
122 if (iCpuFlag & WELS_CPU_NEON) {
123 sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_neon;
124 sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsampler_neon;
125 sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_neon;
126 sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_neon;
127 sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_neon;
128 sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearAccurateDownsamplerWrap_neon;
129 }
130 #endif
131
132 #if defined(HAVE_NEON_AARCH64)
133 if (iCpuFlag & WELS_CPU_NEON) {
134 sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_AArch64_neon;
135 sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsampler_AArch64_neon;
136 sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_AArch64_neon;
137 sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_AArch64_neon;
138 sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_AArch64_neon;
139 sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearAccurateDownsamplerWrap_AArch64_neon;
140 }
141 #endif
142 }
143
Process(int32_t iType,SPixMap * pSrcPixMap,SPixMap * pDstPixMap)144 EResult CDownsampling::Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pDstPixMap) {
145 int32_t iSrcWidthY = pSrcPixMap->sRect.iRectWidth;
146 int32_t iSrcHeightY = pSrcPixMap->sRect.iRectHeight;
147 int32_t iDstWidthY = pDstPixMap->sRect.iRectWidth;
148 int32_t iDstHeightY = pDstPixMap->sRect.iRectHeight;
149
150 int32_t iSrcWidthUV = iSrcWidthY >> 1;
151 int32_t iSrcHeightUV = iSrcHeightY >> 1;
152 int32_t iDstWidthUV = iDstWidthY >> 1;
153 int32_t iDstHeightUV = iDstHeightY >> 1;
154
155 if (iSrcWidthY <= iDstWidthY || iSrcHeightY <= iDstHeightY) {
156 return RET_INVALIDPARAM;
157 }
158 if ((iSrcWidthY >> 1) > MAX_SAMPLE_WIDTH || (iSrcHeightY >> 1) > MAX_SAMPLE_HEIGHT || m_bNoSampleBuffer) {
159 if ((iSrcWidthY >> 1) == iDstWidthY && (iSrcHeightY >> 1) == iDstHeightY) {
160 // use half average functions
161 DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
162 (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY);
163 DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
164 (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV);
165 DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
166 (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV);
167 } else if ((iSrcWidthY >> 2) == iDstWidthY && (iSrcHeightY >> 2) == iDstHeightY) {
168
169 m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
170 (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY);
171
172 m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
173 (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV);
174
175 m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
176 (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV);
177
178 } else if ((iSrcWidthY / 3) == iDstWidthY && (iSrcHeightY / 3) == iDstHeightY) {
179
180 m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
181 (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iDstHeightY);
182
183 m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
184 (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iDstHeightUV);
185
186 m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
187 (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iDstHeightUV);
188
189 } else {
190 m_pfDownsample.pfGeneralRatioLuma ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], iDstWidthY, iDstHeightY,
191 (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY);
192
193 m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], iDstWidthUV, iDstHeightUV,
194 (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV);
195
196 m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], iDstWidthUV, iDstHeightUV,
197 (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV);
198 }
199 } else {
200
201 int32_t iIdx = 0;
202 int32_t iHalfSrcWidth = iSrcWidthY >> 1;
203 int32_t iHalfSrcHeight = iSrcHeightY >> 1;
204 uint8_t* pSrcY = (uint8_t*)pSrcPixMap->pPixel[0];
205 uint8_t* pSrcU = (uint8_t*)pSrcPixMap->pPixel[1];
206 uint8_t* pSrcV = (uint8_t*)pSrcPixMap->pPixel[2];
207 int32_t iSrcStrideY = pSrcPixMap->iStride[0];
208 int32_t iSrcStrideU = pSrcPixMap->iStride[1];
209 int32_t iSrcStrideV = pSrcPixMap->iStride[2];
210
211 int32_t iDstStrideY = pDstPixMap->iStride[0];
212 int32_t iDstStrideU = pDstPixMap->iStride[1];
213 int32_t iDstStrideV = pDstPixMap->iStride[2];
214
215 uint8_t* pDstY = (uint8_t*)m_pSampleBuffer[iIdx][0];
216 uint8_t* pDstU = (uint8_t*)m_pSampleBuffer[iIdx][1];
217 uint8_t* pDstV = (uint8_t*)m_pSampleBuffer[iIdx][2];
218 iIdx++;
219 do {
220 if ((iHalfSrcWidth == iDstWidthY) && (iHalfSrcHeight == iDstHeightY)) { //end
221 // use half average functions
222 DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
223 (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY);
224 DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
225 (uint8_t*)pSrcU, iSrcStrideU, iSrcWidthUV, iSrcHeightUV);
226 DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
227 (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV);
228 break;
229 } else if ((iHalfSrcWidth > iDstWidthY) && (iHalfSrcHeight > iDstHeightY)){
230 // use half average functions
231 iDstStrideY = WELS_ALIGN (iHalfSrcWidth, 32);
232 iDstStrideU = WELS_ALIGN (iHalfSrcWidth >> 1, 32);
233 iDstStrideV = WELS_ALIGN (iHalfSrcWidth >> 1, 32);
234 DownsampleHalfAverage ((uint8_t*)pDstY, iDstStrideY,
235 (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY);
236 DownsampleHalfAverage ((uint8_t*)pDstU, iDstStrideU,
237 (uint8_t*)pSrcU, iSrcStrideU, iSrcWidthUV, iSrcHeightUV);
238 DownsampleHalfAverage ((uint8_t*)pDstV, iDstStrideV,
239 (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV);
240
241 pSrcY = (uint8_t*)pDstY;
242 pSrcU = (uint8_t*)pDstU;
243 pSrcV = (uint8_t*)pDstV;
244
245
246 iSrcWidthY = iHalfSrcWidth;
247 iSrcWidthUV = iHalfSrcWidth >> 1;
248 iSrcHeightY = iHalfSrcHeight;
249 iSrcHeightUV = iHalfSrcHeight >> 1;
250
251 iSrcStrideY = iDstStrideY;
252 iSrcStrideU = iDstStrideU;
253 iSrcStrideV = iDstStrideV;
254
255 iHalfSrcWidth >>= 1;
256 iHalfSrcHeight >>= 1;
257
258 iIdx = iIdx % 2;
259 pDstY = (uint8_t*)m_pSampleBuffer[iIdx][0];
260 pDstU = (uint8_t*)m_pSampleBuffer[iIdx][1];
261 pDstV = (uint8_t*)m_pSampleBuffer[iIdx][2];
262 iIdx++;
263 } else {
264 m_pfDownsample.pfGeneralRatioLuma ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], iDstWidthY, iDstHeightY,
265 (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY);
266
267 m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], iDstWidthUV, iDstHeightUV,
268 (uint8_t*)pSrcU, iSrcStrideU, iSrcWidthUV, iSrcHeightUV);
269
270 m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], iDstWidthUV, iDstHeightUV,
271 (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV);
272 break;
273 }
274 } while (true);
275 }
276 return RET_SUCCESS;
277 }
278
DownsampleHalfAverage(uint8_t * pDst,int32_t iDstStride,uint8_t * pSrc,int32_t iSrcStride,int32_t iSrcWidth,int32_t iSrcHeight)279 void CDownsampling::DownsampleHalfAverage (uint8_t* pDst, int32_t iDstStride,
280 uint8_t* pSrc, int32_t iSrcStride, int32_t iSrcWidth, int32_t iSrcHeight) {
281 if ((iSrcStride & 31) == 0) {
282 assert ((iDstStride & 15) == 0);
283 m_pfDownsample.pfHalfAverageWidthx32 (pDst, iDstStride,
284 pSrc, iSrcStride, WELS_ALIGN (iSrcWidth & ~1, 32), iSrcHeight);
285 } else {
286 assert ((iSrcStride & 15) == 0);
287 assert ((iDstStride & 7) == 0);
288 m_pfDownsample.pfHalfAverageWidthx16 (pDst, iDstStride,
289 pSrc, iSrcStride, WELS_ALIGN (iSrcWidth & ~1, 16), iSrcHeight);
290 }
291 }
292
293
294 WELSVP_NAMESPACE_END
295