• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2  * \copy
3  *     Copyright (c)  2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 
33 #include "downsample.h"
34 #include "cpu.h"
35 #include <assert.h>
36 
37 WELSVP_NAMESPACE_BEGIN
38 #define MAX_SAMPLE_WIDTH 1920
39 #define MAX_SAMPLE_HEIGHT 1088
40 
41 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
42 
CDownsampling(int32_t iCpuFlag)43 CDownsampling::CDownsampling (int32_t iCpuFlag) {
44   m_iCPUFlag = iCpuFlag;
45   m_eMethod   = METHOD_DOWNSAMPLE;
46   WelsMemset (&m_pfDownsample, 0, sizeof (m_pfDownsample));
47   InitDownsampleFuncs (m_pfDownsample, m_iCPUFlag);
48   WelsMemset(m_pSampleBuffer,0,sizeof(m_pSampleBuffer));
49   m_bNoSampleBuffer = AllocateSampleBuffer();
50 }
51 
~CDownsampling()52 CDownsampling::~CDownsampling() {
53   FreeSampleBuffer();
54 }
AllocateSampleBuffer()55 bool CDownsampling::AllocateSampleBuffer() {
56   for (int32_t i = 0; i < 2; i++) {
57     m_pSampleBuffer[i][0] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT);
58     if (!m_pSampleBuffer[i][0])
59       goto FREE_RET;
60     m_pSampleBuffer[i][1] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT / 4);
61     if (!m_pSampleBuffer[i][1])
62       goto FREE_RET;
63     m_pSampleBuffer[i][2] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT / 4);
64     if (!m_pSampleBuffer[i][2])
65       goto FREE_RET;
66   }
67   return false;
68 FREE_RET:
69   FreeSampleBuffer();
70   return true;
71 
72 }
FreeSampleBuffer()73 void CDownsampling::FreeSampleBuffer() {
74   for (int32_t i = 0; i < 2; i++) {
75     WelsFree (m_pSampleBuffer[i][0]);
76     m_pSampleBuffer[i][0] = NULL;
77     WelsFree (m_pSampleBuffer[i][1]);
78     m_pSampleBuffer[i][1] = NULL;
79     WelsFree (m_pSampleBuffer[i][2]);
80     m_pSampleBuffer[i][2] = NULL;
81   }
82 }
83 
InitDownsampleFuncs(SDownsampleFuncs & sDownsampleFunc,int32_t iCpuFlag)84 void CDownsampling::InitDownsampleFuncs (SDownsampleFuncs& sDownsampleFunc,  int32_t iCpuFlag) {
85   sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsampler_c;
86   sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsampler_c;
87   sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_c;
88   sDownsampleFunc.pfQuarterDownsampler  = DyadicBilinearQuarterDownsampler_c;
89   sDownsampleFunc.pfGeneralRatioChroma  = GeneralBilinearAccurateDownsampler_c;
90   sDownsampleFunc.pfGeneralRatioLuma    = GeneralBilinearFastDownsampler_c;
91 #if defined(X86_ASM)
92   if (iCpuFlag & WELS_CPU_SSE) {
93     sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_sse;
94     sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsamplerWidthx16_sse;
95     sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_sse;
96   }
97   if (iCpuFlag & WELS_CPU_SSE2) {
98     sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_sse2;
99     sDownsampleFunc.pfGeneralRatioLuma   = GeneralBilinearFastDownsamplerWrap_sse2;
100   }
101   if (iCpuFlag & WELS_CPU_SSSE3) {
102     sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_ssse3;
103     sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsamplerWidthx16_ssse3;
104     sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_ssse3;
105     sDownsampleFunc.pfQuarterDownsampler  = DyadicBilinearQuarterDownsampler_ssse3;
106     sDownsampleFunc.pfGeneralRatioLuma    = GeneralBilinearFastDownsamplerWrap_ssse3;
107   }
108   if (iCpuFlag & WELS_CPU_SSE41) {
109     sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_sse4;
110     sDownsampleFunc.pfQuarterDownsampler  = DyadicBilinearQuarterDownsampler_sse4;
111     sDownsampleFunc.pfGeneralRatioChroma  = GeneralBilinearAccurateDownsamplerWrap_sse41;
112   }
113 #ifdef HAVE_AVX2
114   if (iCpuFlag & WELS_CPU_AVX2) {
115     sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_avx2;
116     sDownsampleFunc.pfGeneralRatioLuma   = GeneralBilinearFastDownsamplerWrap_avx2;
117   }
118 #endif
119 #endif//X86_ASM
120 
121 #if defined(HAVE_NEON)
122   if (iCpuFlag & WELS_CPU_NEON) {
123     sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_neon;
124     sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsampler_neon;
125     sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_neon;
126     sDownsampleFunc.pfQuarterDownsampler  = DyadicBilinearQuarterDownsampler_neon;
127     sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_neon;
128     sDownsampleFunc.pfGeneralRatioLuma   = GeneralBilinearAccurateDownsamplerWrap_neon;
129   }
130 #endif
131 
132 #if defined(HAVE_NEON_AARCH64)
133   if (iCpuFlag & WELS_CPU_NEON) {
134     sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_AArch64_neon;
135     sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsampler_AArch64_neon;
136     sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_AArch64_neon;
137     sDownsampleFunc.pfQuarterDownsampler  = DyadicBilinearQuarterDownsampler_AArch64_neon;
138     sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_AArch64_neon;
139     sDownsampleFunc.pfGeneralRatioLuma   = GeneralBilinearAccurateDownsamplerWrap_AArch64_neon;
140   }
141 #endif
142 }
143 
Process(int32_t iType,SPixMap * pSrcPixMap,SPixMap * pDstPixMap)144 EResult CDownsampling::Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pDstPixMap) {
145   int32_t iSrcWidthY = pSrcPixMap->sRect.iRectWidth;
146   int32_t iSrcHeightY = pSrcPixMap->sRect.iRectHeight;
147   int32_t iDstWidthY = pDstPixMap->sRect.iRectWidth;
148   int32_t iDstHeightY = pDstPixMap->sRect.iRectHeight;
149 
150   int32_t iSrcWidthUV = iSrcWidthY >> 1;
151   int32_t iSrcHeightUV = iSrcHeightY >> 1;
152   int32_t iDstWidthUV = iDstWidthY >> 1;
153   int32_t iDstHeightUV = iDstHeightY >> 1;
154 
155   if (iSrcWidthY <= iDstWidthY || iSrcHeightY <= iDstHeightY) {
156     return RET_INVALIDPARAM;
157   }
158   if ((iSrcWidthY >> 1) > MAX_SAMPLE_WIDTH || (iSrcHeightY >> 1) > MAX_SAMPLE_HEIGHT || m_bNoSampleBuffer) {
159     if ((iSrcWidthY >> 1) == iDstWidthY && (iSrcHeightY >> 1) == iDstHeightY) {
160       // use half average functions
161       DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
162           (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY);
163       DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
164           (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV);
165       DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
166           (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV);
167     } else if ((iSrcWidthY >> 2) == iDstWidthY && (iSrcHeightY >> 2) == iDstHeightY) {
168 
169       m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
170                                            (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY);
171 
172       m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
173                                            (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV);
174 
175       m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
176                                            (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV);
177 
178     } else if ((iSrcWidthY / 3) == iDstWidthY && (iSrcHeightY / 3) == iDstHeightY) {
179 
180       m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
181                                             (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iDstHeightY);
182 
183       m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
184                                             (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iDstHeightUV);
185 
186       m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
187                                             (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iDstHeightUV);
188 
189     } else {
190       m_pfDownsample.pfGeneralRatioLuma ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], iDstWidthY, iDstHeightY,
191                                          (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY);
192 
193       m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], iDstWidthUV, iDstHeightUV,
194                                            (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV);
195 
196       m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], iDstWidthUV, iDstHeightUV,
197                                            (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV);
198     }
199   } else {
200 
201     int32_t iIdx = 0;
202     int32_t iHalfSrcWidth = iSrcWidthY >> 1;
203     int32_t iHalfSrcHeight = iSrcHeightY >> 1;
204     uint8_t* pSrcY = (uint8_t*)pSrcPixMap->pPixel[0];
205     uint8_t* pSrcU = (uint8_t*)pSrcPixMap->pPixel[1];
206     uint8_t* pSrcV = (uint8_t*)pSrcPixMap->pPixel[2];
207     int32_t iSrcStrideY = pSrcPixMap->iStride[0];
208     int32_t iSrcStrideU = pSrcPixMap->iStride[1];
209     int32_t iSrcStrideV = pSrcPixMap->iStride[2];
210 
211     int32_t iDstStrideY = pDstPixMap->iStride[0];
212     int32_t iDstStrideU = pDstPixMap->iStride[1];
213     int32_t iDstStrideV = pDstPixMap->iStride[2];
214 
215     uint8_t* pDstY = (uint8_t*)m_pSampleBuffer[iIdx][0];
216     uint8_t* pDstU = (uint8_t*)m_pSampleBuffer[iIdx][1];
217     uint8_t* pDstV = (uint8_t*)m_pSampleBuffer[iIdx][2];
218     iIdx++;
219     do {
220       if ((iHalfSrcWidth == iDstWidthY) && (iHalfSrcHeight == iDstHeightY)) { //end
221         // use half average functions
222         DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
223             (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY);
224         DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
225             (uint8_t*)pSrcU, iSrcStrideU, iSrcWidthUV, iSrcHeightUV);
226         DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
227             (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV);
228         break;
229       } else if ((iHalfSrcWidth > iDstWidthY) && (iHalfSrcHeight > iDstHeightY)){
230         // use half average functions
231         iDstStrideY = WELS_ALIGN (iHalfSrcWidth, 32);
232         iDstStrideU = WELS_ALIGN (iHalfSrcWidth >> 1, 32);
233         iDstStrideV = WELS_ALIGN (iHalfSrcWidth >> 1, 32);
234         DownsampleHalfAverage ((uint8_t*)pDstY, iDstStrideY,
235             (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY);
236         DownsampleHalfAverage ((uint8_t*)pDstU, iDstStrideU,
237             (uint8_t*)pSrcU, iSrcStrideU, iSrcWidthUV, iSrcHeightUV);
238         DownsampleHalfAverage ((uint8_t*)pDstV, iDstStrideV,
239             (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV);
240 
241         pSrcY = (uint8_t*)pDstY;
242         pSrcU = (uint8_t*)pDstU;
243         pSrcV = (uint8_t*)pDstV;
244 
245 
246         iSrcWidthY = iHalfSrcWidth;
247         iSrcWidthUV = iHalfSrcWidth >> 1;
248         iSrcHeightY = iHalfSrcHeight;
249         iSrcHeightUV = iHalfSrcHeight >> 1;
250 
251         iSrcStrideY = iDstStrideY;
252         iSrcStrideU = iDstStrideU;
253         iSrcStrideV = iDstStrideV;
254 
255         iHalfSrcWidth >>= 1;
256         iHalfSrcHeight >>= 1;
257 
258         iIdx = iIdx % 2;
259         pDstY = (uint8_t*)m_pSampleBuffer[iIdx][0];
260         pDstU = (uint8_t*)m_pSampleBuffer[iIdx][1];
261         pDstV = (uint8_t*)m_pSampleBuffer[iIdx][2];
262         iIdx++;
263       } else {
264         m_pfDownsample.pfGeneralRatioLuma ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], iDstWidthY, iDstHeightY,
265                                            (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY);
266 
267         m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], iDstWidthUV, iDstHeightUV,
268                                              (uint8_t*)pSrcU, iSrcStrideU,  iSrcWidthUV, iSrcHeightUV);
269 
270         m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], iDstWidthUV, iDstHeightUV,
271                                              (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV);
272         break;
273       }
274     } while (true);
275   }
276   return RET_SUCCESS;
277 }
278 
DownsampleHalfAverage(uint8_t * pDst,int32_t iDstStride,uint8_t * pSrc,int32_t iSrcStride,int32_t iSrcWidth,int32_t iSrcHeight)279 void CDownsampling::DownsampleHalfAverage (uint8_t* pDst, int32_t iDstStride,
280         uint8_t* pSrc, int32_t iSrcStride, int32_t iSrcWidth, int32_t iSrcHeight) {
281   if ((iSrcStride & 31) == 0) {
282     assert ((iDstStride & 15) == 0);
283     m_pfDownsample.pfHalfAverageWidthx32 (pDst, iDstStride,
284         pSrc, iSrcStride, WELS_ALIGN (iSrcWidth & ~1, 32), iSrcHeight);
285   } else {
286     assert ((iSrcStride & 15) == 0);
287     assert ((iDstStride &  7) == 0);
288     m_pfDownsample.pfHalfAverageWidthx16 (pDst, iDstStride,
289         pSrc, iSrcStride, WELS_ALIGN (iSrcWidth & ~1, 16), iSrcHeight);
290   }
291 }
292 
293 
294 WELSVP_NAMESPACE_END
295