1 /*!
2 * \copy
3 * Copyright (c) 2009-2013, Cisco Systems
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 *
32 * \file encode_mb.c
33 *
34 * \brief Implementaion for pCurMb encoding
35 *
36 * \date 05/19/2009 Created
37 *************************************************************************************
38 */
39
40
41 #include "svc_encode_mb.h"
42 #include "encode_mb_aux.h"
43 #include "decode_mb_aux.h"
44 #include "ls_defines.h"
45
46 namespace WelsEnc {
WelsDctMb(int16_t * pRes,uint8_t * pEncMb,int32_t iEncStride,uint8_t * pBestPred,PDctFunc pfDctFourT4)47 void WelsDctMb (int16_t* pRes, uint8_t* pEncMb, int32_t iEncStride, uint8_t* pBestPred, PDctFunc pfDctFourT4) {
48 pfDctFourT4 (pRes, pEncMb, iEncStride, pBestPred, 16);
49 pfDctFourT4 (pRes + 64, pEncMb + 8, iEncStride, pBestPred + 8, 16);
50 pfDctFourT4 (pRes + 128, pEncMb + 8 * iEncStride, iEncStride, pBestPred + 128, 16);
51 pfDctFourT4 (pRes + 192, pEncMb + 8 * iEncStride + 8, iEncStride, pBestPred + 136, 16);
52 }
53
WelsEncRecI16x16Y(sWelsEncCtx * pEncCtx,SMB * pCurMb,SMbCache * pMbCache)54 void WelsEncRecI16x16Y (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache) {
55 ENFORCE_STACK_ALIGN_1D (int16_t, aDctT4Dc, 16, 16)
56 SWelsFuncPtrList* pFuncList = pEncCtx->pFuncList;
57 SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
58 const int32_t kiEncStride = pCurDqLayer->iEncStride[0];
59 int16_t* pRes = pMbCache->pCoeffLevel;
60 uint8_t* pPred = pMbCache->SPicData.pCsMb[0];
61 const int32_t kiRecStride = pCurDqLayer->iCsStride[0];
62 int16_t* pBlock = pMbCache->pDct->iLumaBlock[0];
63 uint8_t* pBestPred = pMbCache->pMemPredLuma;
64 const uint8_t* kpNoneZeroCountIdx = &g_kuiMbCountScan4Idx[0];
65 uint8_t i, uiQp = pCurMb->uiLumaQp;
66 uint32_t uiNoneZeroCount, uiNoneZeroCountMbAc = 0, uiCountI16x16Dc;
67
68 const int16_t* pMF = g_kiQuantMF[uiQp];
69 const int16_t* pFF = g_iQuantIntraFF[uiQp];
70
71 WelsDctMb (pRes, pMbCache->SPicData.pEncMb[0], kiEncStride, pBestPred, pEncCtx->pFuncList->pfDctFourT4);
72
73 pFuncList->pfTransformHadamard4x4Dc (aDctT4Dc, pRes);
74 pFuncList->pfQuantizationDc4x4 (aDctT4Dc, pFF[0] << 1, pMF[0]>>1);
75 pFuncList->pfScan4x4 (pMbCache->pDct->iLumaI16x16Dc, aDctT4Dc);
76 uiCountI16x16Dc = pFuncList->pfGetNoneZeroCount (pMbCache->pDct->iLumaI16x16Dc);
77
78 for (i = 0; i < 4; i++) {
79 pFuncList->pfQuantizationFour4x4 (pRes, pFF, pMF);
80 pFuncList->pfScan4x4Ac (pBlock, pRes);
81 pFuncList->pfScan4x4Ac (pBlock + 16, pRes + 16);
82 pFuncList->pfScan4x4Ac (pBlock + 32, pRes + 32);
83 pFuncList->pfScan4x4Ac (pBlock + 48, pRes + 48);
84 pRes += 64;
85 pBlock += 64;
86 }
87 pRes -= 256;
88 pBlock -= 256;
89
90 for (i = 0; i < 16; i++) {
91 uiNoneZeroCount = pFuncList->pfGetNoneZeroCount (pBlock);
92 pCurMb->pNonZeroCount[*kpNoneZeroCountIdx++] = uiNoneZeroCount;
93 uiNoneZeroCountMbAc += uiNoneZeroCount;
94 pBlock += 16;
95 }
96
97 if (uiCountI16x16Dc > 0) {
98 if (uiQp < 12) {
99 WelsIHadamard4x4Dc (aDctT4Dc);
100 WelsDequantLumaDc4x4 (aDctT4Dc, uiQp);
101 } else
102 pFuncList->pfDequantizationIHadamard4x4 (aDctT4Dc, g_kuiDequantCoeff[uiQp][0] >> 2);
103 }
104
105 if (uiNoneZeroCountMbAc > 0) {
106 pCurMb->uiCbp = 15;
107 pFuncList->pfDequantizationFour4x4 (pRes, g_kuiDequantCoeff[uiQp]);
108 pFuncList->pfDequantizationFour4x4 (pRes + 64, g_kuiDequantCoeff[uiQp]);
109 pFuncList->pfDequantizationFour4x4 (pRes + 128, g_kuiDequantCoeff[uiQp]);
110 pFuncList->pfDequantizationFour4x4 (pRes + 192, g_kuiDequantCoeff[uiQp]);
111
112 pRes[0] = aDctT4Dc[0];
113 pRes[16] = aDctT4Dc[1];
114 pRes[32] = aDctT4Dc[4];
115 pRes[48] = aDctT4Dc[5];
116 pRes[64] = aDctT4Dc[2];
117 pRes[80] = aDctT4Dc[3];
118 pRes[96] = aDctT4Dc[6];
119 pRes[112] = aDctT4Dc[7];
120 pRes[128] = aDctT4Dc[8];
121 pRes[144] = aDctT4Dc[9];
122 pRes[160] = aDctT4Dc[12];
123 pRes[176] = aDctT4Dc[13];
124 pRes[192] = aDctT4Dc[10];
125 pRes[208] = aDctT4Dc[11];
126 pRes[224] = aDctT4Dc[14];
127 pRes[240] = aDctT4Dc[15];
128
129 pFuncList->pfIDctFourT4 (pPred, kiRecStride, pBestPred, 16, pRes);
130 pFuncList->pfIDctFourT4 (pPred + 8, kiRecStride, pBestPred + 8, 16, pRes + 64);
131 pFuncList->pfIDctFourT4 (pPred + kiRecStride * 8, kiRecStride, pBestPred + 128, 16, pRes + 128);
132 pFuncList->pfIDctFourT4 (pPred + kiRecStride * 8 + 8, kiRecStride, pBestPred + 136, 16, pRes + 192);
133 } else if (uiCountI16x16Dc > 0) {
134 pFuncList->pfIDctI16x16Dc (pPred, kiRecStride, pBestPred, 16, aDctT4Dc);
135 } else {
136 pFuncList->pfCopy16x16Aligned (pPred, kiRecStride, pBestPred, 16);
137 }
138 }
WelsEncRecI4x4Y(sWelsEncCtx * pEncCtx,SMB * pCurMb,SMbCache * pMbCache,uint8_t uiI4x4Idx)139 void WelsEncRecI4x4Y (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, uint8_t uiI4x4Idx) {
140 SWelsFuncPtrList* pFuncList = pEncCtx->pFuncList;
141 SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
142 int32_t iEncStride = pCurDqLayer->iEncStride[0];
143 uint8_t uiQp = pCurMb->uiLumaQp;
144
145 int16_t* pResI4x4 = pMbCache->pCoeffLevel;
146 uint8_t* pPredI4x4;
147
148 uint8_t* pPred = pMbCache->SPicData.pCsMb[0];
149 int32_t iRecStride = pCurDqLayer->iCsStride[0];
150
151 uint32_t uiOffset = g_kuiMbCountScan4Idx[uiI4x4Idx];
152 uint8_t* pEncMb = pMbCache->SPicData.pEncMb[0];
153 uint8_t* pBestPred = pMbCache->pBestPredI4x4Blk4;
154 int16_t* pBlock = pMbCache->pDct->iLumaBlock[uiI4x4Idx];
155
156 const int16_t* pMF = g_kiQuantMF[uiQp];
157 const int16_t* pFF = g_iQuantIntraFF[uiQp];
158
159 int32_t* pStrideEncBlockOffset = pEncCtx->pStrideTab->pStrideEncBlockOffset[pEncCtx->uiDependencyId];
160 int32_t* pStrideDecBlockOffset = pEncCtx->pStrideTab->pStrideDecBlockOffset[pEncCtx->uiDependencyId][0 ==
161 pEncCtx->uiTemporalId];
162 int32_t iNoneZeroCount = 0;
163
164 pFuncList->pfDctT4 (pResI4x4, & (pEncMb[pStrideEncBlockOffset[uiI4x4Idx]]), iEncStride, pBestPred, 4);
165 pFuncList->pfQuantization4x4 (pResI4x4, pFF, pMF);
166 pFuncList->pfScan4x4 (pBlock, pResI4x4);
167
168 iNoneZeroCount = pFuncList->pfGetNoneZeroCount (pBlock);
169 pCurMb->pNonZeroCount[uiOffset] = iNoneZeroCount;
170
171 pPredI4x4 = pPred + pStrideDecBlockOffset[uiI4x4Idx];
172 if (iNoneZeroCount > 0) {
173 pCurMb->uiCbp |= 1 << (uiI4x4Idx >> 2);
174 pFuncList->pfDequantization4x4 (pResI4x4, g_kuiDequantCoeff[uiQp]);
175 pFuncList->pfIDctT4 (pPredI4x4, iRecStride, pBestPred, 4, pResI4x4);
176 } else
177 pFuncList->pfCopy4x4 (pPredI4x4, iRecStride, pBestPred, 4);
178 }
179
WelsEncInterY(SWelsFuncPtrList * pFuncList,SMB * pCurMb,SMbCache * pMbCache)180 void WelsEncInterY (SWelsFuncPtrList* pFuncList, SMB* pCurMb, SMbCache* pMbCache) {
181 PQuantizationMaxFunc pfQuantizationFour4x4Max = pFuncList->pfQuantizationFour4x4Max;
182 PSetMemoryZero pfSetMemZeroSize8 = pFuncList->pfSetMemZeroSize8;
183 PSetMemoryZero pfSetMemZeroSize64 = pFuncList->pfSetMemZeroSize64;
184 PScanFunc pfScan4x4 = pFuncList->pfScan4x4;
185 PCalculateSingleCtrFunc pfCalculateSingleCtr4x4 = pFuncList->pfCalculateSingleCtr4x4;
186 PGetNoneZeroCountFunc pfGetNoneZeroCount = pFuncList->pfGetNoneZeroCount;
187 PDeQuantizationFunc pfDequantizationFour4x4 = pFuncList->pfDequantizationFour4x4;
188 int16_t* pRes = pMbCache->pCoeffLevel;
189 int32_t iSingleCtrMb = 0, iSingleCtr8x8[4];
190 int16_t* pBlock = pMbCache->pDct->iLumaBlock[0];
191 uint8_t uiQp = pCurMb->uiLumaQp;
192 const int16_t* pMF = g_kiQuantMF[uiQp];
193 const int16_t* pFF = g_kiQuantInterFF[uiQp];
194 int16_t aMax[16];
195 int32_t i, j, iNoneZeroCountMbDcAc = 0, iNoneZeroCount = 0;
196
197 for (i = 0; i < 4; i++) {
198 pfQuantizationFour4x4Max (pRes, pFF, pMF, aMax + (i << 2));
199 iSingleCtr8x8[i] = 0;
200 for (j = 0; j < 4; j++) {
201 if (aMax[ (i << 2) + j] == 0)
202 pfSetMemZeroSize8 (pBlock, 32);
203 else {
204 pfScan4x4 (pBlock, pRes);
205 if (aMax[ (i << 2) + j] > 1)
206 iSingleCtr8x8[i] += 9;
207 else if (iSingleCtr8x8[i] < 6)
208 iSingleCtr8x8[i] += pfCalculateSingleCtr4x4 (pBlock);
209 }
210 pRes += 16;
211 pBlock += 16;
212 }
213 iSingleCtrMb += iSingleCtr8x8[i];
214 }
215 pBlock -= 256;
216 pRes -= 256;
217
218 memset (pCurMb->pNonZeroCount, 0, 16);
219
220
221 if (iSingleCtrMb < 6) { //from JVT-O079
222 iNoneZeroCountMbDcAc = 0;
223 pfSetMemZeroSize64 (pRes, 768); // confirmed_safe_unsafe_usage
224 } else {
225 const uint8_t* kpNoneZeroCountIdx = g_kuiMbCountScan4Idx;
226 for (i = 0; i < 4; i++) {
227 if (iSingleCtr8x8[i] >= 4) {
228 for (j = 0; j < 4; j++) {
229 iNoneZeroCount = pfGetNoneZeroCount (pBlock);
230 pCurMb->pNonZeroCount[*kpNoneZeroCountIdx++] = iNoneZeroCount;
231 iNoneZeroCountMbDcAc += iNoneZeroCount;
232 pBlock += 16;
233 }
234 pfDequantizationFour4x4 (pRes, g_kuiDequantCoeff[uiQp]);
235 pCurMb->uiCbp |= 1 << i;
236 } else { // set zero for an 8x8 pBlock
237 pfSetMemZeroSize64 (pRes, 128); // confirmed_safe_unsafe_usage
238 kpNoneZeroCountIdx += 4;
239 pBlock += 64;
240 }
241 pRes += 64;
242 }
243 }
244 }
245
WelsEncRecUV(SWelsFuncPtrList * pFuncList,SMB * pCurMb,SMbCache * pMbCache,int16_t * pRes,int32_t iUV)246 void WelsEncRecUV (SWelsFuncPtrList* pFuncList, SMB* pCurMb, SMbCache* pMbCache, int16_t* pRes, int32_t iUV) {
247 PQuantizationHadamardFunc pfQuantizationHadamard2x2 = pFuncList->pfQuantizationHadamard2x2;
248 PQuantizationMaxFunc pfQuantizationFour4x4Max = pFuncList->pfQuantizationFour4x4Max;
249 PSetMemoryZero pfSetMemZeroSize8 = pFuncList->pfSetMemZeroSize8;
250 PSetMemoryZero pfSetMemZeroSize64 = pFuncList->pfSetMemZeroSize64;
251 PScanFunc pfScan4x4Ac = pFuncList->pfScan4x4Ac;
252 PCalculateSingleCtrFunc pfCalculateSingleCtr4x4 = pFuncList->pfCalculateSingleCtr4x4;
253 PGetNoneZeroCountFunc pfGetNoneZeroCount = pFuncList->pfGetNoneZeroCount;
254 PDeQuantizationFunc pfDequantizationFour4x4 = pFuncList->pfDequantizationFour4x4;
255 const int32_t kiInterFlag = !IS_INTRA (pCurMb->uiMbType);
256 const uint8_t kiQp = pCurMb->uiChromaQp;
257 uint8_t i, uiNoneZeroCount, uiNoneZeroCountMbAc = 0, uiNoneZeroCountMbDc = 0;
258 uint8_t uiNoneZeroCountOffset = (iUV - 1) << 1; //UV==1 or 2
259 uint8_t uiSubMbIdx = 16 + ((iUV - 1) << 2); //uiSubMbIdx == 16 or 20
260 int16_t* iChromaDc = pMbCache->pDct->iChromaDc[iUV - 1], *pBlock = pMbCache->pDct->iChromaBlock[ (iUV - 1) << 2];
261 int16_t aDct2x2[4], j, aMax[4];
262 int32_t iSingleCtr8x8 = 0;
263 const int16_t* pMF = g_kiQuantMF[kiQp];
264 const int16_t* pFF = g_kiQuantInterFF[ (!kiInterFlag) * 6 + kiQp];
265
266 uiNoneZeroCountMbDc = pfQuantizationHadamard2x2 (pRes, pFF[0] << 1, pMF[0]>>1, aDct2x2, iChromaDc);
267
268 pfQuantizationFour4x4Max (pRes, pFF, pMF, aMax);
269
270 for (j = 0; j < 4; j++) {
271 if (aMax[j] == 0)
272 pfSetMemZeroSize8 (pBlock, 32);
273 else {
274 pfScan4x4Ac (pBlock, pRes);
275 if (kiInterFlag) {
276 if (aMax[j] > 1)
277 iSingleCtr8x8 += 9;
278 else if (iSingleCtr8x8 < 7)
279 iSingleCtr8x8 += pfCalculateSingleCtr4x4 (pBlock);
280 } else
281 iSingleCtr8x8 = INT_MAX;
282 }
283 pRes += 16;
284 pBlock += 16;
285 }
286 pRes -= 64;
287
288 if (iSingleCtr8x8 < 7) { //from JVT-O079
289 pfSetMemZeroSize64 (pRes, 128); // confirmed_safe_unsafe_usage
290 ST16 (&pCurMb->pNonZeroCount[16 + uiNoneZeroCountOffset], 0);
291 ST16 (&pCurMb->pNonZeroCount[20 + uiNoneZeroCountOffset], 0);
292 } else {
293 const uint8_t* kpNoneZeroCountIdx = &g_kuiMbCountScan4Idx[uiSubMbIdx];
294 pBlock -= 64;
295 for (i = 0; i < 4; i++) {
296 uiNoneZeroCount = pfGetNoneZeroCount (pBlock);
297 pCurMb->pNonZeroCount[*kpNoneZeroCountIdx++] = uiNoneZeroCount;
298 uiNoneZeroCountMbAc += uiNoneZeroCount;
299 pBlock += 16;
300 }
301 pfDequantizationFour4x4 (pRes, g_kuiDequantCoeff[pCurMb->uiChromaQp]);
302 pCurMb->uiCbp &= 0x0F;
303 pCurMb->uiCbp |= 0x20;
304 }
305
306 if (uiNoneZeroCountMbDc > 0) {
307 WelsDequantIHadamard2x2Dc (aDct2x2, g_kuiDequantCoeff[kiQp][0]);
308 if (2 != (pCurMb->uiCbp >> 4))
309 pCurMb->uiCbp |= (0x01 << 4) ;
310 pRes[0] = aDct2x2[0];
311 pRes[16] = aDct2x2[1];
312 pRes[32] = aDct2x2[2];
313 pRes[48] = aDct2x2[3];
314 }
315 }
316
317
WelsRecPskip(SDqLayer * pCurLayer,SWelsFuncPtrList * pFuncList,SMB * pCurMb,SMbCache * pMbCache)318 void WelsRecPskip (SDqLayer* pCurLayer, SWelsFuncPtrList* pFuncList, SMB* pCurMb, SMbCache* pMbCache) {
319 int32_t* iRecStride = pCurLayer->iCsStride;
320 uint8_t** pCsMb = &pMbCache->SPicData.pCsMb[0];
321
322 pFuncList->pfCopy16x16Aligned (pCsMb[0], *iRecStride++, pMbCache->pSkipMb, 16);
323 pFuncList->pfCopy8x8Aligned (pCsMb[1], *iRecStride++, pMbCache->pSkipMb + 256, 8);
324 pFuncList->pfCopy8x8Aligned (pCsMb[2], *iRecStride, pMbCache->pSkipMb + 320, 8);
325 pFuncList->pfSetMemZeroSize8 (pCurMb->pNonZeroCount, 24);
326 }
327
WelsTryPYskip(sWelsEncCtx * pEncCtx,SMB * pCurMb,SMbCache * pMbCache)328 bool WelsTryPYskip (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache) {
329 int32_t iSingleCtrMb = 0;
330 int16_t* pRes = pMbCache->pCoeffLevel;
331 const uint8_t kuiQp = pCurMb->uiLumaQp;
332
333 int16_t* pBlock = pMbCache->pDct->iLumaBlock[0];
334 uint16_t aMax[4], i, j;
335 const int16_t* pMF = g_kiQuantMF[kuiQp];
336 const int16_t* pFF = g_kiQuantInterFF[kuiQp];
337
338 for (i = 0; i < 4; i++) {
339 pEncCtx->pFuncList->pfQuantizationFour4x4Max (pRes, pFF, pMF, (int16_t*)aMax);
340
341 for (j = 0; j < 4; j++) {
342 if (aMax[j] > 1) return false; // iSingleCtrMb += 9, can't be P_SKIP
343 else if (aMax[j] == 1) {
344 pEncCtx->pFuncList->pfScan4x4 (pBlock, pRes); //
345 iSingleCtrMb += pEncCtx->pFuncList->pfCalculateSingleCtr4x4 (pBlock);
346 }
347 if (iSingleCtrMb >= 6) return false; //from JVT-O079
348 pRes += 16;
349 pBlock += 16;
350 }
351 }
352 return true;
353 }
354
WelsTryPUVskip(sWelsEncCtx * pEncCtx,SMB * pCurMb,SMbCache * pMbCache,int32_t iUV)355 bool WelsTryPUVskip (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, int32_t iUV) {
356 int16_t* pRes = ((iUV == 1) ? & (pMbCache->pCoeffLevel[256]) : & (pMbCache->pCoeffLevel[256 + 64]));
357
358 const uint8_t kuiQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (pCurMb->uiLumaQp +
359 pEncCtx->pCurDqLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset)];
360
361 const int16_t* pMF = g_kiQuantMF[kuiQp];
362 const int16_t* pFF = g_kiQuantInterFF[kuiQp];
363
364 if (pEncCtx->pFuncList->pfQuantizationHadamard2x2Skip (pRes, pFF[0] << 1, pMF[0]>>1))
365 return false;
366 else {
367 uint16_t aMax[4], j;
368 int32_t iSingleCtrMb = 0;
369 int16_t* pBlock = pMbCache->pDct->iChromaBlock[ (iUV - 1) << 2];
370 pEncCtx->pFuncList->pfQuantizationFour4x4Max (pRes, pFF, pMF, (int16_t*)aMax);
371
372 for (j = 0; j < 4; j++) {
373 if (aMax[j] > 1) return false; // iSingleCtrMb += 9, can't be P_SKIP
374 else if (aMax[j] == 1) {
375 pEncCtx->pFuncList->pfScan4x4Ac (pBlock, pRes);
376 iSingleCtrMb += pEncCtx->pFuncList->pfCalculateSingleCtr4x4 (pBlock);
377 }
378 if (iSingleCtrMb >= 7) return false; //from JVT-O079
379 pRes += 16;
380 pBlock += 16;
381 }
382 return true;
383 }
384 }
385
386 } // namespace WelsEnc
387