1 /*!
2 * \copy
3 * Copyright (c) 2009-2013, Cisco Systems
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 *
32 * \file deblocking.c
33 *
34 * \brief Interfaces introduced in frame deblocking filtering
35 *
36 * \date 08/03/2009 Created
37 *
38 *************************************************************************************
39 */
40
41 #include "deblocking.h"
42 #include "cpu_core.h"
43
44 namespace WelsEnc {
45
46 #define g_kuiAlphaTable(x) g_kuiAlphaTable[(x)]
47 #define g_kiBetaTable(x) g_kiBetaTable[(x)]
48 #define g_kiTc0Table(x) g_kiTc0Table[(x)]
49
50 #define MB_BS_MV(sCurMv, sNeighMv, uiBIdx, uiBnIdx) \
51 (\
52 ( WELS_ABS( sCurMv[uiBIdx].iMvX - sNeighMv[uiBnIdx].iMvX ) >= 4 ) ||\
53 ( WELS_ABS( sCurMv[uiBIdx].iMvY - sNeighMv[uiBnIdx].iMvY ) >= 4 )\
54 )
55
56 #define SMB_EDGE_MV(uiRefIndex, sMotionVector, uiBIdx, uiBnIdx) \
57 (\
58 !!((WELS_ABS(sMotionVector[uiBIdx].iMvX - sMotionVector[uiBnIdx].iMvX) &(~3)) | (WELS_ABS(sMotionVector[uiBIdx].iMvY - sMotionVector[uiBnIdx].iMvY) &(~3)))\
59 )
60
61 #define BS_EDGE(bsx1, uiRefIndex, sMotionVector, uiBIdx, uiBnIdx) \
62 ( (bsx1|SMB_EDGE_MV(uiRefIndex, sMotionVector, uiBIdx, uiBnIdx))<<(bsx1?1:0))
63
64 #define GET_ALPHA_BETA_FROM_QP(QP, iAlphaOffset, iBetaOffset, iIdexA, iAlpha, iBeta) \
65 {\
66 iIdexA = (QP + iAlphaOffset);\
67 iIdexA = CLIP3_QP_0_51(iIdexA);\
68 iAlpha = g_kuiAlphaTable(iIdexA);\
69 iBeta = g_kiBetaTable((CLIP3_QP_0_51(QP + iBetaOffset)));\
70 }
71
72 static const uint8_t g_kuiAlphaTable[52 + 12] = { //this table refers to Table 8-16 in H.264/AVC standard
73 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
74 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
75 7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
76 25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
77 80, 90, 101, 113, 127, 144, 162, 182, 203, 226,
78 255, 255
79 , 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
80 };
81
82 static const int8_t g_kiBetaTable[52 + 12] = { //this table refers to Table 8-16 in H.264/AVC standard
83 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
85 3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
86 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
87 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
88 18, 18
89 , 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18
90 };
91
92 static const int8_t g_kiTc0Table[52 + 12][4] = { //this table refers Table 8-17 in H.264/AVC standard
93 { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
94 { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
95 { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 1 },
96 { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 1, 1 }, { -1, 0, 1, 1 }, { -1, 1, 1, 1 },
97 { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 },
98 { -1, 1, 1, 2 }, { -1, 1, 2, 3 }, { -1, 1, 2, 3 }, { -1, 2, 2, 3 }, { -1, 2, 2, 4 }, { -1, 2, 3, 4 },
99 { -1, 2, 3, 4 }, { -1, 3, 3, 5 }, { -1, 3, 4, 6 }, { -1, 3, 4, 6 }, { -1, 4, 5, 7 }, { -1, 4, 5, 8 },
100 { -1, 4, 6, 9 }, { -1, 5, 7, 10 }, { -1, 6, 8, 11 }, { -1, 6, 8, 13 }, { -1, 7, 10, 14 }, { -1, 8, 11, 16 },
101 { -1, 9, 12, 18 }, { -1, 10, 13, 20 }, { -1, 11, 15, 23 }, { -1, 13, 17, 25 }
102 , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }
103 , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }
104 };
105
106 static const uint8_t g_kuiTableBIdx[2][8] = {
107 {
108 0, 4, 8, 12, // g_kuiTableBIdx
109 3, 7, 11, 15
110 }, // table_bn_idx
111
112 {
113 0, 1, 2, 3 , // g_kuiTableBIdx
114 12, 13, 14, 15
115 }, // table_bn_idx
116 };
117
118 #define TC0_TBL_LOOKUP(iTc, iIdexA, pBS, bchroma) \
119 {\
120 iTc[0] = g_kiTc0Table(iIdexA)[pBS[0]] + bchroma;\
121 iTc[1] = g_kiTc0Table(iIdexA)[pBS[1]] + bchroma;\
122 iTc[2] = g_kiTc0Table(iIdexA)[pBS[2]] + bchroma;\
123 iTc[3] = g_kiTc0Table(iIdexA)[pBS[3]] + bchroma;\
124 }
125
DeblockingBSInsideMBAvsbase(int8_t * pNnzTab,uint8_t uiBS[2][4][4],int32_t iLShiftFactor)126 void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t uiBS[2][4][4], int32_t iLShiftFactor) {
127 uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
128
129 uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
130 uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
131 uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
132 uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
133
134 uiBS[0][1][0] = (pNnzTab[0] | pNnzTab[1]) << iLShiftFactor;
135 uiBS[0][2][0] = (pNnzTab[1] | pNnzTab[2]) << iLShiftFactor;
136 uiBS[0][3][0] = (pNnzTab[2] | pNnzTab[3]) << iLShiftFactor;
137
138 uiBS[0][1][1] = (pNnzTab[4] | pNnzTab[5]) << iLShiftFactor;
139 uiBS[0][2][1] = (pNnzTab[5] | pNnzTab[6]) << iLShiftFactor;
140 uiBS[0][3][1] = (pNnzTab[6] | pNnzTab[7]) << iLShiftFactor;
141 * (uint32_t*)uiBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor;
142
143 uiBS[0][1][2] = (pNnzTab[8] | pNnzTab[9]) << iLShiftFactor;
144 uiBS[0][2][2] = (pNnzTab[9] | pNnzTab[10]) << iLShiftFactor;
145 uiBS[0][3][2] = (pNnzTab[10] | pNnzTab[11]) << iLShiftFactor;
146 * (uint32_t*)uiBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor;
147
148 uiBS[0][1][3] = (pNnzTab[12] | pNnzTab[13]) << iLShiftFactor;
149 uiBS[0][2][3] = (pNnzTab[13] | pNnzTab[14]) << iLShiftFactor;
150 uiBS[0][3][3] = (pNnzTab[14] | pNnzTab[15]) << iLShiftFactor;
151 * (uint32_t*)uiBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor;
152
153 }
154
DeblockingBSInsideMBNormal(SMB * pCurMb,uint8_t uiBS[2][4][4],int8_t * pNnzTab)155 void inline DeblockingBSInsideMBNormal (SMB* pCurMb, uint8_t uiBS[2][4][4], int8_t* pNnzTab) {
156 uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
157 ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
158
159 uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
160 uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
161 uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
162 uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
163
164 for (int i = 0; i < 3; i++)
165 uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
166 uiBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 1, 0);
167 uiBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 2, 1);
168 uiBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 3, 2);
169
170 for (int i = 0; i < 3; i++)
171 uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
172 uiBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 5, 4);
173 uiBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 6, 5);
174 uiBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 7, 6);
175
176 for (int i = 0; i < 3; i++)
177 uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
178 uiBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 9, 8);
179 uiBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 10, 9);
180 uiBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 11, 10);
181
182 for (int i = 0; i < 3; i++)
183 uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
184 uiBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 13, 12);
185 uiBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 14, 13);
186 uiBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 15, 14);
187
188 //horizontal
189 * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
190 uiBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 4, 0);
191 uiBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 5, 1);
192 uiBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 6, 2);
193 uiBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 7, 3);
194
195 * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
196 uiBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 8, 4);
197 uiBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 9, 5);
198 uiBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 10, 6);
199 uiBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 11, 7);
200
201 * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
202 uiBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 12, 8);
203 uiBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 13, 9);
204 uiBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 14, 10);
205 uiBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 15, 11);
206 }
207
DeblockingBSMarginalMBAvcbase(SMB * pCurMb,SMB * pNeighMb,int32_t iEdge)208 uint32_t DeblockingBSMarginalMBAvcbase (SMB* pCurMb, SMB* pNeighMb, int32_t iEdge) {
209 int32_t i;
210 uint32_t uiBSx4;
211 uint8_t* pBS = (uint8_t*) (&uiBSx4);
212 const uint8_t* pBIdx = &g_kuiTableBIdx[iEdge][0];
213 const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4];
214
215
216 for (i = 0; i < 4; i++) {
217 if (pCurMb->pNonZeroCount[*pBIdx] | pNeighMb->pNonZeroCount[*pBnIdx]) {
218 pBS[i] = 2;
219 } else {
220 pBS[i] =
221 #ifndef SINGLE_REF_FRAME
222 (pCurMb->uiRefIndex[g_kiTableBlock8x8Idx[1][iEdge][i]] - pNeighMb->uiRefIndex[g_kiTableBlock8x8NIdx[1][iEdge][i]]) ||
223 #endif
224 MB_BS_MV (pCurMb->sMv, pNeighMb->sMv, *pBIdx, *pBnIdx);
225 }
226 pBIdx++;
227 pBnIdx++;
228 }
229 return uiBSx4;
230 }
231
FilteringEdgeLumaH(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)232 void FilteringEdgeLumaH (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride,
233 uint8_t* pBS) {
234 int32_t iIdexA;
235 int32_t iAlpha;
236 int32_t iBeta;
237 ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
238
239 GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
240 iBeta);
241
242 if (iAlpha | iBeta) {
243 TC0_TBL_LOOKUP (iTc, iIdexA, pBS, 0);
244 pfDeblocking->pfLumaDeblockingLT4Ver (pPix, iStride, iAlpha, iBeta, iTc);
245 }
246 return;
247 }
FilteringEdgeLumaV(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)248 void FilteringEdgeLumaV (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride,
249 uint8_t* pBS) {
250 int32_t iIdexA;
251 int32_t iAlpha;
252 int32_t iBeta;
253 ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
254
255 GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
256 iBeta);
257
258 if (iAlpha | iBeta) {
259 TC0_TBL_LOOKUP (iTc, iIdexA, pBS, 0);
260 pfDeblocking->pfLumaDeblockingLT4Hor (pPix, iStride, iAlpha, iBeta, iTc);
261 }
262 return;
263 }
264
FilteringEdgeLumaIntraH(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)265 void FilteringEdgeLumaIntraH (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride,
266 uint8_t* pBS) {
267 int32_t iIdexA;
268 int32_t iAlpha;
269 int32_t iBeta;
270
271 GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
272 iBeta);
273
274 if (iAlpha | iBeta) {
275 pfDeblocking->pfLumaDeblockingEQ4Ver (pPix, iStride, iAlpha, iBeta);
276 }
277 return;
278 }
279
FilteringEdgeLumaIntraV(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)280 void FilteringEdgeLumaIntraV (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride,
281 uint8_t* pBS) {
282 int32_t iIdexA;
283 int32_t iAlpha;
284 int32_t iBeta;
285
286 GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
287 iBeta);
288
289 if (iAlpha | iBeta) {
290 pfDeblocking->pfLumaDeblockingEQ4Hor (pPix, iStride, iAlpha, iBeta);
291 }
292 return;
293 }
FilteringEdgeChromaH(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)294 void FilteringEdgeChromaH (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr,
295 int32_t iStride, uint8_t* pBS) {
296 int32_t iIdexA;
297 int32_t iAlpha;
298 int32_t iBeta;
299 ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
300
301 GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
302 iBeta);
303
304 if (iAlpha | iBeta) {
305 TC0_TBL_LOOKUP (iTc, iIdexA, pBS, 1);
306 pfDeblocking->pfChromaDeblockingLT4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta, iTc);
307 }
308 return;
309 }
FilteringEdgeChromaV(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)310 void FilteringEdgeChromaV (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr,
311 int32_t iStride, uint8_t* pBS) {
312 int32_t iIdexA;
313 int32_t iAlpha;
314 int32_t iBeta;
315 ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
316
317 GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
318 iBeta);
319
320 if (iAlpha | iBeta) {
321 TC0_TBL_LOOKUP (iTc, iIdexA, pBS, 1);
322 pfDeblocking->pfChromaDeblockingLT4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta, iTc);
323 }
324 return;
325 }
326
FilteringEdgeChromaIntraH(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)327 void FilteringEdgeChromaIntraH (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPixCb,
328 uint8_t* pPixCr, int32_t iStride, uint8_t* pBS) {
329 int32_t iIdexA;
330 int32_t iAlpha;
331 int32_t iBeta;
332
333 GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
334 iBeta);
335
336 if (iAlpha | iBeta) {
337 pfDeblocking->pfChromaDeblockingEQ4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta);
338 }
339 return;
340 }
341
FilteringEdgeChromaIntraV(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)342 void FilteringEdgeChromaIntraV (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPixCb,
343 uint8_t* pPixCr, int32_t iStride, uint8_t* pBS) {
344 int32_t iIdexA;
345 int32_t iAlpha;
346 int32_t iBeta;
347
348 GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
349 iBeta);
350
351 if (iAlpha | iBeta) {
352 pfDeblocking->pfChromaDeblockingEQ4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta);
353 }
354 return;
355 }
356
DeblockingInterMb(DeblockingFunc * pfDeblocking,SMB * pCurMb,SDeblockingFilter * pFilter,uint8_t uiBS[2][4][4])357 void DeblockingInterMb (DeblockingFunc* pfDeblocking, SMB* pCurMb, SDeblockingFilter* pFilter, uint8_t uiBS[2][4][4]) {
358 int8_t iCurLumaQp = pCurMb->uiLumaQp;
359 int8_t iCurChromaQp = pCurMb->uiChromaQp;
360 int32_t iLineSize = pFilter->iCsStride[0];
361 int32_t iLineSizeUV = pFilter->iCsStride[1];
362 int32_t iMbStride = pFilter->iMbStride;
363
364 int32_t iMbX = pCurMb->iMbX;
365 int32_t iMbY = pCurMb->iMbY;
366
367 bool bLeftBsValid[2] = { (iMbX > 0), ((iMbX > 0)&& (pCurMb->uiSliceIdc == (pCurMb - 1)->uiSliceIdc))};
368 bool bTopBsValid[2] = { (iMbY > 0), ((iMbY > 0)&& (pCurMb->uiSliceIdc == (pCurMb - iMbStride)->uiSliceIdc))};
369
370 int32_t iLeftFlag = bLeftBsValid[pFilter->uiFilterIdc];
371 int32_t iTopFlag = bTopBsValid[pFilter->uiFilterIdc];
372
373 uint8_t* pDestY, *pDestCb, *pDestCr;
374 pDestY = pFilter->pCsData[0];
375 pDestCb = pFilter->pCsData[1];
376 pDestCr = pFilter->pCsData[2];
377
378 if (iLeftFlag) {
379 pFilter->uiLumaQP = (iCurLumaQp + (pCurMb - 1)->uiLumaQp + 1) >> 1;
380 pFilter->uiChromaQP = (iCurChromaQp + (pCurMb - 1)->uiChromaQp + 1) >> 1;
381
382 if (uiBS[0][0][0] == 0x04) {
383 FilteringEdgeLumaIntraV (pfDeblocking, pFilter, pDestY, iLineSize , NULL);
384 FilteringEdgeChromaIntraV (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSizeUV, NULL);
385 } else {
386 if (* (uint32_t*)uiBS[0][0] != 0) {
387 FilteringEdgeLumaV (pfDeblocking, pFilter, pDestY, iLineSize, uiBS[0][0]);
388 FilteringEdgeChromaV (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSizeUV, uiBS[0][0]);
389 }
390 }
391 }
392
393 pFilter->uiLumaQP = iCurLumaQp;
394 pFilter->uiChromaQP = iCurChromaQp;
395
396 if (* (uint32_t*)uiBS[0][1] != 0) {
397 FilteringEdgeLumaV (pfDeblocking, pFilter, &pDestY[1 << 2], iLineSize, uiBS[0][1]);
398 }
399
400 if (* (uint32_t*)uiBS[0][2] != 0) {
401 FilteringEdgeLumaV (pfDeblocking, pFilter, &pDestY[2 << 2], iLineSize, uiBS[0][2]);
402 FilteringEdgeChromaV (pfDeblocking, pFilter, &pDestCb[2 << 1], &pDestCr[2 << 1], iLineSizeUV, uiBS[0][2]);
403 }
404
405 if (* (uint32_t*)uiBS[0][3] != 0) {
406 FilteringEdgeLumaV (pfDeblocking, pFilter, &pDestY[3 << 2], iLineSize, uiBS[0][3]);
407 }
408
409 if (iTopFlag) {
410 pFilter->uiLumaQP = (iCurLumaQp + (pCurMb - iMbStride)->uiLumaQp + 1) >> 1;
411 pFilter->uiChromaQP = (iCurChromaQp + (pCurMb - iMbStride)->uiChromaQp + 1) >> 1;
412
413 if (uiBS[1][0][0] == 0x04) {
414 FilteringEdgeLumaIntraH (pfDeblocking, pFilter, pDestY, iLineSize , NULL);
415 FilteringEdgeChromaIntraH (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSizeUV, NULL);
416 } else {
417 if (* (uint32_t*)uiBS[1][0] != 0) {
418 FilteringEdgeLumaH (pfDeblocking, pFilter, pDestY, iLineSize, uiBS[1][0]);
419 FilteringEdgeChromaH (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSizeUV, uiBS[1][0]);
420 }
421 }
422 }
423
424 pFilter->uiLumaQP = iCurLumaQp;
425 pFilter->uiChromaQP = iCurChromaQp;
426
427 if (* (uint32_t*)uiBS[1][1] != 0) {
428 FilteringEdgeLumaH (pfDeblocking, pFilter, &pDestY[ (1 << 2)*iLineSize], iLineSize, uiBS[1][1]);
429 }
430
431 if (* (uint32_t*)uiBS[1][2] != 0) {
432 FilteringEdgeLumaH (pfDeblocking, pFilter, &pDestY[ (2 << 2)*iLineSize], iLineSize, uiBS[1][2]);
433 FilteringEdgeChromaH (pfDeblocking, pFilter, &pDestCb[ (2 << 1)*iLineSizeUV], &pDestCr[ (2 << 1)*iLineSizeUV],
434 iLineSizeUV, uiBS[1][2]);
435 }
436
437 if (* (uint32_t*)uiBS[1][3] != 0) {
438 FilteringEdgeLumaH (pfDeblocking, pFilter, &pDestY[ (3 << 2)*iLineSize], iLineSize, uiBS[1][3]);
439 }
440 }
441
FilteringEdgeLumaHV(DeblockingFunc * pfDeblocking,SMB * pCurMb,SDeblockingFilter * pFilter)442 void FilteringEdgeLumaHV (DeblockingFunc* pfDeblocking, SMB* pCurMb, SDeblockingFilter* pFilter) {
443 int32_t iLineSize = pFilter->iCsStride[0];
444 int32_t iMbStride = pFilter->iMbStride;
445
446 uint8_t* pDestY;
447 int8_t iCurQp;
448 int32_t iIdexA, iAlpha, iBeta;
449
450 int32_t iMbX = pCurMb->iMbX;
451 int32_t iMbY = pCurMb->iMbY;
452
453 bool bLeftBsValid[2] = { (iMbX > 0), ((iMbX > 0)&& (pCurMb->uiSliceIdc == (pCurMb - 1)->uiSliceIdc))};
454 bool bTopBsValid[2] = { (iMbY > 0), ((iMbY > 0)&& (pCurMb->uiSliceIdc == (pCurMb - iMbStride)->uiSliceIdc))};
455
456 int32_t iLeftFlag = bLeftBsValid[pFilter->uiFilterIdc];
457 int32_t iTopFlag = bTopBsValid[pFilter->uiFilterIdc];
458
459 ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
460 ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
461
462 pDestY = pFilter->pCsData[0];
463 iCurQp = pCurMb->uiLumaQp;
464
465 * (uint32_t*)uiBSx4 = 0x03030303;
466
467 // luma v
468 if (iLeftFlag) {
469 pFilter->uiLumaQP = (iCurQp + (pCurMb - 1)->uiLumaQp + 1) >> 1;
470 FilteringEdgeLumaIntraV (pfDeblocking, pFilter, pDestY, iLineSize, NULL);
471 }
472
473 pFilter->uiLumaQP = iCurQp;
474 GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
475 iBeta);
476 if (iAlpha | iBeta) {
477 TC0_TBL_LOOKUP (iTc, iIdexA, uiBSx4, 0);
478 pfDeblocking->pfLumaDeblockingLT4Hor (&pDestY[1 << 2], iLineSize, iAlpha, iBeta, iTc);
479 pfDeblocking->pfLumaDeblockingLT4Hor (&pDestY[2 << 2], iLineSize, iAlpha, iBeta, iTc);
480 pfDeblocking->pfLumaDeblockingLT4Hor (&pDestY[3 << 2], iLineSize, iAlpha, iBeta, iTc);
481
482 }
483
484 // luma h
485 if (iTopFlag) {
486 pFilter->uiLumaQP = (iCurQp + (pCurMb - iMbStride)->uiLumaQp + 1) >> 1;
487 FilteringEdgeLumaIntraH (pfDeblocking, pFilter, pDestY, iLineSize, NULL);
488 }
489
490 pFilter->uiLumaQP = iCurQp;
491 if (iAlpha | iBeta) {
492 pfDeblocking->pfLumaDeblockingLT4Ver (&pDestY[ (1 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
493 pfDeblocking->pfLumaDeblockingLT4Ver (&pDestY[ (2 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
494 pfDeblocking->pfLumaDeblockingLT4Ver (&pDestY[ (3 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
495 }
496 }
FilteringEdgeChromaHV(DeblockingFunc * pfDeblocking,SMB * pCurMb,SDeblockingFilter * pFilter)497 void FilteringEdgeChromaHV (DeblockingFunc* pfDeblocking, SMB* pCurMb, SDeblockingFilter* pFilter) {
498 int32_t iLineSize = pFilter->iCsStride[1];
499 int32_t iMbStride = pFilter->iMbStride;
500
501 uint8_t* pDestCb, *pDestCr;
502 int8_t iCurQp;
503 int32_t iIdexA, iAlpha, iBeta;
504
505 int32_t iMbX = pCurMb->iMbX;
506 int32_t iMbY = pCurMb->iMbY;
507
508 bool bLeftBsValid[2] = { (iMbX > 0), ((iMbX > 0)&& (pCurMb->uiSliceIdc == (pCurMb - 1)->uiSliceIdc))};
509 bool bTopBsValid[2] = { (iMbY > 0), ((iMbY > 0)&& (pCurMb->uiSliceIdc == (pCurMb - iMbStride)->uiSliceIdc))};
510
511 int32_t iLeftFlag = bLeftBsValid[pFilter->uiFilterIdc];
512 int32_t iTopFlag = bTopBsValid[pFilter->uiFilterIdc];
513
514 ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
515 ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
516
517 pDestCb = pFilter->pCsData[1];
518 pDestCr = pFilter->pCsData[2];
519 iCurQp = pCurMb->uiChromaQp;
520 * (uint32_t*)uiBSx4 = 0x03030303;
521
522 // chroma v
523 if (iLeftFlag) {
524 pFilter->uiChromaQP = (iCurQp + (pCurMb - 1)->uiChromaQp + 1) >> 1;
525 FilteringEdgeChromaIntraV (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSize, NULL);
526 }
527
528 pFilter->uiChromaQP = iCurQp;
529 GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
530 iBeta);
531 if (iAlpha | iBeta) {
532 TC0_TBL_LOOKUP (iTc, iIdexA, uiBSx4, 1);
533 pfDeblocking->pfChromaDeblockingLT4Hor (&pDestCb[2 << 1], &pDestCr[2 << 1], iLineSize, iAlpha, iBeta, iTc);
534 }
535
536 // chroma h
537 if (iTopFlag) {
538 pFilter->uiChromaQP = (iCurQp + (pCurMb - iMbStride)->uiChromaQp + 1) >> 1;
539 FilteringEdgeChromaIntraH (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSize, NULL);
540 }
541
542 pFilter->uiChromaQP = iCurQp;
543 if (iAlpha | iBeta) {
544 pfDeblocking->pfChromaDeblockingLT4Ver (&pDestCb[ (2 << 1)*iLineSize], &pDestCr[ (2 << 1)*iLineSize], iLineSize, iAlpha,
545 iBeta, iTc);
546 }
547 }
548
549 // merge h&v lookup table operation to save performance
DeblockingIntraMb(DeblockingFunc * pfDeblocking,SMB * pCurMb,SDeblockingFilter * pFilter)550 void DeblockingIntraMb (DeblockingFunc* pfDeblocking, SMB* pCurMb, SDeblockingFilter* pFilter) {
551 FilteringEdgeLumaHV (pfDeblocking, pCurMb, pFilter);
552 FilteringEdgeChromaHV (pfDeblocking, pCurMb, pFilter);
553 }
554
555 #if defined(HAVE_NEON) && defined(SINGLE_REF_FRAME)
DeblockingBSCalc_neon(SWelsFuncPtrList * pFunc,SMB * pCurMb,uint8_t uiBS[2][4][4],Mb_Type uiCurMbType,int32_t iMbStride,int32_t iLeftFlag,int32_t iTopFlag)556 void DeblockingBSCalc_neon (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
557 int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
558 DeblockingBSCalcEnc_neon (pCurMb->pNonZeroCount, pCurMb->sMv,
559 (iLeftFlag ? LEFT_MB_POS : 0) | (iTopFlag ? TOP_MB_POS : 0), iMbStride, uiBS);
560 if (iLeftFlag) {
561 if (IS_INTRA ((pCurMb - 1)->uiMbType)) {
562 * (uint32_t*)uiBS[0][0] = 0x04040404;
563 }
564 } else {
565 * (uint32_t*)uiBS[0][0] = 0;
566 }
567 if (iTopFlag) {
568 if (IS_INTRA ((pCurMb - iMbStride)->uiMbType)) {
569 * (uint32_t*)uiBS[1][0] = 0x04040404;
570 }
571 } else {
572 * (uint32_t*)uiBS[1][0] = 0;
573 }
574 }
575 #endif
576
577 #if defined(HAVE_NEON_AARCH64) && defined(SINGLE_REF_FRAME)
DeblockingBSCalc_AArch64_neon(SWelsFuncPtrList * pFunc,SMB * pCurMb,uint8_t uiBS[2][4][4],Mb_Type uiCurMbType,int32_t iMbStride,int32_t iLeftFlag,int32_t iTopFlag)578 void DeblockingBSCalc_AArch64_neon (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
579 int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
580 DeblockingBSCalcEnc_AArch64_neon (pCurMb->pNonZeroCount, pCurMb->sMv,
581 (iLeftFlag ? LEFT_MB_POS : 0) | (iTopFlag ? TOP_MB_POS : 0), iMbStride, uiBS);
582 if (iLeftFlag) {
583 if (IS_INTRA ((pCurMb - 1)->uiMbType)) {
584 * (uint32_t*)uiBS[0][0] = 0x04040404;
585 }
586 } else {
587 * (uint32_t*)uiBS[0][0] = 0;
588 }
589 if (iTopFlag) {
590 if (IS_INTRA ((pCurMb - iMbStride)->uiMbType)) {
591 * (uint32_t*)uiBS[1][0] = 0x04040404;
592 }
593 } else {
594 * (uint32_t*)uiBS[1][0] = 0;
595 }
596 }
597 #endif
598
DeblockingBSCalc_c(SWelsFuncPtrList * pFunc,SMB * pCurMb,uint8_t uiBS[2][4][4],Mb_Type uiCurMbType,int32_t iMbStride,int32_t iLeftFlag,int32_t iTopFlag)599 void DeblockingBSCalc_c (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
600 int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
601 if (iLeftFlag) {
602 * (uint32_t*)uiBS[0][0] = IS_INTRA ((pCurMb - 1)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (pCurMb,
603 pCurMb - 1, 0);
604 } else {
605 * (uint32_t*)uiBS[0][0] = 0;
606 }
607 if (iTopFlag) {
608 * (uint32_t*)uiBS[1][0] = IS_INTRA ((pCurMb - iMbStride)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (
609 pCurMb, (pCurMb - iMbStride), 1);
610 } else {
611 * (uint32_t*)uiBS[1][0] = 0;
612 }
613 //SKIP MB_16x16 or others
614 if (uiCurMbType != MB_TYPE_SKIP) {
615 pFunc->pfSetNZCZero (pCurMb->pNonZeroCount); // set all none-zero nzc to 1; dbk can be opti!
616
617 if (uiCurMbType == MB_TYPE_16x16) {
618 DeblockingBSInsideMBAvsbase (pCurMb->pNonZeroCount, uiBS, 1);
619 } else {
620 DeblockingBSInsideMBNormal (pCurMb, uiBS, pCurMb->pNonZeroCount);
621 }
622 } else {
623 * (uint32_t*)uiBS[0][1] = * (uint32_t*)uiBS[0][2] = * (uint32_t*)uiBS[0][3] =
624 * (uint32_t*)uiBS[1][1] = * (uint32_t*)uiBS[1][2] = * (uint32_t*)uiBS[1][3] = 0;
625 }
626 }
627
DeblockingMbAvcbase(SWelsFuncPtrList * pFunc,SMB * pCurMb,SDeblockingFilter * pFilter)628 void DeblockingMbAvcbase (SWelsFuncPtrList* pFunc, SMB* pCurMb, SDeblockingFilter* pFilter) {
629 uint8_t uiBS[2][4][4] = {{{ 0 }}};
630
631 Mb_Type uiCurMbType = pCurMb->uiMbType;
632 int32_t iMbStride = pFilter->iMbStride;
633
634 int32_t iMbX = pCurMb->iMbX;
635 int32_t iMbY = pCurMb->iMbY;
636
637 bool bLeftBsValid[2] = { (iMbX > 0), ((iMbX > 0)&& (pCurMb->uiSliceIdc == (pCurMb - 1)->uiSliceIdc))};
638 bool bTopBsValid[2] = { (iMbY > 0), ((iMbY > 0)&& (pCurMb->uiSliceIdc == (pCurMb - iMbStride)->uiSliceIdc))};
639
640 int32_t iLeftFlag = bLeftBsValid[pFilter->uiFilterIdc];
641 int32_t iTopFlag = bTopBsValid[pFilter->uiFilterIdc];
642
643 switch (uiCurMbType) {
644 case MB_TYPE_INTRA4x4:
645 case MB_TYPE_INTRA16x16:
646 case MB_TYPE_INTRA_PCM:
647 DeblockingIntraMb (&pFunc->pfDeblocking, pCurMb, pFilter);
648 break;
649 default:
650 pFunc->pfDeblocking.pfDeblockingBSCalc (pFunc, pCurMb, uiBS, uiCurMbType, iMbStride, iLeftFlag, iTopFlag);
651 DeblockingInterMb (&pFunc->pfDeblocking, pCurMb, pFilter, uiBS);
652 break;
653 }
654 }
655
DeblockingFilterFrameAvcbase(SDqLayer * pCurDq,SWelsFuncPtrList * pFunc)656 void DeblockingFilterFrameAvcbase (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc) {
657 int32_t i, j;
658 const int32_t kiMbWidth = pCurDq->iMbWidth;
659 const int32_t kiMbHeight = pCurDq->iMbHeight;
660 SMB* pCurrentMbBlock = pCurDq->sMbDataP;
661 SSliceHeaderExt* sSliceHeaderExt = &pCurDq->ppSliceInLayer[0]->sSliceHeaderExt;
662 SDeblockingFilter pFilter;
663
664 /* Step1: parameters set */
665 if (sSliceHeaderExt->sSliceHeader.uiDisableDeblockingFilterIdc == 1)
666 return;
667
668 pFilter.uiFilterIdc = (sSliceHeaderExt->sSliceHeader.uiDisableDeblockingFilterIdc != 0);
669
670 pFilter.iCsStride[0] = pCurDq->pDecPic->iLineSize[0];
671 pFilter.iCsStride[1] = pCurDq->pDecPic->iLineSize[1];
672 pFilter.iCsStride[2] = pCurDq->pDecPic->iLineSize[2];
673
674 pFilter.iSliceAlphaC0Offset = sSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset;
675 pFilter.iSliceBetaOffset = sSliceHeaderExt->sSliceHeader.iSliceBetaOffset;
676
677 pFilter.iMbStride = kiMbWidth;
678
679 for (j = 0; j < kiMbHeight; ++j) {
680 pFilter.pCsData[0] = pCurDq->pDecPic->pData[0] + ((j * pFilter.iCsStride[0]) << 4);
681 pFilter.pCsData[1] = pCurDq->pDecPic->pData[1] + ((j * pFilter.iCsStride[1]) << 3);
682 pFilter.pCsData[2] = pCurDq->pDecPic->pData[2] + ((j * pFilter.iCsStride[2]) << 3);
683 for (i = 0; i < kiMbWidth; i++) {
684 DeblockingMbAvcbase (pFunc, pCurrentMbBlock, &pFilter);
685 ++pCurrentMbBlock;
686 pFilter.pCsData[0] += MB_WIDTH_LUMA;
687 pFilter.pCsData[1] += MB_WIDTH_CHROMA;
688 pFilter.pCsData[2] += MB_WIDTH_CHROMA;
689 }
690 }
691 }
692
DeblockingFilterSliceAvcbase(SDqLayer * pCurDq,SWelsFuncPtrList * pFunc,SSlice * pSlice)693 void DeblockingFilterSliceAvcbase (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, SSlice* pSlice) {
694 SMB* pMbList = pCurDq->sMbDataP;
695 SSliceHeaderExt* sSliceHeaderExt = &pSlice->sSliceHeaderExt;
696 SMB* pCurrentMbBlock;
697
698 const int32_t kiMbWidth = pCurDq->iMbWidth;
699 const int32_t kiMbHeight = pCurDq->iMbHeight;
700 const int32_t kiTotalNumMb = kiMbWidth * kiMbHeight;
701 int32_t iCurMbIdx = 0, iNextMbIdx = 0, iNumMbFiltered = 0;
702
703 /* Step1: parameters set */
704 if (sSliceHeaderExt->sSliceHeader.uiDisableDeblockingFilterIdc == 1)
705 return;
706
707 SDeblockingFilter pFilter;
708
709 pFilter.uiFilterIdc = (sSliceHeaderExt->sSliceHeader.uiDisableDeblockingFilterIdc != 0);
710 pFilter.iCsStride[0] = pCurDq->pDecPic->iLineSize[0];
711 pFilter.iCsStride[1] = pCurDq->pDecPic->iLineSize[1];
712 pFilter.iCsStride[2] = pCurDq->pDecPic->iLineSize[2];
713 pFilter.iSliceAlphaC0Offset = sSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset;
714 pFilter.iSliceBetaOffset = sSliceHeaderExt->sSliceHeader.iSliceBetaOffset;
715 pFilter.iMbStride = kiMbWidth;
716
717 iNextMbIdx = sSliceHeaderExt->sSliceHeader.iFirstMbInSlice;
718
719 for (; ;) {
720 iCurMbIdx = iNextMbIdx;
721 pCurrentMbBlock = &pMbList[ iCurMbIdx ];
722
723 pFilter.pCsData[0] = pCurDq->pDecPic->pData[0] + ((pCurrentMbBlock->iMbX + pCurrentMbBlock->iMbY * pFilter.iCsStride[0])
724 << 4);
725 pFilter.pCsData[1] = pCurDq->pDecPic->pData[1] + ((pCurrentMbBlock->iMbX + pCurrentMbBlock->iMbY * pFilter.iCsStride[1])
726 << 3);
727 pFilter.pCsData[2] = pCurDq->pDecPic->pData[2] + ((pCurrentMbBlock->iMbX + pCurrentMbBlock->iMbY * pFilter.iCsStride[2])
728 << 3);
729
730 DeblockingMbAvcbase (pFunc, pCurrentMbBlock, &pFilter);
731
732 ++iNumMbFiltered;
733 iNextMbIdx = WelsGetNextMbOfSlice (pCurDq, iCurMbIdx);
734 //whether all of MB in current slice filtered or not
735 if (iNextMbIdx == -1 || iNextMbIdx >= kiTotalNumMb || iNumMbFiltered >= kiTotalNumMb) {
736 break;
737 }
738 }
739 }
740
DeblockingFilterSliceAvcbaseNull(SDqLayer * pCurDq,SWelsFuncPtrList * pFunc,SSlice * pSlice)741 void DeblockingFilterSliceAvcbaseNull (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, SSlice* pSlice) {
742 }
743
PerformDeblockingFilter(sWelsEncCtx * pEnc)744 void PerformDeblockingFilter (sWelsEncCtx* pEnc) {
745 SDqLayer* pCurLayer = pEnc->pCurDqLayer;
746 SSlice* pSlice = NULL;
747
748 if (pCurLayer->iLoopFilterDisableIdc == 0) {
749 DeblockingFilterFrameAvcbase (pCurLayer, pEnc->pFuncList);
750 } else if (pCurLayer->iLoopFilterDisableIdc == 2) {
751 int32_t iSliceCount = 0;
752 int32_t iSliceIdx = 0;
753
754 iSliceCount = GetCurrentSliceNum (pCurLayer);
755 do {
756 pSlice = pCurLayer->ppSliceInLayer[iSliceIdx];
757 assert (NULL != pSlice);
758 DeblockingFilterSliceAvcbase (pCurLayer, pEnc->pFuncList, pSlice);
759 ++ iSliceIdx;
760 } while (iSliceIdx < iSliceCount);
761 }
762 }
763
WelsBlockFuncInit(PSetNoneZeroCountZeroFunc * pfSetNZCZero,int32_t iCpu)764 void WelsBlockFuncInit (PSetNoneZeroCountZeroFunc* pfSetNZCZero, int32_t iCpu) {
765 *pfSetNZCZero = WelsNonZeroCount_c;
766 #ifdef HAVE_NEON
767 if (iCpu & WELS_CPU_NEON) {
768 *pfSetNZCZero = WelsNonZeroCount_neon;
769 }
770 #endif
771 #ifdef HAVE_NEON_AARCH64
772 if (iCpu & WELS_CPU_NEON) {
773 *pfSetNZCZero = WelsNonZeroCount_AArch64_neon;
774 }
775 #endif
776 #if defined(X86_ASM)
777 if (iCpu & WELS_CPU_SSE2) {
778 *pfSetNZCZero = WelsNonZeroCount_sse2;
779 }
780 #endif
781 #if defined(HAVE_MMI)
782 if (iCpu & WELS_CPU_MMI) {
783 *pfSetNZCZero = WelsNonZeroCount_mmi;
784 }
785 #endif
786 #if defined(HAVE_MSA)
787 if (iCpu & WELS_CPU_MSA) {
788 *pfSetNZCZero = WelsNonZeroCount_msa;
789 }
790 #endif
791 }
792
DeblockingInit(DeblockingFunc * pFunc,int32_t iCpu)793 void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu) {
794 pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_c;
795 pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_c;
796 pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_c;
797 pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_c;
798
799 pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_c;
800 pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_c;
801 pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_c;
802 pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_c;
803
804 pFunc->pfDeblockingBSCalc = DeblockingBSCalc_c;
805
806
807 #ifdef X86_ASM
808 if (iCpu & WELS_CPU_SSSE3) {
809 pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_ssse3;
810 pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_ssse3;
811 pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_ssse3;
812 pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_ssse3;
813 pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_ssse3;
814 pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_ssse3;
815 pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_ssse3;
816 pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_ssse3;
817 }
818 #endif
819
820 #if defined(HAVE_NEON)
821 if (iCpu & WELS_CPU_NEON) {
822 pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_neon;
823 pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_neon;
824 pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_neon;
825 pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_neon;
826
827 pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_neon;
828 pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_neon;
829 pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_neon;
830 pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_neon;
831
832 #if defined(SINGLE_REF_FRAME)
833 pFunc->pfDeblockingBSCalc = DeblockingBSCalc_neon;
834 #endif
835 }
836 #endif
837
838 #if defined(HAVE_NEON_AARCH64)
839 if (iCpu & WELS_CPU_NEON) {
840 pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_AArch64_neon;
841 pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_AArch64_neon;
842 pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_AArch64_neon;
843 pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_AArch64_neon;
844
845 pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_AArch64_neon;
846 pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_AArch64_neon;
847 pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_AArch64_neon;
848 pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_AArch64_neon;
849
850 #if defined(SINGLE_REF_FRAME)
851 pFunc->pfDeblockingBSCalc = DeblockingBSCalc_AArch64_neon;
852 #endif
853 }
854 #endif
855
856 #if defined(HAVE_MMI)
857 if (iCpu & WELS_CPU_MMI) {
858 pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_mmi;
859 pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_mmi;
860 pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_mmi;
861 pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_mmi;
862 pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_mmi;
863 pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_mmi;
864 pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_mmi;
865 pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_mmi;
866 }
867 #endif//HAVE_MMI
868
869 #if defined(HAVE_MSA)
870 if (iCpu & WELS_CPU_MSA) {
871 pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_msa;
872 pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_msa;
873 pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_msa;
874 pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_msa;
875 pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_msa;
876 pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_msa;
877 pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_msa;
878 pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_msa;
879 }
880 #endif//HAVE_MSA
881
882 #if defined(HAVE_LSX)
883 if (iCpu & WELS_CPU_LSX) {
884 pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_lsx;
885 pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_lsx;
886 pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_lsx;
887 pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_lsx;
888 pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_lsx;
889 pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_lsx;
890 pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_lsx;
891 }
892 #endif//HAVE_LSX
893 }
894
895
896 } // namespace WelsEnc
897
898