1 /*!
2 * \copy
3 * Copyright (c) 2010-2013, Cisco Systems
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 *
32 * \file deblocking.c
33 *
34 * \brief Interfaces introduced in frame deblocking filtering
35 *
36 * \date 08/02/2010
37 *
38 *************************************************************************************
39 */
40
41 #include "deblocking.h"
42 #include "deblocking_common.h"
43 #include "cpu_core.h"
44
45 namespace WelsDec {
46
47 #define NO_SUPPORTED_FILTER_IDX (-1)
48 #define LEFT_FLAG_BIT 0
49 #define TOP_FLAG_BIT 1
50 #define LEFT_FLAG_MASK 0x01
51 #define TOP_FLAG_MASK 0x02
52
53 #define SAME_MB_DIFF_REFIDX
54 #define g_kuiAlphaTable(x) g_kuiAlphaTable[(x)+12]
55 #define g_kiBetaTable(x) g_kiBetaTable[(x)+12]
56 #define g_kiTc0Table(x) g_kiTc0Table[(x)+12]
57
58 #define MB_BS_MV(pRefPic0, pRefPic1, iMotionVector, iMbXy, iMbBn, iIndex, iNeighIndex) \
59 (\
60 ( pRefPic0 != pRefPic1) ||\
61 ( WELS_ABS( iMotionVector[iMbXy][iIndex][0] - iMotionVector[iMbBn][iNeighIndex][0] ) >= 4 ) ||\
62 ( WELS_ABS( iMotionVector[iMbXy][iIndex][1] - iMotionVector[iMbBn][iNeighIndex][1] ) >= 4 )\
63 )
64
65 #define ON_MB_BS_MV_DIFF(iMV_A, iMV_B, iMbXy, iMbBn, iIndex, iNeighIndex) \
66 (\
67 (( WELS_ABS( iMV_A[iMbXy][iIndex][0] - iMV_B[iMbBn][iNeighIndex][0] ) >= 4 ) || \
68 ( WELS_ABS( iMV_A[iMbXy][iIndex][1] - iMV_B[iMbBn][iNeighIndex][1] ) >= 4 ))\
69 )
70
71 #define IN_MB_BS_MV_DIFF(iMV_A, iMV_B, iMbXy, iIndex, iNeighIndex) \
72 (\
73 (( WELS_ABS( iMV_A[iMbXy][iIndex][0] - iMV_B[iMbXy][iNeighIndex][0] ) >= 4 ) || \
74 ( WELS_ABS( iMV_A[iMbXy][iIndex][1] - iMV_B[iMbXy][iNeighIndex][1] ) >= 4 )) \
75 )
76
77 //On MB Boundary strength
78 //Apply for B_SLICE
79 #define ON_MB_BS(ref_p0, ref_q0, ref_p1, ref_q1, mv0, mv1, iMbXy, iMbBn, iIndex, iNeighIndex) \
80 (\
81 (ref_p0 != ref_p1) ? \
82 ((ref_p0 == ref_q0) ? \
83 (ON_MB_BS_MV_DIFF (mv0, mv0, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv1, iMbXy, iMbBn, iIndex, iNeighIndex)) : \
84 (ON_MB_BS_MV_DIFF (mv0, mv1, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv0, iMbXy, iMbBn, iIndex, iNeighIndex))) : \
85 ((ON_MB_BS_MV_DIFF (mv0, mv0, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv1, iMbXy, iMbBn, iIndex, iNeighIndex)) && \
86 (ON_MB_BS_MV_DIFF (mv0, mv1, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv0, iMbXy, iMbBn, iIndex, iNeighIndex))) \
87 )
88
89 #if defined(SAME_MB_DIFF_REFIDX)
90 #define SMB_EDGE_MV(pRefPics, iMotionVector, iIndex, iNeighIndex) \
91 (\
92 ( pRefPics[iIndex] != pRefPics[iNeighIndex] )||(\
93 ( WELS_ABS( iMotionVector[iIndex][0] - iMotionVector[iNeighIndex][0] ) &(~3) ) |\
94 ( WELS_ABS( iMotionVector[iIndex][1] - iMotionVector[iNeighIndex][1] ) &(~3) ))\
95 )
96 #else
97 #define SMB_EDGE_MV(iRefIndex, iMotionVector, iIndex, iNeighIndex) \
98 (\
99 !!(( WELS_ABS( iMotionVector[iIndex][0] - iMotionVector[iNeighIndex][0] ) &(~3) ) |( WELS_ABS( iMotionVector[iIndex][1] - iMotionVector[iNeighIndex][1] ) &(~3) ))\
100 )
101 #endif
102
103 #if defined(SAME_MB_DIFF_REFIDX)
104 #define IN_SMB_EDGE_MV(refs, mv, iMbXy, iIndex, iNeigborIndex) \
105 (\
106 (((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_1][iNeigborIndex])) || \
107 ((refs[LIST_0][iIndex] == refs[LIST_1][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_0][iNeigborIndex]))) ? \
108 ((refs[LIST_0][iIndex] != refs[LIST_1][iIndex]) ? \
109 ((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) ? \
110 (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) : \
111 (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex))) : \
112 ((IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) && \
113 (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex)))) : 1 \
114 )
115 #else
116 #define IN_SMB_EDGE_MV(refs, mv, iMbXy, iIndex, iNeigborIndex) \
117 (\
118 !!(((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_1][iNeigborIndex])) || \
119 ((refs[LIST_0][iIndex] == refs[LIST_1][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_0][iNeigborIndex]))) ? \
120 ((refs[LIST_0][iIndex] != refs[LIST_1][iIndex]) ? \
121 ((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) ? \
122 (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) : \
123 (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex))) : \
124 ((IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) && \
125 (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex)))) : 1 \
126 )
127 #endif
128
129 #define BS_EDGE(bsx1, pRefPics, iMotionVector, iIndex, iNeighIndex) \
130 ( (bsx1|SMB_EDGE_MV(pRefPics, iMotionVector, iIndex, iNeighIndex))<<((uint8_t)(!!bsx1)))
131
132 //Inside MB Boundary strength
133 //Apply for B_SLICE
134 #define IN_BS_EDGE(bsx1, refs, mv, iMbXy, iIndex, iNeigborIndex) \
135 ( (bsx1|IN_SMB_EDGE_MV(refs, mv, iMbXy, iIndex, iNeigborIndex))<<((uint8_t)(!!bsx1)))
136
137 #define GET_ALPHA_BETA_FROM_QP(iQp, iAlphaOffset, iBetaOffset, iIndex, iAlpha, iBeta) \
138 {\
139 iIndex = (iQp + iAlphaOffset);\
140 iAlpha = g_kuiAlphaTable(iIndex);\
141 iBeta = g_kiBetaTable((iQp + iBetaOffset));\
142 }
143
144 static const uint8_t g_kuiAlphaTable[52 + 24] = { //this table refers to Table 8-16 in H.264/AVC standard
145 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
146 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
147 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
148 7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
149 25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
150 80, 90, 101, 113, 127, 144, 162, 182, 203, 226,
151 255, 255
152 , 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
153 };
154
155 static const int8_t g_kiBetaTable[52 + 24] = { //this table refers to Table 8-16 in H.264/AVC standard
156 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
157 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
158 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
159 3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
160 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
161 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
162 18, 18
163 , 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18
164 };
165
166 static const int8_t g_kiTc0Table[52 + 24][4] = { //this table refers Table 8-17 in H.264/AVC standard
167 { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
168 { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
169 { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
170 { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
171 { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 1 },
172 { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 1, 1 }, { -1, 0, 1, 1 }, { -1, 1, 1, 1 },
173 { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 },
174 { -1, 1, 1, 2 }, { -1, 1, 2, 3 }, { -1, 1, 2, 3 }, { -1, 2, 2, 3 }, { -1, 2, 2, 4 }, { -1, 2, 3, 4 },
175 { -1, 2, 3, 4 }, { -1, 3, 3, 5 }, { -1, 3, 4, 6 }, { -1, 3, 4, 6 }, { -1, 4, 5, 7 }, { -1, 4, 5, 8 },
176 { -1, 4, 6, 9 }, { -1, 5, 7, 10 }, { -1, 6, 8, 11 }, { -1, 6, 8, 13 }, { -1, 7, 10, 14 }, { -1, 8, 11, 16 },
177 { -1, 9, 12, 18 }, { -1, 10, 13, 20 }, { -1, 11, 15, 23 }, { -1, 13, 17, 25 }
178 , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }
179 , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }
180 };
181
182 static const uint8_t g_kuiTableBIdx[2][8] = {
183 {
184 0, 4, 8, 12,
185 3, 7, 11, 15
186 },
187
188 {
189 0, 1, 2, 3,
190 12, 13, 14, 15
191 },
192 };
193
194 static const uint8_t g_kuiTableB8x8Idx[2][16] = {
195 {
196 0, 1, 4, 5, 8, 9, 12, 13, // 0 1 | 2 3
197 2, 3, 6, 7, 10, 11, 14, 15 // 4 5 | 6 7
198 }, // ------------
199 // 8 9 | 10 11
200 {
201 // 12 13 | 14 15
202 0, 1, 4, 5, 2, 3, 6, 7,
203 8, 9, 12, 13, 10, 11, 14, 15
204 },
205 };
206 //fix Bugzilla 1486223
207 #define TC0_TBL_LOOKUP(tc, iIndexA, pBS, bChroma) \
208 {\
209 tc[0] = g_kiTc0Table(iIndexA)[pBS[0] & 3] + bChroma;\
210 tc[1] = g_kiTc0Table(iIndexA)[pBS[1] & 3] + bChroma;\
211 tc[2] = g_kiTc0Table(iIndexA)[pBS[2] & 3] + bChroma;\
212 tc[3] = g_kiTc0Table(iIndexA)[pBS[3] & 3] + bChroma;\
213 }
214
DeblockingBSInsideMBAvsbase(int8_t * pNnzTab,uint8_t nBS[2][4][4],int32_t iLShiftFactor)215 void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) {
216 uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
217
218 uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
219 uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
220 uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
221 uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
222
223 nBS[0][1][0] = (pNnzTab[0] | pNnzTab[1]) << iLShiftFactor;
224 nBS[0][2][0] = (pNnzTab[1] | pNnzTab[2]) << iLShiftFactor;
225 nBS[0][3][0] = (pNnzTab[2] | pNnzTab[3]) << iLShiftFactor;
226
227 nBS[0][1][1] = (pNnzTab[4] | pNnzTab[5]) << iLShiftFactor;
228 nBS[0][2][1] = (pNnzTab[5] | pNnzTab[6]) << iLShiftFactor;
229 nBS[0][3][1] = (pNnzTab[6] | pNnzTab[7]) << iLShiftFactor;
230 * (uint32_t*)nBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor;
231
232 nBS[0][1][2] = (pNnzTab[8] | pNnzTab[9]) << iLShiftFactor;
233 nBS[0][2][2] = (pNnzTab[9] | pNnzTab[10]) << iLShiftFactor;
234 nBS[0][3][2] = (pNnzTab[10] | pNnzTab[11]) << iLShiftFactor;
235 * (uint32_t*)nBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor;
236
237 nBS[0][1][3] = (pNnzTab[12] | pNnzTab[13]) << iLShiftFactor;
238 nBS[0][2][3] = (pNnzTab[13] | pNnzTab[14]) << iLShiftFactor;
239 nBS[0][3][3] = (pNnzTab[14] | pNnzTab[15]) << iLShiftFactor;
240 * (uint32_t*)nBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor;
241 }
242
DeblockingBSInsideMBAvsbase8x8(int8_t * pNnzTab,uint8_t nBS[2][4][4],int32_t iLShiftFactor)243 void inline DeblockingBSInsideMBAvsbase8x8 (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) {
244 int8_t i8x8NnzTab[4];
245 for (int32_t i = 0; i < 4; i++) {
246 int32_t iBlkIdx = i << 2;
247 i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
248 pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
249 }
250
251 //vertical
252 nBS[0][2][0] = nBS[0][2][1] = (i8x8NnzTab[0] | i8x8NnzTab[1]) << iLShiftFactor;
253 nBS[0][2][2] = nBS[0][2][3] = (i8x8NnzTab[2] | i8x8NnzTab[3]) << iLShiftFactor;
254 //horizontal
255 nBS[1][2][0] = nBS[1][2][1] = (i8x8NnzTab[0] | i8x8NnzTab[2]) << iLShiftFactor;
256 nBS[1][2][2] = nBS[1][2][3] = (i8x8NnzTab[1] | i8x8NnzTab[3]) << iLShiftFactor;
257 }
258
DeblockingBSInsideMBNormal(PDeblockingFilter pFilter,PDqLayer pCurDqLayer,uint8_t nBS[2][4][4],int8_t * pNnzTab,int32_t iMbXy)259 void static inline DeblockingBSInsideMBNormal (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, uint8_t nBS[2][4][4],
260 int8_t* pNnzTab,
261 int32_t iMbXy) {
262 uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
263 int8_t* iRefIdx = pCurDqLayer->pDec->pRefIndex[LIST_0][iMbXy];
264 void* iRefs[MB_BLOCK4x4_NUM];
265 int i;
266 ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
267
268 int8_t i8x8NnzTab[4];
269
270 /* Look up each reference picture based on indices */
271 for (i = 0; i < MB_BLOCK4x4_NUM; i++) {
272 if (iRefIdx[i] > REF_NOT_IN_LIST)
273 iRefs[i] = pFilter->pRefPics[LIST_0][iRefIdx[i]];
274 else
275 iRefs[i] = NULL;
276 }
277
278 if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
279 for (int32_t i = 0; i < 4; i++) {
280 int32_t iBlkIdx = i << 2;
281 i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
282 pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
283 }
284 //vertical
285 nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
286 g_kuiMbCountScan4Idx[1 << 2], g_kuiMbCountScan4Idx[0]);
287 nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
288 g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[2 << 2]);
289
290 //horizontal
291 nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
292 g_kuiMbCountScan4Idx[2 << 2], g_kuiMbCountScan4Idx[0]);
293 nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
294 g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[1 << 2]);
295 } else {
296 uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
297 uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
298 uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
299 uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
300
301 for (int i = 0; i < 3; i++)
302 uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
303 nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 1, 0);
304 nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 2, 1);
305 nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 3, 2);
306
307 for (int i = 0; i < 3; i++)
308 uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
309 nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 5, 4);
310 nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 6, 5);
311 nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 7, 6);
312
313 for (int i = 0; i < 3; i++)
314 uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
315 nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 9, 8);
316 nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 10, 9);
317 nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 11, 10);
318
319 for (int i = 0; i < 3; i++)
320 uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
321 nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 13, 12);
322 nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 14, 13);
323 nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 15, 14);
324
325 // horizontal
326 * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
327 nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 4, 0);
328 nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 5, 1);
329 nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 6, 2);
330 nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 7, 3);
331
332 * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
333 nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 8, 4);
334 nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 9, 5);
335 nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 10, 6);
336 nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 11, 7);
337
338 * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
339 nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 12, 8);
340 nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 13, 9);
341 nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 14, 10);
342 nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 15, 11);
343 }
344 }
345
DeblockingBSliceBSInsideMBNormal(PDeblockingFilter pFilter,PDqLayer pCurDqLayer,uint8_t nBS[2][4][4],int8_t * pNnzTab,int32_t iMbXy)346 void static inline DeblockingBSliceBSInsideMBNormal (PDeblockingFilter pFilter, PDqLayer pCurDqLayer,
347 uint8_t nBS[2][4][4], int8_t* pNnzTab,
348 int32_t iMbXy) {
349 uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
350 void* iRefs[LIST_A][MB_BLOCK4x4_NUM];
351
352 ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
353 int8_t i8x8NnzTab[4];
354 int l;
355
356 for (l = 0; l < LIST_A; l++) {
357 int8_t* iRefIdx = pCurDqLayer->pDec->pRefIndex[l][iMbXy];
358 int i;
359 /* Look up each reference picture based on indices */
360 for (i = 0; i < MB_BLOCK4x4_NUM; i++) {
361 if (iRefIdx[i] > REF_NOT_IN_LIST)
362 iRefs[l][i] = pFilter->pRefPics[l][iRefIdx[i]];
363 else
364 iRefs[l][i] = NULL;
365 }
366 }
367
368 if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
369 for (int32_t i = 0; i < 4; i++) {
370 int32_t iBlkIdx = i << 2;
371 i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
372 pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
373 }
374 //vertical
375 int8_t iIndex = g_kuiMbCountScan4Idx[1 << 2];
376 int8_t iNeigborIndex = g_kuiMbCountScan4Idx[0];
377 nBS[0][2][0] = nBS[0][2][1] = IN_BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefs, pCurDqLayer->pDec->pMv, iMbXy,
378 iIndex, iNeigborIndex);
379 iIndex = g_kuiMbCountScan4Idx[3 << 2];
380 iNeigborIndex = g_kuiMbCountScan4Idx[2 << 2];
381 nBS[0][2][2] = nBS[0][2][3] = IN_BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv, iMbXy,
382 iIndex, iNeigborIndex);
383
384 //horizontal
385 iIndex = g_kuiMbCountScan4Idx[2 << 2];
386 iNeigborIndex = g_kuiMbCountScan4Idx[0];
387 nBS[1][2][0] = nBS[1][2][1] = IN_BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefs, pCurDqLayer->pDec->pMv, iMbXy,
388 iIndex, iNeigborIndex);
389
390 iIndex = g_kuiMbCountScan4Idx[3 << 2];
391 iNeigborIndex = g_kuiMbCountScan4Idx[1 << 2];
392 nBS[1][2][2] = nBS[1][2][3] = IN_BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv, iMbXy,
393 iIndex, iNeigborIndex);
394 } else {
395 uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
396 uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
397 uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
398 uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
399
400 for (int i = 0; i < 3; i++)
401 uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
402 nBS[0][1][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 1, 0);
403 nBS[0][2][0] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 2, 1);
404 nBS[0][3][0] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 3, 2);
405
406 for (int i = 0; i < 3; i++)
407 uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
408 nBS[0][1][1] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 5, 4);
409 nBS[0][2][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 6, 5);
410 nBS[0][3][1] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 7, 6);
411
412 for (int i = 0; i < 3; i++)
413 uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
414 nBS[0][1][2] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 9, 8);
415 nBS[0][2][2] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 10, 9);
416 nBS[0][3][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 11, 10);
417
418 for (int i = 0; i < 3; i++)
419 uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
420 nBS[0][1][3] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 13, 12);
421 nBS[0][2][3] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 14, 13);
422 nBS[0][3][3] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 15, 14);
423
424 // horizontal
425 * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
426 nBS[1][1][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 4, 0);
427 nBS[1][1][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 5, 1);
428 nBS[1][1][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 6, 2);
429 nBS[1][1][3] = IN_BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 7, 3);
430
431 * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
432 nBS[1][2][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 8, 4);
433 nBS[1][2][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 9, 5);
434 nBS[1][2][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 10, 6);
435 nBS[1][2][3] = IN_BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 11, 7);
436
437 * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
438 nBS[1][3][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 12, 8);
439 nBS[1][3][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 13, 9);
440 nBS[1][3][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 14, 10);
441 nBS[1][3][3] = IN_BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 15, 11);
442 for (int ii = 0; ii < 2; ii++)
443 for (int jj = 1; jj < 4; jj++)
444 for (int kk = 0; kk < 4; kk++)
445 if (nBS[ii][jj][kk] > 1)
446 nBS[ii][jj][kk] = nBS[ii][jj][kk];
447 }
448 }
449
450
DeblockingBsMarginalMBAvcbase(PDeblockingFilter pFilter,PDqLayer pCurDqLayer,int32_t iEdge,int32_t iNeighMb,int32_t iMbXy)451 uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge,
452 int32_t iNeighMb, int32_t iMbXy) {
453 int32_t i, j;
454 uint32_t uiBSx4;
455 uint8_t* pBS = (uint8_t*) (&uiBSx4);
456 const uint8_t* pBIdx = &g_kuiTableBIdx[iEdge][0];
457 const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4];
458 const uint8_t* pB8x8Idx = &g_kuiTableB8x8Idx[iEdge][0];
459 const uint8_t* pBn8x8Idx = &g_kuiTableB8x8Idx[iEdge][8];
460 int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pRefIndex[LIST_0] :
461 pCurDqLayer->pRefIndex[LIST_0];
462
463 if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
464 for (i = 0; i < 2; i++) {
465 uint8_t uiNzc = 0;
466 for (j = 0; uiNzc == 0 && j < 4; j++) {
467 uiNzc |= (GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)] | GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]);
468 }
469 if (uiNzc) {
470 pBS[i << 1] = pBS[1 + (i << 1)] = 2;
471 } else {
472 PPicture ref0, ref1;
473 ref0 = (iRefIdx[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pB8x8Idx]] : NULL;
474 ref1 = (iRefIdx[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBn8x8Idx]] :
475 NULL;
476 pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pDec->pMv[LIST_0], iMbXy, iNeighMb,
477 *pB8x8Idx, *pBn8x8Idx);
478 }
479 pB8x8Idx += 4;
480 pBn8x8Idx += 4;
481 }
482 } else if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
483 for (i = 0; i < 2; i++) {
484 uint8_t uiNzc = 0;
485 for (j = 0; uiNzc == 0 && j < 4; j++) {
486 uiNzc |= GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)];
487 }
488 for (j = 0; j < 2; j++) {
489 if (uiNzc | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
490 pBS[j + (i << 1)] = 2;
491 } else {
492 PPicture ref0, ref1;
493 ref0 = (iRefIdx[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pB8x8Idx]] : NULL;
494 ref1 = (iRefIdx[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBnIdx]] : NULL;
495 pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1,
496 (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]), iMbXy, iNeighMb, *pB8x8Idx,
497 *pBnIdx);
498 }
499 pBnIdx++;
500 }
501 pB8x8Idx += 4;
502 }
503 } else if (pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
504 for (i = 0; i < 2; i++) {
505 uint8_t uiNzc = 0;
506 for (j = 0; uiNzc == 0 && j < 4; j++) {
507 uiNzc |= GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)];
508 }
509 for (j = 0; j < 2; j++) {
510 if (uiNzc | GetPNzc (pCurDqLayer, iMbXy)[*pBIdx]) {
511 pBS[j + (i << 1)] = 2;
512 } else {
513 PPicture ref0, ref1;
514 ref0 = (iRefIdx[iMbXy][*pBIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pBIdx]] : NULL;
515 ref1 = (iRefIdx[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBn8x8Idx]] :
516 NULL;
517 pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1,
518 (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]), iMbXy, iNeighMb, *pBIdx,
519 *pBn8x8Idx);
520 }
521 pBIdx++;
522 }
523 pBn8x8Idx += 4;
524 }
525 } else {
526 // only 4x4 transform
527 for (i = 0; i < 4; i++) {
528 if (GetPNzc (pCurDqLayer, iMbXy)[*pBIdx] | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
529 pBS[i] = 2;
530 } else {
531 PPicture ref0, ref1;
532 ref0 = (iRefIdx[iMbXy][*pBIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pBIdx]] : NULL;
533 ref1 = (iRefIdx[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBnIdx]] : NULL;
534 pBS[i] = MB_BS_MV (ref0, ref1, (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]),
535 iMbXy, iNeighMb, *pBIdx, *pBnIdx);
536 }
537 pBIdx++;
538 pBnIdx++;
539 }
540 }
541
542 return uiBSx4;
543 }
DeblockingBSliceBsMarginalMBAvcbase(PDeblockingFilter pFilter,PDqLayer pCurDqLayer,int32_t iEdge,int32_t iNeighMb,int32_t iMbXy)544 uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge,
545 int32_t iNeighMb, int32_t iMbXy) {
546 int32_t i, j;
547 uint32_t uiBSx4;
548 uint8_t* pBS = (uint8_t*) (&uiBSx4);
549 const uint8_t* pBIdx = &g_kuiTableBIdx[iEdge][0];
550 const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4];
551 const uint8_t* pB8x8Idx = &g_kuiTableB8x8Idx[iEdge][0];
552 const uint8_t* pBn8x8Idx = &g_kuiTableB8x8Idx[iEdge][8];
553 PPicture ref_p0, ref_p1, ref_q0, ref_q1;
554 int8_t (*iRefIdx0)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[LIST_0];
555 int8_t (*iRefIdx1)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[LIST_1];
556
557 if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
558 for (i = 0; i < 2; i++) {
559 uint8_t uiNzc = 0;
560 for (j = 0; uiNzc == 0 && j < 4; j++) {
561 uiNzc |= (GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)] | GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]);
562 }
563 if (uiNzc) {
564 pBS[i << 1] = pBS[1 + (i << 1)] = 2;
565 } else {
566 pBS[i << 1] = pBS[1 + (i << 1)] = 1;
567 ref_p0 = iRefIdx0[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pB8x8Idx]] : NULL;
568 ref_q0 = iRefIdx0[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBn8x8Idx]] :
569 NULL;
570 ref_p1 = iRefIdx1[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pB8x8Idx]] : NULL;
571 ref_q1 = iRefIdx1[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBn8x8Idx]] :
572 NULL;
573 if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) {
574 int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] :
575 pCurDqLayer->pMv[LIST_0];
576 int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] :
577 pCurDqLayer->pMv[LIST_1];
578 pBS[i << 1] = pBS[1 + (i << 1)] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pB8x8Idx,
579 *pBn8x8Idx);
580 }
581 }
582 pB8x8Idx += 4;
583 pBn8x8Idx += 4;
584 }
585 } else if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
586 for (i = 0; i < 2; i++) {
587 uint8_t uiNzc = 0;
588 for (j = 0; uiNzc == 0 && j < 4; j++) {
589 uiNzc |= GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)];
590 }
591 for (j = 0; j < 2; j++) {
592 if (uiNzc | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
593 pBS[j + (i << 1)] = 2;
594 } else {
595 pBS[j + (i << 1)] = 1;
596 ref_p0 = iRefIdx0[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pB8x8Idx]] : NULL;
597 ref_q0 = iRefIdx0[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBnIdx]] :
598 NULL;
599 ref_p1 = iRefIdx1[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pB8x8Idx]] : NULL;
600 ref_q1 = iRefIdx1[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBnIdx]] :
601 NULL;
602 if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) {
603 int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] :
604 pCurDqLayer->pMv[LIST_0];
605 int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] :
606 pCurDqLayer->pMv[LIST_1];
607 pBS[j + (i << 1)] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pB8x8Idx, *pBnIdx);
608 }
609 }
610 pBnIdx++;
611 }
612 pB8x8Idx += 4;
613 }
614 } else if (pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
615 for (i = 0; i < 2; i++) {
616 uint8_t uiNzc = 0;
617 for (j = 0; uiNzc == 0 && j < 4; j++) {
618 uiNzc |= GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)];
619 }
620 for (j = 0; j < 2; j++) {
621 if (uiNzc | GetPNzc (pCurDqLayer, iMbXy)[*pBIdx]) {
622 pBS[j + (i << 1)] = 2;
623 } else {
624 pBS[j + (i << 1)] = 1;
625 ref_p0 = iRefIdx0[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pBIdx]] : NULL;
626 ref_q0 = iRefIdx0[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBn8x8Idx]] :
627 NULL;
628 ref_p1 = iRefIdx1[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pBIdx]] : NULL;
629 ref_q1 = iRefIdx1[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBn8x8Idx]] :
630 NULL;
631 if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) {
632 int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] :
633 pCurDqLayer->pMv[LIST_0];
634 int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] :
635 pCurDqLayer->pMv[LIST_1];
636 pBS[j + (i << 1)] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pBIdx, *pBn8x8Idx);
637 }
638 }
639 pBIdx++;
640 }
641 pBn8x8Idx += 4;
642 }
643 } else {
644 // only 4x4 transform
645 for (i = 0; i < 4; i++) {
646 if (GetPNzc (pCurDqLayer, iMbXy)[*pBIdx] | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
647 pBS[i] = 2;
648 } else {
649 pBS[i] = 1;
650 ref_p0 = iRefIdx0[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pBIdx]] : NULL;
651 ref_q0 = iRefIdx0[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBnIdx]] :
652 NULL;
653 ref_p1 = iRefIdx1[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pBIdx]] : NULL;
654 ref_q1 = iRefIdx1[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBnIdx]] :
655 NULL;
656 if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) {
657 int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] :
658 pCurDqLayer->pMv[LIST_0];
659 int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] :
660 pCurDqLayer->pMv[LIST_1];
661 pBS[i] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pBIdx, *pBnIdx);
662 }
663 }
664 pBIdx++;
665 pBnIdx++;
666 }
667 }
668
669 return uiBSx4;
670 }
DeblockingAvailableNoInterlayer(PDqLayer pCurDqLayer,int32_t iFilterIdc)671 int32_t DeblockingAvailableNoInterlayer (PDqLayer pCurDqLayer, int32_t iFilterIdc) {
672 int32_t iMbY = pCurDqLayer->iMbY;
673 int32_t iMbX = pCurDqLayer->iMbX;
674 int32_t iMbXy = pCurDqLayer->iMbXyIndex;
675 bool bLeftFlag = false;
676 bool bTopFlag = false;
677
678 if (2 == iFilterIdc) {
679 bLeftFlag = (iMbX > 0) && (pCurDqLayer->pSliceIdc[iMbXy] == pCurDqLayer->pSliceIdc[iMbXy - 1]);
680 bTopFlag = (iMbY > 0) && (pCurDqLayer->pSliceIdc[iMbXy] == pCurDqLayer->pSliceIdc[iMbXy - pCurDqLayer->iMbWidth]);
681 } else { //if ( 0 == iFilterIdc )
682 bLeftFlag = (iMbX > 0);
683 bTopFlag = (iMbY > 0);
684 }
685 return (bLeftFlag << LEFT_FLAG_BIT) | (bTopFlag << TOP_FLAG_BIT);
686 }
687
FilteringEdgeLumaH(SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)688 void FilteringEdgeLumaH (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
689 int32_t iIndexA;
690 int32_t iAlpha;
691 int32_t iBeta;
692 ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
693
694 GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
695 iBeta);
696
697 if (iAlpha | iBeta) {
698 TC0_TBL_LOOKUP (tc, iIndexA, pBS, 0);
699 pFilter->pLoopf->pfLumaDeblockingLT4Ver (pPix, iStride, iAlpha, iBeta, tc);
700 }
701 return;
702 }
703
704
FilteringEdgeLumaV(SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)705 void FilteringEdgeLumaV (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
706 int32_t iIndexA;
707 int32_t iAlpha;
708 int32_t iBeta;
709 ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
710
711 GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
712 iBeta);
713
714 if (iAlpha | iBeta) {
715 TC0_TBL_LOOKUP (tc, iIndexA, pBS, 0);
716 pFilter->pLoopf->pfLumaDeblockingLT4Hor (pPix, iStride, iAlpha, iBeta, tc);
717 }
718 return;
719 }
720
721
FilteringEdgeLumaIntraH(SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)722 void FilteringEdgeLumaIntraH (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
723 int32_t iIndexA;
724 int32_t iAlpha;
725 int32_t iBeta;
726
727 GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
728 iBeta);
729
730 if (iAlpha | iBeta) {
731 pFilter->pLoopf->pfLumaDeblockingEQ4Ver (pPix, iStride, iAlpha, iBeta);
732 }
733 return;
734 }
735
FilteringEdgeLumaIntraV(SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)736 void FilteringEdgeLumaIntraV (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
737 int32_t iIndexA;
738 int32_t iAlpha;
739 int32_t iBeta;
740
741 GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
742 iBeta);
743
744 if (iAlpha | iBeta) {
745 pFilter->pLoopf->pfLumaDeblockingEQ4Hor (pPix, iStride, iAlpha, iBeta);
746 }
747 return;
748 }
FilteringEdgeChromaH(SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)749 void FilteringEdgeChromaH (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
750 uint8_t* pBS) {
751 int32_t iIndexA;
752 int32_t iAlpha;
753 int32_t iBeta;
754 ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
755 if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
756
757 GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
758 iBeta);
759
760 if (iAlpha | iBeta) {
761 TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
762 pFilter->pLoopf->pfChromaDeblockingLT4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta, tc);
763 }
764 } else {
765
766 for (int i = 0; i < 2; i++) {
767
768
769 GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
770 iBeta);
771
772 if (iAlpha | iBeta) {
773 uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
774 TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
775 pFilter->pLoopf->pfChromaDeblockingLT4Ver2 (pPixCbCr, iStride, iAlpha, iBeta, tc);
776 }
777
778
779
780 }
781
782 }
783 return;
784 }
FilteringEdgeChromaV(SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)785 void FilteringEdgeChromaV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
786 uint8_t* pBS) {
787 int32_t iIndexA;
788 int32_t iAlpha;
789 int32_t iBeta;
790 ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
791 if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
792
793
794 GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
795 iBeta);
796
797 if (iAlpha | iBeta) {
798 TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
799 pFilter->pLoopf->pfChromaDeblockingLT4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta, tc);
800 }
801
802
803 } else {
804
805 for (int i = 0; i < 2; i++) {
806
807 GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
808 iBeta);
809
810 if (iAlpha | iBeta) {
811 uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
812 TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
813 pFilter->pLoopf->pfChromaDeblockingLT4Hor2 (pPixCbCr, iStride, iAlpha, iBeta, tc);
814 }
815
816
817 }
818 }
819 return;
820 }
821
FilteringEdgeChromaIntraH(SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)822 void FilteringEdgeChromaIntraH (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
823 uint8_t* pBS) {
824 int32_t iIndexA;
825 int32_t iAlpha;
826 int32_t iBeta;
827 if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
828
829 GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
830 iBeta);
831
832 if (iAlpha | iBeta) {
833 pFilter->pLoopf->pfChromaDeblockingEQ4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta);
834 }
835 } else {
836
837 for (int i = 0; i < 2; i++) {
838
839 GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
840 iBeta);
841
842 if (iAlpha | iBeta) {
843 uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
844 pFilter->pLoopf->pfChromaDeblockingEQ4Ver2 (pPixCbCr, iStride, iAlpha, iBeta);
845 }
846
847 }
848 }
849 return;
850 }
851
FilteringEdgeChromaIntraV(SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)852 void FilteringEdgeChromaIntraV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
853 uint8_t* pBS) {
854 int32_t iIndexA;
855 int32_t iAlpha;
856 int32_t iBeta;
857 if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) { // QP of cb and cr are the same
858
859
860
861
862 GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
863 iBeta);
864 if (iAlpha | iBeta) {
865 pFilter->pLoopf->pfChromaDeblockingEQ4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta);
866 }
867 } else {
868
869 for (int i = 0; i < 2; i++) {
870
871
872 GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
873 iBeta);
874 if (iAlpha | iBeta) {
875 uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
876 pFilter->pLoopf->pfChromaDeblockingEQ4Hor2 (pPixCbCr, iStride, iAlpha, iBeta);
877 }
878 }
879
880 }
881 return;
882 }
883
884
DeblockingInterMb(PDqLayer pCurDqLayer,PDeblockingFilter pFilter,uint8_t nBS[2][4][4],int32_t iBoundryFlag)885 static void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, uint8_t nBS[2][4][4],
886 int32_t iBoundryFlag) {
887 int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex;
888 int32_t iMbX = pCurDqLayer->iMbX;
889 int32_t iMbY = pCurDqLayer->iMbY;
890
891 int32_t iCurLumaQp = pCurDqLayer->pLumaQp[iMbXyIndex];
892 //int32_t* iCurChromaQp = pCurDqLayer->pChromaQp[iMbXyIndex];
893 int8_t* pCurChromaQp = pCurDqLayer->pChromaQp[iMbXyIndex];
894 int32_t iLineSize = pFilter->iCsStride[0];
895 int32_t iLineSizeUV = pFilter->iCsStride[1];
896
897 uint8_t* pDestY, * pDestCb, * pDestCr;
898 pDestY = pFilter->pCsData[0] + ((iMbY * iLineSize + iMbX) << 4);
899 pDestCb = pFilter->pCsData[1] + ((iMbY * iLineSizeUV + iMbX) << 3);
900 pDestCr = pFilter->pCsData[2] + ((iMbY * iLineSizeUV + iMbX) << 3);
901
902 //Vertical margin
903 if (iBoundryFlag & LEFT_FLAG_MASK) {
904 int32_t iLeftXyIndex = iMbXyIndex - 1;
905 pFilter->iLumaQP = (iCurLumaQp + pCurDqLayer->pLumaQp[iLeftXyIndex] + 1) >> 1;
906 for (int i = 0; i < 2; i++) {
907 pFilter->iChromaQP[i] = (pCurChromaQp[i] + pCurDqLayer->pChromaQp[iLeftXyIndex][i] + 1) >> 1;
908 }
909 if (nBS[0][0][0] == 0x04) {
910 FilteringEdgeLumaIntraV (pFilter, pDestY, iLineSize, NULL);
911 FilteringEdgeChromaIntraV (pFilter, pDestCb, pDestCr, iLineSizeUV, NULL);
912 } else {
913 if (* (uint32_t*)nBS[0][0] != 0) {
914 FilteringEdgeLumaV (pFilter, pDestY, iLineSize, nBS[0][0]);
915 FilteringEdgeChromaV (pFilter, pDestCb, pDestCr, iLineSizeUV, nBS[0][0]);
916 }
917 }
918 }
919
920 pFilter->iLumaQP = iCurLumaQp;
921 pFilter->iChromaQP[0] = pCurChromaQp[0];
922 pFilter->iChromaQP[1] = pCurChromaQp[1];
923
924 if (* (uint32_t*)nBS[0][1] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
925 FilteringEdgeLumaV (pFilter, &pDestY[1 << 2], iLineSize, nBS[0][1]);
926 }
927
928 if (* (uint32_t*)nBS[0][2] != 0) {
929 FilteringEdgeLumaV (pFilter, &pDestY[2 << 2], iLineSize, nBS[0][2]);
930 FilteringEdgeChromaV (pFilter, &pDestCb[2 << 1], &pDestCr[2 << 1], iLineSizeUV, nBS[0][2]);
931 }
932
933 if (* (uint32_t*)nBS[0][3] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
934 FilteringEdgeLumaV (pFilter, &pDestY[3 << 2], iLineSize, nBS[0][3]);
935 }
936
937 if (iBoundryFlag & TOP_FLAG_MASK) {
938 int32_t iTopXyIndex = iMbXyIndex - pCurDqLayer->iMbWidth;
939 pFilter->iLumaQP = (iCurLumaQp + pCurDqLayer->pLumaQp[iTopXyIndex] + 1) >> 1;
940 for (int i = 0; i < 2; i++) {
941 pFilter->iChromaQP[i] = (pCurChromaQp[i] + pCurDqLayer->pChromaQp[iTopXyIndex][i] + 1) >> 1;
942 }
943
944 if (nBS[1][0][0] == 0x04) {
945 FilteringEdgeLumaIntraH (pFilter, pDestY, iLineSize, NULL);
946 FilteringEdgeChromaIntraH (pFilter, pDestCb, pDestCr, iLineSizeUV, NULL);
947 } else {
948 if (* (uint32_t*)nBS[1][0] != 0) {
949 FilteringEdgeLumaH (pFilter, pDestY, iLineSize, nBS[1][0]);
950 FilteringEdgeChromaH (pFilter, pDestCb, pDestCr, iLineSizeUV, nBS[1][0]);
951 }
952 }
953 }
954
955 pFilter->iLumaQP = iCurLumaQp;
956 pFilter->iChromaQP[0] = pCurChromaQp[0];
957 pFilter->iChromaQP[1] = pCurChromaQp[1];
958
959 if (* (uint32_t*)nBS[1][1] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
960 FilteringEdgeLumaH (pFilter, &pDestY[ (1 << 2)*iLineSize], iLineSize, nBS[1][1]);
961 }
962
963 if (* (uint32_t*)nBS[1][2] != 0) {
964 FilteringEdgeLumaH (pFilter, &pDestY[ (2 << 2)*iLineSize], iLineSize, nBS[1][2]);
965 FilteringEdgeChromaH (pFilter, &pDestCb[ (2 << 1)*iLineSizeUV], &pDestCr[ (2 << 1)*iLineSizeUV], iLineSizeUV,
966 nBS[1][2]);
967 }
968
969 if (* (uint32_t*)nBS[1][3] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
970 FilteringEdgeLumaH (pFilter, &pDestY[ (3 << 2)*iLineSize], iLineSize, nBS[1][3]);
971 }
972 }
973
FilteringEdgeLumaHV(PDqLayer pCurDqLayer,PDeblockingFilter pFilter,int32_t iBoundryFlag)974 void FilteringEdgeLumaHV (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag) {
975 int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex;
976 int32_t iMbX = pCurDqLayer->iMbX;
977 int32_t iMbY = pCurDqLayer->iMbY;
978 int32_t iMbWidth = pCurDqLayer->iMbWidth;
979 int32_t iLineSize = pFilter->iCsStride[0];
980
981 uint8_t* pDestY;
982 int32_t iCurQp;
983 int32_t iIndexA, iAlpha, iBeta;
984
985 ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
986 ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
987
988 pDestY = pFilter->pCsData[0] + ((iMbY * iLineSize + iMbX) << 4);
989 iCurQp = pCurDqLayer->pLumaQp[iMbXyIndex];
990
991 * (uint32_t*)uiBSx4 = 0x03030303;
992
993 // luma v
994 if (iBoundryFlag & LEFT_FLAG_MASK) {
995 pFilter->iLumaQP = (iCurQp + pCurDqLayer->pLumaQp[iMbXyIndex - 1] + 1) >> 1;
996 FilteringEdgeLumaIntraV (pFilter, pDestY, iLineSize, NULL);
997 }
998
999 pFilter->iLumaQP = iCurQp;
1000 GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
1001 iBeta);
1002 if (iAlpha | iBeta) {
1003 TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 0);
1004
1005 if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
1006 pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[1 << 2], iLineSize, iAlpha, iBeta, iTc);
1007 }
1008
1009 pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[2 << 2], iLineSize, iAlpha, iBeta, iTc);
1010
1011 if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
1012 pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[3 << 2], iLineSize, iAlpha, iBeta, iTc);
1013 }
1014 }
1015
1016 // luma h
1017 if (iBoundryFlag & TOP_FLAG_MASK) {
1018 pFilter->iLumaQP = (iCurQp + pCurDqLayer->pLumaQp[iMbXyIndex - iMbWidth] + 1) >> 1;
1019 FilteringEdgeLumaIntraH (pFilter, pDestY, iLineSize, NULL);
1020 }
1021
1022 pFilter->iLumaQP = iCurQp;
1023 if (iAlpha | iBeta) {
1024 if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
1025 pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (1 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
1026 }
1027
1028 pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (2 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
1029
1030 if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
1031 pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (3 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
1032 }
1033 }
1034 }
FilteringEdgeChromaHV(PDqLayer pCurDqLayer,PDeblockingFilter pFilter,int32_t iBoundryFlag)1035 void FilteringEdgeChromaHV (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag) {
1036 int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex;
1037 int32_t iMbX = pCurDqLayer->iMbX;
1038 int32_t iMbY = pCurDqLayer->iMbY;
1039 int32_t iMbWidth = pCurDqLayer->iMbWidth;
1040 int32_t iLineSize = pFilter->iCsStride[1];
1041
1042 uint8_t* pDestCb;
1043 uint8_t* pDestCr;
1044 //int32_t iCurQp;
1045 int8_t* pCurQp;
1046 int32_t iIndexA, iAlpha, iBeta;
1047
1048 ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
1049 ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
1050
1051 pDestCb = pFilter->pCsData[1] + ((iMbY * iLineSize + iMbX) << 3);
1052 pDestCr = pFilter->pCsData[2] + ((iMbY * iLineSize + iMbX) << 3);
1053 pCurQp = pCurDqLayer->pChromaQp[iMbXyIndex];
1054
1055 * (uint32_t*)uiBSx4 = 0x03030303;
1056
1057
1058 // chroma v
1059 if (iBoundryFlag & LEFT_FLAG_MASK) {
1060
1061 for (int i = 0; i < 2; i++) {
1062 pFilter->iChromaQP[i] = (pCurQp[i] + pCurDqLayer->pChromaQp[iMbXyIndex - 1][i] + 1) >> 1;
1063
1064 }
1065 FilteringEdgeChromaIntraV (pFilter, pDestCb, pDestCr, iLineSize, NULL);
1066 }
1067
1068 pFilter->iChromaQP[0] = pCurQp[0];
1069 pFilter->iChromaQP[1] = pCurQp[1];
1070 if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
1071 GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
1072 iBeta);
1073 if (iAlpha | iBeta) {
1074 TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
1075 pFilter->pLoopf->pfChromaDeblockingLT4Hor (&pDestCb[2 << 1], &pDestCr[2 << 1], iLineSize, iAlpha, iBeta, iTc);
1076 }
1077 } else {
1078
1079 for (int i = 0; i < 2; i++) {
1080 GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
1081 iBeta);
1082 if (iAlpha | iBeta) {
1083 uint8_t* pDestCbCr = (i == 0) ? &pDestCb[2 << 1] : &pDestCr[2 << 1];
1084 TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
1085 pFilter->pLoopf->pfChromaDeblockingLT4Hor2 (pDestCbCr, iLineSize, iAlpha, iBeta, iTc);
1086 }
1087
1088 }
1089 }
1090
1091 // chroma h
1092
1093 if (iBoundryFlag & TOP_FLAG_MASK) {
1094 for (int i = 0; i < 2; i++) {
1095 pFilter->iChromaQP[i] = (pCurQp[i] + pCurDqLayer->pChromaQp[iMbXyIndex - iMbWidth][i] + 1) >> 1;
1096 }
1097 FilteringEdgeChromaIntraH (pFilter, pDestCb, pDestCr, iLineSize, NULL);
1098 }
1099
1100 pFilter->iChromaQP[0] = pCurQp[0];
1101 pFilter->iChromaQP[1] = pCurQp[1];
1102
1103 if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
1104 GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
1105 iBeta);
1106 if (iAlpha | iBeta) {
1107 TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
1108 pFilter->pLoopf->pfChromaDeblockingLT4Ver (&pDestCb[ (2 << 1)*iLineSize], &pDestCr[ (2 << 1)*iLineSize], iLineSize,
1109 iAlpha, iBeta, iTc);
1110 }
1111 } else {
1112 for (int i = 0; i < 2; i++) {
1113
1114 GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
1115 iBeta);
1116 if (iAlpha | iBeta) {
1117 TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
1118 uint8_t* pDestCbCr = (i == 0) ? &pDestCb[ (2 << 1) * iLineSize] : &pDestCr[ (2 << 1) * iLineSize];
1119 pFilter->pLoopf->pfChromaDeblockingLT4Ver2 (pDestCbCr, iLineSize,
1120 iAlpha, iBeta, iTc);
1121 }
1122 }
1123
1124
1125 }
1126 }
1127
1128 // merge h&v lookup table operation to save performance
DeblockingIntraMb(PDqLayer pCurDqLayer,PDeblockingFilter pFilter,int32_t iBoundryFlag)1129 static void DeblockingIntraMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag) {
1130 FilteringEdgeLumaHV (pCurDqLayer, pFilter, iBoundryFlag);
1131 FilteringEdgeChromaHV (pCurDqLayer, pFilter, iBoundryFlag);
1132 }
1133
WelsDeblockingMb(PDqLayer pCurDqLayer,PDeblockingFilter pFilter,int32_t iBoundryFlag)1134 void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag) {
1135 uint8_t nBS[2][4][4] = {{{ 0 }}};
1136
1137 int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex;
1138 uint32_t iCurMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbXyIndex] :
1139 pCurDqLayer->pMbType[iMbXyIndex];
1140 int32_t iMbNb;
1141
1142 PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
1143 PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
1144 bool bBSlice = pSliceHeader->eSliceType == B_SLICE;
1145
1146 switch (iCurMbType) {
1147 case MB_TYPE_INTRA4x4:
1148 case MB_TYPE_INTRA8x8:
1149 case MB_TYPE_INTRA16x16:
1150 case MB_TYPE_INTRA_PCM:
1151 DeblockingIntraMb (pCurDqLayer, pFilter, iBoundryFlag);
1152 break;
1153 default:
1154
1155 if (iBoundryFlag & LEFT_FLAG_MASK) {
1156 iMbNb = iMbXyIndex - 1;
1157 uint32_t uiMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbNb] : pCurDqLayer->pMbType[iMbNb];
1158 if (bBSlice) {
1159 * (uint32_t*)nBS[0][0] = IS_INTRA (uiMbType) ? 0x04040404 :
1160 DeblockingBSliceBsMarginalMBAvcbase (
1161 pFilter, pCurDqLayer, 0, iMbNb, iMbXyIndex);
1162 } else {
1163 * (uint32_t*)nBS[0][0] = IS_INTRA (uiMbType) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
1164 pFilter, pCurDqLayer, 0, iMbNb, iMbXyIndex);
1165 }
1166 } else {
1167 * (uint32_t*)nBS[0][0] = 0;
1168 }
1169 if (iBoundryFlag & TOP_FLAG_MASK) {
1170 iMbNb = iMbXyIndex - pCurDqLayer->iMbWidth;
1171 uint32_t uiMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbNb] : pCurDqLayer->pMbType[iMbNb];
1172 if (bBSlice) {
1173 * (uint32_t*)nBS[1][0] = IS_INTRA (uiMbType) ? 0x04040404 :
1174 DeblockingBSliceBsMarginalMBAvcbase (
1175 pFilter, pCurDqLayer, 1, iMbNb, iMbXyIndex);
1176 } else {
1177 * (uint32_t*)nBS[1][0] = IS_INTRA (uiMbType) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
1178 pFilter, pCurDqLayer, 1, iMbNb, iMbXyIndex);
1179 }
1180 } else {
1181 * (uint32_t*)nBS[1][0] = 0;
1182 }
1183 //SKIP MB_16x16 or others
1184 if (IS_SKIP (iCurMbType)) {
1185 * (uint32_t*)nBS[0][1] = * (uint32_t*)nBS[0][2] = * (uint32_t*)nBS[0][3] =
1186 * (uint32_t*)nBS[1][1] = * (uint32_t*)nBS[1][2] = * (uint32_t*)nBS[1][3] = 0;
1187 } else {
1188 if (IS_INTER_16x16 (iCurMbType)) {
1189 if (!pCurDqLayer->pTransformSize8x8Flag[pCurDqLayer->iMbXyIndex]) {
1190 DeblockingBSInsideMBAvsbase (GetPNzc (pCurDqLayer, iMbXyIndex), nBS, 1);
1191 } else {
1192 DeblockingBSInsideMBAvsbase8x8 (GetPNzc (pCurDqLayer, iMbXyIndex), nBS, 1);
1193 }
1194 } else {
1195
1196 if (bBSlice) {
1197 DeblockingBSliceBSInsideMBNormal (pFilter, pCurDqLayer, nBS, GetPNzc (pCurDqLayer, iMbXyIndex), iMbXyIndex);
1198 } else {
1199 DeblockingBSInsideMBNormal (pFilter, pCurDqLayer, nBS, GetPNzc (pCurDqLayer, iMbXyIndex), iMbXyIndex);
1200 }
1201 }
1202 }
1203 DeblockingInterMb (pCurDqLayer, pFilter, nBS, iBoundryFlag);
1204 break;
1205 }
1206 }
1207
1208 /*!
1209 * \brief AVC slice deblocking filtering target layer
1210 *
1211 * \param dec Wels avc decoder context
1212 *
1213 * \return NONE
1214 */
WelsDeblockingFilterSlice(PWelsDecoderContext pCtx,PDeblockingFilterMbFunc pDeblockMb)1215 void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFunc pDeblockMb) {
1216 PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
1217 PSliceHeaderExt pSliceHeaderExt = &pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
1218 int32_t iMbWidth = pCurDqLayer->iMbWidth;
1219 int32_t iTotalMbCount = pSliceHeaderExt->sSliceHeader.pSps->uiTotalMbCount;
1220
1221 SDeblockingFilter pFilter;
1222 memset (&pFilter, 0, sizeof (pFilter));
1223 PFmo pFmo = pCtx->pFmo;
1224 int32_t iNextMbXyIndex = 0;
1225 int32_t iTotalNumMb = pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice;
1226 int32_t iCountNumMb = 0;
1227 int32_t iBoundryFlag;
1228 int32_t iFilterIdc = pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiDisableDeblockingFilterIdc;
1229
1230 /* Step1: parameters set */
1231 pFilter.pCsData[0] = pCtx->pDec->pData[0];
1232 pFilter.pCsData[1] = pCtx->pDec->pData[1];
1233 pFilter.pCsData[2] = pCtx->pDec->pData[2];
1234
1235 pFilter.iCsStride[0] = pCtx->pDec->iLinesize[0];
1236 pFilter.iCsStride[1] = pCtx->pDec->iLinesize[1];
1237
1238 pFilter.eSliceType = (EWelsSliceType) pCurDqLayer->sLayerInfo.sSliceInLayer.eSliceType;
1239
1240 pFilter.iSliceAlphaC0Offset = pSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset;
1241 pFilter.iSliceBetaOffset = pSliceHeaderExt->sSliceHeader.iSliceBetaOffset;
1242
1243 pFilter.pLoopf = &pCtx->sDeblockingFunc;
1244 pFilter.pRefPics[0] = pCtx->sRefPic.pRefList[0];
1245 pFilter.pRefPics[1] = pCtx->sRefPic.pRefList[1];
1246
1247 /* Step2: macroblock deblocking */
1248 if (0 == iFilterIdc || 2 == iFilterIdc) {
1249 iNextMbXyIndex = pSliceHeaderExt->sSliceHeader.iFirstMbInSlice;
1250 pCurDqLayer->iMbX = iNextMbXyIndex % iMbWidth;
1251 pCurDqLayer->iMbY = iNextMbXyIndex / iMbWidth;
1252 pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
1253
1254 do {
1255 iBoundryFlag = DeblockingAvailableNoInterlayer (pCurDqLayer, iFilterIdc);
1256
1257 pDeblockMb (pCurDqLayer, &pFilter, iBoundryFlag);
1258
1259 ++iCountNumMb;
1260 if (iCountNumMb >= iTotalNumMb) {
1261 break;
1262 }
1263
1264 if (pSliceHeaderExt->sSliceHeader.pPps->uiNumSliceGroups > 1) {
1265 iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
1266 } else {
1267 ++iNextMbXyIndex;
1268 }
1269 if (-1 == iNextMbXyIndex || iNextMbXyIndex >= iTotalMbCount) { // slice group boundary or end of a frame
1270 break;
1271 }
1272
1273 pCurDqLayer->iMbX = iNextMbXyIndex % iMbWidth;
1274 pCurDqLayer->iMbY = iNextMbXyIndex / iMbWidth;
1275 pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
1276 } while (1);
1277 }
1278 }
1279
1280 /*!
1281 * \brief AVC slice init deblocking filtering target layer
1282 *
1283 * \in and out param SDeblockingFilter
1284 * \in and out param iFilterIdc
1285 *
1286 * \return NONE
1287 */
WelsDeblockingInitFilter(PWelsDecoderContext pCtx,SDeblockingFilter & pFilter,int32_t & iFilterIdc)1288 void WelsDeblockingInitFilter (PWelsDecoderContext pCtx, SDeblockingFilter& pFilter, int32_t& iFilterIdc) {
1289 PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
1290 PSliceHeaderExt pSliceHeaderExt = &pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
1291
1292 memset (&pFilter, 0, sizeof (pFilter));
1293
1294 iFilterIdc = pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiDisableDeblockingFilterIdc;
1295
1296 /* Step1: parameters set */
1297 pFilter.pCsData[0] = pCtx->pDec->pData[0];
1298 pFilter.pCsData[1] = pCtx->pDec->pData[1];
1299 pFilter.pCsData[2] = pCtx->pDec->pData[2];
1300
1301 pFilter.iCsStride[0] = pCtx->pDec->iLinesize[0];
1302 pFilter.iCsStride[1] = pCtx->pDec->iLinesize[1];
1303
1304 pFilter.eSliceType = (EWelsSliceType)pCurDqLayer->sLayerInfo.sSliceInLayer.eSliceType;
1305
1306 pFilter.iSliceAlphaC0Offset = pSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset;
1307 pFilter.iSliceBetaOffset = pSliceHeaderExt->sSliceHeader.iSliceBetaOffset;
1308
1309 pFilter.pLoopf = &pCtx->sDeblockingFunc;
1310 pFilter.pRefPics[0] = pCtx->sRefPic.pRefList[0];
1311 pFilter.pRefPics[1] = pCtx->sRefPic.pRefList[1];
1312 }
1313
1314 /*!
1315 * \brief AVC MB deblocking filtering target layer
1316 *
1317 * \param DqLayer which has the current location of MB to be deblocked.
1318 *
1319 * \return NONE
1320 */
WelsDeblockingFilterMB(PDqLayer pCurDqLayer,SDeblockingFilter & pFilter,int32_t & iFilterIdc,PDeblockingFilterMbFunc pDeblockMb)1321 void WelsDeblockingFilterMB (PDqLayer pCurDqLayer, SDeblockingFilter& pFilter, int32_t& iFilterIdc,
1322 PDeblockingFilterMbFunc pDeblockMb) {
1323 /* macroblock deblocking */
1324 if (0 == iFilterIdc || 2 == iFilterIdc) {
1325 int32_t iBoundryFlag = DeblockingAvailableNoInterlayer (pCurDqLayer, iFilterIdc);
1326 pDeblockMb (pCurDqLayer, &pFilter, iBoundryFlag);
1327 }
1328 }
1329 /*!
1330 * \brief deblocking module initialize
1331 *
1332 * \param pf
1333 * cpu
1334 *
1335 * \return NONE
1336 */
1337
DeblockingInit(SDeblockingFunc * pFunc,int32_t iCpu)1338 void DeblockingInit (SDeblockingFunc* pFunc, int32_t iCpu) {
1339 pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_c;
1340 pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_c;
1341 pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_c;
1342 pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_c;
1343
1344 pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_c;
1345 pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_c;
1346 pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_c;
1347 pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_c;
1348
1349 pFunc->pfChromaDeblockingLT4Ver2 = DeblockChromaLt4V2_c;
1350 pFunc->pfChromaDeblockingEQ4Ver2 = DeblockChromaEq4V2_c;
1351 pFunc->pfChromaDeblockingLT4Hor2 = DeblockChromaLt4H2_c;
1352 pFunc->pfChromaDeblockingEQ4Hor2 = DeblockChromaEq4H2_c;
1353
1354 #ifdef X86_ASM
1355 if (iCpu & WELS_CPU_SSSE3) {
1356 pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_ssse3;
1357 pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_ssse3;
1358 pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_ssse3;
1359 pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_ssse3;
1360 pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_ssse3;
1361 pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_ssse3;
1362 pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_ssse3;
1363 pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_ssse3;
1364 }
1365 #endif
1366
1367 #if defined(HAVE_NEON)
1368 if (iCpu & WELS_CPU_NEON) {
1369 pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_neon;
1370 pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_neon;
1371 pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_neon;
1372 pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_neon;
1373
1374 pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_neon;
1375 pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_neon;
1376 pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_neon;
1377 pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_neon;
1378 }
1379 #endif
1380
1381 #if defined(HAVE_NEON_AARCH64)
1382 if (iCpu & WELS_CPU_NEON) {
1383 pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_AArch64_neon;
1384 pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_AArch64_neon;
1385 pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_AArch64_neon;
1386 pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_AArch64_neon;
1387
1388 pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_AArch64_neon;
1389 pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_AArch64_neon;
1390 pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_AArch64_neon;
1391 pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_AArch64_neon;
1392 }
1393 #endif
1394
1395 #if defined(HAVE_MMI)
1396 if (iCpu & WELS_CPU_MMI) {
1397 pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_mmi;
1398 pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_mmi;
1399 pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_mmi;
1400 pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_mmi;
1401 pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_mmi;
1402 pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_mmi;
1403 pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_mmi;
1404 pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_mmi;
1405 }
1406 #endif//HAVE_MMI
1407
1408 #if defined(HAVE_MSA)
1409 if (iCpu & WELS_CPU_MSA) {
1410 pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_msa;
1411 pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_msa;
1412 pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_msa;
1413 pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_msa;
1414 pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_msa;
1415 pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_msa;
1416 pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_msa;
1417 pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_msa;
1418 }
1419 #endif//HAVE_MSA
1420 }
1421
1422 } // namespace WelsDec
1423