• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2  * \copy
3  *     Copyright (c)  2010-2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  *
32  * \file    deblocking.c
33  *
34  * \brief   Interfaces introduced in frame deblocking filtering
35  *
36  * \date    08/02/2010
37  *
38  *************************************************************************************
39  */
40 
41 #include "deblocking.h"
42 #include "deblocking_common.h"
43 #include "cpu_core.h"
44 
45 namespace WelsDec {
46 
47 #define NO_SUPPORTED_FILTER_IDX     (-1)
48 #define LEFT_FLAG_BIT 0
49 #define TOP_FLAG_BIT 1
50 #define LEFT_FLAG_MASK 0x01
51 #define TOP_FLAG_MASK 0x02
52 
53 #define SAME_MB_DIFF_REFIDX
54 #define g_kuiAlphaTable(x) g_kuiAlphaTable[(x)+12]
55 #define g_kiBetaTable(x)  g_kiBetaTable[(x)+12]
56 #define g_kiTc0Table(x)   g_kiTc0Table[(x)+12]
57 
58 #define MB_BS_MV(pRefPic0, pRefPic1, iMotionVector, iMbXy, iMbBn, iIndex, iNeighIndex) \
59 (\
60     ( pRefPic0 != pRefPic1) ||\
61     ( WELS_ABS( iMotionVector[iMbXy][iIndex][0] - iMotionVector[iMbBn][iNeighIndex][0] ) >= 4 ) ||\
62     ( WELS_ABS( iMotionVector[iMbXy][iIndex][1] - iMotionVector[iMbBn][iNeighIndex][1] ) >= 4 )\
63 )
64 
65 #define ON_MB_BS_MV_DIFF(iMV_A, iMV_B, iMbXy, iMbBn, iIndex, iNeighIndex) \
66 (\
67     (( WELS_ABS( iMV_A[iMbXy][iIndex][0] - iMV_B[iMbBn][iNeighIndex][0] ) >= 4 ) || \
68     ( WELS_ABS( iMV_A[iMbXy][iIndex][1] - iMV_B[iMbBn][iNeighIndex][1] ) >= 4 ))\
69 )
70 
71 #define IN_MB_BS_MV_DIFF(iMV_A, iMV_B, iMbXy, iIndex, iNeighIndex) \
72 (\
73     (( WELS_ABS( iMV_A[iMbXy][iIndex][0] - iMV_B[iMbXy][iNeighIndex][0] ) >= 4 ) || \
74     ( WELS_ABS( iMV_A[iMbXy][iIndex][1] - iMV_B[iMbXy][iNeighIndex][1] )  >= 4 )) \
75 )
76 
77 //On MB Boundary strength
78 //Apply for B_SLICE
79 #define ON_MB_BS(ref_p0, ref_q0, ref_p1, ref_q1, mv0, mv1, iMbXy, iMbBn, iIndex, iNeighIndex) \
80 (\
81   (ref_p0 != ref_p1) ? \
82   ((ref_p0 == ref_q0) ? \
83   (ON_MB_BS_MV_DIFF (mv0, mv0, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv1, iMbXy, iMbBn, iIndex, iNeighIndex)) : \
84   (ON_MB_BS_MV_DIFF (mv0, mv1, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv0, iMbXy, iMbBn, iIndex, iNeighIndex))) : \
85   ((ON_MB_BS_MV_DIFF (mv0, mv0, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv1, iMbXy, iMbBn, iIndex, iNeighIndex)) && \
86   (ON_MB_BS_MV_DIFF (mv0, mv1, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv0, iMbXy, iMbBn, iIndex, iNeighIndex))) \
87 )
88 
89 #if defined(SAME_MB_DIFF_REFIDX)
90 #define SMB_EDGE_MV(pRefPics, iMotionVector, iIndex, iNeighIndex) \
91 (\
92     ( pRefPics[iIndex] != pRefPics[iNeighIndex] )||(\
93     ( WELS_ABS( iMotionVector[iIndex][0] - iMotionVector[iNeighIndex][0] ) &(~3) ) |\
94     ( WELS_ABS( iMotionVector[iIndex][1] - iMotionVector[iNeighIndex][1] ) &(~3) ))\
95 )
96 #else
97 #define SMB_EDGE_MV(iRefIndex, iMotionVector, iIndex, iNeighIndex) \
98 (\
99     !!(( WELS_ABS( iMotionVector[iIndex][0] - iMotionVector[iNeighIndex][0] ) &(~3) ) |( WELS_ABS( iMotionVector[iIndex][1] - iMotionVector[iNeighIndex][1] ) &(~3) ))\
100 )
101 #endif
102 
103 #if defined(SAME_MB_DIFF_REFIDX)
104 #define IN_SMB_EDGE_MV(refs, mv, iMbXy, iIndex, iNeigborIndex) \
105 (\
106   (((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_1][iNeigborIndex])) || \
107   ((refs[LIST_0][iIndex] == refs[LIST_1][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_0][iNeigborIndex]))) ? \
108   ((refs[LIST_0][iIndex] != refs[LIST_1][iIndex]) ? \
109   ((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) ? \
110   (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) : \
111   (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex))) : \
112   ((IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) && \
113   (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex)))) : 1 \
114 )
115 #else
116 #define IN_SMB_EDGE_MV(refs, mv, iMbXy, iIndex, iNeigborIndex) \
117 (\
118   !!(((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_1][iNeigborIndex])) || \
119   ((refs[LIST_0][iIndex] == refs[LIST_1][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_0][iNeigborIndex]))) ? \
120   ((refs[LIST_0][iIndex] != refs[LIST_1][iIndex]) ? \
121   ((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) ? \
122   (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) : \
123   (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex))) : \
124   ((IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) && \
125   (IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex)))) : 1 \
126 )
127 #endif
128 
129 #define BS_EDGE(bsx1, pRefPics, iMotionVector, iIndex, iNeighIndex) \
130 ( (bsx1|SMB_EDGE_MV(pRefPics, iMotionVector, iIndex, iNeighIndex))<<((uint8_t)(!!bsx1)))
131 
132 //Inside MB Boundary strength
133 //Apply for B_SLICE
134 #define IN_BS_EDGE(bsx1, refs, mv, iMbXy, iIndex, iNeigborIndex) \
135 ( (bsx1|IN_SMB_EDGE_MV(refs, mv, iMbXy, iIndex, iNeigborIndex))<<((uint8_t)(!!bsx1)))
136 
137 #define GET_ALPHA_BETA_FROM_QP(iQp, iAlphaOffset, iBetaOffset, iIndex, iAlpha, iBeta) \
138 {\
139   iIndex = (iQp + iAlphaOffset);\
140   iAlpha = g_kuiAlphaTable(iIndex);\
141   iBeta  = g_kiBetaTable((iQp + iBetaOffset));\
142 }
143 
144 static const uint8_t g_kuiAlphaTable[52 + 24] = { //this table refers to Table 8-16 in H.264/AVC standard
145   0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0,  0,
146   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
147   0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
148   7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
149   25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
150   80, 90, 101, 113, 127, 144, 162, 182, 203, 226,
151   255, 255
152   , 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
153 };
154 
155 static const int8_t g_kiBetaTable[52 + 24] = { //this table refers to Table 8-16 in H.264/AVC standard
156   0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0,  0,
157   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
158   0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
159   3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
160   8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
161   13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
162   18, 18
163   , 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18
164 };
165 
166 static const int8_t g_kiTc0Table[52 + 24][4] = { //this table refers Table 8-17 in H.264/AVC standard
167   { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
168   { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
169   { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
170   { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
171   { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 1 },
172   { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 1, 1 }, { -1, 0, 1, 1 }, { -1, 1, 1, 1 },
173   { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 },
174   { -1, 1, 1, 2 }, { -1, 1, 2, 3 }, { -1, 1, 2, 3 }, { -1, 2, 2, 3 }, { -1, 2, 2, 4 }, { -1, 2, 3, 4 },
175   { -1, 2, 3, 4 }, { -1, 3, 3, 5 }, { -1, 3, 4, 6 }, { -1, 3, 4, 6 }, { -1, 4, 5, 7 }, { -1, 4, 5, 8 },
176   { -1, 4, 6, 9 }, { -1, 5, 7, 10 }, { -1, 6, 8, 11 }, { -1, 6, 8, 13 }, { -1, 7, 10, 14 }, { -1, 8, 11, 16 },
177   { -1, 9, 12, 18 }, { -1, 10, 13, 20 }, { -1, 11, 15, 23 }, { -1, 13, 17, 25 }
178   , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }
179   , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }
180 };
181 
182 static const uint8_t g_kuiTableBIdx[2][8] = {
183   {
184     0,  4,  8,  12,
185     3,  7,  11, 15
186   },
187 
188   {
189     0,  1,  2,  3,
190     12, 13, 14, 15
191   },
192 };
193 
194 static const uint8_t g_kuiTableB8x8Idx[2][16] = {
195   {
196     0,  1,  4,  5,  8,  9,  12, 13,   // 0   1 |  2  3
197     2,  3,  6,  7, 10, 11,  14, 15    // 4   5 |  6  7
198   },                                  // ------------
199   // 8   9 | 10 11
200   {
201     // 12 13 | 14 15
202     0,  1,  4,  5,  2,  3,  6,  7,
203     8,  9,  12, 13, 10, 11, 14, 15
204   },
205 };
206 //fix Bugzilla 1486223
207 #define TC0_TBL_LOOKUP(tc, iIndexA, pBS, bChroma) \
208 {\
209   tc[0] = g_kiTc0Table(iIndexA)[pBS[0] & 3] + bChroma;\
210   tc[1] = g_kiTc0Table(iIndexA)[pBS[1] & 3] + bChroma;\
211   tc[2] = g_kiTc0Table(iIndexA)[pBS[2] & 3] + bChroma;\
212   tc[3] = g_kiTc0Table(iIndexA)[pBS[3] & 3] + bChroma;\
213 }
214 
DeblockingBSInsideMBAvsbase(int8_t * pNnzTab,uint8_t nBS[2][4][4],int32_t iLShiftFactor)215 void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) {
216   uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
217 
218   uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
219   uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
220   uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
221   uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
222 
223   nBS[0][1][0] = (pNnzTab[0] | pNnzTab[1]) << iLShiftFactor;
224   nBS[0][2][0] = (pNnzTab[1] | pNnzTab[2]) << iLShiftFactor;
225   nBS[0][3][0] = (pNnzTab[2] | pNnzTab[3]) << iLShiftFactor;
226 
227   nBS[0][1][1] = (pNnzTab[4] | pNnzTab[5]) << iLShiftFactor;
228   nBS[0][2][1] = (pNnzTab[5] | pNnzTab[6]) << iLShiftFactor;
229   nBS[0][3][1] = (pNnzTab[6] | pNnzTab[7]) << iLShiftFactor;
230   * (uint32_t*)nBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor;
231 
232   nBS[0][1][2] = (pNnzTab[8]  | pNnzTab[9])  << iLShiftFactor;
233   nBS[0][2][2] = (pNnzTab[9]  | pNnzTab[10]) << iLShiftFactor;
234   nBS[0][3][2] = (pNnzTab[10] | pNnzTab[11]) << iLShiftFactor;
235   * (uint32_t*)nBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor;
236 
237   nBS[0][1][3] = (pNnzTab[12] | pNnzTab[13]) << iLShiftFactor;
238   nBS[0][2][3] = (pNnzTab[13] | pNnzTab[14]) << iLShiftFactor;
239   nBS[0][3][3] = (pNnzTab[14] | pNnzTab[15]) << iLShiftFactor;
240   * (uint32_t*)nBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor;
241 }
242 
DeblockingBSInsideMBAvsbase8x8(int8_t * pNnzTab,uint8_t nBS[2][4][4],int32_t iLShiftFactor)243 void inline DeblockingBSInsideMBAvsbase8x8 (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) {
244   int8_t i8x8NnzTab[4];
245   for (int32_t i = 0; i < 4; i++) {
246     int32_t iBlkIdx = i << 2;
247     i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
248                      pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
249   }
250 
251   //vertical
252   nBS[0][2][0] = nBS[0][2][1] = (i8x8NnzTab[0] | i8x8NnzTab[1]) << iLShiftFactor;
253   nBS[0][2][2] = nBS[0][2][3] = (i8x8NnzTab[2] | i8x8NnzTab[3]) << iLShiftFactor;
254   //horizontal
255   nBS[1][2][0] = nBS[1][2][1] = (i8x8NnzTab[0] | i8x8NnzTab[2]) << iLShiftFactor;
256   nBS[1][2][2] = nBS[1][2][3] = (i8x8NnzTab[1] | i8x8NnzTab[3]) << iLShiftFactor;
257 }
258 
DeblockingBSInsideMBNormal(PDeblockingFilter pFilter,PDqLayer pCurDqLayer,uint8_t nBS[2][4][4],int8_t * pNnzTab,int32_t iMbXy)259 void static inline DeblockingBSInsideMBNormal (PDeblockingFilter  pFilter, PDqLayer pCurDqLayer, uint8_t nBS[2][4][4],
260     int8_t* pNnzTab,
261     int32_t iMbXy) {
262   uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
263   int8_t* iRefIdx = pCurDqLayer->pDec->pRefIndex[LIST_0][iMbXy];
264   void* iRefs[MB_BLOCK4x4_NUM];
265   int i;
266   ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
267 
268   int8_t i8x8NnzTab[4];
269 
270   /* Look up each reference picture based on indices */
271   for (i = 0; i < MB_BLOCK4x4_NUM; i++) {
272     if (iRefIdx[i] > REF_NOT_IN_LIST)
273       iRefs[i] = pFilter->pRefPics[LIST_0][iRefIdx[i]];
274     else
275       iRefs[i] = NULL;
276   }
277 
278   if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
279     for (int32_t i = 0; i < 4; i++) {
280       int32_t iBlkIdx = i << 2;
281       i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
282                        pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
283     }
284     //vertical
285     nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
286                                            g_kuiMbCountScan4Idx[1 << 2], g_kuiMbCountScan4Idx[0]);
287     nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
288                                            g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[2 << 2]);
289 
290     //horizontal
291     nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
292                                            g_kuiMbCountScan4Idx[2 << 2], g_kuiMbCountScan4Idx[0]);
293     nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
294                                            g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[1 << 2]);
295   } else {
296     uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
297     uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
298     uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
299     uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
300 
301     for (int i = 0; i < 3; i++)
302       uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
303     nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 1, 0);
304     nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 2, 1);
305     nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 3, 2);
306 
307     for (int i = 0; i < 3; i++)
308       uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
309     nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 5, 4);
310     nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 6, 5);
311     nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 7, 6);
312 
313     for (int i = 0; i < 3; i++)
314       uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
315     nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 9, 8);
316     nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 10, 9);
317     nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 11, 10);
318 
319     for (int i = 0; i < 3; i++)
320       uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
321     nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 13, 12);
322     nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 14, 13);
323     nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 15, 14);
324 
325     // horizontal
326     * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
327     nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 4, 0);
328     nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 5, 1);
329     nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 6, 2);
330     nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 7, 3);
331 
332     * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
333     nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 8, 4);
334     nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 9, 5);
335     nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 10, 6);
336     nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 11, 7);
337 
338     * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
339     nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 12, 8);
340     nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 13, 9);
341     nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 14, 10);
342     nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 15, 11);
343   }
344 }
345 
DeblockingBSliceBSInsideMBNormal(PDeblockingFilter pFilter,PDqLayer pCurDqLayer,uint8_t nBS[2][4][4],int8_t * pNnzTab,int32_t iMbXy)346 void static inline DeblockingBSliceBSInsideMBNormal (PDeblockingFilter  pFilter, PDqLayer pCurDqLayer,
347     uint8_t nBS[2][4][4], int8_t* pNnzTab,
348     int32_t iMbXy) {
349   uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
350   void* iRefs[LIST_A][MB_BLOCK4x4_NUM];
351 
352   ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
353   int8_t i8x8NnzTab[4];
354   int l;
355 
356   for (l = 0; l < LIST_A; l++) {
357     int8_t* iRefIdx = pCurDqLayer->pDec->pRefIndex[l][iMbXy];
358     int i;
359     /* Look up each reference picture based on indices */
360     for (i = 0; i < MB_BLOCK4x4_NUM; i++) {
361       if (iRefIdx[i] > REF_NOT_IN_LIST)
362         iRefs[l][i] = pFilter->pRefPics[l][iRefIdx[i]];
363       else
364         iRefs[l][i] = NULL;
365     }
366   }
367 
368   if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
369     for (int32_t i = 0; i < 4; i++) {
370       int32_t iBlkIdx = i << 2;
371       i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
372                        pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
373     }
374     //vertical
375     int8_t iIndex = g_kuiMbCountScan4Idx[1 << 2];
376     int8_t iNeigborIndex = g_kuiMbCountScan4Idx[0];
377     nBS[0][2][0] = nBS[0][2][1] = IN_BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefs, pCurDqLayer->pDec->pMv, iMbXy,
378                                   iIndex, iNeigborIndex);
379     iIndex = g_kuiMbCountScan4Idx[3 << 2];
380     iNeigborIndex = g_kuiMbCountScan4Idx[2 << 2];
381     nBS[0][2][2] = nBS[0][2][3] = IN_BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv, iMbXy,
382                                   iIndex, iNeigborIndex);
383 
384     //horizontal
385     iIndex = g_kuiMbCountScan4Idx[2 << 2];
386     iNeigborIndex = g_kuiMbCountScan4Idx[0];
387     nBS[1][2][0] = nBS[1][2][1] = IN_BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefs, pCurDqLayer->pDec->pMv, iMbXy,
388                                   iIndex, iNeigborIndex);
389 
390     iIndex = g_kuiMbCountScan4Idx[3 << 2];
391     iNeigborIndex = g_kuiMbCountScan4Idx[1 << 2];
392     nBS[1][2][2] = nBS[1][2][3] = IN_BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv, iMbXy,
393                                   iIndex, iNeigborIndex);
394   } else {
395     uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
396     uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
397     uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
398     uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
399 
400     for (int i = 0; i < 3; i++)
401       uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
402     nBS[0][1][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 1, 0);
403     nBS[0][2][0] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 2, 1);
404     nBS[0][3][0] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 3, 2);
405 
406     for (int i = 0; i < 3; i++)
407       uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
408     nBS[0][1][1] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 5, 4);
409     nBS[0][2][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 6, 5);
410     nBS[0][3][1] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 7, 6);
411 
412     for (int i = 0; i < 3; i++)
413       uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
414     nBS[0][1][2] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 9, 8);
415     nBS[0][2][2] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 10, 9);
416     nBS[0][3][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 11, 10);
417 
418     for (int i = 0; i < 3; i++)
419       uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
420     nBS[0][1][3] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 13, 12);
421     nBS[0][2][3] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 14, 13);
422     nBS[0][3][3] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 15, 14);
423 
424     // horizontal
425     * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
426     nBS[1][1][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 4, 0);
427     nBS[1][1][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 5, 1);
428     nBS[1][1][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 6, 2);
429     nBS[1][1][3] = IN_BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 7, 3);
430 
431     * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
432     nBS[1][2][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 8, 4);
433     nBS[1][2][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 9, 5);
434     nBS[1][2][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 10, 6);
435     nBS[1][2][3] = IN_BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 11, 7);
436 
437     * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
438     nBS[1][3][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 12, 8);
439     nBS[1][3][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 13, 9);
440     nBS[1][3][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 14, 10);
441     nBS[1][3][3] = IN_BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 15, 11);
442     for (int ii = 0; ii < 2; ii++)
443       for (int jj = 1; jj < 4; jj++)
444         for (int kk = 0; kk < 4; kk++)
445           if (nBS[ii][jj][kk] > 1)
446             nBS[ii][jj][kk] = nBS[ii][jj][kk];
447   }
448 }
449 
450 
DeblockingBsMarginalMBAvcbase(PDeblockingFilter pFilter,PDqLayer pCurDqLayer,int32_t iEdge,int32_t iNeighMb,int32_t iMbXy)451 uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter  pFilter, PDqLayer pCurDqLayer, int32_t iEdge,
452                                         int32_t iNeighMb, int32_t iMbXy) {
453   int32_t i, j;
454   uint32_t uiBSx4;
455   uint8_t* pBS = (uint8_t*) (&uiBSx4);
456   const uint8_t* pBIdx      = &g_kuiTableBIdx[iEdge][0];
457   const uint8_t* pBnIdx     = &g_kuiTableBIdx[iEdge][4];
458   const uint8_t* pB8x8Idx   = &g_kuiTableB8x8Idx[iEdge][0];
459   const uint8_t* pBn8x8Idx  = &g_kuiTableB8x8Idx[iEdge][8];
460   int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pRefIndex[LIST_0] :
461                                        pCurDqLayer->pRefIndex[LIST_0];
462 
463   if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
464     for (i = 0; i < 2; i++) {
465       uint8_t uiNzc = 0;
466       for (j = 0; uiNzc == 0 && j < 4; j++) {
467         uiNzc |= (GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)] | GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]);
468       }
469       if (uiNzc) {
470         pBS[i << 1] = pBS[1 + (i << 1)] = 2;
471       } else {
472         PPicture ref0, ref1;
473         ref0 = (iRefIdx[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pB8x8Idx]] : NULL;
474         ref1 = (iRefIdx[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBn8x8Idx]] :
475                NULL;
476         pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pDec->pMv[LIST_0], iMbXy, iNeighMb,
477                                           *pB8x8Idx, *pBn8x8Idx);
478       }
479       pB8x8Idx += 4;
480       pBn8x8Idx += 4;
481     }
482   } else if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
483     for (i = 0; i < 2; i++) {
484       uint8_t uiNzc = 0;
485       for (j = 0; uiNzc == 0 && j < 4; j++) {
486         uiNzc |= GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)];
487       }
488       for (j = 0; j < 2; j++) {
489         if (uiNzc | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
490           pBS[j + (i << 1)] = 2;
491         } else {
492           PPicture ref0, ref1;
493           ref0 = (iRefIdx[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pB8x8Idx]] : NULL;
494           ref1 = (iRefIdx[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBnIdx]] : NULL;
495           pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1,
496                                         (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]), iMbXy, iNeighMb, *pB8x8Idx,
497                                         *pBnIdx);
498         }
499         pBnIdx++;
500       }
501       pB8x8Idx += 4;
502     }
503   } else if (pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
504     for (i = 0; i < 2; i++) {
505       uint8_t uiNzc = 0;
506       for (j = 0; uiNzc == 0 && j < 4; j++) {
507         uiNzc |= GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)];
508       }
509       for (j = 0; j < 2; j++) {
510         if (uiNzc | GetPNzc (pCurDqLayer, iMbXy)[*pBIdx]) {
511           pBS[j + (i << 1)] = 2;
512         } else {
513           PPicture ref0, ref1;
514           ref0 = (iRefIdx[iMbXy][*pBIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pBIdx]] : NULL;
515           ref1 = (iRefIdx[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBn8x8Idx]] :
516                  NULL;
517           pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1,
518                                         (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]), iMbXy, iNeighMb, *pBIdx,
519                                         *pBn8x8Idx);
520         }
521         pBIdx++;
522       }
523       pBn8x8Idx += 4;
524     }
525   } else {
526     // only 4x4 transform
527     for (i = 0; i < 4; i++) {
528       if (GetPNzc (pCurDqLayer, iMbXy)[*pBIdx] | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
529         pBS[i] = 2;
530       } else {
531         PPicture ref0, ref1;
532         ref0 = (iRefIdx[iMbXy][*pBIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pBIdx]] : NULL;
533         ref1 = (iRefIdx[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBnIdx]] : NULL;
534         pBS[i] = MB_BS_MV (ref0, ref1, (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]),
535                            iMbXy, iNeighMb, *pBIdx, *pBnIdx);
536       }
537       pBIdx++;
538       pBnIdx++;
539     }
540   }
541 
542   return uiBSx4;
543 }
DeblockingBSliceBsMarginalMBAvcbase(PDeblockingFilter pFilter,PDqLayer pCurDqLayer,int32_t iEdge,int32_t iNeighMb,int32_t iMbXy)544 uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDeblockingFilter  pFilter, PDqLayer pCurDqLayer, int32_t iEdge,
545     int32_t iNeighMb, int32_t iMbXy) {
546   int32_t i, j;
547   uint32_t uiBSx4;
548   uint8_t* pBS = (uint8_t*) (&uiBSx4);
549   const uint8_t* pBIdx = &g_kuiTableBIdx[iEdge][0];
550   const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4];
551   const uint8_t* pB8x8Idx = &g_kuiTableB8x8Idx[iEdge][0];
552   const uint8_t* pBn8x8Idx = &g_kuiTableB8x8Idx[iEdge][8];
553   PPicture ref_p0, ref_p1, ref_q0, ref_q1;
554   int8_t (*iRefIdx0)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[LIST_0];
555   int8_t (*iRefIdx1)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[LIST_1];
556 
557   if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
558     for (i = 0; i < 2; i++) {
559       uint8_t uiNzc = 0;
560       for (j = 0; uiNzc == 0 && j < 4; j++) {
561         uiNzc |= (GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)] | GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]);
562       }
563       if (uiNzc) {
564         pBS[i << 1] = pBS[1 + (i << 1)] = 2;
565       } else {
566         pBS[i << 1] = pBS[1 + (i << 1)] = 1;
567         ref_p0 = iRefIdx0[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pB8x8Idx]] : NULL;
568         ref_q0 = iRefIdx0[iNeighMb][*pBn8x8Idx]  > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBn8x8Idx]] :
569                  NULL;
570         ref_p1 = iRefIdx1[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pB8x8Idx]] : NULL;
571         ref_q1 = iRefIdx1[iNeighMb][*pBn8x8Idx]  > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBn8x8Idx]] :
572                  NULL;
573         if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) {
574           int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] :
575               pCurDqLayer->pMv[LIST_0];
576           int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] :
577               pCurDqLayer->pMv[LIST_1];
578           pBS[i << 1] = pBS[1 + (i << 1)] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pB8x8Idx,
579                                             *pBn8x8Idx);
580         }
581       }
582       pB8x8Idx += 4;
583       pBn8x8Idx += 4;
584     }
585   } else if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
586     for (i = 0; i < 2; i++) {
587       uint8_t uiNzc = 0;
588       for (j = 0; uiNzc == 0 && j < 4; j++) {
589         uiNzc |= GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)];
590       }
591       for (j = 0; j < 2; j++) {
592         if (uiNzc | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
593           pBS[j + (i << 1)] = 2;
594         } else {
595           pBS[j + (i << 1)] = 1;
596           ref_p0 = iRefIdx0[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pB8x8Idx]] : NULL;
597           ref_q0 = iRefIdx0[iNeighMb][*pBnIdx]  > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBnIdx]] :
598                    NULL;
599           ref_p1 = iRefIdx1[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pB8x8Idx]] : NULL;
600           ref_q1 = iRefIdx1[iNeighMb][*pBnIdx]  > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBnIdx]] :
601                    NULL;
602           if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) {
603             int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] :
604                 pCurDqLayer->pMv[LIST_0];
605             int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] :
606                 pCurDqLayer->pMv[LIST_1];
607             pBS[j + (i << 1)] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pB8x8Idx, *pBnIdx);
608           }
609         }
610         pBnIdx++;
611       }
612       pB8x8Idx += 4;
613     }
614   } else if (pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
615     for (i = 0; i < 2; i++) {
616       uint8_t uiNzc = 0;
617       for (j = 0; uiNzc == 0 && j < 4; j++) {
618         uiNzc |= GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)];
619       }
620       for (j = 0; j < 2; j++) {
621         if (uiNzc | GetPNzc (pCurDqLayer, iMbXy)[*pBIdx]) {
622           pBS[j + (i << 1)] = 2;
623         } else {
624           pBS[j + (i << 1)] = 1;
625           ref_p0 = iRefIdx0[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pBIdx]] : NULL;
626           ref_q0 = iRefIdx0[iNeighMb][*pBn8x8Idx]  > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBn8x8Idx]] :
627                    NULL;
628           ref_p1 = iRefIdx1[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pBIdx]] : NULL;
629           ref_q1 = iRefIdx1[iNeighMb][*pBn8x8Idx]  > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBn8x8Idx]] :
630                    NULL;
631           if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) {
632             int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] :
633                 pCurDqLayer->pMv[LIST_0];
634             int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] :
635                 pCurDqLayer->pMv[LIST_1];
636             pBS[j + (i << 1)] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pBIdx, *pBn8x8Idx);
637           }
638         }
639         pBIdx++;
640       }
641       pBn8x8Idx += 4;
642     }
643   } else {
644     // only 4x4 transform
645     for (i = 0; i < 4; i++) {
646       if (GetPNzc (pCurDqLayer, iMbXy)[*pBIdx] | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
647         pBS[i] = 2;
648       } else {
649         pBS[i] = 1;
650         ref_p0 = iRefIdx0[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pBIdx]] : NULL;
651         ref_q0 = iRefIdx0[iNeighMb][*pBnIdx]  > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBnIdx]] :
652                  NULL;
653         ref_p1 = iRefIdx1[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pBIdx]] : NULL;
654         ref_q1 = iRefIdx1[iNeighMb][*pBnIdx]  > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBnIdx]] :
655                  NULL;
656         if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) {
657           int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] :
658               pCurDqLayer->pMv[LIST_0];
659           int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] :
660               pCurDqLayer->pMv[LIST_1];
661           pBS[i] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pBIdx, *pBnIdx);
662         }
663       }
664       pBIdx++;
665       pBnIdx++;
666     }
667   }
668 
669   return uiBSx4;
670 }
DeblockingAvailableNoInterlayer(PDqLayer pCurDqLayer,int32_t iFilterIdc)671 int32_t DeblockingAvailableNoInterlayer (PDqLayer pCurDqLayer, int32_t iFilterIdc) {
672   int32_t iMbY = pCurDqLayer->iMbY;
673   int32_t iMbX = pCurDqLayer->iMbX;
674   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
675   bool bLeftFlag = false;
676   bool bTopFlag  = false;
677 
678   if (2 == iFilterIdc) {
679     bLeftFlag = (iMbX > 0) && (pCurDqLayer->pSliceIdc[iMbXy] == pCurDqLayer->pSliceIdc[iMbXy - 1]);
680     bTopFlag  = (iMbY > 0) && (pCurDqLayer->pSliceIdc[iMbXy] == pCurDqLayer->pSliceIdc[iMbXy - pCurDqLayer->iMbWidth]);
681   } else { //if ( 0 == iFilterIdc )
682     bLeftFlag = (iMbX > 0);
683     bTopFlag  = (iMbY > 0);
684   }
685   return (bLeftFlag << LEFT_FLAG_BIT) | (bTopFlag << TOP_FLAG_BIT);
686 }
687 
FilteringEdgeLumaH(SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)688 void FilteringEdgeLumaH (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
689   int32_t iIndexA;
690   int32_t iAlpha;
691   int32_t iBeta;
692   ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
693 
694   GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
695                           iBeta);
696 
697   if (iAlpha | iBeta) {
698     TC0_TBL_LOOKUP (tc, iIndexA, pBS, 0);
699     pFilter->pLoopf->pfLumaDeblockingLT4Ver (pPix, iStride, iAlpha, iBeta, tc);
700   }
701   return;
702 }
703 
704 
FilteringEdgeLumaV(SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)705 void FilteringEdgeLumaV (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
706   int32_t  iIndexA;
707   int32_t  iAlpha;
708   int32_t  iBeta;
709   ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
710 
711   GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
712                           iBeta);
713 
714   if (iAlpha | iBeta) {
715     TC0_TBL_LOOKUP (tc, iIndexA, pBS, 0);
716     pFilter->pLoopf->pfLumaDeblockingLT4Hor (pPix, iStride, iAlpha, iBeta, tc);
717   }
718   return;
719 }
720 
721 
FilteringEdgeLumaIntraH(SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)722 void FilteringEdgeLumaIntraH (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
723   int32_t iIndexA;
724   int32_t iAlpha;
725   int32_t iBeta;
726 
727   GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
728                           iBeta);
729 
730   if (iAlpha | iBeta) {
731     pFilter->pLoopf->pfLumaDeblockingEQ4Ver (pPix, iStride, iAlpha, iBeta);
732   }
733   return;
734 }
735 
FilteringEdgeLumaIntraV(SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)736 void FilteringEdgeLumaIntraV (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
737   int32_t iIndexA;
738   int32_t iAlpha;
739   int32_t iBeta;
740 
741   GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
742                           iBeta);
743 
744   if (iAlpha | iBeta) {
745     pFilter->pLoopf->pfLumaDeblockingEQ4Hor (pPix, iStride, iAlpha, iBeta);
746   }
747   return;
748 }
FilteringEdgeChromaH(SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)749 void FilteringEdgeChromaH (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
750                            uint8_t* pBS) {
751   int32_t iIndexA;
752   int32_t iAlpha;
753   int32_t iBeta;
754   ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
755   if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
756 
757     GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
758                             iBeta);
759 
760     if (iAlpha | iBeta) {
761       TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
762       pFilter->pLoopf->pfChromaDeblockingLT4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta, tc);
763     }
764   } else {
765 
766     for (int i = 0; i < 2; i++) {
767 
768 
769       GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
770                               iBeta);
771 
772       if (iAlpha | iBeta) {
773         uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
774         TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
775         pFilter->pLoopf->pfChromaDeblockingLT4Ver2 (pPixCbCr, iStride, iAlpha, iBeta, tc);
776       }
777 
778 
779 
780     }
781 
782   }
783   return;
784 }
FilteringEdgeChromaV(SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)785 void FilteringEdgeChromaV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
786                            uint8_t* pBS) {
787   int32_t iIndexA;
788   int32_t iAlpha;
789   int32_t iBeta;
790   ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
791   if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
792 
793 
794     GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
795                             iBeta);
796 
797     if (iAlpha | iBeta) {
798       TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
799       pFilter->pLoopf->pfChromaDeblockingLT4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta, tc);
800     }
801 
802 
803   } else {
804 
805     for (int i = 0; i < 2; i++) {
806 
807       GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
808                               iBeta);
809 
810       if (iAlpha | iBeta) {
811         uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
812         TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
813         pFilter->pLoopf->pfChromaDeblockingLT4Hor2 (pPixCbCr, iStride, iAlpha, iBeta, tc);
814       }
815 
816 
817     }
818   }
819   return;
820 }
821 
FilteringEdgeChromaIntraH(SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)822 void FilteringEdgeChromaIntraH (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
823                                 uint8_t* pBS) {
824   int32_t iIndexA;
825   int32_t iAlpha;
826   int32_t iBeta;
827   if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
828 
829     GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
830                             iBeta);
831 
832     if (iAlpha | iBeta) {
833       pFilter->pLoopf->pfChromaDeblockingEQ4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta);
834     }
835   } else {
836 
837     for (int i = 0; i < 2; i++) {
838 
839       GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
840                               iBeta);
841 
842       if (iAlpha | iBeta) {
843         uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
844         pFilter->pLoopf->pfChromaDeblockingEQ4Ver2 (pPixCbCr, iStride, iAlpha, iBeta);
845       }
846 
847     }
848   }
849   return;
850 }
851 
FilteringEdgeChromaIntraV(SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)852 void FilteringEdgeChromaIntraV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
853                                 uint8_t* pBS) {
854   int32_t iIndexA;
855   int32_t iAlpha;
856   int32_t iBeta;
857   if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) { // QP of cb and cr are the same
858 
859 
860 
861 
862     GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
863                             iBeta);
864     if (iAlpha | iBeta) {
865       pFilter->pLoopf->pfChromaDeblockingEQ4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta);
866     }
867   } else {
868 
869     for (int i = 0; i < 2; i++) {
870 
871 
872       GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
873                               iBeta);
874       if (iAlpha | iBeta) {
875         uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
876         pFilter->pLoopf->pfChromaDeblockingEQ4Hor2 (pPixCbCr, iStride, iAlpha, iBeta);
877       }
878     }
879 
880   }
881   return;
882 }
883 
884 
DeblockingInterMb(PDqLayer pCurDqLayer,PDeblockingFilter pFilter,uint8_t nBS[2][4][4],int32_t iBoundryFlag)885 static void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, uint8_t nBS[2][4][4],
886                                int32_t iBoundryFlag) {
887   int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex;
888   int32_t iMbX = pCurDqLayer->iMbX;
889   int32_t iMbY = pCurDqLayer->iMbY;
890 
891   int32_t iCurLumaQp = pCurDqLayer->pLumaQp[iMbXyIndex];
892   //int32_t* iCurChromaQp = pCurDqLayer->pChromaQp[iMbXyIndex];
893   int8_t* pCurChromaQp = pCurDqLayer->pChromaQp[iMbXyIndex];
894   int32_t iLineSize   = pFilter->iCsStride[0];
895   int32_t iLineSizeUV = pFilter->iCsStride[1];
896 
897   uint8_t* pDestY, * pDestCb, * pDestCr;
898   pDestY  = pFilter->pCsData[0] + ((iMbY * iLineSize + iMbX) << 4);
899   pDestCb = pFilter->pCsData[1] + ((iMbY * iLineSizeUV + iMbX) << 3);
900   pDestCr = pFilter->pCsData[2] + ((iMbY * iLineSizeUV + iMbX) << 3);
901 
902   //Vertical margin
903   if (iBoundryFlag & LEFT_FLAG_MASK) {
904     int32_t iLeftXyIndex = iMbXyIndex - 1;
905     pFilter->iLumaQP   = (iCurLumaQp + pCurDqLayer->pLumaQp[iLeftXyIndex] + 1) >> 1;
906     for (int i = 0; i < 2; i++) {
907       pFilter->iChromaQP[i] = (pCurChromaQp[i] + pCurDqLayer->pChromaQp[iLeftXyIndex][i] + 1) >> 1;
908     }
909     if (nBS[0][0][0] == 0x04) {
910       FilteringEdgeLumaIntraV (pFilter, pDestY, iLineSize, NULL);
911       FilteringEdgeChromaIntraV (pFilter, pDestCb, pDestCr, iLineSizeUV, NULL);
912     } else {
913       if (* (uint32_t*)nBS[0][0] != 0) {
914         FilteringEdgeLumaV (pFilter, pDestY, iLineSize, nBS[0][0]);
915         FilteringEdgeChromaV (pFilter, pDestCb, pDestCr, iLineSizeUV, nBS[0][0]);
916       }
917     }
918   }
919 
920   pFilter->iLumaQP = iCurLumaQp;
921   pFilter->iChromaQP[0] = pCurChromaQp[0];
922   pFilter->iChromaQP[1] = pCurChromaQp[1];
923 
924   if (* (uint32_t*)nBS[0][1] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
925     FilteringEdgeLumaV (pFilter, &pDestY[1 << 2], iLineSize, nBS[0][1]);
926   }
927 
928   if (* (uint32_t*)nBS[0][2] != 0) {
929     FilteringEdgeLumaV (pFilter, &pDestY[2 << 2], iLineSize, nBS[0][2]);
930     FilteringEdgeChromaV (pFilter, &pDestCb[2 << 1], &pDestCr[2 << 1], iLineSizeUV, nBS[0][2]);
931   }
932 
933   if (* (uint32_t*)nBS[0][3] != 0  && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
934     FilteringEdgeLumaV (pFilter, &pDestY[3 << 2], iLineSize, nBS[0][3]);
935   }
936 
937   if (iBoundryFlag & TOP_FLAG_MASK) {
938     int32_t iTopXyIndex = iMbXyIndex - pCurDqLayer->iMbWidth;
939     pFilter->iLumaQP = (iCurLumaQp + pCurDqLayer->pLumaQp[iTopXyIndex] + 1) >> 1;
940     for (int i = 0; i < 2; i++) {
941       pFilter->iChromaQP[i] = (pCurChromaQp[i] + pCurDqLayer->pChromaQp[iTopXyIndex][i] + 1) >> 1;
942     }
943 
944     if (nBS[1][0][0] == 0x04) {
945       FilteringEdgeLumaIntraH (pFilter, pDestY, iLineSize, NULL);
946       FilteringEdgeChromaIntraH (pFilter, pDestCb, pDestCr, iLineSizeUV, NULL);
947     } else {
948       if (* (uint32_t*)nBS[1][0] != 0) {
949         FilteringEdgeLumaH (pFilter, pDestY, iLineSize, nBS[1][0]);
950         FilteringEdgeChromaH (pFilter, pDestCb, pDestCr, iLineSizeUV, nBS[1][0]);
951       }
952     }
953   }
954 
955   pFilter->iLumaQP = iCurLumaQp;
956   pFilter->iChromaQP[0] = pCurChromaQp[0];
957   pFilter->iChromaQP[1] = pCurChromaQp[1];
958 
959   if (* (uint32_t*)nBS[1][1] != 0  && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
960     FilteringEdgeLumaH (pFilter, &pDestY[ (1 << 2)*iLineSize], iLineSize, nBS[1][1]);
961   }
962 
963   if (* (uint32_t*)nBS[1][2] != 0) {
964     FilteringEdgeLumaH (pFilter, &pDestY[ (2 << 2)*iLineSize], iLineSize, nBS[1][2]);
965     FilteringEdgeChromaH (pFilter, &pDestCb[ (2 << 1)*iLineSizeUV], &pDestCr[ (2 << 1)*iLineSizeUV], iLineSizeUV,
966                           nBS[1][2]);
967   }
968 
969   if (* (uint32_t*)nBS[1][3] != 0  && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
970     FilteringEdgeLumaH (pFilter, &pDestY[ (3 << 2)*iLineSize], iLineSize, nBS[1][3]);
971   }
972 }
973 
FilteringEdgeLumaHV(PDqLayer pCurDqLayer,PDeblockingFilter pFilter,int32_t iBoundryFlag)974 void FilteringEdgeLumaHV (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, int32_t iBoundryFlag) {
975   int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex;
976   int32_t iMbX      = pCurDqLayer->iMbX;
977   int32_t iMbY      = pCurDqLayer->iMbY;
978   int32_t iMbWidth  = pCurDqLayer->iMbWidth;
979   int32_t iLineSize  = pFilter->iCsStride[0];
980 
981   uint8_t*  pDestY;
982   int32_t  iCurQp;
983   int32_t  iIndexA, iAlpha, iBeta;
984 
985   ENFORCE_STACK_ALIGN_1D (int8_t,  iTc,   4, 16);
986   ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
987 
988   pDestY  = pFilter->pCsData[0] + ((iMbY * iLineSize + iMbX) << 4);
989   iCurQp  = pCurDqLayer->pLumaQp[iMbXyIndex];
990 
991   * (uint32_t*)uiBSx4 = 0x03030303;
992 
993   // luma v
994   if (iBoundryFlag & LEFT_FLAG_MASK) {
995     pFilter->iLumaQP   = (iCurQp   + pCurDqLayer->pLumaQp[iMbXyIndex - 1] + 1) >> 1;
996     FilteringEdgeLumaIntraV (pFilter, pDestY, iLineSize, NULL);
997   }
998 
999   pFilter->iLumaQP   = iCurQp;
1000   GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
1001                           iBeta);
1002   if (iAlpha | iBeta) {
1003     TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 0);
1004 
1005     if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
1006       pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[1 << 2], iLineSize, iAlpha, iBeta, iTc);
1007     }
1008 
1009     pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[2 << 2], iLineSize, iAlpha, iBeta, iTc);
1010 
1011     if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
1012       pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[3 << 2], iLineSize, iAlpha, iBeta, iTc);
1013     }
1014   }
1015 
1016   // luma h
1017   if (iBoundryFlag & TOP_FLAG_MASK) {
1018     pFilter->iLumaQP   = (iCurQp   + pCurDqLayer->pLumaQp[iMbXyIndex - iMbWidth] + 1) >> 1;
1019     FilteringEdgeLumaIntraH (pFilter, pDestY, iLineSize, NULL);
1020   }
1021 
1022   pFilter->iLumaQP   = iCurQp;
1023   if (iAlpha | iBeta) {
1024     if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
1025       pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (1 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
1026     }
1027 
1028     pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (2 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
1029 
1030     if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
1031       pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (3 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
1032     }
1033   }
1034 }
FilteringEdgeChromaHV(PDqLayer pCurDqLayer,PDeblockingFilter pFilter,int32_t iBoundryFlag)1035 void FilteringEdgeChromaHV (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, int32_t iBoundryFlag) {
1036   int32_t iMbXyIndex     = pCurDqLayer->iMbXyIndex;
1037   int32_t iMbX      = pCurDqLayer->iMbX;
1038   int32_t iMbY      = pCurDqLayer->iMbY;
1039   int32_t iMbWidth  = pCurDqLayer->iMbWidth;
1040   int32_t iLineSize  = pFilter->iCsStride[1];
1041 
1042   uint8_t* pDestCb;
1043   uint8_t* pDestCr;
1044   //int32_t  iCurQp;
1045   int8_t* pCurQp;
1046   int32_t  iIndexA, iAlpha, iBeta;
1047 
1048   ENFORCE_STACK_ALIGN_1D (int8_t,  iTc,   4, 16);
1049   ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
1050 
1051   pDestCb = pFilter->pCsData[1] + ((iMbY * iLineSize + iMbX) << 3);
1052   pDestCr = pFilter->pCsData[2] + ((iMbY * iLineSize + iMbX) << 3);
1053   pCurQp  = pCurDqLayer->pChromaQp[iMbXyIndex];
1054 
1055   * (uint32_t*)uiBSx4 = 0x03030303;
1056 
1057 
1058 // chroma v
1059   if (iBoundryFlag & LEFT_FLAG_MASK) {
1060 
1061     for (int i = 0; i < 2; i++) {
1062       pFilter->iChromaQP[i] = (pCurQp[i] + pCurDqLayer->pChromaQp[iMbXyIndex - 1][i] + 1) >> 1;
1063 
1064     }
1065     FilteringEdgeChromaIntraV (pFilter, pDestCb, pDestCr, iLineSize, NULL);
1066   }
1067 
1068   pFilter->iChromaQP[0]   = pCurQp[0];
1069   pFilter->iChromaQP[1]   = pCurQp[1];
1070   if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
1071     GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
1072                             iBeta);
1073     if (iAlpha | iBeta) {
1074       TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
1075       pFilter->pLoopf->pfChromaDeblockingLT4Hor (&pDestCb[2 << 1], &pDestCr[2 << 1], iLineSize, iAlpha, iBeta, iTc);
1076     }
1077   } else {
1078 
1079     for (int i = 0; i < 2; i++) {
1080       GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
1081                               iBeta);
1082       if (iAlpha | iBeta) {
1083         uint8_t* pDestCbCr = (i == 0) ? &pDestCb[2 << 1] : &pDestCr[2 << 1];
1084         TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
1085         pFilter->pLoopf->pfChromaDeblockingLT4Hor2 (pDestCbCr, iLineSize, iAlpha, iBeta, iTc);
1086       }
1087 
1088     }
1089   }
1090 
1091   // chroma h
1092 
1093   if (iBoundryFlag & TOP_FLAG_MASK) {
1094     for (int i = 0; i < 2; i++) {
1095       pFilter->iChromaQP[i] = (pCurQp[i] + pCurDqLayer->pChromaQp[iMbXyIndex - iMbWidth][i] + 1) >> 1;
1096     }
1097     FilteringEdgeChromaIntraH (pFilter, pDestCb, pDestCr, iLineSize, NULL);
1098   }
1099 
1100   pFilter->iChromaQP[0]   = pCurQp[0];
1101   pFilter->iChromaQP[1]   = pCurQp[1];
1102 
1103   if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
1104     GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
1105                             iBeta);
1106     if (iAlpha | iBeta) {
1107       TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
1108       pFilter->pLoopf->pfChromaDeblockingLT4Ver (&pDestCb[ (2 << 1)*iLineSize], &pDestCr[ (2 << 1)*iLineSize], iLineSize,
1109           iAlpha, iBeta, iTc);
1110     }
1111   } else {
1112     for (int i = 0; i < 2; i++) {
1113 
1114       GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
1115                               iBeta);
1116       if (iAlpha | iBeta) {
1117         TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
1118         uint8_t* pDestCbCr = (i == 0) ? &pDestCb[ (2 << 1) * iLineSize] : &pDestCr[ (2 << 1) * iLineSize];
1119         pFilter->pLoopf->pfChromaDeblockingLT4Ver2 (pDestCbCr, iLineSize,
1120             iAlpha, iBeta, iTc);
1121       }
1122     }
1123 
1124 
1125   }
1126 }
1127 
1128 // merge h&v lookup table operation to save performance
DeblockingIntraMb(PDqLayer pCurDqLayer,PDeblockingFilter pFilter,int32_t iBoundryFlag)1129 static void DeblockingIntraMb (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, int32_t iBoundryFlag) {
1130   FilteringEdgeLumaHV (pCurDqLayer, pFilter, iBoundryFlag);
1131   FilteringEdgeChromaHV (pCurDqLayer, pFilter, iBoundryFlag);
1132 }
1133 
WelsDeblockingMb(PDqLayer pCurDqLayer,PDeblockingFilter pFilter,int32_t iBoundryFlag)1134 void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, int32_t iBoundryFlag) {
1135   uint8_t nBS[2][4][4] = {{{ 0 }}};
1136 
1137   int32_t iMbXyIndex  = pCurDqLayer->iMbXyIndex;
1138   uint32_t iCurMbType  = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbXyIndex] :
1139                          pCurDqLayer->pMbType[iMbXyIndex];
1140   int32_t iMbNb;
1141 
1142   PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
1143   PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
1144   bool bBSlice = pSliceHeader->eSliceType == B_SLICE;
1145 
1146   switch (iCurMbType) {
1147   case MB_TYPE_INTRA4x4:
1148   case MB_TYPE_INTRA8x8:
1149   case MB_TYPE_INTRA16x16:
1150   case MB_TYPE_INTRA_PCM:
1151     DeblockingIntraMb (pCurDqLayer, pFilter, iBoundryFlag);
1152     break;
1153   default:
1154 
1155     if (iBoundryFlag & LEFT_FLAG_MASK) {
1156       iMbNb = iMbXyIndex - 1;
1157       uint32_t uiMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbNb] : pCurDqLayer->pMbType[iMbNb];
1158       if (bBSlice) {
1159         * (uint32_t*)nBS[0][0] = IS_INTRA (uiMbType) ? 0x04040404 :
1160                                  DeblockingBSliceBsMarginalMBAvcbase (
1161                                    pFilter, pCurDqLayer, 0, iMbNb, iMbXyIndex);
1162       } else {
1163         * (uint32_t*)nBS[0][0] = IS_INTRA (uiMbType) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
1164                                    pFilter, pCurDqLayer, 0, iMbNb, iMbXyIndex);
1165       }
1166     } else {
1167       * (uint32_t*)nBS[0][0] = 0;
1168     }
1169     if (iBoundryFlag & TOP_FLAG_MASK) {
1170       iMbNb = iMbXyIndex - pCurDqLayer->iMbWidth;
1171       uint32_t uiMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbNb] : pCurDqLayer->pMbType[iMbNb];
1172       if (bBSlice) {
1173         * (uint32_t*)nBS[1][0] = IS_INTRA (uiMbType) ? 0x04040404 :
1174                                  DeblockingBSliceBsMarginalMBAvcbase (
1175                                    pFilter, pCurDqLayer, 1, iMbNb, iMbXyIndex);
1176       } else {
1177         * (uint32_t*)nBS[1][0] = IS_INTRA (uiMbType) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
1178                                    pFilter, pCurDqLayer, 1, iMbNb, iMbXyIndex);
1179       }
1180     } else {
1181       * (uint32_t*)nBS[1][0] = 0;
1182     }
1183     //SKIP MB_16x16 or others
1184     if (IS_SKIP (iCurMbType)) {
1185       * (uint32_t*)nBS[0][1] = * (uint32_t*)nBS[0][2] = * (uint32_t*)nBS[0][3] =
1186                                  * (uint32_t*)nBS[1][1] = * (uint32_t*)nBS[1][2] = * (uint32_t*)nBS[1][3] = 0;
1187     } else {
1188       if (IS_INTER_16x16 (iCurMbType)) {
1189         if (!pCurDqLayer->pTransformSize8x8Flag[pCurDqLayer->iMbXyIndex]) {
1190           DeblockingBSInsideMBAvsbase (GetPNzc (pCurDqLayer, iMbXyIndex), nBS, 1);
1191         } else {
1192           DeblockingBSInsideMBAvsbase8x8 (GetPNzc (pCurDqLayer, iMbXyIndex), nBS, 1);
1193         }
1194       } else {
1195 
1196         if (bBSlice) {
1197           DeblockingBSliceBSInsideMBNormal (pFilter, pCurDqLayer, nBS, GetPNzc (pCurDqLayer, iMbXyIndex), iMbXyIndex);
1198         } else {
1199           DeblockingBSInsideMBNormal (pFilter, pCurDqLayer, nBS, GetPNzc (pCurDqLayer, iMbXyIndex), iMbXyIndex);
1200         }
1201       }
1202     }
1203     DeblockingInterMb (pCurDqLayer, pFilter, nBS, iBoundryFlag);
1204     break;
1205   }
1206 }
1207 
1208 /*!
1209  * \brief   AVC slice deblocking filtering target layer
1210  *
1211  * \param   dec         Wels avc decoder context
1212  *
1213  * \return  NONE
1214  */
WelsDeblockingFilterSlice(PWelsDecoderContext pCtx,PDeblockingFilterMbFunc pDeblockMb)1215 void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFunc pDeblockMb) {
1216   PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
1217   PSliceHeaderExt pSliceHeaderExt = &pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
1218   int32_t iMbWidth  = pCurDqLayer->iMbWidth;
1219   int32_t iTotalMbCount = pSliceHeaderExt->sSliceHeader.pSps->uiTotalMbCount;
1220 
1221   SDeblockingFilter pFilter;
1222   memset (&pFilter, 0, sizeof (pFilter));
1223   PFmo pFmo = pCtx->pFmo;
1224   int32_t iNextMbXyIndex = 0;
1225   int32_t iTotalNumMb = pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice;
1226   int32_t iCountNumMb = 0;
1227   int32_t iBoundryFlag;
1228   int32_t iFilterIdc = pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiDisableDeblockingFilterIdc;
1229 
1230   /* Step1: parameters set */
1231   pFilter.pCsData[0] = pCtx->pDec->pData[0];
1232   pFilter.pCsData[1] = pCtx->pDec->pData[1];
1233   pFilter.pCsData[2] = pCtx->pDec->pData[2];
1234 
1235   pFilter.iCsStride[0] = pCtx->pDec->iLinesize[0];
1236   pFilter.iCsStride[1] = pCtx->pDec->iLinesize[1];
1237 
1238   pFilter.eSliceType = (EWelsSliceType) pCurDqLayer->sLayerInfo.sSliceInLayer.eSliceType;
1239 
1240   pFilter.iSliceAlphaC0Offset = pSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset;
1241   pFilter.iSliceBetaOffset     = pSliceHeaderExt->sSliceHeader.iSliceBetaOffset;
1242 
1243   pFilter.pLoopf = &pCtx->sDeblockingFunc;
1244   pFilter.pRefPics[0] = pCtx->sRefPic.pRefList[0];
1245   pFilter.pRefPics[1] = pCtx->sRefPic.pRefList[1];
1246 
1247   /* Step2: macroblock deblocking */
1248   if (0 == iFilterIdc || 2 == iFilterIdc) {
1249     iNextMbXyIndex = pSliceHeaderExt->sSliceHeader.iFirstMbInSlice;
1250     pCurDqLayer->iMbX  = iNextMbXyIndex % iMbWidth;
1251     pCurDqLayer->iMbY  = iNextMbXyIndex / iMbWidth;
1252     pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
1253 
1254     do {
1255       iBoundryFlag = DeblockingAvailableNoInterlayer (pCurDqLayer, iFilterIdc);
1256 
1257       pDeblockMb (pCurDqLayer, &pFilter, iBoundryFlag);
1258 
1259       ++iCountNumMb;
1260       if (iCountNumMb >= iTotalNumMb) {
1261         break;
1262       }
1263 
1264       if (pSliceHeaderExt->sSliceHeader.pPps->uiNumSliceGroups > 1) {
1265         iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
1266       } else {
1267         ++iNextMbXyIndex;
1268       }
1269       if (-1 == iNextMbXyIndex || iNextMbXyIndex >= iTotalMbCount) { // slice group boundary or end of a frame
1270         break;
1271       }
1272 
1273       pCurDqLayer->iMbX  = iNextMbXyIndex % iMbWidth;
1274       pCurDqLayer->iMbY  = iNextMbXyIndex / iMbWidth;
1275       pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
1276     } while (1);
1277   }
1278 }
1279 
1280 /*!
1281 * \brief   AVC slice init deblocking filtering target layer
1282 *
1283 * \in and out param   SDeblockingFilter
1284 * \in and out param   iFilterIdc
1285 *
1286 * \return  NONE
1287 */
WelsDeblockingInitFilter(PWelsDecoderContext pCtx,SDeblockingFilter & pFilter,int32_t & iFilterIdc)1288 void WelsDeblockingInitFilter (PWelsDecoderContext pCtx, SDeblockingFilter& pFilter, int32_t& iFilterIdc) {
1289   PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
1290   PSliceHeaderExt pSliceHeaderExt = &pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
1291 
1292   memset (&pFilter, 0, sizeof (pFilter));
1293 
1294   iFilterIdc = pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiDisableDeblockingFilterIdc;
1295 
1296   /* Step1: parameters set */
1297   pFilter.pCsData[0] = pCtx->pDec->pData[0];
1298   pFilter.pCsData[1] = pCtx->pDec->pData[1];
1299   pFilter.pCsData[2] = pCtx->pDec->pData[2];
1300 
1301   pFilter.iCsStride[0] = pCtx->pDec->iLinesize[0];
1302   pFilter.iCsStride[1] = pCtx->pDec->iLinesize[1];
1303 
1304   pFilter.eSliceType = (EWelsSliceType)pCurDqLayer->sLayerInfo.sSliceInLayer.eSliceType;
1305 
1306   pFilter.iSliceAlphaC0Offset = pSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset;
1307   pFilter.iSliceBetaOffset = pSliceHeaderExt->sSliceHeader.iSliceBetaOffset;
1308 
1309   pFilter.pLoopf = &pCtx->sDeblockingFunc;
1310   pFilter.pRefPics[0] = pCtx->sRefPic.pRefList[0];
1311   pFilter.pRefPics[1] = pCtx->sRefPic.pRefList[1];
1312 }
1313 
1314 /*!
1315 * \brief   AVC MB deblocking filtering target layer
1316 *
1317 * \param   DqLayer which has the current location of MB to be deblocked.
1318 *
1319 * \return  NONE
1320 */
WelsDeblockingFilterMB(PDqLayer pCurDqLayer,SDeblockingFilter & pFilter,int32_t & iFilterIdc,PDeblockingFilterMbFunc pDeblockMb)1321 void WelsDeblockingFilterMB (PDqLayer pCurDqLayer, SDeblockingFilter& pFilter, int32_t& iFilterIdc,
1322                              PDeblockingFilterMbFunc pDeblockMb) {
1323   /* macroblock deblocking */
1324   if (0 == iFilterIdc || 2 == iFilterIdc) {
1325     int32_t iBoundryFlag = DeblockingAvailableNoInterlayer (pCurDqLayer, iFilterIdc);
1326     pDeblockMb (pCurDqLayer, &pFilter, iBoundryFlag);
1327   }
1328 }
1329 /*!
1330  * \brief   deblocking module initialize
1331  *
1332  * \param   pf
1333  *          cpu
1334  *
1335  * \return  NONE
1336  */
1337 
DeblockingInit(SDeblockingFunc * pFunc,int32_t iCpu)1338 void  DeblockingInit (SDeblockingFunc*  pFunc,  int32_t iCpu) {
1339   pFunc->pfLumaDeblockingLT4Ver     = DeblockLumaLt4V_c;
1340   pFunc->pfLumaDeblockingEQ4Ver     = DeblockLumaEq4V_c;
1341   pFunc->pfLumaDeblockingLT4Hor     = DeblockLumaLt4H_c;
1342   pFunc->pfLumaDeblockingEQ4Hor     = DeblockLumaEq4H_c;
1343 
1344   pFunc->pfChromaDeblockingLT4Ver   = DeblockChromaLt4V_c;
1345   pFunc->pfChromaDeblockingEQ4Ver   = DeblockChromaEq4V_c;
1346   pFunc->pfChromaDeblockingLT4Hor   = DeblockChromaLt4H_c;
1347   pFunc->pfChromaDeblockingEQ4Hor   = DeblockChromaEq4H_c;
1348 
1349   pFunc->pfChromaDeblockingLT4Ver2  = DeblockChromaLt4V2_c;
1350   pFunc->pfChromaDeblockingEQ4Ver2  = DeblockChromaEq4V2_c;
1351   pFunc->pfChromaDeblockingLT4Hor2  = DeblockChromaLt4H2_c;
1352   pFunc->pfChromaDeblockingEQ4Hor2  = DeblockChromaEq4H2_c;
1353 
1354 #ifdef X86_ASM
1355   if (iCpu & WELS_CPU_SSSE3) {
1356     pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_ssse3;
1357     pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_ssse3;
1358     pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_ssse3;
1359     pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_ssse3;
1360     pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_ssse3;
1361     pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_ssse3;
1362     pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_ssse3;
1363     pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_ssse3;
1364   }
1365 #endif
1366 
1367 #if defined(HAVE_NEON)
1368   if (iCpu & WELS_CPU_NEON) {
1369     pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_neon;
1370     pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_neon;
1371     pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_neon;
1372     pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_neon;
1373 
1374     pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_neon;
1375     pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_neon;
1376     pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_neon;
1377     pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_neon;
1378   }
1379 #endif
1380 
1381 #if defined(HAVE_NEON_AARCH64)
1382   if (iCpu & WELS_CPU_NEON) {
1383     pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_AArch64_neon;
1384     pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_AArch64_neon;
1385     pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_AArch64_neon;
1386     pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_AArch64_neon;
1387 
1388     pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_AArch64_neon;
1389     pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_AArch64_neon;
1390     pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_AArch64_neon;
1391     pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_AArch64_neon;
1392   }
1393 #endif
1394 
1395 #if defined(HAVE_MMI)
1396   if (iCpu & WELS_CPU_MMI) {
1397     pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_mmi;
1398     pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_mmi;
1399     pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_mmi;
1400     pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_mmi;
1401     pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_mmi;
1402     pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_mmi;
1403     pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_mmi;
1404     pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_mmi;
1405   }
1406 #endif//HAVE_MMI
1407 
1408 #if defined(HAVE_MSA)
1409   if (iCpu & WELS_CPU_MSA) {
1410     pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_msa;
1411     pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_msa;
1412     pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_msa;
1413     pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_msa;
1414     pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_msa;
1415     pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_msa;
1416     pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_msa;
1417     pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_msa;
1418   }
1419 #endif//HAVE_MSA
1420 }
1421 
1422 } // namespace WelsDec
1423