1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 * ihevc_deblk.c
22 *
23 * @brief
24 * Contains definition for the ctb level deblk function
25 *
26 * @author
27 * Srinivas T
28 *
29 * @par List of Functions:
30 * - ihevc_deblk()
31 *
32 * @remarks
33 * None
34 *
35 *******************************************************************************
36 */
37
38 #include <stdio.h>
39 #include <stddef.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <assert.h>
43
44 #include "ihevc_typedefs.h"
45 #include "iv.h"
46 #include "ivd.h"
47 #include "ihevcd_cxa.h"
48 #include "ithread.h"
49
50 #include "ihevc_defs.h"
51 #include "ihevc_debug.h"
52 #include "ihevc_defs.h"
53 #include "ihevc_structs.h"
54 #include "ihevc_macros.h"
55 #include "ihevc_platform_macros.h"
56 #include "ihevc_cabac_tables.h"
57
58 #include "ihevc_error.h"
59 #include "ihevc_common_tables.h"
60
61 #include "ihevcd_trace.h"
62 #include "ihevcd_defs.h"
63 #include "ihevcd_function_selector.h"
64 #include "ihevcd_structs.h"
65 #include "ihevcd_error.h"
66 #include "ihevcd_nal.h"
67 #include "ihevcd_bitstream.h"
68 #include "ihevcd_job_queue.h"
69 #include "ihevcd_utils.h"
70 #include "ihevcd_debug.h"
71
72 #include "ihevc_deblk.h"
73 #include "ihevc_deblk_tables.h"
74 #include "ihevcd_profile.h"
75 /**
76 *******************************************************************************
77 *
78 * @brief
79 * Deblock CTB level function.
80 *
81 * @par Description:
82 * For a given CTB, deblocking on both vertical and
83 * horizontal edges is done. Both the luma and chroma
84 * blocks are processed
85 *
86 * @param[in] ps_deblk
87 * Pointer to the deblock context
88 *
89 * @returns
90 *
91 * @remarks
92 * None
93 *
94 *******************************************************************************
95 */
96
ihevcd_deblk_ctb(deblk_ctxt_t * ps_deblk,WORD32 i4_is_last_ctb_x,WORD32 i4_is_last_ctb_y)97 void ihevcd_deblk_ctb(deblk_ctxt_t *ps_deblk,
98 WORD32 i4_is_last_ctb_x,
99 WORD32 i4_is_last_ctb_y)
100 {
101 WORD32 ctb_size;
102 WORD32 log2_ctb_size;
103 UWORD32 u4_bs;
104 WORD32 bs_tz; /*Leading zeros in boundary strength*/
105 WORD32 qp_p, qp_q;
106
107 WORD32 filter_p, filter_q;
108
109 UWORD8 *pu1_src;
110 WORD32 qp_strd;
111 UWORD32 *pu4_vert_bs, *pu4_horz_bs;
112 UWORD32 *pu4_ctb_vert_bs, *pu4_ctb_horz_bs;
113 WORD32 bs_strd;
114 WORD32 src_strd;
115 UWORD8 *pu1_qp;
116 UWORD16 *pu2_ctb_no_loop_filter_flag;
117 UWORD16 au2_ctb_no_loop_filter_flag[9];
118
119 WORD32 col, row;
120
121 /* Flag to indicate if QP is constant in CTB
122 * 0 - top_left, 1 - top, 2 - left, 3 - current */
123 UWORD32 u4_qp_const_in_ctb[4] = { 0, 0, 0, 0 };
124 WORD32 ctb_indx;
125 WORD32 chroma_yuv420sp_vu = ps_deblk->is_chroma_yuv420sp_vu;
126 sps_t *ps_sps;
127 pps_t *ps_pps;
128 codec_t *ps_codec;
129 slice_header_t *ps_slice_hdr;
130
131 PROFILE_DISABLE_DEBLK();
132
133 ps_sps = ps_deblk->ps_sps;
134 ps_pps = ps_deblk->ps_pps;
135 ps_codec = ps_deblk->ps_codec;
136 ps_slice_hdr = ps_deblk->ps_slice_hdr;
137
138 log2_ctb_size = ps_sps->i1_log2_ctb_size;
139 ctb_size = (1 << ps_sps->i1_log2_ctb_size);
140
141 /* strides are in units of number of bytes */
142 /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
143 bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
144
145 pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_vert_bs +
146 (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
147 ps_deblk->i4_ctb_y * bs_strd);
148 pu4_ctb_vert_bs = pu4_vert_bs;
149
150 pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_horz_bs +
151 (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
152 ps_deblk->i4_ctb_y * bs_strd);
153 pu4_ctb_horz_bs = pu4_horz_bs;
154
155 qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
156 pu1_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
157
158 pu2_ctb_no_loop_filter_flag = ps_deblk->au2_ctb_no_loop_filter_flag;
159
160 ctb_indx = ps_deblk->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_deblk->i4_ctb_y;
161 if(i4_is_last_ctb_y)
162 {
163 pu4_vert_bs = (UWORD32 *)((UWORD8 *)pu4_vert_bs + bs_strd);
164 pu4_ctb_vert_bs = pu4_vert_bs;
165 /* ctb_size/8 is the number of edges per CTB
166 * ctb_size/4 is the number of BS values needed per edge
167 * divided by 8 for the number of bytes
168 * 2 is the number of bits needed for each BS value */
169 memset(pu4_vert_bs, 0, 1 << (2 * log2_ctb_size - 7));
170
171 pu1_qp += (qp_strd << (log2_ctb_size - 3));
172 pu2_ctb_no_loop_filter_flag += (ctb_size >> 3);
173 ctb_indx += ps_sps->i2_pic_wd_in_ctb;
174 }
175
176 if(i4_is_last_ctb_x)
177 {
178 pu4_horz_bs = (UWORD32 *)((UWORD8 *)pu4_horz_bs + (1 << (2 * log2_ctb_size - 7)));
179 pu4_ctb_horz_bs = pu4_horz_bs;
180 memset(pu4_horz_bs, 0, 1 << (2 * log2_ctb_size - 7));
181
182 pu1_qp += (ctb_size >> 3);
183
184 for(row = 0; row < (ctb_size >> 3) + 1; row++)
185 au2_ctb_no_loop_filter_flag[row] = ps_deblk->au2_ctb_no_loop_filter_flag[row] >> (ctb_size >> 3);
186 pu2_ctb_no_loop_filter_flag = au2_ctb_no_loop_filter_flag;
187 ctb_indx += 1;
188 }
189
190 u4_qp_const_in_ctb[3] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx) >> 3] & (1 << (ctb_indx & 7));
191
192 if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
193 {
194 u4_qp_const_in_ctb[2] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - 1) >> 3] & (1 << ((ctb_indx - 1) & 7));
195 }
196
197 if((ps_deblk->i4_ctb_x || i4_is_last_ctb_x) && (ps_deblk->i4_ctb_y || i4_is_last_ctb_y))
198 {
199 u4_qp_const_in_ctb[0] =
200 ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) >> 3] &
201 (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) & 7));
202 }
203
204
205
206 if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
207 {
208 u4_qp_const_in_ctb[1] =
209 ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb) >> 3] &
210 (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb) & 7));
211 }
212
213 src_strd = ps_codec->i4_strd;
214
215 /* Luma Vertical Edge */
216
217 if(0 == i4_is_last_ctb_x)
218 {
219 /* Top CTB's slice header */
220 slice_header_t *ps_slice_hdr_top;
221 {
222 WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
223 if(i4_is_last_ctb_y)
224 cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
225 ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
226 }
227
228 pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << (log2_ctb_size));
229 pu1_src += i4_is_last_ctb_y ? ps_deblk->ps_codec->i4_strd << log2_ctb_size : 0;
230
231 /** Deblocking is done on a shifted CTB -
232 * Vertical edge processing is done by shifting the CTB up by four pixels */
233 pu1_src -= 4 * src_strd;
234
235 for(col = 0; col < ctb_size / 8; col++)
236 {
237 WORD32 shift = 0;
238
239 /* downshift vert_bs by ctb_size/2 for each column
240 * shift = (col & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1);
241 * which will reduce to the following assuming ctb size is one of 16, 32 and 64
242 * and deblocking is done on 8x8 grid
243 */
244 if(6 != log2_ctb_size)
245 shift = (col & 1) << (log2_ctb_size - 1);
246
247 /* BS for the column - Last row is excluded and the top row is included*/
248 u4_bs = (pu4_vert_bs[0] >> shift) << 2;
249
250 if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
251 {
252 /* Picking the last BS of the previous CTB corresponding to the same column */
253 UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
254 UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
255 u4_bs |= u4_top_bs & 3;
256 }
257
258 for(row = 0; row < ctb_size / 4;)
259 {
260 WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
261 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
262
263 /* Trailing zeros are computed and the corresponding rows are not processed */
264 bs_tz = CTZ(u4_bs) >> 1;
265 if(0 != bs_tz)
266 {
267 u4_bs = u4_bs >> (bs_tz << 1);
268 if((row + bs_tz) >= (ctb_size / 4))
269 pu1_src += 4 * (ctb_size / 4 - row) * src_strd;
270 else
271 pu1_src += 4 * bs_tz * src_strd;
272
273 row += bs_tz;
274 continue;
275 }
276
277 if(0 == row)
278 {
279 i1_beta_offset_div2 = ps_slice_hdr_top->i1_beta_offset_div2;
280 i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
281
282 if(0 == col)
283 {
284 qp_p = u4_qp_const_in_ctb[0] ?
285 pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
286 pu1_qp[-qp_strd - 1];
287 }
288 else
289 {
290 qp_p = u4_qp_const_in_ctb[1] ?
291 pu1_qp[-ctb_size / 8 * qp_strd] :
292 pu1_qp[col - 1 - qp_strd];
293 }
294
295 qp_q = u4_qp_const_in_ctb[1] ?
296 pu1_qp[-ctb_size / 8 * qp_strd] :
297 pu1_qp[col - qp_strd];
298 }
299 else
300 {
301 if(0 == col)
302 {
303 qp_p = u4_qp_const_in_ctb[2] ?
304 pu1_qp[-ctb_size / 8] :
305 pu1_qp[((row - 1) >> 1) * qp_strd - 1];
306 }
307 else
308 {
309 qp_p = u4_qp_const_in_ctb[3] ?
310 pu1_qp[0] :
311 pu1_qp[((row - 1) >> 1) * qp_strd + col - 1];
312 }
313
314 qp_q = u4_qp_const_in_ctb[3] ?
315 pu1_qp[0] :
316 pu1_qp[((row - 1) >> 1) * qp_strd + col];
317 }
318
319 filter_p = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 1;
320 filter_q = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 2;
321 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
322 filter_p = !filter_p;
323 filter_q = !filter_q;
324
325 if(filter_p || filter_q)
326 {
327 DUMP_DEBLK_LUMA_VERT(pu1_src, src_strd,
328 u4_bs & 3, qp_p, qp_q,
329 ps_slice_hdr->i1_beta_offset_div2,
330 ps_slice_hdr->i1_tc_offset_div2,
331 filter_p, filter_q);
332 ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr(pu1_src, src_strd,
333 u4_bs & 3, qp_p, qp_q,
334 i1_beta_offset_div2,
335 i1_tc_offset_div2,
336 filter_p, filter_q);
337 }
338
339 pu1_src += 4 * src_strd;
340 u4_bs = u4_bs >> 2;
341 row++;
342 }
343
344 if((64 == ctb_size) ||
345 ((32 == ctb_size) && (col & 1)))
346 {
347 pu4_vert_bs++;
348 }
349 pu1_src -= (src_strd << log2_ctb_size);
350 pu1_src += 8;
351 }
352 pu4_vert_bs = pu4_ctb_vert_bs;
353 }
354
355
356 /* Luma Horizontal Edge */
357
358 if(0 == i4_is_last_ctb_y)
359 {
360
361 /* Left CTB's slice header */
362 slice_header_t *ps_slice_hdr_left;
363 {
364 WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
365 if(i4_is_last_ctb_x)
366 cur_ctb_indx += 1;
367 ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
368 }
369 pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << log2_ctb_size);
370 pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
371
372 /** Deblocking is done on a shifted CTB -
373 * Horizontal edge processing is done by shifting the CTB left by four pixels */
374 pu1_src -= 4;
375 for(row = 0; row < ctb_size / 8; row++)
376 {
377 WORD32 shift = 0;
378
379 /* downshift vert_bs by ctb_size/2 for each column
380 * shift = (row & (MAX_CTB_SIZE / ctb_size - 1)) * ctb_size / 2;
381 * which will reduce to the following assuming ctb size is one of 16, 32 and 64
382 * and deblocking is done on 8x8 grid
383 */
384 if(6 != log2_ctb_size)
385 shift = (row & 1) << (log2_ctb_size - 1);
386
387 /* BS for the row - Last column is excluded and the left column is included*/
388 u4_bs = (pu4_horz_bs[0] >> shift) << 2;
389
390 if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
391 {
392 /** Picking the last BS of the previous CTB corresponding to the same row
393 * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
394 */
395 UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
396 UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
397 u4_bs |= u4_left_bs & 3;
398 }
399
400 for(col = 0; col < ctb_size / 4;)
401 {
402 WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
403 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
404
405 bs_tz = CTZ(u4_bs) >> 1;
406 if(0 != bs_tz)
407 {
408 u4_bs = u4_bs >> (bs_tz << 1);
409
410 if((col + bs_tz) >= (ctb_size / 4))
411 pu1_src += 4 * (ctb_size / 4 - col);
412 else
413 pu1_src += 4 * bs_tz;
414
415 col += bs_tz;
416 continue;
417 }
418
419 if(0 == col)
420 {
421 i1_beta_offset_div2 = ps_slice_hdr_left->i1_beta_offset_div2;
422 i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
423
424 if(0 == row)
425 {
426 qp_p = u4_qp_const_in_ctb[0] ?
427 pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
428 pu1_qp[-qp_strd - 1];
429 }
430 else
431 {
432 qp_p = u4_qp_const_in_ctb[2] ?
433 pu1_qp[-ctb_size / 8] :
434 pu1_qp[(row - 1) * qp_strd - 1];
435 }
436
437 qp_q = u4_qp_const_in_ctb[2] ?
438 pu1_qp[-ctb_size / 8] :
439 pu1_qp[row * qp_strd - 1];
440 }
441 else
442 {
443 if(0 == row)
444 {
445 qp_p = u4_qp_const_in_ctb[1] ?
446 pu1_qp[-ctb_size / 8 * qp_strd] :
447 pu1_qp[((col - 1) >> 1) - qp_strd];
448 }
449 else
450 {
451 qp_p = u4_qp_const_in_ctb[3] ?
452 pu1_qp[0] :
453 pu1_qp[((col - 1) >> 1) + (row - 1) * qp_strd];
454 }
455
456 qp_q = u4_qp_const_in_ctb[3] ?
457 pu1_qp[0] :
458 pu1_qp[((col - 1) >> 1) + row * qp_strd];
459 }
460
461 filter_p = (pu2_ctb_no_loop_filter_flag[row] >> ((col + 1) >> 1)) & 1;
462 filter_q = (pu2_ctb_no_loop_filter_flag[row + 1] >> ((col + 1) >> 1)) & 1;
463 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
464 filter_p = !filter_p;
465 filter_q = !filter_q;
466
467 if(filter_p || filter_q)
468 {
469 DUMP_DEBLK_LUMA_HORZ(pu1_src, src_strd,
470 u4_bs & 3, qp_p, qp_q,
471 ps_slice_hdr->i1_beta_offset_div2,
472 ps_slice_hdr->i1_tc_offset_div2,
473 filter_p, filter_q);
474 ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr(pu1_src, src_strd,
475 u4_bs & 3, qp_p, qp_q,
476 i1_beta_offset_div2,
477 i1_tc_offset_div2, filter_p, filter_q);
478 }
479
480 pu1_src += 4;
481 u4_bs = u4_bs >> 2;
482 col++;
483 }
484
485 if((64 == ctb_size) ||
486 ((32 == ctb_size) && (row & 1)))
487 {
488 pu4_horz_bs++;
489 }
490 pu1_src -= ctb_size;
491 pu1_src += (src_strd << 3);
492 }
493 pu4_horz_bs = pu4_ctb_horz_bs;
494 }
495
496
497 /* Chroma Veritcal Edge */
498
499 if(0 == i4_is_last_ctb_x)
500 {
501
502 /* Top CTB's slice header */
503 slice_header_t *ps_slice_hdr_top;
504 {
505 WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
506 if(i4_is_last_ctb_y)
507 cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
508 ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
509 }
510
511 pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
512 pu1_src += i4_is_last_ctb_y ? (ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size : 0;
513
514 /** Deblocking is done on a shifted CTB -
515 * Vertical edge processing is done by shifting the CTB up by four pixels */
516 pu1_src -= 4 * src_strd;
517
518 for(col = 0; col < ctb_size / 16; col++)
519 {
520
521 /* BS for the column - Last row is excluded and the top row is included*/
522 u4_bs = pu4_vert_bs[0] << 2;
523
524 if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
525 {
526 /* Picking the last BS of the previous CTB corresponding to the same column */
527 UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
528 UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> ((1 << (log2_ctb_size - 1)) - 2);
529 u4_bs |= u4_top_bs & 3;
530 }
531
532 /* Every alternate boundary strength value is used for chroma */
533 u4_bs &= 0x22222222;
534
535 for(row = 0; row < ctb_size / 8;)
536 {
537 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
538
539 bs_tz = CTZ(u4_bs) >> 2;
540 if(0 != bs_tz)
541 {
542 if((row + bs_tz) >= (ctb_size / 8))
543 pu1_src += 4 * (ctb_size / 8 - row) * src_strd;
544 else
545 pu1_src += 4 * bs_tz * src_strd;
546 row += bs_tz;
547 u4_bs = u4_bs >> (bs_tz << 2);
548 continue;
549 }
550
551 if(0 == row)
552 {
553 i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
554
555 if(0 == col)
556 {
557 qp_p = u4_qp_const_in_ctb[0] ?
558 pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
559 pu1_qp[-qp_strd - 1];
560 }
561 else
562 {
563 qp_p = u4_qp_const_in_ctb[1] ?
564 pu1_qp[-ctb_size / 8 * qp_strd] :
565 pu1_qp[2 * col - 1 - qp_strd];
566 }
567
568 qp_q = u4_qp_const_in_ctb[1] ?
569 pu1_qp[-ctb_size / 8 * qp_strd] :
570 pu1_qp[2 * col - qp_strd];
571 }
572 else
573 {
574 if(0 == col)
575 {
576 qp_p = u4_qp_const_in_ctb[2] ?
577 pu1_qp[-ctb_size / 8] :
578 pu1_qp[(row - 1) * qp_strd - 1];
579 }
580 else
581 {
582 qp_p = u4_qp_const_in_ctb[3] ?
583 pu1_qp[0] :
584 pu1_qp[(row - 1) * qp_strd + 2 * col - 1];
585 }
586
587 qp_q = u4_qp_const_in_ctb[3] ?
588 pu1_qp[0] :
589 pu1_qp[(row - 1) * qp_strd + 2 * col];
590 }
591
592 filter_p = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 1;
593 filter_q = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 2;
594 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
595 filter_p = !filter_p;
596 filter_q = !filter_q;
597
598 if(filter_p || filter_q)
599 {
600 ASSERT(1 == ((u4_bs & 3) >> 1));
601 DUMP_DEBLK_CHROMA_VERT(pu1_src, src_strd,
602 u4_bs & 3, qp_p, qp_q,
603 ps_pps->i1_pic_cb_qp_offset,
604 ps_pps->i1_pic_cr_qp_offset,
605 ps_slice_hdr->i1_tc_offset_div2,
606 filter_p, filter_q);
607 if(chroma_yuv420sp_vu)
608 {
609 ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
610 src_strd,
611 qp_q,
612 qp_p,
613 ps_pps->i1_pic_cr_qp_offset,
614 ps_pps->i1_pic_cb_qp_offset,
615 i1_tc_offset_div2,
616 filter_q,
617 filter_p);
618 }
619 else
620 {
621 ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
622 src_strd,
623 qp_p,
624 qp_q,
625 ps_pps->i1_pic_cb_qp_offset,
626 ps_pps->i1_pic_cr_qp_offset,
627 i1_tc_offset_div2,
628 filter_p,
629 filter_q);
630 }
631 }
632
633 pu1_src += 4 * src_strd;
634 u4_bs = u4_bs >> 4;
635 row++;
636 }
637
638 pu4_vert_bs += (64 == ctb_size) ? 2 : 1;
639 pu1_src -= ((src_strd / 2) << log2_ctb_size);
640 pu1_src += 16;
641 }
642 }
643
644 /* Chroma Horizontal Edge */
645
646 if(0 == i4_is_last_ctb_y)
647 {
648
649 /* Left CTB's slice header */
650 slice_header_t *ps_slice_hdr_left;
651 {
652 WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
653 if(i4_is_last_ctb_x)
654 cur_ctb_indx += 1;
655 ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
656 }
657
658 pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
659 pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
660
661 /** Deblocking is done on a shifted CTB -
662 * Vertical edge processing is done by shifting the CTB up by four pixels (8 here beacuse UV are interleaved) */
663 pu1_src -= 8;
664 for(row = 0; row < ctb_size / 16; row++)
665 {
666 /* BS for the row - Last column is excluded and the left column is included*/
667 u4_bs = pu4_horz_bs[0] << 2;
668
669 if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
670 {
671 /** Picking the last BS of the previous CTB corresponding to the same row
672 * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
673 */
674 UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
675 UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> ((1 << (log2_ctb_size - 1)) - 2);
676 u4_bs |= u4_left_bs & 3;
677 }
678
679 /* Every alternate boundary strength value is used for chroma */
680 u4_bs &= 0x22222222;
681
682 for(col = 0; col < ctb_size / 8;)
683 {
684 WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
685
686 bs_tz = CTZ(u4_bs) >> 2;
687 if(0 != bs_tz)
688 {
689 u4_bs = u4_bs >> (bs_tz << 2);
690
691 if((col + bs_tz) >= (ctb_size / 8))
692 pu1_src += 8 * (ctb_size / 8 - col);
693 else
694 pu1_src += 8 * bs_tz;
695
696 col += bs_tz;
697 continue;
698 }
699
700 if(0 == col)
701 {
702 i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
703
704 if(0 == row)
705 {
706 qp_p = u4_qp_const_in_ctb[0] ?
707 pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
708 pu1_qp[-qp_strd - 1];
709 }
710 else
711 {
712 qp_p = u4_qp_const_in_ctb[2] ?
713 pu1_qp[-ctb_size / 8] :
714 pu1_qp[(2 * row - 1) * qp_strd - 1];
715 }
716
717 qp_q = u4_qp_const_in_ctb[2] ?
718 pu1_qp[-ctb_size / 8] :
719 pu1_qp[(2 * row) * qp_strd - 1];
720 }
721 else
722 {
723 if(0 == row)
724 {
725 qp_p = u4_qp_const_in_ctb[1] ?
726 pu1_qp[-ctb_size / 8 * qp_strd] :
727 pu1_qp[col - 1 - qp_strd];
728 }
729 else
730 {
731 qp_p = u4_qp_const_in_ctb[3] ?
732 pu1_qp[0] :
733 pu1_qp[(col - 1) + (2 * row - 1) * qp_strd];
734 }
735
736 qp_q = u4_qp_const_in_ctb[3] ?
737 pu1_qp[0] :
738 pu1_qp[(col - 1) + 2 * row * qp_strd];
739 }
740
741 filter_p = (pu2_ctb_no_loop_filter_flag[row << 1] >> col) & 1;
742 filter_q = (pu2_ctb_no_loop_filter_flag[(row << 1) + 1] >> col) & 1;
743 /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
744 filter_p = !filter_p;
745 filter_q = !filter_q;
746
747 if(filter_p || filter_q)
748 {
749 ASSERT(1 == ((u4_bs & 3) >> 1));
750 DUMP_DEBLK_CHROMA_HORZ(pu1_src, src_strd,
751 u4_bs & 3, qp_p, qp_q,
752 ps_pps->i1_pic_cb_qp_offset,
753 ps_pps->i1_pic_cr_qp_offset,
754 ps_slice_hdr->i1_tc_offset_div2,
755 filter_p, filter_q);
756 if(chroma_yuv420sp_vu)
757 {
758 ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
759 src_strd,
760 qp_q,
761 qp_p,
762 ps_pps->i1_pic_cr_qp_offset,
763 ps_pps->i1_pic_cb_qp_offset,
764 i1_tc_offset_div2,
765 filter_q,
766 filter_p);
767 }
768 else
769 {
770 ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
771 src_strd,
772 qp_p,
773 qp_q,
774 ps_pps->i1_pic_cb_qp_offset,
775 ps_pps->i1_pic_cr_qp_offset,
776 i1_tc_offset_div2,
777 filter_p,
778 filter_q);
779 }
780 }
781
782 pu1_src += 8;
783 u4_bs = u4_bs >> 4;
784 col++;
785 }
786
787 pu4_horz_bs += (64 == ctb_size) ? 2 : 1;
788 pu1_src -= ctb_size;
789 pu1_src += 8 * src_strd;
790
791 }
792 }
793 }
794