1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 * ihevc_boundary_strength.c
22 *
23 * @brief
24 * Contains functions for computing boundary strength
25 *
26 * @author
27 * Harish
28 *
29 * @par List of Functions:
30 *
31 * @remarks
32 * None
33 *
34 *******************************************************************************
35 */
36 /*****************************************************************************/
37 /* File Includes */
38 /*****************************************************************************/
39 #include <stdio.h>
40 #include <stddef.h>
41 #include <stdlib.h>
42 #include <string.h>
43
44 #include "ihevc_typedefs.h"
45 #include "iv.h"
46 #include "ivd.h"
47 #include "ihevcd_cxa.h"
48 #include "ithread.h"
49
50 #include "ihevc_defs.h"
51 #include "ihevc_debug.h"
52 #include "ihevc_defs.h"
53 #include "ihevc_structs.h"
54 #include "ihevc_macros.h"
55 #include "ihevc_platform_macros.h"
56 #include "ihevc_cabac_tables.h"
57
58 #include "ihevc_error.h"
59 #include "ihevc_common_tables.h"
60
61 #include "ihevcd_trace.h"
62 #include "ihevcd_defs.h"
63 #include "ihevcd_function_selector.h"
64 #include "ihevcd_structs.h"
65 #include "ihevcd_error.h"
66 #include "ihevcd_nal.h"
67 #include "ihevcd_bitstream.h"
68 #include "ihevcd_job_queue.h"
69 #include "ihevcd_utils.h"
70 #include "ihevcd_profile.h"
71
72 /*****************************************************************************/
73 /* Function Prototypes */
74 /*****************************************************************************/
75
76
77 #define SET_NGBHR_ALL_AVAIL(avail) avail = 0x1F;
78
79 #define SET_NGBHR_BOTLEFT_NOTAVAIL(avail) avail &= ~0x10;
80 #define SET_NGBHR_LEFT_NOTAVAIL(avail) avail &= ~0x8;
81 #define SET_NGBHR_TOPLEFT_NOTAVAIL(avail) avail &= ~0x4;
82 #define SET_NGBHR_TOP_NOTAVAIL(avail) avail &= ~0x2;
83 #define SET_NGBHR_TOPRIGHT_NOTAVAIL(avail) avail &= ~0x1;
84
ihevcd_pu_boundary_strength(pu_t * ps_pu,pu_t * ps_ngbr_pu)85 WORD32 ihevcd_pu_boundary_strength(pu_t *ps_pu,
86 pu_t *ps_ngbr_pu)
87 {
88 WORD32 i4_bs;
89 UWORD32 l0_ref_pic_buf_id, l1_ref_pic_buf_id;
90 UWORD32 ngbr_l0_ref_pic_buf_id, ngbr_l1_ref_pic_buf_id;
91
92 WORD16 i2_mv_x0, i2_mv_y0, i2_mv_x1, i2_mv_y1;
93 WORD16 i2_ngbr_mv_x0, i2_ngbr_mv_y0, i2_ngbr_mv_x1, i2_ngbr_mv_y1;
94
95 WORD32 num_mv, ngbr_num_mv;
96
97 num_mv = (PRED_BI == ps_pu->b2_pred_mode) ? 2 : 1;
98 ngbr_num_mv = (PRED_BI == ps_ngbr_pu->b2_pred_mode) ? 2 : 1;
99
100 l0_ref_pic_buf_id = ps_pu->mv.i1_l0_ref_pic_buf_id;
101 l1_ref_pic_buf_id = ps_pu->mv.i1_l1_ref_pic_buf_id;
102 ngbr_l0_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l0_ref_pic_buf_id;
103 ngbr_l1_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l1_ref_pic_buf_id;
104
105
106 i2_mv_x0 = ps_pu->mv.s_l0_mv.i2_mvx;
107 i2_mv_y0 = ps_pu->mv.s_l0_mv.i2_mvy;
108 i2_mv_x1 = ps_pu->mv.s_l1_mv.i2_mvx;
109 i2_mv_y1 = ps_pu->mv.s_l1_mv.i2_mvy;
110
111 i2_ngbr_mv_x0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvx;
112 i2_ngbr_mv_y0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvy;
113 i2_ngbr_mv_x1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvx;
114 i2_ngbr_mv_y1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvy;
115
116
117 /* If two motion vectors are used */
118 if((2 == num_mv) &&
119 (2 == ngbr_num_mv))
120 {
121 if((l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id) ||
122 (l0_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id))
123 {
124 if(l0_ref_pic_buf_id != l1_ref_pic_buf_id) /* Different L0 and L1 */
125 {
126 if(l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id)
127 {
128 i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x0) < 4) &&
129 (ABS(i2_mv_y0 - i2_ngbr_mv_y0) < 4) &&
130 (ABS(i2_mv_x1 - i2_ngbr_mv_x1) < 4) &&
131 (ABS(i2_mv_y1 - i2_ngbr_mv_y1) < 4) ? 0 : 1;
132 }
133 else
134 {
135 i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x1) < 4) &&
136 (ABS(i2_mv_y0 - i2_ngbr_mv_y1) < 4) &&
137 (ABS(i2_mv_x1 - i2_ngbr_mv_x0) < 4) &&
138 (ABS(i2_mv_y1 - i2_ngbr_mv_y0) < 4) ? 0 : 1;
139 }
140 }
141 else /* Same L0 and L1 */
142 {
143 i4_bs = ((ABS(i2_mv_x0 - i2_ngbr_mv_x0) >= 4) ||
144 (ABS(i2_mv_y0 - i2_ngbr_mv_y0) >= 4) ||
145 (ABS(i2_mv_x1 - i2_ngbr_mv_x1) >= 4) ||
146 (ABS(i2_mv_y1 - i2_ngbr_mv_y1) >= 4)) &&
147 ((ABS(i2_mv_x0 - i2_ngbr_mv_x1) >= 4) ||
148 (ABS(i2_mv_y0 - i2_ngbr_mv_y1) >= 4) ||
149 (ABS(i2_mv_x1 - i2_ngbr_mv_x0) >= 4) ||
150 (ABS(i2_mv_y1 - i2_ngbr_mv_y0) >= 4)) ? 1 : 0;
151 }
152 }
153 else /* If the reference pictures used are different */
154 {
155 i4_bs = 1;
156 }
157 }
158
159 /* If one motion vector is used in both PUs */
160 else if((1 == num_mv) &&
161 (1 == ngbr_num_mv))
162 {
163 WORD16 i2_mv_x, i2_mv_y;
164 WORD16 i2_ngbr_mv_x, i2_ngbr_mv_y;
165 UWORD32 ref_pic_buf_id, ngbr_ref_pic_buf_id;
166
167 if(PRED_L0 == ps_pu->b2_pred_mode)
168 {
169 i2_mv_x = i2_mv_x0;
170 i2_mv_y = i2_mv_y0;
171 ref_pic_buf_id = l0_ref_pic_buf_id;
172 }
173 else
174 {
175 i2_mv_x = i2_mv_x1;
176 i2_mv_y = i2_mv_y1;
177 ref_pic_buf_id = l1_ref_pic_buf_id;
178 }
179
180 if(PRED_L0 == ps_ngbr_pu->b2_pred_mode)
181 {
182 i2_ngbr_mv_x = i2_ngbr_mv_x0;
183 i2_ngbr_mv_y = i2_ngbr_mv_y0;
184 ngbr_ref_pic_buf_id = ngbr_l0_ref_pic_buf_id;
185 }
186 else
187 {
188 i2_ngbr_mv_x = i2_ngbr_mv_x1;
189 i2_ngbr_mv_y = i2_ngbr_mv_y1;
190 ngbr_ref_pic_buf_id = ngbr_l1_ref_pic_buf_id;
191 }
192
193 i4_bs = (ref_pic_buf_id == ngbr_ref_pic_buf_id) &&
194 (ABS(i2_mv_x - i2_ngbr_mv_x) < 4) &&
195 (ABS(i2_mv_y - i2_ngbr_mv_y) < 4) ? 0 : 1;
196 }
197
198 /* If the no. of motion vectors is not the same */
199 else
200 {
201 i4_bs = 1;
202 }
203
204
205 return i4_bs;
206 }
207
208 /* QP is also populated in the same function */
ihevcd_ctb_boundary_strength_islice(bs_ctxt_t * ps_bs_ctxt)209 WORD32 ihevcd_ctb_boundary_strength_islice(bs_ctxt_t *ps_bs_ctxt)
210 {
211 pps_t *ps_pps;
212 sps_t *ps_sps;
213 tu_t *ps_tu;
214 UWORD32 *pu4_vert_bs;
215 UWORD32 *pu4_horz_bs;
216 WORD32 bs_strd;
217 WORD32 vert_bs0_tmp;
218 WORD32 horz_bs0_tmp;
219 UWORD8 *pu1_qp;
220 WORD32 qp_strd;
221 UWORD32 u4_qp_const_in_ctb;
222 WORD32 ctb_indx;
223 WORD32 i4_tu_cnt;
224 WORD32 log2_ctb_size;
225 WORD32 ctb_size;
226
227 WORD8 i1_loop_filter_across_tiles_enabled_flag;
228 WORD8 i1_loop_filter_across_slices_enabled_flag;
229
230 WORD32 i;
231
232 PROFILE_DISABLE_BOUNDARY_STRENGTH();
233
234 ps_pps = ps_bs_ctxt->ps_pps;
235 ps_sps = ps_bs_ctxt->ps_sps;
236 i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
237 i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
238 i4_tu_cnt = ps_bs_ctxt->i4_ctb_tu_cnt;
239
240 log2_ctb_size = ps_sps->i1_log2_ctb_size;
241 ctb_size = (1 << log2_ctb_size);
242
243 /* strides are in units of number of bytes */
244 /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
245 bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
246
247 pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
248 (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
249 ps_bs_ctxt->i4_ctb_y * bs_strd);
250 pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
251 (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
252 ps_bs_ctxt->i4_ctb_y * bs_strd);
253
254 /* ctb_size/8 elements per CTB */
255 qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
256 pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
257
258 ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
259 u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
260
261 vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
262 horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
263
264 /* ctb_size/8 is the number of edges per CTB
265 * ctb_size/4 is the number of BS values needed per edge
266 * divided by 8 for the number of bytes
267 * 2 is the number of bits needed for each BS value */
268 /*
269 memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) / 8 * 2 );
270 memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2 );
271 */
272 memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + ((ctb_size >> 5) << 1));
273 memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
274
275 /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
276 if(0 != ps_bs_ctxt->i4_ctb_x)
277 {
278 pu4_vert_bs[0] |= vert_bs0_tmp;
279 }
280
281 /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
282 if(0 != ps_bs_ctxt->i4_ctb_y)
283 {
284 pu4_horz_bs[0] |= horz_bs0_tmp;
285 }
286
287 ps_tu = ps_bs_ctxt->ps_tu;
288
289 /* Populating the QP array - if const_qp_in_ctb flag is one, set only the first element */
290 if(u4_qp_const_in_ctb)
291 pu1_qp[0] = ps_tu->b7_qp;
292
293 for(i = 0; i < i4_tu_cnt; i++)
294 {
295 WORD32 start_pos_x;
296 WORD32 start_pos_y;
297 WORD32 tu_size;
298
299
300 UWORD32 u4_bs;
301 ps_tu = ps_bs_ctxt->ps_tu + i;
302
303 /* start_pos_x and start_pos_y are in units of min TU size (4x4) */
304 start_pos_x = ps_tu->b4_pos_x;
305 start_pos_y = ps_tu->b4_pos_y;
306
307 tu_size = 1 << (ps_tu->b3_size + 2);
308 tu_size >>= 2; /* TU size divided by 4 */
309
310 u4_bs = DUP_LSB_10(tu_size);
311
312 /* Only if the current edge falls on 8 pixel grid set BS */
313 if(0 == (start_pos_x & 1))
314 {
315 WORD32 shift;
316 shift = start_pos_y * 2;
317 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
318 * will reduce to the following assuming ctb size is one of 16, 32 and 64
319 * and deblocking is done on 8x8 grid
320 */
321 if(6 != log2_ctb_size)
322 shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
323 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
324 }
325 /* Only if the current edge falls on 8 pixel grid set BS */
326 if(0 == (start_pos_y & 1))
327 {
328 WORD32 shift;
329 shift = start_pos_x * 2;
330 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
331 * will reduce to the following assuming ctb size is one of 16, 32 and 64
332 * and deblocking is done on 8x8 grid
333 */
334 if(6 != log2_ctb_size)
335 shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
336 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
337 }
338
339 /* Populating the QP array */
340 if(0 == u4_qp_const_in_ctb)
341 {
342 if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
343 {
344 WORD32 row, col;
345 for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
346 {
347 for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
348 {
349 pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
350 }
351 }
352 }
353 }
354
355 }
356 {
357 /*Determine if the slice is dependent, and is its left neighbor belongs to the same slice, in a different tile*/
358 UWORD32 ctb_addr;
359 WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
360 /* If left neighbor is not available, then set BS for entire first column to zero */
361 if(!ps_pps->i1_tiles_enabled_flag)
362 {
363 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
364 (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
365 (0 == ps_bs_ctxt->i4_ctb_x))
366 {
367 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
368 }
369 }
370 else
371 {
372 //If across-tiles is disabled
373 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
374 {
375 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
376 }
377 else
378 {
379 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
380 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
381 if(ps_bs_ctxt->i4_ctb_x)
382 {
383 ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
384 left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
385 }
386 /*If the 1st slice in a new tile is a dependent slice*/
387 if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
388 {
389 /* Removed reduntant checks */
390 if((0 == i1_loop_filter_across_slices_enabled_flag && (
391 ((slice_idx != left_slice_idx) && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
392 ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) ||
393 (0 == ps_bs_ctxt->i4_ctb_x))
394 {
395 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
396 }
397 }
398 }
399 }
400
401 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
402 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
403 if(ps_bs_ctxt->i4_ctb_y)
404 {
405 ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
406 top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
407 }
408
409 /* If top neighbor is not available, then set BS for entire first row to zero */
410 /* Removed reduntant checks */
411 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
412 || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx)))
413 || (0 == ps_bs_ctxt->i4_ctb_y))
414 {
415 pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
416 }
417 }
418
419 /**
420 * Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
421 * (They might have been set to non zero values because of CBF of the current CTB)
422 * This block might not be needed for I slices*/
423 {
424 WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
425 WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
426 if(num_rows_remaining < (ctb_size >> 3))
427 {
428 /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
429 * will reduce to the following assuming ctb size is one of 16, 32 and 64
430 * and deblocking is done on 8x8 grid
431 */
432 WORD32 offset;
433 offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
434 if(6 != log2_ctb_size)
435 offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
436
437 memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
438 }
439
440 if(num_cols_remaining < (ctb_size >> 3))
441 {
442 /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
443 * will reduce to the following assuming ctb size is one of 16, 32 and 64
444 * and deblocking is done on 8x8 grid
445 */
446
447 WORD32 offset;
448 offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
449 if(6 != log2_ctb_size)
450 offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
451
452 memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
453 }
454 }
455
456 return 0;
457 }
ihevcd_ctb_boundary_strength_pbslice(bs_ctxt_t * ps_bs_ctxt)458 WORD32 ihevcd_ctb_boundary_strength_pbslice(bs_ctxt_t *ps_bs_ctxt)
459 {
460 sps_t *ps_sps;
461 pps_t *ps_pps;
462 WORD32 cur_ctb_idx, next_ctb_idx = 0;
463 WORD32 i4_tu_cnt;
464 WORD32 i4_pu_cnt;
465 tu_t *ps_tu;
466
467 UWORD32 *pu4_vert_bs;
468 UWORD32 *pu4_horz_bs;
469 WORD32 bs_strd;
470 WORD32 vert_bs0_tmp;
471 WORD32 horz_bs0_tmp;
472 UWORD8 *pu1_qp;
473 WORD32 qp_strd;
474 UWORD32 u4_qp_const_in_ctb;
475 WORD32 ctb_indx;
476 WORD32 log2_ctb_size;
477 WORD32 ctb_size;
478
479 WORD32 i;
480 WORD8 i1_loop_filter_across_tiles_enabled_flag;
481 WORD8 i1_loop_filter_across_slices_enabled_flag;
482
483 PROFILE_DISABLE_BOUNDARY_STRENGTH();
484
485 ps_sps = ps_bs_ctxt->ps_sps;
486 ps_pps = ps_bs_ctxt->ps_pps;
487
488 log2_ctb_size = ps_sps->i1_log2_ctb_size;
489 ctb_size = (1 << log2_ctb_size);
490
491 /* strides are in units of number of bytes */
492 /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
493 bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
494
495 pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
496 (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
497 ps_bs_ctxt->i4_ctb_y * bs_strd);
498 pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
499 (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
500 ps_bs_ctxt->i4_ctb_y * bs_strd);
501
502 vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
503 horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
504
505 ps_tu = ps_bs_ctxt->ps_tu;
506
507 /* ctb_size/8 elements per CTB */
508 qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
509 pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
510
511 ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
512 u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
513
514 i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
515 i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
516
517 /* ctb_size/8 is the number of edges per CTB
518 * ctb_size/4 is the number of BS values needed per edge
519 * divided by 8 for the number of bytes
520 * 2 is the number of bits needed for each BS value */
521 /*
522 memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) * 2 / 8 );
523 memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) * 2 / 8 );
524 */
525 memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + (ctb_size >> 4));
526 memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
527
528 /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
529 if(0 != ps_bs_ctxt->i4_ctb_x)
530 {
531 pu4_vert_bs[0] |= vert_bs0_tmp;
532 }
533
534 /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
535 if(0 != ps_bs_ctxt->i4_ctb_y)
536 {
537 pu4_horz_bs[0] |= horz_bs0_tmp;
538 }
539 /* pu4_horz_bs[bs_strd / 4] corresponds to pu4_horz_bs[0] of the bottom CTB */
540 *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) = 0;
541
542 cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
543 + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
544 next_ctb_idx = ps_bs_ctxt->i4_next_tu_ctb_cnt;
545 if(1 == ps_bs_ctxt->ps_codec->i4_num_cores)
546 {
547 i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx % RESET_TU_BUF_NCTB];
548 }
549 else
550 {
551 i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx];
552 }
553
554 ps_tu = ps_bs_ctxt->ps_tu;
555 if(u4_qp_const_in_ctb)
556 pu1_qp[0] = ps_tu->b7_qp;
557
558 /* For all TUs in the CTB For left and top edges, check if there are coded coefficients on either sides of the edge */
559 for(i = 0; i < i4_tu_cnt; i++)
560 {
561 WORD32 start_pos_x;
562 WORD32 start_pos_y;
563 WORD32 end_pos_x;
564 WORD32 end_pos_y;
565 WORD32 tu_size;
566 UWORD32 u4_bs;
567 WORD32 intra_flag;
568 UWORD8 *pu1_pic_intra_flag;
569
570 ps_tu = ps_bs_ctxt->ps_tu + i;
571
572 start_pos_x = ps_tu->b4_pos_x;
573 start_pos_y = ps_tu->b4_pos_y;
574
575 tu_size = 1 << (ps_tu->b3_size + 2);
576 tu_size >>= 2;
577
578 end_pos_x = start_pos_x + tu_size;
579 end_pos_y = start_pos_y + tu_size;
580
581 {
582 WORD32 tu_abs_x = (ps_bs_ctxt->i4_ctb_x << log2_ctb_size) + (start_pos_x << 2);
583 WORD32 tu_abs_y = (ps_bs_ctxt->i4_ctb_y << log2_ctb_size) + (start_pos_y << 2);
584
585 WORD32 numbytes_row = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
586
587 pu1_pic_intra_flag = ps_bs_ctxt->ps_codec->pu1_pic_intra_flag;
588 pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row;
589 pu1_pic_intra_flag += (tu_abs_x >> 6);
590
591 intra_flag = *pu1_pic_intra_flag;
592 intra_flag &= (1 << ((tu_abs_x >> 3) % 8));
593 }
594 if(intra_flag)
595 {
596 u4_bs = DUP_LSB_10(tu_size);
597
598 /* Only if the current edge falls on 8 pixel grid set BS */
599 if(0 == (start_pos_x & 1))
600 {
601 WORD32 shift;
602 shift = start_pos_y * 2;
603 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
604 * will reduce to the following assuming ctb size is one of 16, 32 and 64
605 * and deblocking is done on 8x8 grid
606 */
607 if(6 != log2_ctb_size)
608 shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
609 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
610 }
611 /* Only if the current edge falls on 8 pixel grid set BS */
612 if(0 == (start_pos_y & 1))
613 {
614 WORD32 shift;
615 shift = start_pos_x * 2;
616 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
617 * will reduce to the following assuming ctb size is one of 16, 32 and 64
618 * and deblocking is done on 8x8 grid
619 */
620 if(6 != log2_ctb_size)
621 shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
622 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
623 }
624 }
625
626
627 /* If the current TU is coded then set both top edge and left edge BS to 1 and go to next TU */
628 if(ps_tu->b1_y_cbf)
629 {
630 u4_bs = DUP_LSB_01(tu_size);
631
632 /* Only if the current edge falls on 8 pixel grid set BS */
633 if(0 == (start_pos_x & 1))
634 {
635 WORD32 shift;
636 shift = start_pos_y * 2;
637 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
638 * will reduce to the following assuming ctb size is one of 16, 32 and 64
639 * and deblocking is done on 8x8 grid
640 */
641 if(6 != log2_ctb_size)
642 shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
643 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
644 }
645 /* Only if the current edge falls on 8 pixel grid set BS */
646 if(0 == (start_pos_y & 1))
647 {
648 WORD32 shift;
649 shift = start_pos_x * 2;
650 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
651 * will reduce to the following assuming ctb size is one of 16, 32 and 64
652 * and deblocking is done on 8x8 grid
653 */
654 if(6 != log2_ctb_size)
655 shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
656 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
657 }
658 /* Only if the current edge falls on 8 pixel grid set BS */
659 if(0 == (end_pos_x & 1))
660 {
661 if(!(ctb_size / 8 == (end_pos_x >> 1) && ps_bs_ctxt->i4_ctb_x == ps_sps->i2_pic_wd_in_ctb - 1))
662 {
663 WORD32 shift;
664 shift = start_pos_y * 2;
665 shift += (((end_pos_x >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
666 pu4_vert_bs[end_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
667 }
668 }
669 /* Only if the current edge falls on 8 pixel grid set BS */
670 if(0 == (end_pos_y & 1))
671 {
672 /* If end_pos_y corresponds to the bottom of the CTB, write to pu4_horz_bs[0] of the bottom CTB */
673 if(ctb_size / 8 == (end_pos_y >> 1))
674 {
675 *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) |= (u4_bs << (start_pos_x * 2));
676 }
677 else
678 {
679 WORD32 shift;
680 shift = start_pos_x * 2;
681 shift += (((end_pos_y >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
682 pu4_horz_bs[end_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
683 }
684 }
685 }
686
687 if(0 == u4_qp_const_in_ctb)
688 {
689 if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
690 {
691 WORD32 row, col;
692 for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
693 {
694 for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
695 {
696 pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
697 }
698 }
699 }
700 }
701 }
702
703 /* For all PUs in the CTB,
704 For left and top edges, compute BS */
705
706 cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
707 + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
708
709 {
710 WORD32 next_ctb_idx;
711 next_ctb_idx = ps_bs_ctxt->i4_next_pu_ctb_cnt;
712 i4_pu_cnt = ps_bs_ctxt->pu4_pic_pu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_pu_idx[cur_ctb_idx];
713 }
714
715 for(i = 0; i < i4_pu_cnt; i++)
716 {
717 WORD32 start_pos_x;
718 WORD32 start_pos_y;
719 WORD32 end_pos_x;
720 WORD32 end_pos_y;
721 WORD32 pu_wd, pu_ht;
722 UWORD32 u4_bs;
723 pu_t *ps_pu = ps_bs_ctxt->ps_pu + i;
724 pu_t *ps_ngbr_pu;
725 UWORD32 u4_ngbr_pu_indx;
726
727 start_pos_x = ps_pu->b4_pos_x;
728 start_pos_y = ps_pu->b4_pos_y;
729
730 pu_wd = (ps_pu->b4_wd + 1);
731 pu_ht = (ps_pu->b4_ht + 1);
732
733 end_pos_x = start_pos_x + pu_wd;
734 end_pos_y = start_pos_y + pu_ht;
735
736 /* If the current PU is intra, set Boundary strength as 2 for both top and left edge */
737 /* Need not mask the BS to zero even if it was set to 1 already since BS 2 and 3 are assumed to be the same in leaf level functions */
738 if(ps_pu->b1_intra_flag)
739 {
740 u4_bs = DUP_LSB_10(pu_ht);
741
742 /* Only if the current edge falls on 8 pixel grid set BS */
743 if(0 == (start_pos_x & 1))
744 {
745 WORD32 shift;
746 shift = start_pos_y * 2;
747 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
748 * will reduce to the following assuming ctb size is one of 16, 32 and 64
749 * and deblocking is done on 8x8 grid
750 */
751 if(6 != log2_ctb_size)
752 shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
753 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
754 }
755
756 u4_bs = DUP_LSB_10(pu_wd);
757
758 /* Only if the current edge falls on 8 pixel grid set BS */
759 if(0 == (start_pos_y & 1))
760 {
761 WORD32 shift;
762 shift = start_pos_x * 2;
763 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
764 * will reduce to the following assuming ctb size is one of 16, 32 and 64
765 * and deblocking is done on 8x8 grid
766 */
767 if(6 != log2_ctb_size)
768 shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
769 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
770 }
771 }
772
773 else
774 {
775 /* Vertical edge */
776 /* Process only if the edge is not a frame edge */
777 if(0 != ps_bs_ctxt->i4_ctb_x + start_pos_x)
778 {
779 do
780 {
781 WORD32 pu_ngbr_ht;
782 WORD32 min_pu_ht;
783 WORD32 ngbr_end_pos_y;
784 UWORD32 ngbr_pu_idx_strd;
785 ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
786 u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y + 1) * ngbr_pu_idx_strd + (start_pos_x)];
787 ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
788
789 pu_ngbr_ht = ps_ngbr_pu->b4_ht + 1;
790 ngbr_end_pos_y = ps_ngbr_pu->b4_pos_y + pu_ngbr_ht;
791
792 min_pu_ht = MIN(ngbr_end_pos_y, end_pos_y) - start_pos_y;
793
794 if(ps_ngbr_pu->b1_intra_flag)
795 {
796 u4_bs = DUP_LSB_10(min_pu_ht);
797
798 /* Only if the current edge falls on 8 pixel grid set BS */
799 if(0 == (start_pos_x & 1))
800 {
801 WORD32 shift;
802 shift = start_pos_y * 2;
803 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
804 * will reduce to the following assuming ctb size is one of 16, 32 and 64
805 * and deblocking is done on 8x8 grid
806 */
807 if(6 != log2_ctb_size)
808 shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
809 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
810 }
811 }
812 else
813 {
814 u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
815 if(u4_bs)
816 {
817 u4_bs = DUP_LSB_01(min_pu_ht);
818 if(0 == (start_pos_x & 1))
819 {
820 WORD32 shift;
821 shift = start_pos_y * 2;
822 /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
823 * will reduce to the following assuming ctb size is one of 16, 32 and 64
824 * and deblocking is done on 8x8 grid
825 */
826 if(6 != log2_ctb_size)
827 shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
828 pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
829 }
830 }
831 }
832
833 pu_ht -= min_pu_ht;
834 start_pos_y += min_pu_ht;
835 }while(pu_ht > 0);
836
837 /* Reinitialising since the values are updated in the previous loop */
838 pu_ht = ps_pu->b4_ht + 1;
839 start_pos_y = ps_pu->b4_pos_y;
840 }
841
842 /* Horizontal edge */
843 /* Process only if the edge is not a frame edge */
844 if(0 != ps_bs_ctxt->i4_ctb_y + start_pos_y)
845 {
846 do
847 {
848 WORD32 pu_ngbr_wd;
849 WORD32 min_pu_wd;
850 WORD32 ngbr_end_pos_x;
851 UWORD32 ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
852 u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y)*ngbr_pu_idx_strd + (start_pos_x + 1)];
853 ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
854
855 pu_ngbr_wd = ps_ngbr_pu->b4_wd + 1;
856 ngbr_end_pos_x = ps_ngbr_pu->b4_pos_x + pu_ngbr_wd;
857
858 min_pu_wd = MIN(ngbr_end_pos_x, end_pos_x) - start_pos_x;
859
860 if(ps_ngbr_pu->b1_intra_flag)
861 {
862 u4_bs = DUP_LSB_10(min_pu_wd);
863
864 /* Only if the current edge falls on 8 pixel grid set BS */
865 if(0 == (start_pos_y & 1))
866 {
867 WORD32 shift;
868 shift = start_pos_x * 2;
869 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
870 * will reduce to the following assuming ctb size is one of 16, 32 and 64
871 * and deblocking is done on 8x8 grid
872 */
873 if(6 != log2_ctb_size)
874 shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
875 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
876 }
877 }
878 else
879 {
880 u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
881 if(u4_bs)
882 {
883 u4_bs = DUP_LSB_01(min_pu_wd);
884
885 /* Only if the current edge falls on 8 pixel grid set BS */
886 if(0 == (start_pos_y & 1))
887 {
888 WORD32 shift;
889 shift = start_pos_x * 2;
890 /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
891 * will reduce to the following assuming ctb size is one of 16, 32 and 64
892 * and deblocking is done on 8x8 grid
893 */
894 if(6 != log2_ctb_size)
895 shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
896 pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
897 }
898 }
899 }
900
901 pu_wd -= min_pu_wd;
902 start_pos_x += min_pu_wd;
903 }while(pu_wd > 0);
904
905 /* Reinitialising since the values are updated in the previous loop */
906 pu_wd = ps_pu->b4_wd + 1;
907 start_pos_x = ps_pu->b4_pos_x;
908 }
909 }
910 }
911
912 {
913 /* If left neighbor is not available, then set BS for entire first column to zero */
914 UWORD32 ctb_addr;
915 WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
916
917 if(!ps_pps->i1_tiles_enabled_flag)
918 {
919 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
920 (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
921 (0 == ps_bs_ctxt->i4_ctb_x))
922 {
923 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
924 }
925 }
926 else
927 {
928 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
929 {
930 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
931 }
932 else
933 {
934
935 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
936 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
937
938 if(ps_bs_ctxt->i4_ctb_x)
939 {
940 ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
941 left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
942 }
943
944 if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
945 {
946 /* Removed reduntant checks */
947 if((0 == i1_loop_filter_across_slices_enabled_flag && (
948 (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
949 ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) || (0 == ps_bs_ctxt->i4_ctb_x))
950 {
951 pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
952 }
953 }
954 }
955 }
956
957 ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
958 slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
959 if(ps_bs_ctxt->i4_ctb_y)
960 {
961 ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
962 top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
963 }
964 /* If top neighbor is not available, then set BS for entire first row to zero */
965 /* Removed reduntant checks */
966 if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
967 || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx)))
968 || (0 == ps_bs_ctxt->i4_ctb_y))
969 {
970 pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
971 }
972 }
973
974 /**
975 * Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
976 * (They might have set to non zero values because of CBF of the current CTB)*/
977 {
978 WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
979 WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
980 if(num_rows_remaining < (ctb_size >> 3))
981 {
982 /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
983 * will reduce to the following assuming ctb size is one of 16, 32 and 64
984 * and deblocking is done on 8x8 grid
985 */
986 WORD32 offset;
987 offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
988 if(6 != log2_ctb_size)
989 offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
990
991 memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
992 }
993
994 if(num_cols_remaining < (ctb_size >> 3))
995 {
996 /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
997 * will reduce to the following assuming ctb size is one of 16, 32 and 64
998 * and deblocking is done on 8x8 grid
999 */
1000
1001 WORD32 offset;
1002 offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
1003 if(6 != log2_ctb_size)
1004 offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
1005
1006 memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
1007 }
1008 }
1009 return 0;
1010 }
1011