1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 * ihevc_sao.c
22 *
23 * @brief
24 * Contains function definitions for sample adaptive offset process
25 *
26 * @author
27 * Srinivas T
28 *
29 * @par List of Functions:
30 *
31 * @remarks
32 * None
33 *
34 *******************************************************************************
35 */
36
37 #include <stdio.h>
38 #include <stddef.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <assert.h>
42
43 #include "ihevc_typedefs.h"
44 #include "iv.h"
45 #include "ivd.h"
46 #include "ihevcd_cxa.h"
47 #include "ithread.h"
48
49 #include "ihevc_defs.h"
50 #include "ihevc_debug.h"
51 #include "ihevc_defs.h"
52 #include "ihevc_structs.h"
53 #include "ihevc_macros.h"
54 #include "ihevc_platform_macros.h"
55 #include "ihevc_cabac_tables.h"
56 #include "ihevc_sao.h"
57 #include "ihevc_mem_fns.h"
58
59 #include "ihevc_error.h"
60 #include "ihevc_common_tables.h"
61
62 #include "ihevcd_trace.h"
63 #include "ihevcd_defs.h"
64 #include "ihevcd_function_selector.h"
65 #include "ihevcd_structs.h"
66 #include "ihevcd_error.h"
67 #include "ihevcd_nal.h"
68 #include "ihevcd_bitstream.h"
69 #include "ihevcd_job_queue.h"
70 #include "ihevcd_utils.h"
71
72 #include "ihevc_deblk.h"
73 #include "ihevc_deblk_tables.h"
74 #include "ihevcd_profile.h"
75 #include "ihevcd_sao.h"
76 #include "ihevcd_debug.h"
77
78 #define SAO_SHIFT_CTB 8
79
80 /**
81 * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
82 */
ihevcd_sao_ctb(sao_ctxt_t * ps_sao_ctxt)83 void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
84 {
85 codec_t *ps_codec = ps_sao_ctxt->ps_codec;
86 UWORD8 *pu1_src_luma;
87 UWORD8 *pu1_src_chroma;
88 WORD32 src_strd;
89 WORD32 ctb_size;
90 WORD32 log2_ctb_size;
91 sps_t *ps_sps;
92 sao_t *ps_sao;
93 WORD32 row, col;
94 UWORD8 au1_avail_luma[8];
95 UWORD8 au1_avail_chroma[8];
96 WORD32 i;
97 UWORD8 *pu1_src_top_luma;
98 UWORD8 *pu1_src_top_chroma;
99 UWORD8 *pu1_src_left_luma;
100 UWORD8 *pu1_src_left_chroma;
101 UWORD8 au1_src_top_right[2];
102 UWORD8 au1_src_bot_left[2];
103 UWORD8 *pu1_no_loop_filter_flag;
104 WORD32 loop_filter_strd;
105
106 WORD8 ai1_offset_y[5];
107 WORD8 ai1_offset_cb[5];
108 WORD8 ai1_offset_cr[5];
109
110 PROFILE_DISABLE_SAO();
111
112 ai1_offset_y[0] = 0;
113 ai1_offset_cb[0] = 0;
114 ai1_offset_cr[0] = 0;
115
116 ps_sps = ps_sao_ctxt->ps_sps;
117 log2_ctb_size = ps_sps->i1_log2_ctb_size;
118 ctb_size = (1 << log2_ctb_size);
119 src_strd = ps_sao_ctxt->ps_codec->i4_strd;
120 pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
121 pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
122
123 ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
124 loop_filter_strd = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
125
126 /* Current CTB */
127 {
128 WORD32 sao_wd_luma;
129 WORD32 sao_wd_chroma;
130 WORD32 sao_ht_luma;
131 WORD32 sao_ht_chroma;
132
133 WORD32 remaining_rows;
134 WORD32 remaining_cols;
135
136 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
137 sao_wd_luma = MIN(ctb_size, remaining_cols);
138 sao_wd_chroma = MIN(ctb_size, remaining_cols);
139
140 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
141 sao_ht_luma = MIN(ctb_size, remaining_rows);
142 sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
143
144 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
145 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
146 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
147 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
148
149 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
150 ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
151 ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
152
153 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
154 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
155 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
156 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
157
158 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
159 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
160 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
161 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
162
163 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
164 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
165 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
166 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
167
168 for(i = 0; i < 8; i++)
169 {
170 au1_avail_luma[i] = 255;
171 au1_avail_chroma[i] = 255;
172 }
173
174
175 if(0 == ps_sao_ctxt->i4_ctb_x)
176 {
177 au1_avail_luma[0] = 0;
178 au1_avail_luma[4] = 0;
179 au1_avail_luma[6] = 0;
180
181 au1_avail_chroma[0] = 0;
182 au1_avail_chroma[4] = 0;
183 au1_avail_chroma[6] = 0;
184 }
185
186 if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
187 {
188 au1_avail_luma[1] = 0;
189 au1_avail_luma[5] = 0;
190 au1_avail_luma[7] = 0;
191
192 au1_avail_chroma[1] = 0;
193 au1_avail_chroma[5] = 0;
194 au1_avail_chroma[7] = 0;
195 }
196
197 if(0 == ps_sao_ctxt->i4_ctb_y)
198 {
199 au1_avail_luma[2] = 0;
200 au1_avail_luma[4] = 0;
201 au1_avail_luma[5] = 0;
202
203 au1_avail_chroma[2] = 0;
204 au1_avail_chroma[4] = 0;
205 au1_avail_chroma[5] = 0;
206 }
207
208 if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
209 {
210 au1_avail_luma[3] = 0;
211 au1_avail_luma[6] = 0;
212 au1_avail_luma[7] = 0;
213
214 au1_avail_chroma[3] = 0;
215 au1_avail_chroma[6] = 0;
216 au1_avail_chroma[7] = 0;
217 }
218
219
220 if(0 == ps_sao->b3_y_type_idx)
221 {
222 /* Update left, top and top-left */
223 for(row = 0; row < sao_ht_luma; row++)
224 {
225 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
226 }
227 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
228
229 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
230
231 }
232 else
233 {
234 UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
235 UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
236 WORD32 tmp_strd = MAX_CTB_SIZE + 2;
237 WORD32 no_loop_filter_enabled = 0;
238
239 /* Check the loop filter flags and copy the original values for back up */
240 {
241 UWORD32 u4_no_loop_filter_flag;
242 WORD32 min_cu = 8;
243 UWORD8 *pu1_src_tmp = pu1_src_luma;
244
245 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
246 {
247 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
248 ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
249 u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
250
251 if(u4_no_loop_filter_flag)
252 {
253 WORD32 tmp_wd = sao_wd_luma;
254 no_loop_filter_enabled = 1;
255 while(tmp_wd > 0)
256 {
257 if(CTZ(u4_no_loop_filter_flag))
258 {
259 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
260 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
261 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
262 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
263 }
264 else
265 {
266 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
267 {
268 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
269 {
270 pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
271 }
272 }
273
274 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
275 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
276 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
277 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
278 }
279 }
280
281 pu1_src_tmp -= sao_wd_luma;
282 }
283
284 pu1_src_tmp += min_cu * src_strd;
285 pu1_src_copy += min_cu * tmp_strd;
286 }
287 }
288
289 if(1 == ps_sao->b3_y_type_idx)
290 {
291 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
292 src_strd,
293 pu1_src_left_luma,
294 pu1_src_top_luma,
295 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
296 ps_sao->b5_y_band_pos,
297 ai1_offset_y,
298 sao_wd_luma,
299 sao_ht_luma);
300 }
301 else // if(2 <= ps_sao->b3_y_type_idx)
302 {
303 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
304 au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
305 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
306 src_strd,
307 pu1_src_left_luma,
308 pu1_src_top_luma,
309 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
310 au1_src_top_right,
311 au1_src_bot_left,
312 au1_avail_luma,
313 ai1_offset_y,
314 sao_wd_luma,
315 sao_ht_luma);
316 }
317
318 /* Check the loop filter flags and copy the original values back if they are set */
319 if(no_loop_filter_enabled)
320 {
321 UWORD32 u4_no_loop_filter_flag;
322 WORD32 min_cu = 8;
323 UWORD8 *pu1_src_tmp = pu1_src_luma;
324
325 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
326 {
327 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
328 u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
329
330 if(u4_no_loop_filter_flag)
331 {
332 WORD32 tmp_wd = sao_wd_luma;
333 while(tmp_wd > 0)
334 {
335 if(CTZ(u4_no_loop_filter_flag))
336 {
337 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
338 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
339 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
340 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
341 }
342 else
343 {
344 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
345 {
346 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
347 {
348 pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
349 }
350 }
351
352 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
353 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
354 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
355 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
356 }
357 }
358
359 pu1_src_tmp -= sao_wd_luma;
360 }
361
362 pu1_src_tmp += min_cu * src_strd;
363 pu1_src_copy += min_cu * tmp_strd;
364 }
365 }
366
367 }
368
369 if(0 == ps_sao->b3_cb_type_idx)
370 {
371 for(row = 0; row < sao_ht_chroma; row++)
372 {
373 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
374 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
375 }
376 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
377 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
378
379 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
380 }
381 else
382 {
383 UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
384 UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
385 WORD32 tmp_strd = MAX_CTB_SIZE + 4;
386 WORD32 no_loop_filter_enabled = 0;
387
388 /* Check the loop filter flags and copy the original values for back up */
389 {
390 UWORD32 u4_no_loop_filter_flag;
391 WORD32 min_cu = 4;
392 UWORD8 *pu1_src_tmp = pu1_src_chroma;
393
394 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
395 {
396 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
397 u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
398
399 if(u4_no_loop_filter_flag)
400 {
401 WORD32 tmp_wd = sao_wd_chroma;
402 no_loop_filter_enabled = 1;
403 while(tmp_wd > 0)
404 {
405 if(CTZ(u4_no_loop_filter_flag))
406 {
407 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
408 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
409 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
410 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
411 }
412 else
413 {
414 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
415 {
416 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
417 {
418 pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
419 }
420 }
421
422 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
423 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
424 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
425 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
426 }
427 }
428
429 pu1_src_tmp -= sao_wd_chroma;
430 }
431
432 pu1_src_tmp += min_cu * src_strd;
433 pu1_src_copy += min_cu * tmp_strd;
434 }
435 }
436
437 if(1 == ps_sao->b3_cb_type_idx)
438 {
439 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
440 src_strd,
441 pu1_src_left_chroma,
442 pu1_src_top_chroma,
443 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
444 ps_sao->b5_cb_band_pos,
445 ps_sao->b5_cr_band_pos,
446 ai1_offset_cb,
447 ai1_offset_cr,
448 sao_wd_chroma,
449 sao_ht_chroma
450 );
451 }
452 else // if(2 <= ps_sao->b3_cb_type_idx)
453 {
454 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
455 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
456 au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
457 au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
458 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
459 src_strd,
460 pu1_src_left_chroma,
461 pu1_src_top_chroma,
462 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
463 au1_src_top_right,
464 au1_src_bot_left,
465 au1_avail_chroma,
466 ai1_offset_cb,
467 ai1_offset_cr,
468 sao_wd_chroma,
469 sao_ht_chroma);
470 }
471
472 /* Check the loop filter flags and copy the original values back if they are set */
473 if(no_loop_filter_enabled)
474 {
475 UWORD32 u4_no_loop_filter_flag;
476 WORD32 min_cu = 4;
477 UWORD8 *pu1_src_tmp = pu1_src_chroma;
478
479 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
480 {
481 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
482 u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
483
484 if(u4_no_loop_filter_flag)
485 {
486 WORD32 tmp_wd = sao_wd_chroma;
487 while(tmp_wd > 0)
488 {
489 if(CTZ(u4_no_loop_filter_flag))
490 {
491 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
492 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
493 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
494 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
495 }
496 else
497 {
498 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
499 {
500 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
501 {
502 pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
503 }
504 }
505
506 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
507 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
508 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
509 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
510 }
511 }
512
513 pu1_src_tmp -= sao_wd_chroma;
514 }
515
516 pu1_src_tmp += min_cu * src_strd;
517 pu1_src_copy += min_cu * tmp_strd;
518 }
519 }
520
521 }
522
523 }
524 }
525
ihevcd_sao_shift_ctb(sao_ctxt_t * ps_sao_ctxt)526 void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
527 {
528 codec_t *ps_codec = ps_sao_ctxt->ps_codec;
529 UWORD8 *pu1_src_luma;
530 UWORD8 *pu1_src_chroma;
531 WORD32 src_strd;
532 WORD32 ctb_size;
533 WORD32 log2_ctb_size;
534 sps_t *ps_sps;
535 sao_t *ps_sao;
536 pps_t *ps_pps;
537 slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
538 tile_t *ps_tile;
539 UWORD16 *pu1_slice_idx;
540 UWORD16 *pu1_tile_idx;
541 WORD32 row, col;
542 UWORD8 au1_avail_luma[8];
543 UWORD8 au1_avail_chroma[8];
544 UWORD8 au1_tile_slice_boundary[8];
545 UWORD8 au4_ilf_across_tile_slice_enable[8];
546 WORD32 i;
547 UWORD8 *pu1_src_top_luma;
548 UWORD8 *pu1_src_top_chroma;
549 UWORD8 *pu1_src_left_luma;
550 UWORD8 *pu1_src_left_chroma;
551 UWORD8 au1_src_top_right[2];
552 UWORD8 au1_src_bot_left[2];
553 UWORD8 *pu1_no_loop_filter_flag;
554 UWORD8 *pu1_src_backup_luma;
555 UWORD8 *pu1_src_backup_chroma;
556 WORD32 backup_strd;
557 WORD32 loop_filter_strd;
558
559 WORD32 no_loop_filter_enabled_luma = 0;
560 WORD32 no_loop_filter_enabled_chroma = 0;
561 UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
562 UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
563 UWORD8 *pu1_sao_src_luma_top_left_ctb;
564 UWORD8 *pu1_sao_src_chroma_top_left_ctb;
565 UWORD8 *pu1_sao_src_top_left_luma_top_right;
566 UWORD8 *pu1_sao_src_top_left_chroma_top_right;
567 UWORD8 u1_sao_src_top_left_luma_bot_left;
568 UWORD8 *pu1_sao_src_top_left_luma_bot_left;
569 UWORD8 *au1_sao_src_top_left_chroma_bot_left;
570 UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
571
572 WORD8 ai1_offset_y[5];
573 WORD8 ai1_offset_cb[5];
574 WORD8 ai1_offset_cr[5];
575 WORD32 chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
576
577 PROFILE_DISABLE_SAO();
578
579 ai1_offset_y[0] = 0;
580 ai1_offset_cb[0] = 0;
581 ai1_offset_cr[0] = 0;
582
583 ps_sps = ps_sao_ctxt->ps_sps;
584 ps_pps = ps_sao_ctxt->ps_pps;
585 ps_tile = ps_sao_ctxt->ps_tile;
586
587 log2_ctb_size = ps_sps->i1_log2_ctb_size;
588 ctb_size = (1 << log2_ctb_size);
589 src_strd = ps_sao_ctxt->ps_codec->i4_strd;
590 ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
591 ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
592
593 pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
594 pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
595 pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
596 pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
597
598 /*Stores the left value for each row ctbs- Needed for column tiles*/
599 pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
600 pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
601 pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
602 pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
603 u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
604 pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
605 au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
606 pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
607 pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
608 pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
609
610 ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
611 loop_filter_strd = (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
612 backup_strd = 2 * MAX_CTB_SIZE;
613
614 DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
615
616 {
617 /* Check the loop filter flags and copy the original values for back up */
618 /* Luma */
619 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
620 {
621 UWORD32 u4_no_loop_filter_flag;
622 WORD32 loop_filter_bit_pos;
623 WORD32 log2_min_cu = 3;
624 WORD32 min_cu = (1 << log2_min_cu);
625 UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
626 WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
627 WORD32 sao_blk_wd = ctb_size;
628 WORD32 remaining_rows;
629 WORD32 remaining_cols;
630
631 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
632 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
633 if(remaining_rows <= SAO_SHIFT_CTB)
634 sao_blk_ht += remaining_rows;
635 if(remaining_cols <= SAO_SHIFT_CTB)
636 sao_blk_wd += remaining_cols;
637
638 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
639 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
640
641 pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
642
643 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
644 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
645 if(ps_sao_ctxt->i4_ctb_x > 0)
646 loop_filter_bit_pos -= 1;
647
648 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
649 (loop_filter_bit_pos >> 3);
650
651 for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
652 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
653 {
654 WORD32 tmp_wd = sao_blk_wd;
655
656 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
657 (loop_filter_bit_pos & 7);
658 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
659
660 if(u4_no_loop_filter_flag)
661 {
662 no_loop_filter_enabled_luma = 1;
663 while(tmp_wd > 0)
664 {
665 if(CTZ(u4_no_loop_filter_flag))
666 {
667 pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
668 pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
669 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
670 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
671 }
672 else
673 {
674 for(row = 0; row < min_cu; row++)
675 {
676 for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
677 {
678 pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
679 }
680 }
681 pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
682 pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
683 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
684 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
685 }
686 }
687
688 pu1_src_tmp_luma -= sao_blk_wd;
689 pu1_src_backup_luma -= sao_blk_wd;
690 }
691
692 pu1_src_tmp_luma += (src_strd << log2_min_cu);
693 pu1_src_backup_luma += (backup_strd << log2_min_cu);
694 }
695 }
696
697 /* Chroma */
698 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
699 {
700 UWORD32 u4_no_loop_filter_flag;
701 WORD32 loop_filter_bit_pos;
702 WORD32 log2_min_cu = 3;
703 WORD32 min_cu = (1 << log2_min_cu);
704 UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
705 WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
706 WORD32 sao_blk_wd = ctb_size;
707 WORD32 remaining_rows;
708 WORD32 remaining_cols;
709
710 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
711 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
712 if(remaining_rows <= 2 * SAO_SHIFT_CTB)
713 sao_blk_ht += remaining_rows;
714 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
715 sao_blk_wd += remaining_cols;
716
717 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
718 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
719
720 pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
721
722 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
723 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
724 if(ps_sao_ctxt->i4_ctb_x > 0)
725 loop_filter_bit_pos -= 2;
726
727 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
728 (loop_filter_bit_pos >> 3);
729
730 for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
731 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
732 {
733 WORD32 tmp_wd = sao_blk_wd;
734
735 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
736 (loop_filter_bit_pos & 7);
737 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
738
739 if(u4_no_loop_filter_flag)
740 {
741 no_loop_filter_enabled_chroma = 1;
742 while(tmp_wd > 0)
743 {
744 if(CTZ(u4_no_loop_filter_flag))
745 {
746 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
747 pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
748 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
749 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
750 }
751 else
752 {
753 for(row = 0; row < min_cu / 2; row++)
754 {
755 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
756 {
757 pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
758 }
759 }
760
761 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
762 pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
763 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
764 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
765 }
766 }
767
768 pu1_src_tmp_chroma -= sao_blk_wd;
769 pu1_src_backup_chroma -= sao_blk_wd;
770 }
771
772 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
773 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
774 }
775 }
776 }
777
778 DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
779
780 /* Top-left CTB */
781 if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
782 {
783 WORD32 sao_wd_luma = SAO_SHIFT_CTB;
784 WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
785 WORD32 sao_ht_luma = SAO_SHIFT_CTB;
786 WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
787
788 WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
789 WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
790 WORD32 au4_idx_tl[8], idx_tl;
791
792
793 pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
794 pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
795 ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
796 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
797 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
798 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
799 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
800
801 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
802 {
803 if(0 == ps_sao->b3_y_type_idx)
804 {
805 /* Update left, top and top-left */
806 for(row = 0; row < sao_ht_luma; row++)
807 {
808 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
809 }
810 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
811
812 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
813
814
815 }
816
817 else if(1 == ps_sao->b3_y_type_idx)
818 {
819 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
820 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
821 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
822 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
823
824 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
825 src_strd,
826 pu1_src_left_luma,
827 pu1_src_top_luma,
828 pu1_sao_src_luma_top_left_ctb,
829 ps_sao->b5_y_band_pos,
830 ai1_offset_y,
831 sao_wd_luma,
832 sao_ht_luma
833 );
834 }
835
836 else // if(2 <= ps_sao->b3_y_type_idx)
837 {
838 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
839 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
840 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
841 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
842
843 for(i = 0; i < 8; i++)
844 {
845 au1_avail_luma[i] = 255;
846 au1_tile_slice_boundary[i] = 0;
847 au4_idx_tl[i] = 0;
848 au4_ilf_across_tile_slice_enable[i] = 1;
849 }
850
851 /******************************************************************
852 * Derive the Top-left CTB's neighbor pixel's slice indices.
853 *
854 * TL_T
855 * 4 _2__5________
856 * 0 | | |
857 * TL_L | TL | 1 TL_R|
858 * |____|_______|____
859 * 6|TL_D|7 | |
860 * | 3 | | |
861 * |____|_______| |
862 * | |
863 * | |
864 * |____________|
865 *
866 *****************************************************************/
867
868 /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
869 {
870 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
871 {
872 {
873 /*Assuming that sao shift is uniform along x and y directions*/
874 if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
875 {
876 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
877 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
878 }
879 else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
880 {
881 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
882 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
883 }
884 ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
885 ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
886
887 ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
888 ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
889
890 ctbx_tl_d = ps_sao_ctxt->i4_ctb_x - 1;
891 ctby_tl_d = ps_sao_ctxt->i4_ctb_y;
892
893 ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
894 ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
895 }
896
897 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
898 {
899 /*Calculate slice indices for neighbor pixels*/
900 idx_tl = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
901 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
902 au4_idx_tl[0] = pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
903 au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
904 au4_idx_tl[3] = au4_idx_tl[6] = pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
905 au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
906
907 if((0 == (1 << log2_ctb_size) - sao_wd_luma))
908 {
909 if(ps_sao_ctxt->i4_ctb_x == 1)
910 {
911 au4_idx_tl[6] = -1;
912 au4_idx_tl[4] = -1;
913 }
914 else
915 {
916 au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
917 }
918 if(ps_sao_ctxt->i4_ctb_y == 1)
919 {
920 au4_idx_tl[5] = -1;
921 au4_idx_tl[4] = -1;
922 }
923 else
924 {
925 au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
926 au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
927 }
928 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
929 }
930
931 /* Verify that the neighbor ctbs dont cross pic boundary.
932 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
933 * of the pixel having a greater address is checked. Accordingly, set the availability flags.
934 * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
935 * the respective pixel's flags are checked
936 */
937
938 if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
939 {
940 au4_ilf_across_tile_slice_enable[4] = 0;
941 au4_ilf_across_tile_slice_enable[6] = 0;
942 }
943 else
944 {
945 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
946 }
947 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
948 {
949 au4_ilf_across_tile_slice_enable[5] = 0;
950 au4_ilf_across_tile_slice_enable[4] = 0;
951 }
952 else
953 {
954 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
955 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
956 }
957 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
958 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
959 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
960 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
961 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
962
963 /*
964 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
965 * of the pixel having a greater address is checked. Accordingly, set the availability flags.
966 * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
967 * the respective pixel's flags are checked
968 */
969 for(i = 0; i < 8; i++)
970 {
971 /*Sets the edges that lie on the slice/tile boundary*/
972 if(au4_idx_tl[i] != idx_tl)
973 {
974 au1_tile_slice_boundary[i] = 1;
975 }
976 else
977 {
978 au4_ilf_across_tile_slice_enable[i] = 1;
979 }
980 }
981
982 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
983 }
984
985 if(ps_pps->i1_tiles_enabled_flag)
986 {
987 /* Calculate availability flags at slice boundary */
988 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
989 {
990 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
991 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
992 {
993 /*Set the boundary arrays*/
994 /*Calculate tile indices for neighbor pixels*/
995 idx_tl = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
996 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
997 au4_idx_tl[0] = pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
998 au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
999 au4_idx_tl[3] = au4_idx_tl[6] = pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1000 au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1001
1002 if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1003 {
1004 if(ps_sao_ctxt->i4_ctb_x == 1)
1005 {
1006 au4_idx_tl[6] = -1;
1007 au4_idx_tl[4] = -1;
1008 }
1009 else
1010 {
1011 au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1012 }
1013 if(ps_sao_ctxt->i4_ctb_y == 1)
1014 {
1015 au4_idx_tl[5] = -1;
1016 au4_idx_tl[4] = -1;
1017 }
1018 else
1019 {
1020 au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1021 au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1022 }
1023 au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1024 }
1025 for(i = 0; i < 8; i++)
1026 {
1027 /*Sets the edges that lie on the tile boundary*/
1028 if(au4_idx_tl[i] != idx_tl)
1029 {
1030 au1_tile_slice_boundary[i] |= 1;
1031 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1032 }
1033 }
1034 }
1035 }
1036 }
1037
1038
1039 /*Set availability flags based on tile and slice boundaries*/
1040 for(i = 0; i < 8; i++)
1041 {
1042 /*Sets the edges that lie on the slice/tile boundary*/
1043 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1044 {
1045 au1_avail_luma[i] = 0;
1046 }
1047 }
1048 }
1049 }
1050
1051 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1052 {
1053 au1_avail_luma[0] = 0;
1054 au1_avail_luma[4] = 0;
1055 au1_avail_luma[6] = 0;
1056 }
1057
1058 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1059 {
1060 au1_avail_luma[1] = 0;
1061 au1_avail_luma[5] = 0;
1062 au1_avail_luma[7] = 0;
1063 }
1064 //y==1 case
1065 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
1066 {
1067 au1_avail_luma[2] = 0;
1068 au1_avail_luma[4] = 0;
1069 au1_avail_luma[5] = 0;
1070 }
1071 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1072 {
1073 au1_avail_luma[3] = 0;
1074 au1_avail_luma[6] = 0;
1075 au1_avail_luma[7] = 0;
1076 }
1077
1078 {
1079 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1080 u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
1081 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1082 src_strd,
1083 pu1_src_left_luma,
1084 pu1_src_top_luma,
1085 pu1_sao_src_luma_top_left_ctb,
1086 au1_src_top_right,
1087 &u1_sao_src_top_left_luma_bot_left,
1088 au1_avail_luma,
1089 ai1_offset_y,
1090 sao_wd_luma,
1091 sao_ht_luma);
1092 }
1093 }
1094
1095 }
1096
1097 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1098 {
1099 if(0 == ps_sao->b3_cb_type_idx)
1100 {
1101 for(row = 0; row < sao_ht_chroma; row++)
1102 {
1103 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1104 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1105 }
1106 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1107 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1108
1109 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1110
1111 }
1112
1113 else if(1 == ps_sao->b3_cb_type_idx)
1114 {
1115 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1116 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1117 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1118 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1119
1120 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1121 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1122 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1123 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1124
1125 if(chroma_yuv420sp_vu)
1126 {
1127 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1128 src_strd,
1129 pu1_src_left_chroma,
1130 pu1_src_top_chroma,
1131 pu1_sao_src_chroma_top_left_ctb,
1132 ps_sao->b5_cr_band_pos,
1133 ps_sao->b5_cb_band_pos,
1134 ai1_offset_cr,
1135 ai1_offset_cb,
1136 sao_wd_chroma,
1137 sao_ht_chroma
1138 );
1139 }
1140 else
1141 {
1142 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1143 src_strd,
1144 pu1_src_left_chroma,
1145 pu1_src_top_chroma,
1146 pu1_sao_src_chroma_top_left_ctb,
1147 ps_sao->b5_cb_band_pos,
1148 ps_sao->b5_cr_band_pos,
1149 ai1_offset_cb,
1150 ai1_offset_cr,
1151 sao_wd_chroma,
1152 sao_ht_chroma
1153 );
1154 }
1155 }
1156
1157 else // if(2 <= ps_sao->b3_cb_type_idx)
1158 {
1159 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1160 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1161 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1162 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1163
1164 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1165 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1166 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1167 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1168 for(i = 0; i < 8; i++)
1169 {
1170 au1_avail_chroma[i] = 255;
1171 au1_tile_slice_boundary[i] = 0;
1172 au4_idx_tl[i] = 0;
1173 au4_ilf_across_tile_slice_enable[i] = 1;
1174 }
1175 /*In case of slices*/
1176 {
1177 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1178 {
1179 if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
1180 {
1181 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
1182 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
1183 }
1184 else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
1185 {
1186 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
1187 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
1188 }
1189 ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
1190 ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
1191
1192 ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
1193 ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
1194
1195 ctbx_tl_d = ps_sao_ctxt->i4_ctb_x - 1;
1196 ctby_tl_d = ps_sao_ctxt->i4_ctb_y;
1197
1198 ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
1199 ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
1200
1201 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1202 {
1203
1204 idx_tl = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1205 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1206 au4_idx_tl[0] = pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1207 au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1208 au4_idx_tl[3] = au4_idx_tl[6] = pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1209 au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1210
1211 if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
1212 {
1213 if(ps_sao_ctxt->i4_ctb_x == 1)
1214 {
1215 au4_idx_tl[6] = -1;
1216 au4_idx_tl[4] = -1;
1217 }
1218 else
1219 {
1220 au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1221 }
1222 if(ps_sao_ctxt->i4_ctb_y == 1)
1223 {
1224 au4_idx_tl[5] = -1;
1225 au4_idx_tl[4] = -1;
1226 }
1227 else
1228 {
1229 au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1230 au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1231 }
1232 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1233 }
1234
1235 /* Verify that the neighbor ctbs don't cross pic boundary
1236 * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
1237 if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
1238 {
1239 au4_ilf_across_tile_slice_enable[4] = 0;
1240 au4_ilf_across_tile_slice_enable[6] = 0;
1241 }
1242 else
1243 {
1244 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1245 }
1246 if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
1247 {
1248 au4_ilf_across_tile_slice_enable[5] = 0;
1249 au4_ilf_across_tile_slice_enable[4] = 0;
1250 }
1251 else
1252 {
1253 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1254 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1255 }
1256 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1257 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1258 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1259 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1260 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1261 /*
1262 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1263 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1264 */
1265 for(i = 0; i < 8; i++)
1266 {
1267 /*Sets the edges that lie on the slice/tile boundary*/
1268 if(au4_idx_tl[i] != idx_tl)
1269 {
1270 au1_tile_slice_boundary[i] = 1;
1271 }
1272 else
1273 {
1274 au4_ilf_across_tile_slice_enable[i] = 1;
1275 }
1276 }
1277
1278 /*Reset indices*/
1279 for(i = 0; i < 8; i++)
1280 {
1281 au4_idx_tl[i] = 0;
1282 }
1283 }
1284 if(ps_pps->i1_tiles_enabled_flag)
1285 {
1286 /* Calculate availability flags at slice boundary */
1287 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1288 {
1289 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1290 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1291 {
1292 /*Set the boundary arrays*/
1293 /*Calculate tile indices for neighbor pixels*/
1294 idx_tl = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1295 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1296 au4_idx_tl[0] = pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1297 au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1298 au4_idx_tl[3] = au4_idx_tl[6] = pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1299 au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1300
1301 if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1302 {
1303 if(ps_sao_ctxt->i4_ctb_x == 1)
1304 {
1305 au4_idx_tl[6] = -1;
1306 au4_idx_tl[4] = -1;
1307 }
1308 else
1309 {
1310 au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1311 }
1312 if(ps_sao_ctxt->i4_ctb_y == 1)
1313 {
1314 au4_idx_tl[5] = -1;
1315 au4_idx_tl[4] = -1;
1316 }
1317 else
1318 {
1319 au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1320 au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1321 }
1322 au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1323 }
1324 for(i = 0; i < 8; i++)
1325 {
1326 /*Sets the edges that lie on the tile boundary*/
1327 if(au4_idx_tl[i] != idx_tl)
1328 {
1329 au1_tile_slice_boundary[i] |= 1;
1330 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1331 }
1332 }
1333 }
1334 }
1335 }
1336
1337 for(i = 0; i < 8; i++)
1338 {
1339 /*Sets the edges that lie on the slice/tile boundary*/
1340 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1341 {
1342 au1_avail_chroma[i] = 0;
1343 }
1344 }
1345 }
1346 }
1347
1348 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
1349 {
1350 au1_avail_chroma[0] = 0;
1351 au1_avail_chroma[4] = 0;
1352 au1_avail_chroma[6] = 0;
1353 }
1354 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1355 {
1356 au1_avail_chroma[1] = 0;
1357 au1_avail_chroma[5] = 0;
1358 au1_avail_chroma[7] = 0;
1359 }
1360
1361 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1362 {
1363 au1_avail_chroma[2] = 0;
1364 au1_avail_chroma[4] = 0;
1365 au1_avail_chroma[5] = 0;
1366 }
1367 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1368 {
1369 au1_avail_chroma[3] = 0;
1370 au1_avail_chroma[6] = 0;
1371 au1_avail_chroma[7] = 0;
1372 }
1373
1374 {
1375 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
1376 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
1377 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
1378 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
1379 if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
1380 {
1381 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1382 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1383 }
1384
1385 if(chroma_yuv420sp_vu)
1386 {
1387 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1388 src_strd,
1389 pu1_src_left_chroma,
1390 pu1_src_top_chroma,
1391 pu1_sao_src_chroma_top_left_ctb,
1392 au1_src_top_right,
1393 au1_sao_src_top_left_chroma_bot_left,
1394 au1_avail_chroma,
1395 ai1_offset_cr,
1396 ai1_offset_cb,
1397 sao_wd_chroma,
1398 sao_ht_chroma);
1399 }
1400 else
1401 {
1402 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1403 src_strd,
1404 pu1_src_left_chroma,
1405 pu1_src_top_chroma,
1406 pu1_sao_src_chroma_top_left_ctb,
1407 au1_src_top_right,
1408 au1_sao_src_top_left_chroma_bot_left,
1409 au1_avail_chroma,
1410 ai1_offset_cb,
1411 ai1_offset_cr,
1412 sao_wd_chroma,
1413 sao_ht_chroma);
1414 }
1415 }
1416 }
1417 }
1418
1419 pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1420 pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
1421 ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1422 }
1423
1424
1425 /* Top CTB */
1426 if((ps_sao_ctxt->i4_ctb_y > 0))
1427 {
1428 WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1429 WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
1430 WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1431 WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1432
1433 WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1434 WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1435 WORD32 au4_idx_t[8], idx_t;
1436
1437 WORD32 remaining_cols;
1438
1439 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1440 if(remaining_cols <= SAO_SHIFT_CTB)
1441 {
1442 sao_wd_luma += remaining_cols;
1443 }
1444 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
1445 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1446 {
1447 sao_wd_chroma += remaining_cols;
1448 }
1449
1450 pu1_src_luma -= (sao_ht_luma * src_strd);
1451 pu1_src_chroma -= (sao_ht_chroma * src_strd);
1452 ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1453 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1454 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1455 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1456 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
1457
1458 if(0 != sao_wd_luma)
1459 {
1460 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
1461 {
1462 if(0 == ps_sao->b3_y_type_idx)
1463 {
1464 /* Update left, top and top-left */
1465 for(row = 0; row < sao_ht_luma; row++)
1466 {
1467 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1468 }
1469 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1470
1471 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1472
1473 }
1474
1475 else if(1 == ps_sao->b3_y_type_idx)
1476 {
1477 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1478 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1479 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1480 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1481
1482 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1483 src_strd,
1484 pu1_src_left_luma,
1485 pu1_src_top_luma,
1486 pu1_sao_src_luma_top_left_ctb,
1487 ps_sao->b5_y_band_pos,
1488 ai1_offset_y,
1489 sao_wd_luma,
1490 sao_ht_luma
1491 );
1492 }
1493
1494 else // if(2 <= ps_sao->b3_y_type_idx)
1495 {
1496 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1497 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1498 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1499 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1500
1501 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1502 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1503 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1504
1505 for(i = 0; i < 8; i++)
1506 {
1507
1508 au4_ilf_across_tile_slice_enable[i] = 1;
1509 }
1510 /******************************************************************
1511 * Derive the Top-left CTB's neighbor pixel's slice indices.
1512 *
1513 * T_T
1514 * ____________
1515 * | | |
1516 * | T_L| T |T_R
1517 * | | ______|____
1518 * | | T_D | |
1519 * | | | |
1520 * |____|_______| |
1521 * | |
1522 * | |
1523 * |____________|
1524 *
1525 *****************************************************************/
1526
1527 /*In case of slices*/
1528 {
1529 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1530 {
1531
1532 ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1533 ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1534
1535 ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1536 ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1537
1538 ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1539 ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1540
1541 ctbx_t_d = ps_sao_ctxt->i4_ctb_x;
1542 ctby_t_d = ps_sao_ctxt->i4_ctb_y;
1543
1544 ctbx_t = ps_sao_ctxt->i4_ctb_x;
1545 ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1546
1547 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1548 {
1549 /*Calculate neighbor ctb slice indices*/
1550 if(0 == ps_sao_ctxt->i4_ctb_x)
1551 {
1552 au4_idx_t[0] = -1;
1553 au4_idx_t[6] = -1;
1554 au4_idx_t[4] = -1;
1555 }
1556 else
1557 {
1558 au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1559 au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1560 }
1561 idx_t = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1562 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1563 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1564 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1565
1566 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1567 if(0 == ps_sao_ctxt->i4_ctb_x)
1568 {
1569 au4_ilf_across_tile_slice_enable[4] = 0;
1570 au4_ilf_across_tile_slice_enable[6] = 0;
1571 au4_ilf_across_tile_slice_enable[0] = 0;
1572 }
1573 else
1574 {
1575 au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1576 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1577 }
1578
1579
1580
1581 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1582 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1583 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1584 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1585 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1586 /*
1587 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1588 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1589 */
1590
1591 for(i = 0; i < 8; i++)
1592 {
1593 /*Sets the edges that lie on the slice/tile boundary*/
1594 if(au4_idx_t[i] != idx_t)
1595 {
1596 au1_tile_slice_boundary[i] = 1;
1597 /*Check for slice flag at such boundaries*/
1598 }
1599 else
1600 {
1601 au4_ilf_across_tile_slice_enable[i] = 1;
1602 }
1603 }
1604 /*Reset indices*/
1605 for(i = 0; i < 8; i++)
1606 {
1607 au4_idx_t[i] = 0;
1608 }
1609 }
1610
1611 if(ps_pps->i1_tiles_enabled_flag)
1612 {
1613 /* Calculate availability flags at slice boundary */
1614 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1615 {
1616 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1617 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1618 {
1619 /*Calculate neighbor ctb slice indices*/
1620 if(0 == ps_sao_ctxt->i4_ctb_x)
1621 {
1622 au4_idx_t[0] = -1;
1623 au4_idx_t[6] = -1;
1624 au4_idx_t[4] = -1;
1625 }
1626 else
1627 {
1628 au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1629 au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1630 }
1631 idx_t = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1632 au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1633 au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1634 au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1635
1636 for(i = 0; i < 8; i++)
1637 {
1638 /*Sets the edges that lie on the tile boundary*/
1639 if(au4_idx_t[i] != idx_t)
1640 {
1641 au1_tile_slice_boundary[i] |= 1;
1642 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1643 }
1644 }
1645 }
1646 }
1647 }
1648
1649 for(i = 0; i < 8; i++)
1650 {
1651 /*Sets the edges that lie on the slice/tile boundary*/
1652 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1653 {
1654 au1_avail_luma[i] = 0;
1655 }
1656 }
1657 }
1658 }
1659
1660
1661 if(0 == ps_sao_ctxt->i4_ctb_x)
1662 {
1663 au1_avail_luma[0] = 0;
1664 au1_avail_luma[4] = 0;
1665 au1_avail_luma[6] = 0;
1666 }
1667
1668 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1669 {
1670 au1_avail_luma[1] = 0;
1671 au1_avail_luma[5] = 0;
1672 au1_avail_luma[7] = 0;
1673 }
1674
1675 if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1676 {
1677 au1_avail_luma[2] = 0;
1678 au1_avail_luma[4] = 0;
1679 au1_avail_luma[5] = 0;
1680 }
1681
1682 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1683 {
1684 au1_avail_luma[3] = 0;
1685 au1_avail_luma[6] = 0;
1686 au1_avail_luma[7] = 0;
1687 }
1688
1689 {
1690 au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1691 u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1692 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1693 src_strd,
1694 pu1_src_left_luma,
1695 pu1_src_top_luma,
1696 pu1_sao_src_luma_top_left_ctb,
1697 au1_src_top_right,
1698 &u1_sao_src_top_left_luma_bot_left,
1699 au1_avail_luma,
1700 ai1_offset_y,
1701 sao_wd_luma,
1702 sao_ht_luma);
1703 }
1704 }
1705 }
1706 }
1707
1708 if(0 != sao_wd_chroma)
1709 {
1710 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1711 {
1712 if(0 == ps_sao->b3_cb_type_idx)
1713 {
1714
1715 for(row = 0; row < sao_ht_chroma; row++)
1716 {
1717 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1718 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1719 }
1720 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1721 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1722
1723 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1724
1725 }
1726
1727 else if(1 == ps_sao->b3_cb_type_idx)
1728 {
1729 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1730 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1731 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1732 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1733
1734 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1735 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1736 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1737 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1738
1739 if(chroma_yuv420sp_vu)
1740 {
1741 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1742 src_strd,
1743 pu1_src_left_chroma,
1744 pu1_src_top_chroma,
1745 pu1_sao_src_chroma_top_left_ctb,
1746 ps_sao->b5_cr_band_pos,
1747 ps_sao->b5_cb_band_pos,
1748 ai1_offset_cr,
1749 ai1_offset_cb,
1750 sao_wd_chroma,
1751 sao_ht_chroma
1752 );
1753 }
1754 else
1755 {
1756 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1757 src_strd,
1758 pu1_src_left_chroma,
1759 pu1_src_top_chroma,
1760 pu1_sao_src_chroma_top_left_ctb,
1761 ps_sao->b5_cb_band_pos,
1762 ps_sao->b5_cr_band_pos,
1763 ai1_offset_cb,
1764 ai1_offset_cr,
1765 sao_wd_chroma,
1766 sao_ht_chroma
1767 );
1768 }
1769 }
1770 else // if(2 <= ps_sao->b3_cb_type_idx)
1771 {
1772 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1773 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1774 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1775 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1776
1777 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1778 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1779 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1780 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1781
1782 for(i = 0; i < 8; i++)
1783 {
1784 au1_avail_chroma[i] = 255;
1785 au1_tile_slice_boundary[i] = 0;
1786 au4_idx_t[i] = 0;
1787 au4_ilf_across_tile_slice_enable[i] = 1;
1788 }
1789
1790 {
1791 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1792 {
1793 ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1794 ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1795
1796 ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1797 ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1798
1799 ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1800 ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1801
1802 ctbx_t_d = ps_sao_ctxt->i4_ctb_x;
1803 ctby_t_d = ps_sao_ctxt->i4_ctb_y;
1804
1805 ctbx_t = ps_sao_ctxt->i4_ctb_x;
1806 ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1807
1808 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1809 {
1810 if(0 == ps_sao_ctxt->i4_ctb_x)
1811 {
1812 au4_idx_t[0] = -1;
1813 au4_idx_t[6] = -1;
1814 au4_idx_t[4] = -1;
1815 }
1816 else
1817 {
1818 au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1819 au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1820 }
1821 idx_t = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1822 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1823 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1824 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1825
1826 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1827
1828 if(0 == ps_sao_ctxt->i4_ctb_x)
1829 {
1830 au4_ilf_across_tile_slice_enable[4] = 0;
1831 au4_ilf_across_tile_slice_enable[6] = 0;
1832 au4_ilf_across_tile_slice_enable[0] = 0;
1833 }
1834 else
1835 {
1836 au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1837 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1838 }
1839
1840 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1841 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1842 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1843 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1844 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1845 /*
1846 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1847 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1848 */
1849 for(i = 0; i < 8; i++)
1850 {
1851 /*Sets the edges that lie on the slice/tile boundary*/
1852 if(au4_idx_t[i] != idx_t)
1853 {
1854 au1_tile_slice_boundary[i] = 1;
1855 }
1856 else
1857 {
1858 /*Indicates that the neighbour belongs to same/dependent slice*/
1859 au4_ilf_across_tile_slice_enable[i] = 1;
1860 }
1861 }
1862 /*Reset indices*/
1863 for(i = 0; i < 8; i++)
1864 {
1865 au4_idx_t[i] = 0;
1866 }
1867 }
1868 if(ps_pps->i1_tiles_enabled_flag)
1869 {
1870 /* Calculate availability flags at slice boundary */
1871 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1872 {
1873 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1874 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1875 {
1876 /*Calculate neighbor ctb slice indices*/
1877 if(0 == ps_sao_ctxt->i4_ctb_x)
1878 {
1879 au4_idx_t[0] = -1;
1880 au4_idx_t[6] = -1;
1881 au4_idx_t[4] = -1;
1882 }
1883 else
1884 {
1885 au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1886 au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1887 }
1888 idx_t = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1889 au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1890 au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1891 au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1892
1893 for(i = 0; i < 8; i++)
1894 {
1895 /*Sets the edges that lie on the tile boundary*/
1896 if(au4_idx_t[i] != idx_t)
1897 {
1898 au1_tile_slice_boundary[i] |= 1;
1899 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1900 }
1901 }
1902 }
1903 }
1904 }
1905 for(i = 0; i < 8; i++)
1906 {
1907 /*Sets the edges that lie on the slice/tile boundary*/
1908 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1909 {
1910 au1_avail_chroma[i] = 0;
1911 }
1912 }
1913
1914 }
1915 }
1916 if(0 == ps_sao_ctxt->i4_ctb_x)
1917 {
1918 au1_avail_chroma[0] = 0;
1919 au1_avail_chroma[4] = 0;
1920 au1_avail_chroma[6] = 0;
1921 }
1922
1923 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
1924 {
1925 au1_avail_chroma[1] = 0;
1926 au1_avail_chroma[5] = 0;
1927 au1_avail_chroma[7] = 0;
1928 }
1929
1930 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1931 {
1932 au1_avail_chroma[2] = 0;
1933 au1_avail_chroma[4] = 0;
1934 au1_avail_chroma[5] = 0;
1935 }
1936
1937 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1938 {
1939 au1_avail_chroma[3] = 0;
1940 au1_avail_chroma[6] = 0;
1941 au1_avail_chroma[7] = 0;
1942 }
1943
1944 {
1945 au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
1946 au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
1947 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1948 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1949
1950 if(chroma_yuv420sp_vu)
1951 {
1952 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1953 src_strd,
1954 pu1_src_left_chroma,
1955 pu1_src_top_chroma,
1956 pu1_sao_src_chroma_top_left_ctb,
1957 au1_src_top_right,
1958 au1_sao_src_top_left_chroma_bot_left,
1959 au1_avail_chroma,
1960 ai1_offset_cr,
1961 ai1_offset_cb,
1962 sao_wd_chroma,
1963 sao_ht_chroma);
1964 }
1965 else
1966 {
1967 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1968 src_strd,
1969 pu1_src_left_chroma,
1970 pu1_src_top_chroma,
1971 pu1_sao_src_chroma_top_left_ctb,
1972 au1_src_top_right,
1973 au1_sao_src_top_left_chroma_bot_left,
1974 au1_avail_chroma,
1975 ai1_offset_cb,
1976 ai1_offset_cr,
1977 sao_wd_chroma,
1978 sao_ht_chroma);
1979 }
1980 }
1981
1982 }
1983 }
1984 }
1985
1986 pu1_src_luma += sao_ht_luma * src_strd;
1987 pu1_src_chroma += sao_ht_chroma * src_strd;
1988 ps_sao += (ps_sps->i2_pic_wd_in_ctb);
1989 }
1990
1991 /* Left CTB */
1992 if(ps_sao_ctxt->i4_ctb_x > 0)
1993 {
1994 WORD32 sao_wd_luma = SAO_SHIFT_CTB;
1995 WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
1996 WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
1997 WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
1998
1999 WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
2000 WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
2001 WORD32 au4_idx_l[8], idx_l;
2002
2003 WORD32 remaining_rows;
2004 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2005 if(remaining_rows <= SAO_SHIFT_CTB)
2006 {
2007 sao_ht_luma += remaining_rows;
2008 }
2009 remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2010 if(remaining_rows <= SAO_SHIFT_CTB)
2011 {
2012 sao_ht_chroma += remaining_rows;
2013 }
2014
2015 pu1_src_luma -= sao_wd_luma;
2016 pu1_src_chroma -= sao_wd_chroma;
2017 ps_sao -= 1;
2018 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
2019 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
2020 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2021 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2022
2023
2024 if(0 != sao_ht_luma)
2025 {
2026 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2027 {
2028 if(0 == ps_sao->b3_y_type_idx)
2029 {
2030 /* Update left, top and top-left */
2031 for(row = 0; row < sao_ht_luma; row++)
2032 {
2033 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2034 }
2035 /*Update in next location*/
2036 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2037
2038 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2039
2040 }
2041
2042 else if(1 == ps_sao->b3_y_type_idx)
2043 {
2044 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2045 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2046 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2047 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2048
2049 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2050 src_strd,
2051 pu1_src_left_luma,
2052 pu1_src_top_luma,
2053 pu1_sao_src_top_left_luma_curr_ctb,
2054 ps_sao->b5_y_band_pos,
2055 ai1_offset_y,
2056 sao_wd_luma,
2057 sao_ht_luma
2058 );
2059 }
2060
2061 else // if(2 <= ps_sao->b3_y_type_idx)
2062 {
2063 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2064 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2065 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2066 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2067
2068 for(i = 0; i < 8; i++)
2069 {
2070 au1_avail_luma[i] = 255;
2071 au1_tile_slice_boundary[i] = 0;
2072 au4_idx_l[i] = 0;
2073 au4_ilf_across_tile_slice_enable[i] = 1;
2074 }
2075 /******************************************************************
2076 * Derive the Top-left CTB's neighbour pixel's slice indices.
2077 *
2078 *
2079 * ____________
2080 * | | |
2081 * | L_T| |
2082 * |____|_______|____
2083 * | | | |
2084 * L_L | L | L_R | |
2085 * |____|_______| |
2086 * | |
2087 * L_D | |
2088 * |____________|
2089 *
2090 *****************************************************************/
2091
2092 /*In case of slices or tiles*/
2093 {
2094 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2095 {
2096 ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2097 ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2098
2099 ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2100 ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2101
2102 ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2103 ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2104
2105 ctbx_l_d = ps_sao_ctxt->i4_ctb_x - 1;
2106 ctby_l_d = ps_sao_ctxt->i4_ctb_y;
2107
2108 ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2109 ctby_l = ps_sao_ctxt->i4_ctb_y;
2110
2111 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2112 {
2113 if(0 == ps_sao_ctxt->i4_ctb_y)
2114 {
2115 au4_idx_l[2] = -1;
2116 au4_idx_l[4] = -1;
2117 au4_idx_l[5] = -1;
2118 }
2119 else
2120 {
2121 au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2122 au4_idx_l[5] = pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2123 }
2124 idx_l = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2125 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2126 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2127 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2128
2129 /*Verify that the neighbor ctbs don't cross pic boundary.*/
2130 if(0 == ps_sao_ctxt->i4_ctb_y)
2131 {
2132 au4_ilf_across_tile_slice_enable[2] = 0;
2133 au4_ilf_across_tile_slice_enable[4] = 0;
2134 au4_ilf_across_tile_slice_enable[5] = 0;
2135 }
2136 else
2137 {
2138 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2139 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2140
2141 }
2142 //TODO: ILF flag checks for [0] and [6] is missing.
2143 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2144 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2145 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2146 /*
2147 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2148 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2149 */
2150 for(i = 0; i < 8; i++)
2151 {
2152 /*Sets the edges that lie on the slice/tile boundary*/
2153 if(au4_idx_l[i] != idx_l)
2154 {
2155 au1_tile_slice_boundary[i] = 1;
2156 }
2157 else
2158 {
2159 au4_ilf_across_tile_slice_enable[i] = 1;
2160 }
2161 }
2162 /*Reset indices*/
2163 for(i = 0; i < 8; i++)
2164 {
2165 au4_idx_l[i] = 0;
2166 }
2167 }
2168
2169 if(ps_pps->i1_tiles_enabled_flag)
2170 {
2171 /* Calculate availability flags at slice boundary */
2172 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2173 {
2174 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2175 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2176 {
2177 if(0 == ps_sao_ctxt->i4_ctb_y)
2178 {
2179 au4_idx_l[2] = -1;
2180 au4_idx_l[4] = -1;
2181 au4_idx_l[5] = -1;
2182 }
2183 else
2184 {
2185 au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2186 au4_idx_l[5] = pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2187 }
2188
2189 idx_l = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2190 au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2191 au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2192 au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2193
2194 for(i = 0; i < 8; i++)
2195 {
2196 /*Sets the edges that lie on the slice/tile boundary*/
2197 if(au4_idx_l[i] != idx_l)
2198 {
2199 au1_tile_slice_boundary[i] |= 1;
2200 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
2201 }
2202 }
2203 }
2204 }
2205 }
2206
2207 for(i = 0; i < 8; i++)
2208 {
2209 /*Sets the edges that lie on the slice/tile boundary*/
2210 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2211 {
2212 au1_avail_luma[i] = 0;
2213 }
2214 }
2215 }
2216 }
2217 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
2218 {
2219 au1_avail_luma[0] = 0;
2220 au1_avail_luma[4] = 0;
2221 au1_avail_luma[6] = 0;
2222 }
2223 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2224 {
2225 au1_avail_luma[1] = 0;
2226 au1_avail_luma[5] = 0;
2227 au1_avail_luma[7] = 0;
2228 }
2229
2230 if(0 == ps_sao_ctxt->i4_ctb_y)
2231 {
2232 au1_avail_luma[2] = 0;
2233 au1_avail_luma[4] = 0;
2234 au1_avail_luma[5] = 0;
2235 }
2236
2237 if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) <= sao_ht_luma)
2238 {
2239 au1_avail_luma[3] = 0;
2240 au1_avail_luma[6] = 0;
2241 au1_avail_luma[7] = 0;
2242 }
2243
2244 {
2245 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
2246 u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
2247 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2248 src_strd,
2249 pu1_src_left_luma,
2250 pu1_src_top_luma,
2251 pu1_sao_src_top_left_luma_curr_ctb,
2252 au1_src_top_right,
2253 &u1_sao_src_top_left_luma_bot_left,
2254 au1_avail_luma,
2255 ai1_offset_y,
2256 sao_wd_luma,
2257 sao_ht_luma);
2258 }
2259
2260 }
2261 }
2262 }
2263
2264 if(0 != sao_ht_chroma)
2265 {
2266 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
2267 {
2268 if(0 == ps_sao->b3_cb_type_idx)
2269 {
2270 for(row = 0; row < sao_ht_chroma; row++)
2271 {
2272 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2273 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2274 }
2275 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2276 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2277
2278 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2279 }
2280
2281 else if(1 == ps_sao->b3_cb_type_idx)
2282 {
2283 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2284 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2285 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2286 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2287
2288 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2289 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2290 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2291 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2292
2293 if(chroma_yuv420sp_vu)
2294 {
2295 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2296 src_strd,
2297 pu1_src_left_chroma,
2298 pu1_src_top_chroma,
2299 pu1_sao_src_top_left_chroma_curr_ctb,
2300 ps_sao->b5_cr_band_pos,
2301 ps_sao->b5_cb_band_pos,
2302 ai1_offset_cr,
2303 ai1_offset_cb,
2304 sao_wd_chroma,
2305 sao_ht_chroma
2306 );
2307 }
2308 else
2309 {
2310 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2311 src_strd,
2312 pu1_src_left_chroma,
2313 pu1_src_top_chroma,
2314 pu1_sao_src_top_left_chroma_curr_ctb,
2315 ps_sao->b5_cb_band_pos,
2316 ps_sao->b5_cr_band_pos,
2317 ai1_offset_cb,
2318 ai1_offset_cr,
2319 sao_wd_chroma,
2320 sao_ht_chroma
2321 );
2322 }
2323 }
2324
2325 else // if(2 <= ps_sao->b3_cb_type_idx)
2326 {
2327 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2328 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2329 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2330 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2331
2332 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2333 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2334 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2335 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2336
2337 for(i = 0; i < 8; i++)
2338 {
2339 au1_avail_chroma[i] = 255;
2340 au1_tile_slice_boundary[i] = 0;
2341 au4_idx_l[i] = 0;
2342 au4_ilf_across_tile_slice_enable[i] = 1;
2343 }
2344 /*In case of slices*/
2345 {
2346 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2347 {
2348 ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2349 ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2350
2351 ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2352 ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2353
2354 ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2355 ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2356
2357 ctbx_l_d = ps_sao_ctxt->i4_ctb_x - 1;
2358 ctby_l_d = ps_sao_ctxt->i4_ctb_y;
2359
2360 ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2361 ctby_l = ps_sao_ctxt->i4_ctb_y;
2362
2363 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2364 {
2365 if(0 == ps_sao_ctxt->i4_ctb_y)
2366 {
2367 au4_idx_l[2] = -1;
2368 au4_idx_l[4] = -1;
2369 au4_idx_l[5] = -1;
2370 }
2371 else
2372 {
2373 au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2374 au4_idx_l[5] = pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2375 }
2376 idx_l = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2377 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2378 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2379 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2380
2381 /*Verify that the neighbour ctbs dont cross pic boundary.*/
2382 if(0 == ps_sao_ctxt->i4_ctb_y)
2383 {
2384 au4_ilf_across_tile_slice_enable[2] = 0;
2385 au4_ilf_across_tile_slice_enable[4] = 0;
2386 au4_ilf_across_tile_slice_enable[5] = 0;
2387 }
2388 else
2389 {
2390 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2391 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2392 }
2393 // au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2394 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2395 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2396 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2397 /*
2398 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2399 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2400 */
2401 for(i = 0; i < 8; i++)
2402 {
2403 /*Sets the edges that lie on the slice/tile boundary*/
2404 if(au4_idx_l[i] != idx_l)
2405 {
2406 au1_tile_slice_boundary[i] = 1;
2407 }
2408 else
2409 {
2410 au4_ilf_across_tile_slice_enable[i] = 1;
2411 }
2412 }
2413 /*Reset indices*/
2414 for(i = 0; i < 8; i++)
2415 {
2416 au4_idx_l[i] = 0;
2417 }
2418 }
2419 if(ps_pps->i1_tiles_enabled_flag)
2420 {
2421 /* Calculate availability flags at slice boundary */
2422 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2423 {
2424 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2425 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2426 {
2427 if(0 == ps_sao_ctxt->i4_ctb_y)
2428 {
2429 au4_idx_l[2] = -1;
2430 au4_idx_l[4] = -1;
2431 au4_idx_l[5] = -1;
2432 }
2433 else
2434 {
2435 au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2436 au4_idx_l[5] = pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2437 }
2438
2439 idx_l = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2440 au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2441 au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2442 au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2443
2444 for(i = 0; i < 8; i++)
2445 {
2446 /*Sets the edges that lie on the slice/tile boundary*/
2447 if(au4_idx_l[i] != idx_l)
2448 {
2449 au1_tile_slice_boundary[i] |= 1;
2450 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2451 }
2452 }
2453 }
2454 }
2455 }
2456 for(i = 0; i < 8; i++)
2457 {
2458 /*Sets the edges that lie on the slice/tile boundary*/
2459 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2460 {
2461 au1_avail_chroma[i] = 0;
2462 }
2463 }
2464 }
2465 }
2466 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
2467 {
2468 au1_avail_chroma[0] = 0;
2469 au1_avail_chroma[4] = 0;
2470 au1_avail_chroma[6] = 0;
2471 }
2472
2473 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2474 {
2475 au1_avail_chroma[1] = 0;
2476 au1_avail_chroma[5] = 0;
2477 au1_avail_chroma[7] = 0;
2478 }
2479
2480 if(0 == ps_sao_ctxt->i4_ctb_y)
2481 {
2482 au1_avail_chroma[2] = 0;
2483 au1_avail_chroma[4] = 0;
2484 au1_avail_chroma[5] = 0;
2485 }
2486
2487 if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) <= sao_ht_chroma)
2488 {
2489 au1_avail_chroma[3] = 0;
2490 au1_avail_chroma[6] = 0;
2491 au1_avail_chroma[7] = 0;
2492 }
2493
2494 {
2495 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2496 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2497 au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2498 au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2499 //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2500 //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2501 if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2502 {
2503 au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
2504 au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
2505 }
2506
2507
2508 if(chroma_yuv420sp_vu)
2509 {
2510 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2511 src_strd,
2512 pu1_src_left_chroma,
2513 pu1_src_top_chroma,
2514 pu1_sao_src_top_left_chroma_curr_ctb,
2515 au1_src_top_right,
2516 au1_src_bot_left,
2517 au1_avail_chroma,
2518 ai1_offset_cr,
2519 ai1_offset_cb,
2520 sao_wd_chroma,
2521 sao_ht_chroma);
2522 }
2523 else
2524 {
2525 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2526 src_strd,
2527 pu1_src_left_chroma,
2528 pu1_src_top_chroma,
2529 pu1_sao_src_top_left_chroma_curr_ctb,
2530 au1_src_top_right,
2531 au1_src_bot_left,
2532 au1_avail_chroma,
2533 ai1_offset_cb,
2534 ai1_offset_cr,
2535 sao_wd_chroma,
2536 sao_ht_chroma);
2537 }
2538 }
2539
2540 }
2541 }
2542
2543 }
2544 pu1_src_luma += sao_wd_luma;
2545 pu1_src_chroma += sao_wd_chroma;
2546 ps_sao += 1;
2547 }
2548
2549
2550 /* Current CTB */
2551 {
2552 WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2553 WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
2554 WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2555 WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2556 WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2557 WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2558 WORD32 au4_idx_c[8], idx_c;
2559
2560 WORD32 remaining_rows;
2561 WORD32 remaining_cols;
2562
2563 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2564 if(remaining_cols <= SAO_SHIFT_CTB)
2565 {
2566 sao_wd_luma += remaining_cols;
2567 }
2568 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
2569 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2570 {
2571 sao_wd_chroma += remaining_cols;
2572 }
2573
2574 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2575 if(remaining_rows <= SAO_SHIFT_CTB)
2576 {
2577 sao_ht_luma += remaining_rows;
2578 }
2579 remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2580 if(remaining_rows <= SAO_SHIFT_CTB)
2581 {
2582 sao_ht_chroma += remaining_rows;
2583 }
2584
2585 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2586 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2587 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2588 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2589
2590 if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2591 {
2592 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2593 {
2594 if(0 == ps_sao->b3_y_type_idx)
2595 {
2596 /* Update left, top and top-left */
2597 for(row = 0; row < sao_ht_luma; row++)
2598 {
2599 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2600 }
2601 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2602
2603 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2604
2605 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2606
2607 }
2608
2609 else if(1 == ps_sao->b3_y_type_idx)
2610 {
2611 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2612 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2613 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2614 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2615
2616 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2617 src_strd,
2618 pu1_src_left_luma,
2619 pu1_src_top_luma,
2620 pu1_sao_src_top_left_luma_curr_ctb,
2621 ps_sao->b5_y_band_pos,
2622 ai1_offset_y,
2623 sao_wd_luma,
2624 sao_ht_luma
2625 );
2626 }
2627
2628 else // if(2 <= ps_sao->b3_y_type_idx)
2629 {
2630 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2631 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2632 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2633 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2634
2635 for(i = 0; i < 8; i++)
2636 {
2637 au1_avail_luma[i] = 255;
2638 au1_tile_slice_boundary[i] = 0;
2639 au4_idx_c[i] = 0;
2640 au4_ilf_across_tile_slice_enable[i] = 1;
2641 }
2642 /******************************************************************
2643 * Derive the Top-left CTB's neighbour pixel's slice indices.
2644 *
2645 *
2646 * ____________
2647 * | | |
2648 * | | C_T |
2649 * |____|_______|____
2650 * | | | |
2651 * | C_L| C | C_R|
2652 * |____|_______| |
2653 * | C_D |
2654 * | |
2655 * |____________|
2656 *
2657 *****************************************************************/
2658
2659 /*In case of slices*/
2660 {
2661 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2662 {
2663 ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2664 ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2665
2666 ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2667 ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2668
2669 ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2670 ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2671
2672 ctbx_c_d = ps_sao_ctxt->i4_ctb_x;
2673 ctby_c_d = ps_sao_ctxt->i4_ctb_y;
2674
2675 ctbx_c = ps_sao_ctxt->i4_ctb_x;
2676 ctby_c = ps_sao_ctxt->i4_ctb_y;
2677
2678 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2679 {
2680 if(0 == ps_sao_ctxt->i4_ctb_x)
2681 {
2682 au4_idx_c[6] = -1;
2683 au4_idx_c[0] = -1;
2684 au4_idx_c[4] = -1;
2685 }
2686 else
2687 {
2688 au4_idx_c[0] = au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2689 }
2690
2691 if(0 == ps_sao_ctxt->i4_ctb_y)
2692 {
2693 au4_idx_c[2] = -1;
2694 au4_idx_c[5] = -1;
2695 au4_idx_c[4] = -1;
2696 }
2697 else
2698 {
2699 au4_idx_c[4] = pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2700 au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2701 }
2702 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2703 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2704 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2705
2706 if(0 == ps_sao_ctxt->i4_ctb_x)
2707 {
2708 au4_ilf_across_tile_slice_enable[6] = 0;
2709 au4_ilf_across_tile_slice_enable[0] = 0;
2710 au4_ilf_across_tile_slice_enable[4] = 0;
2711 }
2712 else
2713 {
2714 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2715 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2716 }
2717 if(0 == ps_sao_ctxt->i4_ctb_y)
2718 {
2719 au4_ilf_across_tile_slice_enable[2] = 0;
2720 au4_ilf_across_tile_slice_enable[4] = 0;
2721 au4_ilf_across_tile_slice_enable[5] = 0;
2722 }
2723 else
2724 {
2725 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2726 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2727 }
2728 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2729 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2730 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2731
2732 /*
2733 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2734 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2735 */
2736 for(i = 0; i < 8; i++)
2737 {
2738 /*Sets the edges that lie on the slice/tile boundary*/
2739 if(au4_idx_c[i] != idx_c)
2740 {
2741 au1_tile_slice_boundary[i] = 1;
2742 }
2743 else
2744 {
2745 au4_ilf_across_tile_slice_enable[i] = 1;
2746 }
2747 }
2748 /*Reset indices*/
2749 for(i = 0; i < 8; i++)
2750 {
2751 au4_idx_c[i] = 0;
2752 }
2753 }
2754
2755 if(ps_pps->i1_tiles_enabled_flag)
2756 {
2757 /* Calculate availability flags at slice boundary */
2758 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2759 {
2760 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2761 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2762 {
2763 if(0 == ps_sao_ctxt->i4_ctb_x)
2764 {
2765 au4_idx_c[6] = -1;
2766 au4_idx_c[0] = -1;
2767 au4_idx_c[4] = -1;
2768 }
2769 else
2770 {
2771 au4_idx_c[0] = au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2772 }
2773
2774 if(0 == ps_sao_ctxt->i4_ctb_y)
2775 {
2776 au4_idx_c[2] = -1;
2777 au4_idx_c[5] = -1;
2778 au4_idx_c[4] = -1;
2779 }
2780 else
2781 {
2782 au4_idx_c[4] = pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2783 au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2784 }
2785 idx_c = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2786 au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2787 au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2788
2789 for(i = 0; i < 8; i++)
2790 {
2791 /*Sets the edges that lie on the slice/tile boundary*/
2792 if(au4_idx_c[i] != idx_c)
2793 {
2794 au1_tile_slice_boundary[i] |= 1;
2795 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2796 }
2797 }
2798 }
2799 }
2800 }
2801
2802 for(i = 0; i < 8; i++)
2803 {
2804 /*Sets the edges that lie on the slice/tile boundary*/
2805 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2806 {
2807 au1_avail_luma[i] = 0;
2808 }
2809 }
2810
2811 }
2812 }
2813 if(0 == ps_sao_ctxt->i4_ctb_x)
2814 {
2815 au1_avail_luma[0] = 0;
2816 au1_avail_luma[4] = 0;
2817 au1_avail_luma[6] = 0;
2818 }
2819
2820 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2821 {
2822 au1_avail_luma[1] = 0;
2823 au1_avail_luma[5] = 0;
2824 au1_avail_luma[7] = 0;
2825 }
2826
2827 if(0 == ps_sao_ctxt->i4_ctb_y)
2828 {
2829 au1_avail_luma[2] = 0;
2830 au1_avail_luma[4] = 0;
2831 au1_avail_luma[5] = 0;
2832 }
2833
2834 if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) <= sao_ht_luma)
2835 {
2836 au1_avail_luma[3] = 0;
2837 au1_avail_luma[6] = 0;
2838 au1_avail_luma[7] = 0;
2839 }
2840
2841 {
2842 au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2843 u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2844
2845 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2846 src_strd,
2847 pu1_src_left_luma,
2848 pu1_src_top_luma,
2849 pu1_sao_src_top_left_luma_curr_ctb,
2850 au1_src_top_right,
2851 &u1_sao_src_top_left_luma_bot_left,
2852 au1_avail_luma,
2853 ai1_offset_y,
2854 sao_wd_luma,
2855 sao_ht_luma);
2856 }
2857 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2858 pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2859 }
2860 }
2861 }
2862
2863 if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
2864 {
2865 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
2866 {
2867 if(0 == ps_sao->b3_cb_type_idx)
2868 {
2869 for(row = 0; row < sao_ht_chroma; row++)
2870 {
2871 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2872 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2873 }
2874 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2875 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2876
2877 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2878
2879 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
2880 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
2881 }
2882
2883 else if(1 == ps_sao->b3_cb_type_idx)
2884 {
2885 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2886 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2887 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2888 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2889
2890 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2891 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2892 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2893 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2894
2895 if(chroma_yuv420sp_vu)
2896 {
2897 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2898 src_strd,
2899 pu1_src_left_chroma,
2900 pu1_src_top_chroma,
2901 pu1_sao_src_top_left_chroma_curr_ctb,
2902 ps_sao->b5_cr_band_pos,
2903 ps_sao->b5_cb_band_pos,
2904 ai1_offset_cr,
2905 ai1_offset_cb,
2906 sao_wd_chroma,
2907 sao_ht_chroma
2908 );
2909 }
2910 else
2911 {
2912 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2913 src_strd,
2914 pu1_src_left_chroma,
2915 pu1_src_top_chroma,
2916 pu1_sao_src_top_left_chroma_curr_ctb,
2917 ps_sao->b5_cb_band_pos,
2918 ps_sao->b5_cr_band_pos,
2919 ai1_offset_cb,
2920 ai1_offset_cr,
2921 sao_wd_chroma,
2922 sao_ht_chroma
2923 );
2924 }
2925 }
2926
2927 else // if(2 <= ps_sao->b3_cb_type_idx)
2928 {
2929 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2930 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2931 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2932 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2933
2934 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2935 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2936 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2937 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2938
2939 for(i = 0; i < 8; i++)
2940 {
2941 au1_avail_chroma[i] = 255;
2942 au1_tile_slice_boundary[i] = 0;
2943 au4_idx_c[i] = 0;
2944 au4_ilf_across_tile_slice_enable[i] = 1;
2945 }
2946 {
2947 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2948 {
2949 ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2950 ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2951
2952 ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2953 ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2954
2955 ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2956 ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2957
2958 ctbx_c_d = ps_sao_ctxt->i4_ctb_x;
2959 ctby_c_d = ps_sao_ctxt->i4_ctb_y;
2960
2961 ctbx_c = ps_sao_ctxt->i4_ctb_x;
2962 ctby_c = ps_sao_ctxt->i4_ctb_y;
2963
2964 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2965 {
2966 if(0 == ps_sao_ctxt->i4_ctb_x)
2967 {
2968 au4_idx_c[0] = -1;
2969 au4_idx_c[4] = -1;
2970 au4_idx_c[6] = -1;
2971 }
2972 else
2973 {
2974 au4_idx_c[0] = au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2975 }
2976
2977 if(0 == ps_sao_ctxt->i4_ctb_y)
2978 {
2979 au4_idx_c[2] = -1;
2980 au4_idx_c[4] = -1;
2981 au4_idx_c[5] = -1;
2982 }
2983 else
2984 {
2985 au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2986 au4_idx_c[4] = pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2987 }
2988 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2989 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2990 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2991
2992 if(0 == ps_sao_ctxt->i4_ctb_x)
2993 {
2994 au4_ilf_across_tile_slice_enable[0] = 0;
2995 au4_ilf_across_tile_slice_enable[4] = 0;
2996 au4_ilf_across_tile_slice_enable[6] = 0;
2997 }
2998 else
2999 {
3000 au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
3001 au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3002 }
3003
3004 if(0 == ps_sao_ctxt->i4_ctb_y)
3005 {
3006 au4_ilf_across_tile_slice_enable[2] = 0;
3007 au4_ilf_across_tile_slice_enable[4] = 0;
3008 au4_ilf_across_tile_slice_enable[5] = 0;
3009 }
3010 else
3011 {
3012 au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3013 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
3014 }
3015
3016 au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
3017 au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
3018 au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
3019
3020 /*
3021 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
3022 * of the pixel having a greater address is checked. Accordingly, set the availability flags
3023 */
3024 for(i = 0; i < 8; i++)
3025 {
3026 /*Sets the edges that lie on the slice/tile boundary*/
3027 if(au4_idx_c[i] != idx_c)
3028 {
3029 au1_tile_slice_boundary[i] = 1;
3030 }
3031 else
3032 {
3033 au4_ilf_across_tile_slice_enable[i] = 1;
3034 }
3035 }
3036 /*Reset indices*/
3037 for(i = 0; i < 8; i++)
3038 {
3039 au4_idx_c[i] = 0;
3040 }
3041 }
3042
3043 if(ps_pps->i1_tiles_enabled_flag)
3044 {
3045 /* Calculate availability flags at slice boundary */
3046 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
3047 {
3048 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
3049 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
3050 {
3051 if(0 == ps_sao_ctxt->i4_ctb_x)
3052 {
3053 au4_idx_c[6] = -1;
3054 au4_idx_c[0] = -1;
3055 au4_idx_c[4] = -1;
3056 }
3057 else
3058 {
3059 au4_idx_c[0] = au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3060 }
3061
3062 if(0 == ps_sao_ctxt->i4_ctb_y)
3063 {
3064 au4_idx_c[2] = -1;
3065 au4_idx_c[5] = -1;
3066 au4_idx_c[4] = -1;
3067 }
3068 else
3069 {
3070 au4_idx_c[4] = pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3071 au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3072 }
3073 idx_c = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3074 au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3075 au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3076
3077 for(i = 0; i < 8; i++)
3078 {
3079 /*Sets the edges that lie on the slice/tile boundary*/
3080 if(au4_idx_c[i] != idx_c)
3081 {
3082 au1_tile_slice_boundary[i] |= 1;
3083 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
3084 }
3085 }
3086 }
3087 }
3088 }
3089
3090 for(i = 0; i < 8; i++)
3091 {
3092 /*Sets the edges that lie on the slice/tile boundary*/
3093 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
3094 {
3095 au1_avail_chroma[i] = 0;
3096 }
3097 }
3098 }
3099 }
3100
3101 if(0 == ps_sao_ctxt->i4_ctb_x)
3102 {
3103 au1_avail_chroma[0] = 0;
3104 au1_avail_chroma[4] = 0;
3105 au1_avail_chroma[6] = 0;
3106 }
3107
3108 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
3109 {
3110 au1_avail_chroma[1] = 0;
3111 au1_avail_chroma[5] = 0;
3112 au1_avail_chroma[7] = 0;
3113 }
3114
3115 if(0 == ps_sao_ctxt->i4_ctb_y)
3116 {
3117 au1_avail_chroma[2] = 0;
3118 au1_avail_chroma[4] = 0;
3119 au1_avail_chroma[5] = 0;
3120 }
3121
3122 if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) <= sao_ht_chroma)
3123 {
3124 au1_avail_chroma[3] = 0;
3125 au1_avail_chroma[6] = 0;
3126 au1_avail_chroma[7] = 0;
3127 }
3128
3129 {
3130 au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
3131 au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
3132
3133 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
3134 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
3135
3136 if(chroma_yuv420sp_vu)
3137 {
3138 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3139 src_strd,
3140 pu1_src_left_chroma,
3141 pu1_src_top_chroma,
3142 pu1_sao_src_top_left_chroma_curr_ctb,
3143 au1_src_top_right,
3144 au1_sao_src_top_left_chroma_bot_left,
3145 au1_avail_chroma,
3146 ai1_offset_cr,
3147 ai1_offset_cb,
3148 sao_wd_chroma,
3149 sao_ht_chroma);
3150 }
3151 else
3152 {
3153 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3154 src_strd,
3155 pu1_src_left_chroma,
3156 pu1_src_top_chroma,
3157 pu1_sao_src_top_left_chroma_curr_ctb,
3158 au1_src_top_right,
3159 au1_sao_src_top_left_chroma_bot_left,
3160 au1_avail_chroma,
3161 ai1_offset_cb,
3162 ai1_offset_cr,
3163 sao_wd_chroma,
3164 sao_ht_chroma);
3165 }
3166 }
3167
3168 }
3169 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3170 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3171
3172 pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
3173 pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
3174 }
3175
3176 }
3177 }
3178
3179
3180
3181
3182 /* If no loop filter is enabled copy the backed up values */
3183 {
3184 /* Luma */
3185 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag && no_loop_filter_enabled_luma)
3186 {
3187 UWORD32 u4_no_loop_filter_flag;
3188 WORD32 loop_filter_bit_pos;
3189 WORD32 log2_min_cu = 3;
3190 WORD32 min_cu = (1 << log2_min_cu);
3191 UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
3192 WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
3193 WORD32 sao_blk_wd = ctb_size;
3194 WORD32 remaining_rows;
3195 WORD32 remaining_cols;
3196
3197 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3198 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3199 if(remaining_rows <= SAO_SHIFT_CTB)
3200 sao_blk_ht += remaining_rows;
3201 if(remaining_cols <= SAO_SHIFT_CTB)
3202 sao_blk_wd += remaining_cols;
3203
3204 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
3205 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3206
3207 pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
3208
3209 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3210 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3211 if(ps_sao_ctxt->i4_ctb_x > 0)
3212 loop_filter_bit_pos -= 1;
3213
3214 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3215 (loop_filter_bit_pos >> 3);
3216
3217 for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
3218 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3219 {
3220 WORD32 tmp_wd = sao_blk_wd;
3221
3222 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3223 (loop_filter_bit_pos & 7);
3224 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3225
3226 if(u4_no_loop_filter_flag)
3227 {
3228 while(tmp_wd > 0)
3229 {
3230 if(CTZ(u4_no_loop_filter_flag))
3231 {
3232 pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3233 pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3234 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
3235 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
3236 }
3237 else
3238 {
3239 for(row = 0; row < min_cu; row++)
3240 {
3241 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3242 {
3243 pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
3244 }
3245 }
3246 pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3247 pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3248 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
3249 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
3250 }
3251 }
3252
3253 pu1_src_tmp_luma -= sao_blk_wd;
3254 pu1_src_backup_luma -= sao_blk_wd;
3255 }
3256
3257 pu1_src_tmp_luma += (src_strd << log2_min_cu);
3258 pu1_src_backup_luma += (backup_strd << log2_min_cu);
3259 }
3260 }
3261
3262 /* Chroma */
3263 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag && no_loop_filter_enabled_chroma)
3264 {
3265 UWORD32 u4_no_loop_filter_flag;
3266 WORD32 loop_filter_bit_pos;
3267 WORD32 log2_min_cu = 3;
3268 WORD32 min_cu = (1 << log2_min_cu);
3269 UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
3270 WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
3271 WORD32 sao_blk_wd = ctb_size;
3272 WORD32 remaining_rows;
3273 WORD32 remaining_cols;
3274
3275 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3276 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3277 if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3278 sao_blk_ht += remaining_rows;
3279 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3280 sao_blk_wd += remaining_cols;
3281
3282 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3283 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3284
3285 pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3286
3287 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3288 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3289 if(ps_sao_ctxt->i4_ctb_x > 0)
3290 loop_filter_bit_pos -= 2;
3291
3292 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3293 (loop_filter_bit_pos >> 3);
3294
3295 for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3296 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3297 {
3298 WORD32 tmp_wd = sao_blk_wd;
3299
3300 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3301 (loop_filter_bit_pos & 7);
3302 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3303
3304 if(u4_no_loop_filter_flag)
3305 {
3306 while(tmp_wd > 0)
3307 {
3308 if(CTZ(u4_no_loop_filter_flag))
3309 {
3310 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3311 pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3312 tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
3313 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
3314 }
3315 else
3316 {
3317 for(row = 0; row < min_cu / 2; row++)
3318 {
3319 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3320 {
3321 pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
3322 }
3323 }
3324
3325 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3326 pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3327 tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
3328 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
3329 }
3330 }
3331
3332 pu1_src_tmp_chroma -= sao_blk_wd;
3333 pu1_src_backup_chroma -= sao_blk_wd;
3334 }
3335
3336 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
3337 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
3338 }
3339 }
3340 }
3341
3342 }
3343
3344