1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 * ihevc_sao.c
22 *
23 * @brief
24 * Contains function definitions for sample adaptive offset process
25 *
26 * @author
27 * Srinivas T
28 *
29 * @par List of Functions:
30 *
31 * @remarks
32 * None
33 *
34 *******************************************************************************
35 */
36
37 #include <stdio.h>
38 #include <stddef.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <assert.h>
42
43 #include "ihevc_typedefs.h"
44 #include "iv.h"
45 #include "ivd.h"
46 #include "ihevcd_cxa.h"
47 #include "ithread.h"
48
49 #include "ihevc_defs.h"
50 #include "ihevc_debug.h"
51 #include "ihevc_defs.h"
52 #include "ihevc_structs.h"
53 #include "ihevc_macros.h"
54 #include "ihevc_platform_macros.h"
55 #include "ihevc_cabac_tables.h"
56 #include "ihevc_sao.h"
57 #include "ihevc_mem_fns.h"
58
59 #include "ihevc_error.h"
60 #include "ihevc_common_tables.h"
61
62 #include "ihevcd_trace.h"
63 #include "ihevcd_defs.h"
64 #include "ihevcd_function_selector.h"
65 #include "ihevcd_structs.h"
66 #include "ihevcd_error.h"
67 #include "ihevcd_nal.h"
68 #include "ihevcd_bitstream.h"
69 #include "ihevcd_job_queue.h"
70 #include "ihevcd_utils.h"
71
72 #include "ihevc_deblk.h"
73 #include "ihevc_deblk_tables.h"
74 #include "ihevcd_profile.h"
75 #include "ihevcd_sao.h"
76 #include "ihevcd_debug.h"
77
78 #define SAO_SHIFT_CTB 8
79
80 /**
81 * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
82 */
ihevcd_sao_ctb(sao_ctxt_t * ps_sao_ctxt)83 void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
84 {
85 codec_t *ps_codec = ps_sao_ctxt->ps_codec;
86 UWORD8 *pu1_src_luma;
87 UWORD8 *pu1_src_chroma;
88 WORD32 src_strd;
89 WORD32 ctb_size;
90 WORD32 log2_ctb_size;
91 sps_t *ps_sps;
92 sao_t *ps_sao;
93 WORD32 row, col;
94 UWORD8 au1_avail_luma[8];
95 UWORD8 au1_avail_chroma[8];
96 WORD32 i;
97 UWORD8 *pu1_src_top_luma;
98 UWORD8 *pu1_src_top_chroma;
99 UWORD8 *pu1_src_left_luma;
100 UWORD8 *pu1_src_left_chroma;
101 UWORD8 au1_src_top_right[2];
102 UWORD8 au1_src_bot_left[2];
103 UWORD8 *pu1_no_loop_filter_flag;
104 WORD32 loop_filter_strd;
105
106 WORD8 ai1_offset_y[5];
107 WORD8 ai1_offset_cb[5];
108 WORD8 ai1_offset_cr[5];
109
110 PROFILE_DISABLE_SAO();
111
112 ai1_offset_y[0] = 0;
113 ai1_offset_cb[0] = 0;
114 ai1_offset_cr[0] = 0;
115
116 ps_sps = ps_sao_ctxt->ps_sps;
117 log2_ctb_size = ps_sps->i1_log2_ctb_size;
118 ctb_size = (1 << log2_ctb_size);
119 src_strd = ps_sao_ctxt->ps_codec->i4_strd;
120 pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
121 pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
122
123 ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
124 loop_filter_strd = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
125
126 /* Current CTB */
127 {
128 WORD32 sao_wd_luma;
129 WORD32 sao_wd_chroma;
130 WORD32 sao_ht_luma;
131 WORD32 sao_ht_chroma;
132
133 WORD32 remaining_rows;
134 WORD32 remaining_cols;
135
136 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
137 sao_wd_luma = MIN(ctb_size, remaining_cols);
138 sao_wd_chroma = MIN(ctb_size, remaining_cols);
139
140 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
141 sao_ht_luma = MIN(ctb_size, remaining_rows);
142 sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
143
144 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
145 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
146 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
147 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
148
149 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
150 ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
151 ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
152
153 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
154 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
155 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
156 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
157
158 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
159 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
160 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
161 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
162
163 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
164 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
165 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
166 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
167
168 for(i = 0; i < 8; i++)
169 {
170 au1_avail_luma[i] = 255;
171 au1_avail_chroma[i] = 255;
172 }
173
174
175 if(0 == ps_sao_ctxt->i4_ctb_x)
176 {
177 au1_avail_luma[0] = 0;
178 au1_avail_luma[4] = 0;
179 au1_avail_luma[6] = 0;
180
181 au1_avail_chroma[0] = 0;
182 au1_avail_chroma[4] = 0;
183 au1_avail_chroma[6] = 0;
184 }
185
186 if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
187 {
188 au1_avail_luma[1] = 0;
189 au1_avail_luma[5] = 0;
190 au1_avail_luma[7] = 0;
191
192 au1_avail_chroma[1] = 0;
193 au1_avail_chroma[5] = 0;
194 au1_avail_chroma[7] = 0;
195 }
196
197 if(0 == ps_sao_ctxt->i4_ctb_y)
198 {
199 au1_avail_luma[2] = 0;
200 au1_avail_luma[4] = 0;
201 au1_avail_luma[5] = 0;
202
203 au1_avail_chroma[2] = 0;
204 au1_avail_chroma[4] = 0;
205 au1_avail_chroma[5] = 0;
206 }
207
208 if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
209 {
210 au1_avail_luma[3] = 0;
211 au1_avail_luma[6] = 0;
212 au1_avail_luma[7] = 0;
213
214 au1_avail_chroma[3] = 0;
215 au1_avail_chroma[6] = 0;
216 au1_avail_chroma[7] = 0;
217 }
218
219
220 if(0 == ps_sao->b3_y_type_idx)
221 {
222 /* Update left, top and top-left */
223 for(row = 0; row < sao_ht_luma; row++)
224 {
225 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
226 }
227 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
228
229 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
230
231 }
232 else
233 {
234 UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
235 UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
236 WORD32 tmp_strd = MAX_CTB_SIZE + 2;
237 WORD32 no_loop_filter_enabled = 0;
238
239 /* Check the loop filter flags and copy the original values for back up */
240 {
241 UWORD32 u4_no_loop_filter_flag;
242 WORD32 min_cu = 8;
243 UWORD8 *pu1_src_tmp = pu1_src_luma;
244
245 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
246 {
247 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
248 ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
249 u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
250
251 if(u4_no_loop_filter_flag)
252 {
253 WORD32 tmp_wd = sao_wd_luma;
254 no_loop_filter_enabled = 1;
255 while(tmp_wd > 0)
256 {
257 if(CTZ(u4_no_loop_filter_flag))
258 {
259 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
260 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
261 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
262 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
263 }
264 else
265 {
266 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
267 {
268 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
269 {
270 pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
271 }
272 }
273
274 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
275 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
276 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
277 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
278 }
279 }
280
281 pu1_src_tmp -= sao_wd_luma;
282 }
283
284 pu1_src_tmp += min_cu * src_strd;
285 pu1_src_copy += min_cu * tmp_strd;
286 }
287 }
288
289 if(1 == ps_sao->b3_y_type_idx)
290 {
291 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
292 src_strd,
293 pu1_src_left_luma,
294 pu1_src_top_luma,
295 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
296 ps_sao->b5_y_band_pos,
297 ai1_offset_y,
298 sao_wd_luma,
299 sao_ht_luma);
300 }
301 else // if(2 <= ps_sao->b3_y_type_idx)
302 {
303 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
304 au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
305 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
306 src_strd,
307 pu1_src_left_luma,
308 pu1_src_top_luma,
309 ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
310 au1_src_top_right,
311 au1_src_bot_left,
312 au1_avail_luma,
313 ai1_offset_y,
314 sao_wd_luma,
315 sao_ht_luma);
316 }
317
318 /* Check the loop filter flags and copy the original values back if they are set */
319 if(no_loop_filter_enabled)
320 {
321 UWORD32 u4_no_loop_filter_flag;
322 WORD32 min_cu = 8;
323 UWORD8 *pu1_src_tmp = pu1_src_luma;
324
325 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
326 {
327 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
328 u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
329
330 if(u4_no_loop_filter_flag)
331 {
332 WORD32 tmp_wd = sao_wd_luma;
333 while(tmp_wd > 0)
334 {
335 if(CTZ(u4_no_loop_filter_flag))
336 {
337 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
338 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
339 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
340 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
341 }
342 else
343 {
344 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
345 {
346 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
347 {
348 pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
349 }
350 }
351
352 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
353 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
354 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
355 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
356 }
357 }
358
359 pu1_src_tmp -= sao_wd_luma;
360 }
361
362 pu1_src_tmp += min_cu * src_strd;
363 pu1_src_copy += min_cu * tmp_strd;
364 }
365 }
366
367 }
368
369 if(0 == ps_sao->b3_cb_type_idx)
370 {
371 for(row = 0; row < sao_ht_chroma; row++)
372 {
373 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
374 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
375 }
376 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
377 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
378
379 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
380 }
381 else
382 {
383 UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
384 UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
385 WORD32 tmp_strd = MAX_CTB_SIZE + 4;
386 WORD32 no_loop_filter_enabled = 0;
387
388 /* Check the loop filter flags and copy the original values for back up */
389 {
390 UWORD32 u4_no_loop_filter_flag;
391 WORD32 min_cu = 4;
392 UWORD8 *pu1_src_tmp = pu1_src_chroma;
393
394 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
395 {
396 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
397 u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
398
399 if(u4_no_loop_filter_flag)
400 {
401 WORD32 tmp_wd = sao_wd_chroma;
402 no_loop_filter_enabled = 1;
403 while(tmp_wd > 0)
404 {
405 if(CTZ(u4_no_loop_filter_flag))
406 {
407 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
408 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
409 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
410 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
411 }
412 else
413 {
414 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
415 {
416 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
417 {
418 pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
419 }
420 }
421
422 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
423 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
424 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
425 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
426 }
427 }
428
429 pu1_src_tmp -= sao_wd_chroma;
430 }
431
432 pu1_src_tmp += min_cu * src_strd;
433 pu1_src_copy += min_cu * tmp_strd;
434 }
435 }
436
437 if(1 == ps_sao->b3_cb_type_idx)
438 {
439 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
440 src_strd,
441 pu1_src_left_chroma,
442 pu1_src_top_chroma,
443 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
444 ps_sao->b5_cb_band_pos,
445 ps_sao->b5_cr_band_pos,
446 ai1_offset_cb,
447 ai1_offset_cr,
448 sao_wd_chroma,
449 sao_ht_chroma
450 );
451 }
452 else // if(2 <= ps_sao->b3_cb_type_idx)
453 {
454 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
455 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
456 au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
457 au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
458 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
459 src_strd,
460 pu1_src_left_chroma,
461 pu1_src_top_chroma,
462 ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
463 au1_src_top_right,
464 au1_src_bot_left,
465 au1_avail_chroma,
466 ai1_offset_cb,
467 ai1_offset_cr,
468 sao_wd_chroma,
469 sao_ht_chroma);
470 }
471
472 /* Check the loop filter flags and copy the original values back if they are set */
473 if(no_loop_filter_enabled)
474 {
475 UWORD32 u4_no_loop_filter_flag;
476 WORD32 min_cu = 4;
477 UWORD8 *pu1_src_tmp = pu1_src_chroma;
478
479 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
480 {
481 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
482 u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
483
484 if(u4_no_loop_filter_flag)
485 {
486 WORD32 tmp_wd = sao_wd_chroma;
487 while(tmp_wd > 0)
488 {
489 if(CTZ(u4_no_loop_filter_flag))
490 {
491 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
492 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
493 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
494 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
495 }
496 else
497 {
498 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
499 {
500 for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
501 {
502 pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
503 }
504 }
505
506 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
507 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
508 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
509 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
510 }
511 }
512
513 pu1_src_tmp -= sao_wd_chroma;
514 }
515
516 pu1_src_tmp += min_cu * src_strd;
517 pu1_src_copy += min_cu * tmp_strd;
518 }
519 }
520
521 }
522
523 }
524 }
525
ihevcd_sao_shift_ctb(sao_ctxt_t * ps_sao_ctxt)526 void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
527 {
528 codec_t *ps_codec = ps_sao_ctxt->ps_codec;
529 UWORD8 *pu1_src_luma;
530 UWORD8 *pu1_src_chroma;
531 WORD32 src_strd;
532 WORD32 ctb_size;
533 WORD32 log2_ctb_size;
534 sps_t *ps_sps;
535 sao_t *ps_sao;
536 pps_t *ps_pps;
537 slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
538 tile_t *ps_tile;
539 UWORD16 *pu1_slice_idx;
540 UWORD16 *pu1_tile_idx;
541 WORD32 row, col;
542 UWORD8 au1_avail_luma[8];
543 UWORD8 au1_avail_chroma[8];
544 UWORD8 au1_tile_slice_boundary[8];
545 UWORD8 au4_ilf_across_tile_slice_enable[8];
546 WORD32 i;
547 UWORD8 *pu1_src_top_luma;
548 UWORD8 *pu1_src_top_chroma;
549 UWORD8 *pu1_src_left_luma;
550 UWORD8 *pu1_src_left_chroma;
551 UWORD8 au1_src_top_right[2];
552 UWORD8 au1_src_bot_left[2];
553 UWORD8 *pu1_no_loop_filter_flag;
554 UWORD8 *pu1_src_backup_luma;
555 UWORD8 *pu1_src_backup_chroma;
556 WORD32 backup_strd;
557 WORD32 loop_filter_strd;
558
559 WORD32 no_loop_filter_enabled_luma = 0;
560 WORD32 no_loop_filter_enabled_chroma = 0;
561 UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
562 UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
563 UWORD8 *pu1_sao_src_luma_top_left_ctb;
564 UWORD8 *pu1_sao_src_chroma_top_left_ctb;
565 UWORD8 *pu1_sao_src_top_left_luma_top_right;
566 UWORD8 *pu1_sao_src_top_left_chroma_top_right;
567 UWORD8 u1_sao_src_top_left_luma_bot_left;
568 UWORD8 *pu1_sao_src_top_left_luma_bot_left;
569 UWORD8 *au1_sao_src_top_left_chroma_bot_left;
570 UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
571 /* Only 5 values are used, but arrays are large
572 enough so that SIMD functions can read 64 bits at a time */
573 WORD8 ai1_offset_y[8];
574 WORD8 ai1_offset_cb[8];
575 WORD8 ai1_offset_cr[8];
576 WORD32 chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
577
578 PROFILE_DISABLE_SAO();
579
580 ai1_offset_y[0] = 0;
581 ai1_offset_cb[0] = 0;
582 ai1_offset_cr[0] = 0;
583
584 ps_sps = ps_sao_ctxt->ps_sps;
585 ps_pps = ps_sao_ctxt->ps_pps;
586 ps_tile = ps_sao_ctxt->ps_tile;
587
588 log2_ctb_size = ps_sps->i1_log2_ctb_size;
589 ctb_size = (1 << log2_ctb_size);
590 src_strd = ps_sao_ctxt->ps_codec->i4_strd;
591 ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
592 ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
593
594 pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
595 pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
596 pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
597 pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
598
599 /*Stores the left value for each row ctbs- Needed for column tiles*/
600 pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
601 pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
602 pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
603 pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
604 u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
605 pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
606 au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
607 pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
608 pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
609 pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
610
611 ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
612 loop_filter_strd = (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
613 backup_strd = 2 * MAX_CTB_SIZE;
614
615 DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
616
617 {
618 /* Check the loop filter flags and copy the original values for back up */
619 /* Luma */
620
621 /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs
622 * can belong to different slice with their own sao_enable flag */
623 {
624 UWORD32 u4_no_loop_filter_flag;
625 WORD32 loop_filter_bit_pos;
626 WORD32 log2_min_cu = 3;
627 WORD32 min_cu = (1 << log2_min_cu);
628 UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
629 WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
630 WORD32 sao_blk_wd = ctb_size;
631 WORD32 remaining_rows;
632 WORD32 remaining_cols;
633
634 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
635 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
636 if(remaining_rows <= SAO_SHIFT_CTB)
637 sao_blk_ht += remaining_rows;
638 if(remaining_cols <= SAO_SHIFT_CTB)
639 sao_blk_wd += remaining_cols;
640
641 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
642 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
643
644 pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
645
646 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
647 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
648 if(ps_sao_ctxt->i4_ctb_x > 0)
649 loop_filter_bit_pos -= 1;
650
651 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
652 (loop_filter_bit_pos >> 3);
653
654 for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
655 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
656 {
657 WORD32 tmp_wd = sao_blk_wd;
658
659 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
660 (loop_filter_bit_pos & 7);
661 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
662
663 if(u4_no_loop_filter_flag)
664 {
665 no_loop_filter_enabled_luma = 1;
666 while(tmp_wd > 0)
667 {
668 if(CTZ(u4_no_loop_filter_flag))
669 {
670 pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
671 pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
672 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
673 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
674 }
675 else
676 {
677 for(row = 0; row < min_cu; row++)
678 {
679 for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
680 {
681 pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
682 }
683 }
684 pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
685 pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
686 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
687 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
688 }
689 }
690
691 pu1_src_tmp_luma -= sao_blk_wd;
692 pu1_src_backup_luma -= sao_blk_wd;
693 }
694
695 pu1_src_tmp_luma += (src_strd << log2_min_cu);
696 pu1_src_backup_luma += (backup_strd << log2_min_cu);
697 }
698 }
699
700 /* Chroma */
701
702 {
703 UWORD32 u4_no_loop_filter_flag;
704 WORD32 loop_filter_bit_pos;
705 WORD32 log2_min_cu = 3;
706 WORD32 min_cu = (1 << log2_min_cu);
707 UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
708 WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
709 WORD32 sao_blk_wd = ctb_size;
710 WORD32 remaining_rows;
711 WORD32 remaining_cols;
712
713 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
714 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
715 if(remaining_rows <= 2 * SAO_SHIFT_CTB)
716 sao_blk_ht += remaining_rows;
717 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
718 sao_blk_wd += remaining_cols;
719
720 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
721 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
722
723 pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
724
725 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
726 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
727 if(ps_sao_ctxt->i4_ctb_x > 0)
728 loop_filter_bit_pos -= 2;
729
730 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
731 (loop_filter_bit_pos >> 3);
732
733 for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
734 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
735 {
736 WORD32 tmp_wd = sao_blk_wd;
737
738 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
739 (loop_filter_bit_pos & 7);
740 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
741
742 if(u4_no_loop_filter_flag)
743 {
744 no_loop_filter_enabled_chroma = 1;
745 while(tmp_wd > 0)
746 {
747 if(CTZ(u4_no_loop_filter_flag))
748 {
749 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
750 pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
751 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
752 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
753 }
754 else
755 {
756 for(row = 0; row < min_cu / 2; row++)
757 {
758 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
759 {
760 pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
761 }
762 }
763
764 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
765 pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
766 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
767 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
768 }
769 }
770
771 pu1_src_tmp_chroma -= sao_blk_wd;
772 pu1_src_backup_chroma -= sao_blk_wd;
773 }
774
775 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
776 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
777 }
778 }
779 }
780
781 DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
782
783 /* Top-left CTB */
784 if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
785 {
786 WORD32 sao_wd_luma = SAO_SHIFT_CTB;
787 WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
788 WORD32 sao_ht_luma = SAO_SHIFT_CTB;
789 WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
790
791 WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
792 WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
793 WORD32 au4_idx_tl[8], idx_tl;
794
795 slice_header_t *ps_slice_hdr_top_left;
796 {
797 WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
798 (ps_sao_ctxt->i4_ctb_x - 1);
799 ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx];
800 }
801
802
803 pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
804 pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
805 ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
806 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
807 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
808 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
809 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
810
811 if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag)
812 {
813 if(0 == ps_sao->b3_y_type_idx)
814 {
815 /* Update left, top and top-left */
816 for(row = 0; row < sao_ht_luma; row++)
817 {
818 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
819 }
820 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
821
822 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
823
824
825 }
826
827 else if(1 == ps_sao->b3_y_type_idx)
828 {
829 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
830 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
831 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
832 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
833
834 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
835 src_strd,
836 pu1_src_left_luma,
837 pu1_src_top_luma,
838 pu1_sao_src_luma_top_left_ctb,
839 ps_sao->b5_y_band_pos,
840 ai1_offset_y,
841 sao_wd_luma,
842 sao_ht_luma
843 );
844 }
845
846 else // if(2 <= ps_sao->b3_y_type_idx)
847 {
848 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
849 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
850 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
851 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
852
853 for(i = 0; i < 8; i++)
854 {
855 au1_avail_luma[i] = 255;
856 au1_tile_slice_boundary[i] = 0;
857 au4_idx_tl[i] = 0;
858 au4_ilf_across_tile_slice_enable[i] = 1;
859 }
860
861 /******************************************************************
862 * Derive the Top-left CTB's neighbor pixel's slice indices.
863 *
864 * TL_T
865 * 4 _2__5________
866 * 0 | | |
867 * TL_L | TL | 1 TL_R|
868 * |____|_______|____
869 * 6|TL_D|7 | |
870 * | 3 | | |
871 * |____|_______| |
872 * | |
873 * | |
874 * |____________|
875 *
876 *****************************************************************/
877
878 /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
879 {
880 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
881 {
882 {
883 /*Assuming that sao shift is uniform along x and y directions*/
884 if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
885 {
886 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
887 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
888 }
889 else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
890 {
891 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
892 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
893 }
894 ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
895 ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
896
897 ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
898 ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
899
900 ctbx_tl_d = ps_sao_ctxt->i4_ctb_x - 1;
901 ctby_tl_d = ps_sao_ctxt->i4_ctb_y;
902
903 ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
904 ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
905 }
906
907 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
908 {
909 /*Calculate slice indices for neighbor pixels*/
910 idx_tl = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
911 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
912 au4_idx_tl[0] = pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
913 au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
914 au4_idx_tl[3] = au4_idx_tl[6] = pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
915 au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
916
917 if((0 == (1 << log2_ctb_size) - sao_wd_luma))
918 {
919 if(ps_sao_ctxt->i4_ctb_x == 1)
920 {
921 au4_idx_tl[6] = -1;
922 au4_idx_tl[4] = -1;
923 }
924 else
925 {
926 au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
927 }
928 if(ps_sao_ctxt->i4_ctb_y == 1)
929 {
930 au4_idx_tl[5] = -1;
931 au4_idx_tl[4] = -1;
932 }
933 else
934 {
935 au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
936 au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
937 }
938 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
939 }
940
941 /* Verify that the neighbor ctbs dont cross pic boundary.
942 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
943 * of the pixel having a greater address is checked. Accordingly, set the availability flags.
944 * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
945 * the respective pixel's flags are checked
946 */
947
948 if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
949 {
950 au4_ilf_across_tile_slice_enable[4] = 0;
951 au4_ilf_across_tile_slice_enable[6] = 0;
952 }
953 else
954 {
955 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
956 }
957 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
958 {
959 au4_ilf_across_tile_slice_enable[5] = 0;
960 au4_ilf_across_tile_slice_enable[4] = 0;
961 }
962 else
963 {
964 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
965 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
966 }
967 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
968 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
969 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
970 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
971 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
972
973 if(au4_idx_tl[5] > idx_tl)
974 {
975 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
976 }
977
978 /*
979 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
980 * of the pixel having a greater address is checked. Accordingly, set the availability flags.
981 * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
982 * the respective pixel's flags are checked
983 */
984 for(i = 0; i < 8; i++)
985 {
986 /*Sets the edges that lie on the slice/tile boundary*/
987 if(au4_idx_tl[i] != idx_tl)
988 {
989 au1_tile_slice_boundary[i] = 1;
990 }
991 else
992 {
993 au4_ilf_across_tile_slice_enable[i] = 1;
994 }
995 }
996
997 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
998 }
999
1000 if(ps_pps->i1_tiles_enabled_flag)
1001 {
1002 /* Calculate availability flags at slice boundary */
1003 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1004 {
1005 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1006 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1007 {
1008 /*Set the boundary arrays*/
1009 /*Calculate tile indices for neighbor pixels*/
1010 idx_tl = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1011 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1012 au4_idx_tl[0] = pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1013 au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1014 au4_idx_tl[3] = au4_idx_tl[6] = pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1015 au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1016
1017 if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1018 {
1019 if(ps_sao_ctxt->i4_ctb_x == 1)
1020 {
1021 au4_idx_tl[6] = -1;
1022 au4_idx_tl[4] = -1;
1023 }
1024 else
1025 {
1026 au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1027 }
1028 if(ps_sao_ctxt->i4_ctb_y == 1)
1029 {
1030 au4_idx_tl[5] = -1;
1031 au4_idx_tl[4] = -1;
1032 }
1033 else
1034 {
1035 au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1036 au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1037 }
1038 au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1039 }
1040 for(i = 0; i < 8; i++)
1041 {
1042 /*Sets the edges that lie on the tile boundary*/
1043 if(au4_idx_tl[i] != idx_tl)
1044 {
1045 au1_tile_slice_boundary[i] |= 1;
1046 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1047 }
1048 }
1049 }
1050 }
1051 }
1052
1053
1054 /*Set availability flags based on tile and slice boundaries*/
1055 for(i = 0; i < 8; i++)
1056 {
1057 /*Sets the edges that lie on the slice/tile boundary*/
1058 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1059 {
1060 au1_avail_luma[i] = 0;
1061 }
1062 }
1063 }
1064 }
1065
1066 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1067 {
1068 au1_avail_luma[0] = 0;
1069 au1_avail_luma[4] = 0;
1070 au1_avail_luma[6] = 0;
1071 }
1072
1073 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1074 {
1075 au1_avail_luma[1] = 0;
1076 au1_avail_luma[5] = 0;
1077 au1_avail_luma[7] = 0;
1078 }
1079 //y==1 case
1080 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
1081 {
1082 au1_avail_luma[2] = 0;
1083 au1_avail_luma[4] = 0;
1084 au1_avail_luma[5] = 0;
1085 }
1086 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1087 {
1088 au1_avail_luma[3] = 0;
1089 au1_avail_luma[6] = 0;
1090 au1_avail_luma[7] = 0;
1091 }
1092
1093 {
1094 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1095 u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
1096 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1097 src_strd,
1098 pu1_src_left_luma,
1099 pu1_src_top_luma,
1100 pu1_sao_src_luma_top_left_ctb,
1101 au1_src_top_right,
1102 &u1_sao_src_top_left_luma_bot_left,
1103 au1_avail_luma,
1104 ai1_offset_y,
1105 sao_wd_luma,
1106 sao_ht_luma);
1107 }
1108 }
1109
1110 }
1111 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1112 {
1113 /* Update left, top and top-left */
1114 for(row = 0; row < sao_ht_luma; row++)
1115 {
1116 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1117 }
1118 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1119
1120 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1121 }
1122
1123 if(ps_slice_hdr_top_left->i1_slice_sao_chroma_flag)
1124 {
1125 if(0 == ps_sao->b3_cb_type_idx)
1126 {
1127 for(row = 0; row < sao_ht_chroma; row++)
1128 {
1129 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1130 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1131 }
1132 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1133 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1134
1135 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1136
1137 }
1138
1139 else if(1 == ps_sao->b3_cb_type_idx)
1140 {
1141 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1142 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1143 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1144 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1145
1146 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1147 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1148 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1149 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1150
1151 if(chroma_yuv420sp_vu)
1152 {
1153 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1154 src_strd,
1155 pu1_src_left_chroma,
1156 pu1_src_top_chroma,
1157 pu1_sao_src_chroma_top_left_ctb,
1158 ps_sao->b5_cr_band_pos,
1159 ps_sao->b5_cb_band_pos,
1160 ai1_offset_cr,
1161 ai1_offset_cb,
1162 sao_wd_chroma,
1163 sao_ht_chroma
1164 );
1165 }
1166 else
1167 {
1168 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1169 src_strd,
1170 pu1_src_left_chroma,
1171 pu1_src_top_chroma,
1172 pu1_sao_src_chroma_top_left_ctb,
1173 ps_sao->b5_cb_band_pos,
1174 ps_sao->b5_cr_band_pos,
1175 ai1_offset_cb,
1176 ai1_offset_cr,
1177 sao_wd_chroma,
1178 sao_ht_chroma
1179 );
1180 }
1181 }
1182
1183 else // if(2 <= ps_sao->b3_cb_type_idx)
1184 {
1185 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1186 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1187 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1188 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1189
1190 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1191 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1192 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1193 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1194 for(i = 0; i < 8; i++)
1195 {
1196 au1_avail_chroma[i] = 255;
1197 au1_tile_slice_boundary[i] = 0;
1198 au4_idx_tl[i] = 0;
1199 au4_ilf_across_tile_slice_enable[i] = 1;
1200 }
1201 /*In case of slices*/
1202 {
1203 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1204 {
1205 if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
1206 {
1207 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
1208 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
1209 }
1210 else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
1211 {
1212 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
1213 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
1214 }
1215 ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
1216 ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
1217
1218 ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
1219 ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
1220
1221 ctbx_tl_d = ps_sao_ctxt->i4_ctb_x - 1;
1222 ctby_tl_d = ps_sao_ctxt->i4_ctb_y;
1223
1224 ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
1225 ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
1226
1227 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1228 {
1229
1230 idx_tl = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1231 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1232 au4_idx_tl[0] = pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1233 au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1234 au4_idx_tl[3] = au4_idx_tl[6] = pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1235 au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1236
1237 if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
1238 {
1239 if(ps_sao_ctxt->i4_ctb_x == 1)
1240 {
1241 au4_idx_tl[6] = -1;
1242 au4_idx_tl[4] = -1;
1243 }
1244 else
1245 {
1246 au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1247 }
1248 if(ps_sao_ctxt->i4_ctb_y == 1)
1249 {
1250 au4_idx_tl[5] = -1;
1251 au4_idx_tl[4] = -1;
1252 }
1253 else
1254 {
1255 au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1256 au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1257 }
1258 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1259 }
1260
1261 /* Verify that the neighbor ctbs don't cross pic boundary
1262 * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
1263 if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
1264 {
1265 au4_ilf_across_tile_slice_enable[4] = 0;
1266 au4_ilf_across_tile_slice_enable[6] = 0;
1267 }
1268 else
1269 {
1270 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1271 }
1272 if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
1273 {
1274 au4_ilf_across_tile_slice_enable[5] = 0;
1275 au4_ilf_across_tile_slice_enable[4] = 0;
1276 }
1277 else
1278 {
1279 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1280 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1281 }
1282 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1283 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1284 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1285 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1286 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1287 /*
1288 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1289 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1290 */
1291 for(i = 0; i < 8; i++)
1292 {
1293 /*Sets the edges that lie on the slice/tile boundary*/
1294 if(au4_idx_tl[i] != idx_tl)
1295 {
1296 au1_tile_slice_boundary[i] = 1;
1297 }
1298 else
1299 {
1300 au4_ilf_across_tile_slice_enable[i] = 1;
1301 }
1302 }
1303
1304 /*Reset indices*/
1305 for(i = 0; i < 8; i++)
1306 {
1307 au4_idx_tl[i] = 0;
1308 }
1309 }
1310 if(ps_pps->i1_tiles_enabled_flag)
1311 {
1312 /* Calculate availability flags at slice boundary */
1313 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1314 {
1315 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1316 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1317 {
1318 /*Set the boundary arrays*/
1319 /*Calculate tile indices for neighbor pixels*/
1320 idx_tl = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1321 au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1322 au4_idx_tl[0] = pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1323 au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1324 au4_idx_tl[3] = au4_idx_tl[6] = pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1325 au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1326
1327 if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1328 {
1329 if(ps_sao_ctxt->i4_ctb_x == 1)
1330 {
1331 au4_idx_tl[6] = -1;
1332 au4_idx_tl[4] = -1;
1333 }
1334 else
1335 {
1336 au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1337 }
1338 if(ps_sao_ctxt->i4_ctb_y == 1)
1339 {
1340 au4_idx_tl[5] = -1;
1341 au4_idx_tl[4] = -1;
1342 }
1343 else
1344 {
1345 au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1346 au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1347 }
1348 au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1349 }
1350 for(i = 0; i < 8; i++)
1351 {
1352 /*Sets the edges that lie on the tile boundary*/
1353 if(au4_idx_tl[i] != idx_tl)
1354 {
1355 au1_tile_slice_boundary[i] |= 1;
1356 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1357 }
1358 }
1359 }
1360 }
1361 }
1362
1363 for(i = 0; i < 8; i++)
1364 {
1365 /*Sets the edges that lie on the slice/tile boundary*/
1366 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1367 {
1368 au1_avail_chroma[i] = 0;
1369 }
1370 }
1371 }
1372 }
1373
1374 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
1375 {
1376 au1_avail_chroma[0] = 0;
1377 au1_avail_chroma[4] = 0;
1378 au1_avail_chroma[6] = 0;
1379 }
1380 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1381 {
1382 au1_avail_chroma[1] = 0;
1383 au1_avail_chroma[5] = 0;
1384 au1_avail_chroma[7] = 0;
1385 }
1386
1387 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1388 {
1389 au1_avail_chroma[2] = 0;
1390 au1_avail_chroma[4] = 0;
1391 au1_avail_chroma[5] = 0;
1392 }
1393 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1394 {
1395 au1_avail_chroma[3] = 0;
1396 au1_avail_chroma[6] = 0;
1397 au1_avail_chroma[7] = 0;
1398 }
1399
1400 {
1401 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
1402 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
1403 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
1404 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
1405 if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
1406 {
1407 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1408 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1409 }
1410
1411 if(chroma_yuv420sp_vu)
1412 {
1413 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1414 src_strd,
1415 pu1_src_left_chroma,
1416 pu1_src_top_chroma,
1417 pu1_sao_src_chroma_top_left_ctb,
1418 au1_src_top_right,
1419 au1_sao_src_top_left_chroma_bot_left,
1420 au1_avail_chroma,
1421 ai1_offset_cr,
1422 ai1_offset_cb,
1423 sao_wd_chroma,
1424 sao_ht_chroma);
1425 }
1426 else
1427 {
1428 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1429 src_strd,
1430 pu1_src_left_chroma,
1431 pu1_src_top_chroma,
1432 pu1_sao_src_chroma_top_left_ctb,
1433 au1_src_top_right,
1434 au1_sao_src_top_left_chroma_bot_left,
1435 au1_avail_chroma,
1436 ai1_offset_cb,
1437 ai1_offset_cr,
1438 sao_wd_chroma,
1439 sao_ht_chroma);
1440 }
1441 }
1442 }
1443 }
1444 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1445 {
1446 for(row = 0; row < sao_ht_chroma; row++)
1447 {
1448 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1449 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1450 }
1451 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1452 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1453
1454 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1455 }
1456
1457 pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1458 pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
1459 ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1460 }
1461
1462
1463 /* Top CTB */
1464 if((ps_sao_ctxt->i4_ctb_y > 0))
1465 {
1466 WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1467 WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
1468 WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1469 WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1470
1471 WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1472 WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1473 WORD32 au4_idx_t[8], idx_t;
1474
1475 WORD32 remaining_cols;
1476
1477 slice_header_t *ps_slice_hdr_top;
1478 {
1479 WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
1480 (ps_sao_ctxt->i4_ctb_x);
1481 ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx];
1482 }
1483
1484 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1485 if(remaining_cols <= SAO_SHIFT_CTB)
1486 {
1487 sao_wd_luma += remaining_cols;
1488 }
1489 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
1490 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1491 {
1492 sao_wd_chroma += remaining_cols;
1493 }
1494
1495 pu1_src_luma -= (sao_ht_luma * src_strd);
1496 pu1_src_chroma -= (sao_ht_chroma * src_strd);
1497 ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1498 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1499 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1500 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1501 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
1502
1503 if(0 != sao_wd_luma)
1504 {
1505 if(ps_slice_hdr_top->i1_slice_sao_luma_flag)
1506 {
1507 if(0 == ps_sao->b3_y_type_idx)
1508 {
1509 /* Update left, top and top-left */
1510 for(row = 0; row < sao_ht_luma; row++)
1511 {
1512 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1513 }
1514 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1515
1516 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1517
1518 }
1519
1520 else if(1 == ps_sao->b3_y_type_idx)
1521 {
1522 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1523 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1524 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1525 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1526
1527 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1528 src_strd,
1529 pu1_src_left_luma,
1530 pu1_src_top_luma,
1531 pu1_sao_src_luma_top_left_ctb,
1532 ps_sao->b5_y_band_pos,
1533 ai1_offset_y,
1534 sao_wd_luma,
1535 sao_ht_luma
1536 );
1537 }
1538
1539 else // if(2 <= ps_sao->b3_y_type_idx)
1540 {
1541 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1542 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1543 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1544 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1545
1546 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1547 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1548 ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1549
1550 for(i = 0; i < 8; i++)
1551 {
1552
1553 au4_ilf_across_tile_slice_enable[i] = 1;
1554 }
1555 /******************************************************************
1556 * Derive the Top-left CTB's neighbor pixel's slice indices.
1557 *
1558 * T_T
1559 * ____________
1560 * | | |
1561 * | T_L| T |T_R
1562 * | | ______|____
1563 * | | T_D | |
1564 * | | | |
1565 * |____|_______| |
1566 * | |
1567 * | |
1568 * |____________|
1569 *
1570 *****************************************************************/
1571
1572 /*In case of slices*/
1573 {
1574 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1575 {
1576
1577 ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1578 ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1579
1580 ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1581 ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1582
1583 ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1584 ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1585
1586 ctbx_t_d = ps_sao_ctxt->i4_ctb_x;
1587 ctby_t_d = ps_sao_ctxt->i4_ctb_y;
1588
1589 ctbx_t = ps_sao_ctxt->i4_ctb_x;
1590 ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1591
1592 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1593 {
1594 /*Calculate neighbor ctb slice indices*/
1595 if(0 == ps_sao_ctxt->i4_ctb_x)
1596 {
1597 au4_idx_t[0] = -1;
1598 au4_idx_t[6] = -1;
1599 au4_idx_t[4] = -1;
1600 }
1601 else
1602 {
1603 au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1604 au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1605 }
1606 idx_t = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1607 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1608 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1609 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1610
1611 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1612 if(0 == ps_sao_ctxt->i4_ctb_x)
1613 {
1614 au4_ilf_across_tile_slice_enable[4] = 0;
1615 au4_ilf_across_tile_slice_enable[6] = 0;
1616 au4_ilf_across_tile_slice_enable[0] = 0;
1617 }
1618 else
1619 {
1620 au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1621 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1622 }
1623
1624
1625
1626 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1627 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1628 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1629 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1630 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1631
1632 if(au4_idx_t[6] < idx_t)
1633 {
1634 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1635 }
1636
1637 /*
1638 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1639 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1640 */
1641
1642 for(i = 0; i < 8; i++)
1643 {
1644 /*Sets the edges that lie on the slice/tile boundary*/
1645 if(au4_idx_t[i] != idx_t)
1646 {
1647 au1_tile_slice_boundary[i] = 1;
1648 /*Check for slice flag at such boundaries*/
1649 }
1650 else
1651 {
1652 au4_ilf_across_tile_slice_enable[i] = 1;
1653 }
1654 }
1655 /*Reset indices*/
1656 for(i = 0; i < 8; i++)
1657 {
1658 au4_idx_t[i] = 0;
1659 }
1660 }
1661
1662 if(ps_pps->i1_tiles_enabled_flag)
1663 {
1664 /* Calculate availability flags at slice boundary */
1665 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1666 {
1667 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1668 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1669 {
1670 /*Calculate neighbor ctb slice indices*/
1671 if(0 == ps_sao_ctxt->i4_ctb_x)
1672 {
1673 au4_idx_t[0] = -1;
1674 au4_idx_t[6] = -1;
1675 au4_idx_t[4] = -1;
1676 }
1677 else
1678 {
1679 au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1680 au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1681 }
1682 idx_t = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1683 au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1684 au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1685 au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1686
1687 for(i = 0; i < 8; i++)
1688 {
1689 /*Sets the edges that lie on the tile boundary*/
1690 if(au4_idx_t[i] != idx_t)
1691 {
1692 au1_tile_slice_boundary[i] |= 1;
1693 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1694 }
1695 }
1696 }
1697 }
1698 }
1699
1700 for(i = 0; i < 8; i++)
1701 {
1702 /*Sets the edges that lie on the slice/tile boundary*/
1703 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1704 {
1705 au1_avail_luma[i] = 0;
1706 }
1707 }
1708 }
1709 }
1710
1711
1712 if(0 == ps_sao_ctxt->i4_ctb_x)
1713 {
1714 au1_avail_luma[0] = 0;
1715 au1_avail_luma[4] = 0;
1716 au1_avail_luma[6] = 0;
1717 }
1718
1719 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1720 {
1721 au1_avail_luma[1] = 0;
1722 au1_avail_luma[5] = 0;
1723 au1_avail_luma[7] = 0;
1724 }
1725
1726 if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1727 {
1728 au1_avail_luma[2] = 0;
1729 au1_avail_luma[4] = 0;
1730 au1_avail_luma[5] = 0;
1731 }
1732
1733 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1734 {
1735 au1_avail_luma[3] = 0;
1736 au1_avail_luma[6] = 0;
1737 au1_avail_luma[7] = 0;
1738 }
1739
1740 {
1741 au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1742 u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1743 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1744 src_strd,
1745 pu1_src_left_luma,
1746 pu1_src_top_luma,
1747 pu1_sao_src_luma_top_left_ctb,
1748 au1_src_top_right,
1749 &u1_sao_src_top_left_luma_bot_left,
1750 au1_avail_luma,
1751 ai1_offset_y,
1752 sao_wd_luma,
1753 sao_ht_luma);
1754 }
1755 }
1756 }
1757 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1758 {
1759 /* Update left, top and top-left */
1760 for(row = 0; row < sao_ht_luma; row++)
1761 {
1762 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1763 }
1764 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1765
1766 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1767 }
1768 }
1769
1770 if(0 != sao_wd_chroma)
1771 {
1772 if(ps_slice_hdr_top->i1_slice_sao_chroma_flag)
1773 {
1774 if(0 == ps_sao->b3_cb_type_idx)
1775 {
1776
1777 for(row = 0; row < sao_ht_chroma; row++)
1778 {
1779 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1780 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1781 }
1782 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1783 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1784
1785 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1786
1787 }
1788
1789 else if(1 == ps_sao->b3_cb_type_idx)
1790 {
1791 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1792 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1793 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1794 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1795
1796 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1797 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1798 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1799 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1800
1801 if(chroma_yuv420sp_vu)
1802 {
1803 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1804 src_strd,
1805 pu1_src_left_chroma,
1806 pu1_src_top_chroma,
1807 pu1_sao_src_chroma_top_left_ctb,
1808 ps_sao->b5_cr_band_pos,
1809 ps_sao->b5_cb_band_pos,
1810 ai1_offset_cr,
1811 ai1_offset_cb,
1812 sao_wd_chroma,
1813 sao_ht_chroma
1814 );
1815 }
1816 else
1817 {
1818 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1819 src_strd,
1820 pu1_src_left_chroma,
1821 pu1_src_top_chroma,
1822 pu1_sao_src_chroma_top_left_ctb,
1823 ps_sao->b5_cb_band_pos,
1824 ps_sao->b5_cr_band_pos,
1825 ai1_offset_cb,
1826 ai1_offset_cr,
1827 sao_wd_chroma,
1828 sao_ht_chroma
1829 );
1830 }
1831 }
1832 else // if(2 <= ps_sao->b3_cb_type_idx)
1833 {
1834 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1835 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1836 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1837 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1838
1839 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1840 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1841 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1842 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1843
1844 for(i = 0; i < 8; i++)
1845 {
1846 au1_avail_chroma[i] = 255;
1847 au1_tile_slice_boundary[i] = 0;
1848 au4_idx_t[i] = 0;
1849 au4_ilf_across_tile_slice_enable[i] = 1;
1850 }
1851
1852 {
1853 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1854 {
1855 ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1856 ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1857
1858 ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1859 ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1860
1861 ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1862 ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1863
1864 ctbx_t_d = ps_sao_ctxt->i4_ctb_x;
1865 ctby_t_d = ps_sao_ctxt->i4_ctb_y;
1866
1867 ctbx_t = ps_sao_ctxt->i4_ctb_x;
1868 ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1869
1870 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1871 {
1872 if(0 == ps_sao_ctxt->i4_ctb_x)
1873 {
1874 au4_idx_t[0] = -1;
1875 au4_idx_t[6] = -1;
1876 au4_idx_t[4] = -1;
1877 }
1878 else
1879 {
1880 au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1881 au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1882 }
1883 idx_t = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1884 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1885 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1886 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1887
1888 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1889
1890 if(0 == ps_sao_ctxt->i4_ctb_x)
1891 {
1892 au4_ilf_across_tile_slice_enable[4] = 0;
1893 au4_ilf_across_tile_slice_enable[6] = 0;
1894 au4_ilf_across_tile_slice_enable[0] = 0;
1895 }
1896 else
1897 {
1898 au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1899 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1900 }
1901
1902 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1903 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1904 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1905 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1906 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1907
1908 if(idx_t > au4_idx_t[6])
1909 {
1910 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1911 }
1912
1913 /*
1914 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1915 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1916 */
1917 for(i = 0; i < 8; i++)
1918 {
1919 /*Sets the edges that lie on the slice/tile boundary*/
1920 if(au4_idx_t[i] != idx_t)
1921 {
1922 au1_tile_slice_boundary[i] = 1;
1923 }
1924 else
1925 {
1926 /*Indicates that the neighbour belongs to same/dependent slice*/
1927 au4_ilf_across_tile_slice_enable[i] = 1;
1928 }
1929 }
1930 /*Reset indices*/
1931 for(i = 0; i < 8; i++)
1932 {
1933 au4_idx_t[i] = 0;
1934 }
1935 }
1936 if(ps_pps->i1_tiles_enabled_flag)
1937 {
1938 /* Calculate availability flags at slice boundary */
1939 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1940 {
1941 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1942 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1943 {
1944 /*Calculate neighbor ctb slice indices*/
1945 if(0 == ps_sao_ctxt->i4_ctb_x)
1946 {
1947 au4_idx_t[0] = -1;
1948 au4_idx_t[6] = -1;
1949 au4_idx_t[4] = -1;
1950 }
1951 else
1952 {
1953 au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1954 au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1955 }
1956 idx_t = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1957 au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1958 au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1959 au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1960
1961 for(i = 0; i < 8; i++)
1962 {
1963 /*Sets the edges that lie on the tile boundary*/
1964 if(au4_idx_t[i] != idx_t)
1965 {
1966 au1_tile_slice_boundary[i] |= 1;
1967 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1968 }
1969 }
1970 }
1971 }
1972 }
1973 for(i = 0; i < 8; i++)
1974 {
1975 /*Sets the edges that lie on the slice/tile boundary*/
1976 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1977 {
1978 au1_avail_chroma[i] = 0;
1979 }
1980 }
1981
1982 }
1983 }
1984 if(0 == ps_sao_ctxt->i4_ctb_x)
1985 {
1986 au1_avail_chroma[0] = 0;
1987 au1_avail_chroma[4] = 0;
1988 au1_avail_chroma[6] = 0;
1989 }
1990
1991 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
1992 {
1993 au1_avail_chroma[1] = 0;
1994 au1_avail_chroma[5] = 0;
1995 au1_avail_chroma[7] = 0;
1996 }
1997
1998 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1999 {
2000 au1_avail_chroma[2] = 0;
2001 au1_avail_chroma[4] = 0;
2002 au1_avail_chroma[5] = 0;
2003 }
2004
2005 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
2006 {
2007 au1_avail_chroma[3] = 0;
2008 au1_avail_chroma[6] = 0;
2009 au1_avail_chroma[7] = 0;
2010 }
2011
2012 {
2013 au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
2014 au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
2015 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2016 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2017
2018 if(chroma_yuv420sp_vu)
2019 {
2020 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2021 src_strd,
2022 pu1_src_left_chroma,
2023 pu1_src_top_chroma,
2024 pu1_sao_src_chroma_top_left_ctb,
2025 au1_src_top_right,
2026 au1_sao_src_top_left_chroma_bot_left,
2027 au1_avail_chroma,
2028 ai1_offset_cr,
2029 ai1_offset_cb,
2030 sao_wd_chroma,
2031 sao_ht_chroma);
2032 }
2033 else
2034 {
2035 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2036 src_strd,
2037 pu1_src_left_chroma,
2038 pu1_src_top_chroma,
2039 pu1_sao_src_chroma_top_left_ctb,
2040 au1_src_top_right,
2041 au1_sao_src_top_left_chroma_bot_left,
2042 au1_avail_chroma,
2043 ai1_offset_cb,
2044 ai1_offset_cr,
2045 sao_wd_chroma,
2046 sao_ht_chroma);
2047 }
2048 }
2049
2050 }
2051 }
2052 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2053 {
2054 for(row = 0; row < sao_ht_chroma; row++)
2055 {
2056 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2057 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2058 }
2059 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2060 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2061
2062 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2063 }
2064 }
2065
2066 pu1_src_luma += sao_ht_luma * src_strd;
2067 pu1_src_chroma += sao_ht_chroma * src_strd;
2068 ps_sao += (ps_sps->i2_pic_wd_in_ctb);
2069 }
2070
2071 /* Left CTB */
2072 if(ps_sao_ctxt->i4_ctb_x > 0)
2073 {
2074 WORD32 sao_wd_luma = SAO_SHIFT_CTB;
2075 WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
2076 WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2077 WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2078
2079 WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
2080 WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
2081 WORD32 au4_idx_l[8], idx_l;
2082
2083 WORD32 remaining_rows;
2084 slice_header_t *ps_slice_hdr_left;
2085 {
2086 WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb +
2087 (ps_sao_ctxt->i4_ctb_x - 1);
2088 ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx];
2089 }
2090
2091 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2092 if(remaining_rows <= SAO_SHIFT_CTB)
2093 {
2094 sao_ht_luma += remaining_rows;
2095 }
2096 remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2097 if(remaining_rows <= SAO_SHIFT_CTB)
2098 {
2099 sao_ht_chroma += remaining_rows;
2100 }
2101
2102 pu1_src_luma -= sao_wd_luma;
2103 pu1_src_chroma -= sao_wd_chroma;
2104 ps_sao -= 1;
2105 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
2106 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
2107 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2108 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2109
2110
2111 if(0 != sao_ht_luma)
2112 {
2113 if(ps_slice_hdr_left->i1_slice_sao_luma_flag)
2114 {
2115 if(0 == ps_sao->b3_y_type_idx)
2116 {
2117 /* Update left, top and top-left */
2118 for(row = 0; row < sao_ht_luma; row++)
2119 {
2120 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2121 }
2122 /*Update in next location*/
2123 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2124
2125 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2126
2127 }
2128
2129 else if(1 == ps_sao->b3_y_type_idx)
2130 {
2131 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2132 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2133 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2134 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2135
2136 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2137 src_strd,
2138 pu1_src_left_luma,
2139 pu1_src_top_luma,
2140 pu1_sao_src_top_left_luma_curr_ctb,
2141 ps_sao->b5_y_band_pos,
2142 ai1_offset_y,
2143 sao_wd_luma,
2144 sao_ht_luma
2145 );
2146 }
2147
2148 else // if(2 <= ps_sao->b3_y_type_idx)
2149 {
2150 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2151 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2152 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2153 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2154
2155 for(i = 0; i < 8; i++)
2156 {
2157 au1_avail_luma[i] = 255;
2158 au1_tile_slice_boundary[i] = 0;
2159 au4_idx_l[i] = 0;
2160 au4_ilf_across_tile_slice_enable[i] = 1;
2161 }
2162 /******************************************************************
2163 * Derive the Top-left CTB's neighbour pixel's slice indices.
2164 *
2165 *
2166 * ____________
2167 * | | |
2168 * | L_T| |
2169 * |____|_______|____
2170 * | | | |
2171 * L_L | L | L_R | |
2172 * |____|_______| |
2173 * | |
2174 * L_D | |
2175 * |____________|
2176 *
2177 *****************************************************************/
2178
2179 /*In case of slices or tiles*/
2180 {
2181 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2182 {
2183 ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2184 ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2185
2186 ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2187 ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2188
2189 ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2190 ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2191
2192 ctbx_l_d = ps_sao_ctxt->i4_ctb_x - 1;
2193 ctby_l_d = ps_sao_ctxt->i4_ctb_y;
2194
2195 ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2196 ctby_l = ps_sao_ctxt->i4_ctb_y;
2197
2198 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2199 {
2200 if(0 == ps_sao_ctxt->i4_ctb_y)
2201 {
2202 au4_idx_l[2] = -1;
2203 au4_idx_l[4] = -1;
2204 au4_idx_l[5] = -1;
2205 }
2206 else
2207 {
2208 au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2209 au4_idx_l[5] = pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2210 }
2211 idx_l = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2212 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2213 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2214 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2215
2216 /*Verify that the neighbor ctbs don't cross pic boundary.*/
2217 if(0 == ps_sao_ctxt->i4_ctb_y)
2218 {
2219 au4_ilf_across_tile_slice_enable[2] = 0;
2220 au4_ilf_across_tile_slice_enable[4] = 0;
2221 au4_ilf_across_tile_slice_enable[5] = 0;
2222 }
2223 else
2224 {
2225 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2226 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2227
2228 }
2229 //TODO: ILF flag checks for [0] and [6] is missing.
2230 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2231 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2232 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2233
2234 if(idx_l < au4_idx_l[5])
2235 {
2236 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2237 }
2238
2239 /*
2240 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2241 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2242 */
2243 for(i = 0; i < 8; i++)
2244 {
2245 /*Sets the edges that lie on the slice/tile boundary*/
2246 if(au4_idx_l[i] != idx_l)
2247 {
2248 au1_tile_slice_boundary[i] = 1;
2249 }
2250 else
2251 {
2252 au4_ilf_across_tile_slice_enable[i] = 1;
2253 }
2254 }
2255 /*Reset indices*/
2256 for(i = 0; i < 8; i++)
2257 {
2258 au4_idx_l[i] = 0;
2259 }
2260 }
2261
2262 if(ps_pps->i1_tiles_enabled_flag)
2263 {
2264 /* Calculate availability flags at slice boundary */
2265 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2266 {
2267 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2268 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2269 {
2270 if(0 == ps_sao_ctxt->i4_ctb_y)
2271 {
2272 au4_idx_l[2] = -1;
2273 au4_idx_l[4] = -1;
2274 au4_idx_l[5] = -1;
2275 }
2276 else
2277 {
2278 au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2279 au4_idx_l[5] = pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2280 }
2281
2282 idx_l = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2283 au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2284 au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2285 au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2286
2287 for(i = 0; i < 8; i++)
2288 {
2289 /*Sets the edges that lie on the slice/tile boundary*/
2290 if(au4_idx_l[i] != idx_l)
2291 {
2292 au1_tile_slice_boundary[i] |= 1;
2293 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
2294 }
2295 }
2296 }
2297 }
2298 }
2299
2300 for(i = 0; i < 8; i++)
2301 {
2302 /*Sets the edges that lie on the slice/tile boundary*/
2303 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2304 {
2305 au1_avail_luma[i] = 0;
2306 }
2307 }
2308 }
2309 }
2310 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
2311 {
2312 au1_avail_luma[0] = 0;
2313 au1_avail_luma[4] = 0;
2314 au1_avail_luma[6] = 0;
2315 }
2316 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2317 {
2318 au1_avail_luma[1] = 0;
2319 au1_avail_luma[5] = 0;
2320 au1_avail_luma[7] = 0;
2321 }
2322
2323 if(0 == ps_sao_ctxt->i4_ctb_y)
2324 {
2325 au1_avail_luma[2] = 0;
2326 au1_avail_luma[4] = 0;
2327 au1_avail_luma[5] = 0;
2328 }
2329
2330 if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) <= sao_ht_luma)
2331 {
2332 au1_avail_luma[3] = 0;
2333 au1_avail_luma[6] = 0;
2334 au1_avail_luma[7] = 0;
2335 }
2336
2337 {
2338 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
2339 u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
2340 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2341 src_strd,
2342 pu1_src_left_luma,
2343 pu1_src_top_luma,
2344 pu1_sao_src_top_left_luma_curr_ctb,
2345 au1_src_top_right,
2346 &u1_sao_src_top_left_luma_bot_left,
2347 au1_avail_luma,
2348 ai1_offset_y,
2349 sao_wd_luma,
2350 sao_ht_luma);
2351 }
2352
2353 }
2354 }
2355 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2356 {
2357 /* Update left, top and top-left */
2358 for(row = 0; row < sao_ht_luma; row++)
2359 {
2360 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2361 }
2362 /*Update in next location*/
2363 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2364
2365 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2366 }
2367 }
2368
2369 if(0 != sao_ht_chroma)
2370 {
2371 if(ps_slice_hdr_left->i1_slice_sao_chroma_flag)
2372 {
2373 if(0 == ps_sao->b3_cb_type_idx)
2374 {
2375 for(row = 0; row < sao_ht_chroma; row++)
2376 {
2377 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2378 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2379 }
2380 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2381 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2382
2383 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2384 }
2385
2386 else if(1 == ps_sao->b3_cb_type_idx)
2387 {
2388 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2389 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2390 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2391 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2392
2393 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2394 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2395 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2396 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2397
2398 if(chroma_yuv420sp_vu)
2399 {
2400 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2401 src_strd,
2402 pu1_src_left_chroma,
2403 pu1_src_top_chroma,
2404 pu1_sao_src_top_left_chroma_curr_ctb,
2405 ps_sao->b5_cr_band_pos,
2406 ps_sao->b5_cb_band_pos,
2407 ai1_offset_cr,
2408 ai1_offset_cb,
2409 sao_wd_chroma,
2410 sao_ht_chroma
2411 );
2412 }
2413 else
2414 {
2415 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2416 src_strd,
2417 pu1_src_left_chroma,
2418 pu1_src_top_chroma,
2419 pu1_sao_src_top_left_chroma_curr_ctb,
2420 ps_sao->b5_cb_band_pos,
2421 ps_sao->b5_cr_band_pos,
2422 ai1_offset_cb,
2423 ai1_offset_cr,
2424 sao_wd_chroma,
2425 sao_ht_chroma
2426 );
2427 }
2428 }
2429
2430 else // if(2 <= ps_sao->b3_cb_type_idx)
2431 {
2432 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2433 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2434 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2435 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2436
2437 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2438 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2439 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2440 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2441
2442 for(i = 0; i < 8; i++)
2443 {
2444 au1_avail_chroma[i] = 255;
2445 au1_tile_slice_boundary[i] = 0;
2446 au4_idx_l[i] = 0;
2447 au4_ilf_across_tile_slice_enable[i] = 1;
2448 }
2449 /*In case of slices*/
2450 {
2451 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2452 {
2453 ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2454 ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2455
2456 ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2457 ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2458
2459 ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2460 ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2461
2462 ctbx_l_d = ps_sao_ctxt->i4_ctb_x - 1;
2463 ctby_l_d = ps_sao_ctxt->i4_ctb_y;
2464
2465 ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2466 ctby_l = ps_sao_ctxt->i4_ctb_y;
2467
2468 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2469 {
2470 if(0 == ps_sao_ctxt->i4_ctb_y)
2471 {
2472 au4_idx_l[2] = -1;
2473 au4_idx_l[4] = -1;
2474 au4_idx_l[5] = -1;
2475 }
2476 else
2477 {
2478 au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2479 au4_idx_l[5] = pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2480 }
2481 idx_l = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2482 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2483 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2484 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2485
2486 /*Verify that the neighbour ctbs dont cross pic boundary.*/
2487 if(0 == ps_sao_ctxt->i4_ctb_y)
2488 {
2489 au4_ilf_across_tile_slice_enable[2] = 0;
2490 au4_ilf_across_tile_slice_enable[4] = 0;
2491 au4_ilf_across_tile_slice_enable[5] = 0;
2492 }
2493 else
2494 {
2495 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2496 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2497 }
2498
2499 if(au4_idx_l[5] > idx_l)
2500 {
2501 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2502 }
2503
2504 // au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2505 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2506 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2507 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2508 /*
2509 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2510 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2511 */
2512 for(i = 0; i < 8; i++)
2513 {
2514 /*Sets the edges that lie on the slice/tile boundary*/
2515 if(au4_idx_l[i] != idx_l)
2516 {
2517 au1_tile_slice_boundary[i] = 1;
2518 }
2519 else
2520 {
2521 au4_ilf_across_tile_slice_enable[i] = 1;
2522 }
2523 }
2524 /*Reset indices*/
2525 for(i = 0; i < 8; i++)
2526 {
2527 au4_idx_l[i] = 0;
2528 }
2529 }
2530 if(ps_pps->i1_tiles_enabled_flag)
2531 {
2532 /* Calculate availability flags at slice boundary */
2533 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2534 {
2535 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2536 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2537 {
2538 if(0 == ps_sao_ctxt->i4_ctb_y)
2539 {
2540 au4_idx_l[2] = -1;
2541 au4_idx_l[4] = -1;
2542 au4_idx_l[5] = -1;
2543 }
2544 else
2545 {
2546 au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2547 au4_idx_l[5] = pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2548 }
2549
2550 idx_l = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2551 au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2552 au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2553 au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2554
2555 for(i = 0; i < 8; i++)
2556 {
2557 /*Sets the edges that lie on the slice/tile boundary*/
2558 if(au4_idx_l[i] != idx_l)
2559 {
2560 au1_tile_slice_boundary[i] |= 1;
2561 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2562 }
2563 }
2564 }
2565 }
2566 }
2567 for(i = 0; i < 8; i++)
2568 {
2569 /*Sets the edges that lie on the slice/tile boundary*/
2570 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2571 {
2572 au1_avail_chroma[i] = 0;
2573 }
2574 }
2575 }
2576 }
2577 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
2578 {
2579 au1_avail_chroma[0] = 0;
2580 au1_avail_chroma[4] = 0;
2581 au1_avail_chroma[6] = 0;
2582 }
2583
2584 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2585 {
2586 au1_avail_chroma[1] = 0;
2587 au1_avail_chroma[5] = 0;
2588 au1_avail_chroma[7] = 0;
2589 }
2590
2591 if(0 == ps_sao_ctxt->i4_ctb_y)
2592 {
2593 au1_avail_chroma[2] = 0;
2594 au1_avail_chroma[4] = 0;
2595 au1_avail_chroma[5] = 0;
2596 }
2597
2598 if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) <= sao_ht_chroma)
2599 {
2600 au1_avail_chroma[3] = 0;
2601 au1_avail_chroma[6] = 0;
2602 au1_avail_chroma[7] = 0;
2603 }
2604
2605 {
2606 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2607 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2608 au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2609 au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2610 //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2611 //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2612 if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2613 {
2614 au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
2615 au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
2616 }
2617
2618
2619 if(chroma_yuv420sp_vu)
2620 {
2621 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2622 src_strd,
2623 pu1_src_left_chroma,
2624 pu1_src_top_chroma,
2625 pu1_sao_src_top_left_chroma_curr_ctb,
2626 au1_src_top_right,
2627 au1_src_bot_left,
2628 au1_avail_chroma,
2629 ai1_offset_cr,
2630 ai1_offset_cb,
2631 sao_wd_chroma,
2632 sao_ht_chroma);
2633 }
2634 else
2635 {
2636 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2637 src_strd,
2638 pu1_src_left_chroma,
2639 pu1_src_top_chroma,
2640 pu1_sao_src_top_left_chroma_curr_ctb,
2641 au1_src_top_right,
2642 au1_src_bot_left,
2643 au1_avail_chroma,
2644 ai1_offset_cb,
2645 ai1_offset_cr,
2646 sao_wd_chroma,
2647 sao_ht_chroma);
2648 }
2649 }
2650
2651 }
2652 }
2653 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2654 {
2655 for(row = 0; row < sao_ht_chroma; row++)
2656 {
2657 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2658 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2659 }
2660 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2661 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2662
2663 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2664 }
2665
2666 }
2667 pu1_src_luma += sao_wd_luma;
2668 pu1_src_chroma += sao_wd_chroma;
2669 ps_sao += 1;
2670 }
2671
2672
2673 /* Current CTB */
2674 {
2675 WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2676 WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
2677 WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2678 WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2679 WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2680 WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2681 WORD32 au4_idx_c[8], idx_c;
2682
2683 WORD32 remaining_rows;
2684 WORD32 remaining_cols;
2685
2686 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2687 if(remaining_cols <= SAO_SHIFT_CTB)
2688 {
2689 sao_wd_luma += remaining_cols;
2690 }
2691 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
2692 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2693 {
2694 sao_wd_chroma += remaining_cols;
2695 }
2696
2697 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2698 if(remaining_rows <= SAO_SHIFT_CTB)
2699 {
2700 sao_ht_luma += remaining_rows;
2701 }
2702 remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2703 if(remaining_rows <= SAO_SHIFT_CTB)
2704 {
2705 sao_ht_chroma += remaining_rows;
2706 }
2707
2708 pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2709 pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2710 pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2711 pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2712
2713 if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2714 {
2715 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2716 {
2717 if(0 == ps_sao->b3_y_type_idx)
2718 {
2719 /* Update left, top and top-left */
2720 for(row = 0; row < sao_ht_luma; row++)
2721 {
2722 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2723 }
2724 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2725
2726 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2727
2728 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2729
2730 }
2731
2732 else if(1 == ps_sao->b3_y_type_idx)
2733 {
2734 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2735 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2736 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2737 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2738
2739 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2740 src_strd,
2741 pu1_src_left_luma,
2742 pu1_src_top_luma,
2743 pu1_sao_src_top_left_luma_curr_ctb,
2744 ps_sao->b5_y_band_pos,
2745 ai1_offset_y,
2746 sao_wd_luma,
2747 sao_ht_luma
2748 );
2749 }
2750
2751 else // if(2 <= ps_sao->b3_y_type_idx)
2752 {
2753 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2754 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2755 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2756 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2757
2758 for(i = 0; i < 8; i++)
2759 {
2760 au1_avail_luma[i] = 255;
2761 au1_tile_slice_boundary[i] = 0;
2762 au4_idx_c[i] = 0;
2763 au4_ilf_across_tile_slice_enable[i] = 1;
2764 }
2765 /******************************************************************
2766 * Derive the Top-left CTB's neighbour pixel's slice indices.
2767 *
2768 *
2769 * ____________
2770 * | | |
2771 * | | C_T |
2772 * |____|_______|____
2773 * | | | |
2774 * | C_L| C | C_R|
2775 * |____|_______| |
2776 * | C_D |
2777 * | |
2778 * |____________|
2779 *
2780 *****************************************************************/
2781
2782 /*In case of slices*/
2783 {
2784 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2785 {
2786 ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2787 ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2788
2789 ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2790 ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2791
2792 ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2793 ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2794
2795 ctbx_c_d = ps_sao_ctxt->i4_ctb_x;
2796 ctby_c_d = ps_sao_ctxt->i4_ctb_y;
2797
2798 ctbx_c = ps_sao_ctxt->i4_ctb_x;
2799 ctby_c = ps_sao_ctxt->i4_ctb_y;
2800
2801 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2802 {
2803 if(0 == ps_sao_ctxt->i4_ctb_x)
2804 {
2805 au4_idx_c[6] = -1;
2806 au4_idx_c[0] = -1;
2807 au4_idx_c[4] = -1;
2808 }
2809 else
2810 {
2811 au4_idx_c[0] = au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2812 }
2813
2814 if(0 == ps_sao_ctxt->i4_ctb_y)
2815 {
2816 au4_idx_c[2] = -1;
2817 au4_idx_c[5] = -1;
2818 au4_idx_c[4] = -1;
2819 }
2820 else
2821 {
2822 au4_idx_c[4] = pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2823 au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2824 }
2825 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2826 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2827 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2828
2829 if(0 == ps_sao_ctxt->i4_ctb_x)
2830 {
2831 au4_ilf_across_tile_slice_enable[6] = 0;
2832 au4_ilf_across_tile_slice_enable[0] = 0;
2833 au4_ilf_across_tile_slice_enable[4] = 0;
2834 }
2835 else
2836 {
2837 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2838 au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2839 }
2840 if(0 == ps_sao_ctxt->i4_ctb_y)
2841 {
2842 au4_ilf_across_tile_slice_enable[2] = 0;
2843 au4_ilf_across_tile_slice_enable[4] = 0;
2844 au4_ilf_across_tile_slice_enable[5] = 0;
2845 }
2846 else
2847 {
2848 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2849 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2850 }
2851 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2852 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2853 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2854
2855 if(au4_idx_c[6] < idx_c)
2856 {
2857 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2858 }
2859
2860 /*
2861 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2862 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2863 */
2864 for(i = 0; i < 8; i++)
2865 {
2866 /*Sets the edges that lie on the slice/tile boundary*/
2867 if(au4_idx_c[i] != idx_c)
2868 {
2869 au1_tile_slice_boundary[i] = 1;
2870 }
2871 else
2872 {
2873 au4_ilf_across_tile_slice_enable[i] = 1;
2874 }
2875 }
2876 /*Reset indices*/
2877 for(i = 0; i < 8; i++)
2878 {
2879 au4_idx_c[i] = 0;
2880 }
2881 }
2882
2883 if(ps_pps->i1_tiles_enabled_flag)
2884 {
2885 /* Calculate availability flags at slice boundary */
2886 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2887 {
2888 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2889 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2890 {
2891 if(0 == ps_sao_ctxt->i4_ctb_x)
2892 {
2893 au4_idx_c[6] = -1;
2894 au4_idx_c[0] = -1;
2895 au4_idx_c[4] = -1;
2896 }
2897 else
2898 {
2899 au4_idx_c[0] = au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2900 }
2901
2902 if(0 == ps_sao_ctxt->i4_ctb_y)
2903 {
2904 au4_idx_c[2] = -1;
2905 au4_idx_c[5] = -1;
2906 au4_idx_c[4] = -1;
2907 }
2908 else
2909 {
2910 au4_idx_c[4] = pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2911 au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2912 }
2913 idx_c = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2914 au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2915 au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2916
2917 for(i = 0; i < 8; i++)
2918 {
2919 /*Sets the edges that lie on the slice/tile boundary*/
2920 if(au4_idx_c[i] != idx_c)
2921 {
2922 au1_tile_slice_boundary[i] |= 1;
2923 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2924 }
2925 }
2926 }
2927 }
2928 }
2929
2930 for(i = 0; i < 8; i++)
2931 {
2932 /*Sets the edges that lie on the slice/tile boundary*/
2933 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2934 {
2935 au1_avail_luma[i] = 0;
2936 }
2937 }
2938
2939 }
2940 }
2941 if(0 == ps_sao_ctxt->i4_ctb_x)
2942 {
2943 au1_avail_luma[0] = 0;
2944 au1_avail_luma[4] = 0;
2945 au1_avail_luma[6] = 0;
2946 }
2947
2948 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2949 {
2950 au1_avail_luma[1] = 0;
2951 au1_avail_luma[5] = 0;
2952 au1_avail_luma[7] = 0;
2953 }
2954
2955 if(0 == ps_sao_ctxt->i4_ctb_y)
2956 {
2957 au1_avail_luma[2] = 0;
2958 au1_avail_luma[4] = 0;
2959 au1_avail_luma[5] = 0;
2960 }
2961
2962 if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) <= sao_ht_luma)
2963 {
2964 au1_avail_luma[3] = 0;
2965 au1_avail_luma[6] = 0;
2966 au1_avail_luma[7] = 0;
2967 }
2968
2969 {
2970 au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2971 u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2972
2973 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2974 src_strd,
2975 pu1_src_left_luma,
2976 pu1_src_top_luma,
2977 pu1_sao_src_top_left_luma_curr_ctb,
2978 au1_src_top_right,
2979 &u1_sao_src_top_left_luma_bot_left,
2980 au1_avail_luma,
2981 ai1_offset_y,
2982 sao_wd_luma,
2983 sao_ht_luma);
2984 }
2985 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2986 pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2987 }
2988 }
2989 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2990 {
2991 /* Update left, top and top-left */
2992 for(row = 0; row < sao_ht_luma; row++)
2993 {
2994 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2995 }
2996 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2997
2998 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2999
3000 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
3001 }
3002 }
3003
3004 if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
3005 {
3006 if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
3007 {
3008 if(0 == ps_sao->b3_cb_type_idx)
3009 {
3010 for(row = 0; row < sao_ht_chroma; row++)
3011 {
3012 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3013 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3014 }
3015 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3016 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3017
3018 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3019
3020 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3021 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3022 }
3023
3024 else if(1 == ps_sao->b3_cb_type_idx)
3025 {
3026 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3027 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3028 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3029 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3030
3031 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3032 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3033 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3034 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3035
3036 if(chroma_yuv420sp_vu)
3037 {
3038 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3039 src_strd,
3040 pu1_src_left_chroma,
3041 pu1_src_top_chroma,
3042 pu1_sao_src_top_left_chroma_curr_ctb,
3043 ps_sao->b5_cr_band_pos,
3044 ps_sao->b5_cb_band_pos,
3045 ai1_offset_cr,
3046 ai1_offset_cb,
3047 sao_wd_chroma,
3048 sao_ht_chroma
3049 );
3050 }
3051 else
3052 {
3053 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3054 src_strd,
3055 pu1_src_left_chroma,
3056 pu1_src_top_chroma,
3057 pu1_sao_src_top_left_chroma_curr_ctb,
3058 ps_sao->b5_cb_band_pos,
3059 ps_sao->b5_cr_band_pos,
3060 ai1_offset_cb,
3061 ai1_offset_cr,
3062 sao_wd_chroma,
3063 sao_ht_chroma
3064 );
3065 }
3066 }
3067
3068 else // if(2 <= ps_sao->b3_cb_type_idx)
3069 {
3070 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3071 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3072 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3073 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3074
3075 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3076 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3077 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3078 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3079
3080 for(i = 0; i < 8; i++)
3081 {
3082 au1_avail_chroma[i] = 255;
3083 au1_tile_slice_boundary[i] = 0;
3084 au4_idx_c[i] = 0;
3085 au4_ilf_across_tile_slice_enable[i] = 1;
3086 }
3087 {
3088 if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3089 {
3090 ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
3091 ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
3092
3093 ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
3094 ctby_c_l = ps_sao_ctxt->i4_ctb_y;
3095
3096 ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
3097 ctby_c_r = ps_sao_ctxt->i4_ctb_y;
3098
3099 ctbx_c_d = ps_sao_ctxt->i4_ctb_x;
3100 ctby_c_d = ps_sao_ctxt->i4_ctb_y;
3101
3102 ctbx_c = ps_sao_ctxt->i4_ctb_x;
3103 ctby_c = ps_sao_ctxt->i4_ctb_y;
3104
3105 if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
3106 {
3107 if(0 == ps_sao_ctxt->i4_ctb_x)
3108 {
3109 au4_idx_c[0] = -1;
3110 au4_idx_c[4] = -1;
3111 au4_idx_c[6] = -1;
3112 }
3113 else
3114 {
3115 au4_idx_c[0] = au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3116 }
3117
3118 if(0 == ps_sao_ctxt->i4_ctb_y)
3119 {
3120 au4_idx_c[2] = -1;
3121 au4_idx_c[4] = -1;
3122 au4_idx_c[5] = -1;
3123 }
3124 else
3125 {
3126 au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3127 au4_idx_c[4] = pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3128 }
3129 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3130 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3131 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3132
3133 if(0 == ps_sao_ctxt->i4_ctb_x)
3134 {
3135 au4_ilf_across_tile_slice_enable[0] = 0;
3136 au4_ilf_across_tile_slice_enable[4] = 0;
3137 au4_ilf_across_tile_slice_enable[6] = 0;
3138 }
3139 else
3140 {
3141 au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
3142 au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3143 }
3144
3145 if(0 == ps_sao_ctxt->i4_ctb_y)
3146 {
3147 au4_ilf_across_tile_slice_enable[2] = 0;
3148 au4_ilf_across_tile_slice_enable[4] = 0;
3149 au4_ilf_across_tile_slice_enable[5] = 0;
3150 }
3151 else
3152 {
3153 au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3154 au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
3155 }
3156
3157 au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
3158 au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
3159 au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
3160
3161 if(idx_c > au4_idx_c[6])
3162 {
3163 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3164 }
3165
3166 /*
3167 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
3168 * of the pixel having a greater address is checked. Accordingly, set the availability flags
3169 */
3170 for(i = 0; i < 8; i++)
3171 {
3172 /*Sets the edges that lie on the slice/tile boundary*/
3173 if(au4_idx_c[i] != idx_c)
3174 {
3175 au1_tile_slice_boundary[i] = 1;
3176 }
3177 else
3178 {
3179 au4_ilf_across_tile_slice_enable[i] = 1;
3180 }
3181 }
3182 /*Reset indices*/
3183 for(i = 0; i < 8; i++)
3184 {
3185 au4_idx_c[i] = 0;
3186 }
3187 }
3188
3189 if(ps_pps->i1_tiles_enabled_flag)
3190 {
3191 /* Calculate availability flags at slice boundary */
3192 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
3193 {
3194 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
3195 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
3196 {
3197 if(0 == ps_sao_ctxt->i4_ctb_x)
3198 {
3199 au4_idx_c[6] = -1;
3200 au4_idx_c[0] = -1;
3201 au4_idx_c[4] = -1;
3202 }
3203 else
3204 {
3205 au4_idx_c[0] = au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3206 }
3207
3208 if(0 == ps_sao_ctxt->i4_ctb_y)
3209 {
3210 au4_idx_c[2] = -1;
3211 au4_idx_c[5] = -1;
3212 au4_idx_c[4] = -1;
3213 }
3214 else
3215 {
3216 au4_idx_c[4] = pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3217 au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3218 }
3219 idx_c = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3220 au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3221 au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3222
3223 for(i = 0; i < 8; i++)
3224 {
3225 /*Sets the edges that lie on the slice/tile boundary*/
3226 if(au4_idx_c[i] != idx_c)
3227 {
3228 au1_tile_slice_boundary[i] |= 1;
3229 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
3230 }
3231 }
3232 }
3233 }
3234 }
3235
3236 for(i = 0; i < 8; i++)
3237 {
3238 /*Sets the edges that lie on the slice/tile boundary*/
3239 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
3240 {
3241 au1_avail_chroma[i] = 0;
3242 }
3243 }
3244 }
3245 }
3246
3247 if(0 == ps_sao_ctxt->i4_ctb_x)
3248 {
3249 au1_avail_chroma[0] = 0;
3250 au1_avail_chroma[4] = 0;
3251 au1_avail_chroma[6] = 0;
3252 }
3253
3254 if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
3255 {
3256 au1_avail_chroma[1] = 0;
3257 au1_avail_chroma[5] = 0;
3258 au1_avail_chroma[7] = 0;
3259 }
3260
3261 if(0 == ps_sao_ctxt->i4_ctb_y)
3262 {
3263 au1_avail_chroma[2] = 0;
3264 au1_avail_chroma[4] = 0;
3265 au1_avail_chroma[5] = 0;
3266 }
3267
3268 if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) <= sao_ht_chroma)
3269 {
3270 au1_avail_chroma[3] = 0;
3271 au1_avail_chroma[6] = 0;
3272 au1_avail_chroma[7] = 0;
3273 }
3274
3275 {
3276 au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
3277 au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
3278
3279 au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
3280 au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
3281
3282 if(chroma_yuv420sp_vu)
3283 {
3284 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3285 src_strd,
3286 pu1_src_left_chroma,
3287 pu1_src_top_chroma,
3288 pu1_sao_src_top_left_chroma_curr_ctb,
3289 au1_src_top_right,
3290 au1_sao_src_top_left_chroma_bot_left,
3291 au1_avail_chroma,
3292 ai1_offset_cr,
3293 ai1_offset_cb,
3294 sao_wd_chroma,
3295 sao_ht_chroma);
3296 }
3297 else
3298 {
3299 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3300 src_strd,
3301 pu1_src_left_chroma,
3302 pu1_src_top_chroma,
3303 pu1_sao_src_top_left_chroma_curr_ctb,
3304 au1_src_top_right,
3305 au1_sao_src_top_left_chroma_bot_left,
3306 au1_avail_chroma,
3307 ai1_offset_cb,
3308 ai1_offset_cr,
3309 sao_wd_chroma,
3310 sao_ht_chroma);
3311 }
3312 }
3313
3314 }
3315 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3316 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3317
3318 pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
3319 pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
3320 }
3321 else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3322 {
3323 for(row = 0; row < sao_ht_chroma; row++)
3324 {
3325 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3326 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3327 }
3328 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3329 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3330
3331 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3332
3333 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3334 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3335 }
3336
3337 }
3338 }
3339
3340
3341
3342
3343 /* If no loop filter is enabled copy the backed up values */
3344 {
3345 /* Luma */
3346 if(no_loop_filter_enabled_luma)
3347 {
3348 UWORD32 u4_no_loop_filter_flag;
3349 WORD32 loop_filter_bit_pos;
3350 WORD32 log2_min_cu = 3;
3351 WORD32 min_cu = (1 << log2_min_cu);
3352 UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
3353 WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
3354 WORD32 sao_blk_wd = ctb_size;
3355 WORD32 remaining_rows;
3356 WORD32 remaining_cols;
3357
3358 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3359 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3360 if(remaining_rows <= SAO_SHIFT_CTB)
3361 sao_blk_ht += remaining_rows;
3362 if(remaining_cols <= SAO_SHIFT_CTB)
3363 sao_blk_wd += remaining_cols;
3364
3365 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
3366 pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3367
3368 pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
3369
3370 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3371 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3372 if(ps_sao_ctxt->i4_ctb_x > 0)
3373 loop_filter_bit_pos -= 1;
3374
3375 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3376 (loop_filter_bit_pos >> 3);
3377
3378 for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
3379 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3380 {
3381 WORD32 tmp_wd = sao_blk_wd;
3382
3383 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3384 (loop_filter_bit_pos & 7);
3385 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3386
3387 if(u4_no_loop_filter_flag)
3388 {
3389 while(tmp_wd > 0)
3390 {
3391 if(CTZ(u4_no_loop_filter_flag))
3392 {
3393 pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3394 pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3395 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3396 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
3397 }
3398 else
3399 {
3400 for(row = 0; row < min_cu; row++)
3401 {
3402 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3403 {
3404 pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
3405 }
3406 }
3407 pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3408 pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3409 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3410 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
3411 }
3412 }
3413
3414 pu1_src_tmp_luma -= sao_blk_wd;
3415 pu1_src_backup_luma -= sao_blk_wd;
3416 }
3417
3418 pu1_src_tmp_luma += (src_strd << log2_min_cu);
3419 pu1_src_backup_luma += (backup_strd << log2_min_cu);
3420 }
3421 }
3422
3423 /* Chroma */
3424 if(no_loop_filter_enabled_chroma)
3425 {
3426 UWORD32 u4_no_loop_filter_flag;
3427 WORD32 loop_filter_bit_pos;
3428 WORD32 log2_min_cu = 3;
3429 WORD32 min_cu = (1 << log2_min_cu);
3430 UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
3431 WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
3432 WORD32 sao_blk_wd = ctb_size;
3433 WORD32 remaining_rows;
3434 WORD32 remaining_cols;
3435
3436 remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3437 remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3438 if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3439 sao_blk_ht += remaining_rows;
3440 if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3441 sao_blk_wd += remaining_cols;
3442
3443 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3444 pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3445
3446 pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3447
3448 loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3449 (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3450 if(ps_sao_ctxt->i4_ctb_x > 0)
3451 loop_filter_bit_pos -= 2;
3452
3453 pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3454 (loop_filter_bit_pos >> 3);
3455
3456 for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3457 i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3458 {
3459 WORD32 tmp_wd = sao_blk_wd;
3460
3461 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3462 (loop_filter_bit_pos & 7);
3463 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3464
3465 if(u4_no_loop_filter_flag)
3466 {
3467 while(tmp_wd > 0)
3468 {
3469 if(CTZ(u4_no_loop_filter_flag))
3470 {
3471 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3472 pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3473 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3474 u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
3475 }
3476 else
3477 {
3478 for(row = 0; row < min_cu / 2; row++)
3479 {
3480 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3481 {
3482 pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
3483 }
3484 }
3485
3486 pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3487 pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3488 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3489 u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
3490 }
3491 }
3492
3493 pu1_src_tmp_chroma -= sao_blk_wd;
3494 pu1_src_backup_chroma -= sao_blk_wd;
3495 }
3496
3497 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
3498 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
3499 }
3500 }
3501 }
3502
3503 }
3504
3505