• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19  *******************************************************************************
20  * @file
21  *  ihevc_sao.c
22  *
23  * @brief
24  *  Contains function definitions for sample adaptive offset process
25  *
26  * @author
27  *  Srinivas T
28  *
29  * @par List of Functions:
30  *
31  * @remarks
32  *  None
33  *
34  *******************************************************************************
35  */
36 
37 #include <stdio.h>
38 #include <stddef.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <assert.h>
42 
43 #include "ihevc_typedefs.h"
44 #include "iv.h"
45 #include "ivd.h"
46 #include "ihevcd_cxa.h"
47 #include "ithread.h"
48 
49 #include "ihevc_defs.h"
50 #include "ihevc_debug.h"
51 #include "ihevc_defs.h"
52 #include "ihevc_structs.h"
53 #include "ihevc_macros.h"
54 #include "ihevc_platform_macros.h"
55 #include "ihevc_cabac_tables.h"
56 #include "ihevc_sao.h"
57 #include "ihevc_mem_fns.h"
58 
59 #include "ihevc_error.h"
60 #include "ihevc_common_tables.h"
61 
62 #include "ihevcd_trace.h"
63 #include "ihevcd_defs.h"
64 #include "ihevcd_function_selector.h"
65 #include "ihevcd_structs.h"
66 #include "ihevcd_error.h"
67 #include "ihevcd_nal.h"
68 #include "ihevcd_bitstream.h"
69 #include "ihevcd_job_queue.h"
70 #include "ihevcd_utils.h"
71 
72 #include "ihevc_deblk.h"
73 #include "ihevc_deblk_tables.h"
74 #include "ihevcd_profile.h"
75 #include "ihevcd_sao.h"
76 #include "ihevcd_debug.h"
77 
78 #define SAO_SHIFT_CTB    8
79 
80 /**
81  * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
82  */
ihevcd_sao_ctb(sao_ctxt_t * ps_sao_ctxt)83 void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
84 {
85     codec_t *ps_codec = ps_sao_ctxt->ps_codec;
86     UWORD8 *pu1_src_luma;
87     UWORD8 *pu1_src_chroma;
88     WORD32 src_strd;
89     WORD32 ctb_size;
90     WORD32 log2_ctb_size;
91     sps_t *ps_sps;
92     sao_t *ps_sao;
93     WORD32 row, col;
94     UWORD8 au1_avail_luma[8];
95     UWORD8 au1_avail_chroma[8];
96     WORD32 i;
97     UWORD8 *pu1_src_top_luma;
98     UWORD8 *pu1_src_top_chroma;
99     UWORD8 *pu1_src_left_luma;
100     UWORD8 *pu1_src_left_chroma;
101     UWORD8 au1_src_top_right[2];
102     UWORD8 au1_src_bot_left[2];
103     UWORD8 *pu1_no_loop_filter_flag;
104     WORD32 loop_filter_strd;
105 
106     WORD8 ai1_offset_y[5];
107     WORD8 ai1_offset_cb[5];
108     WORD8 ai1_offset_cr[5];
109 
110     PROFILE_DISABLE_SAO();
111 
112     ai1_offset_y[0] = 0;
113     ai1_offset_cb[0] = 0;
114     ai1_offset_cr[0] = 0;
115 
116     ps_sps = ps_sao_ctxt->ps_sps;
117     log2_ctb_size = ps_sps->i1_log2_ctb_size;
118     ctb_size = (1 << log2_ctb_size);
119     src_strd = ps_sao_ctxt->ps_codec->i4_strd;
120     pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
121     pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
122 
123     ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
124     loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
125 
126     /* Current CTB */
127     {
128         WORD32 sao_wd_luma;
129         WORD32 sao_wd_chroma;
130         WORD32 sao_ht_luma;
131         WORD32 sao_ht_chroma;
132 
133         WORD32 remaining_rows;
134         WORD32 remaining_cols;
135 
136         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
137         sao_wd_luma = MIN(ctb_size, remaining_cols);
138         sao_wd_chroma = MIN(ctb_size, remaining_cols);
139 
140         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
141         sao_ht_luma = MIN(ctb_size, remaining_rows);
142         sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
143 
144         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
145         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
146         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
147         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
148 
149         pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
150                         ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
151                         ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
152 
153         ai1_offset_y[1] = ps_sao->b4_y_offset_1;
154         ai1_offset_y[2] = ps_sao->b4_y_offset_2;
155         ai1_offset_y[3] = ps_sao->b4_y_offset_3;
156         ai1_offset_y[4] = ps_sao->b4_y_offset_4;
157 
158         ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
159         ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
160         ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
161         ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
162 
163         ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
164         ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
165         ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
166         ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
167 
168         for(i = 0; i < 8; i++)
169         {
170             au1_avail_luma[i] = 255;
171             au1_avail_chroma[i] = 255;
172         }
173 
174 
175         if(0 == ps_sao_ctxt->i4_ctb_x)
176         {
177             au1_avail_luma[0] = 0;
178             au1_avail_luma[4] = 0;
179             au1_avail_luma[6] = 0;
180 
181             au1_avail_chroma[0] = 0;
182             au1_avail_chroma[4] = 0;
183             au1_avail_chroma[6] = 0;
184         }
185 
186         if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
187         {
188             au1_avail_luma[1] = 0;
189             au1_avail_luma[5] = 0;
190             au1_avail_luma[7] = 0;
191 
192             au1_avail_chroma[1] = 0;
193             au1_avail_chroma[5] = 0;
194             au1_avail_chroma[7] = 0;
195         }
196 
197         if(0 == ps_sao_ctxt->i4_ctb_y)
198         {
199             au1_avail_luma[2] = 0;
200             au1_avail_luma[4] = 0;
201             au1_avail_luma[5] = 0;
202 
203             au1_avail_chroma[2] = 0;
204             au1_avail_chroma[4] = 0;
205             au1_avail_chroma[5] = 0;
206         }
207 
208         if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
209         {
210             au1_avail_luma[3] = 0;
211             au1_avail_luma[6] = 0;
212             au1_avail_luma[7] = 0;
213 
214             au1_avail_chroma[3] = 0;
215             au1_avail_chroma[6] = 0;
216             au1_avail_chroma[7] = 0;
217         }
218 
219 
220         if(0 == ps_sao->b3_y_type_idx)
221         {
222             /* Update left, top and top-left */
223             for(row = 0; row < sao_ht_luma; row++)
224             {
225                 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
226             }
227             ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
228 
229             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
230 
231         }
232         else
233         {
234             UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
235             UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
236             WORD32 tmp_strd = MAX_CTB_SIZE + 2;
237             WORD32 no_loop_filter_enabled = 0;
238 
239             /* Check the loop filter flags and copy the original values for back up */
240             {
241                 UWORD32 u4_no_loop_filter_flag;
242                 WORD32 min_cu = 8;
243                 UWORD8 *pu1_src_tmp = pu1_src_luma;
244 
245                 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
246                 {
247                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
248                                     ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
249                     u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
250 
251                     if(u4_no_loop_filter_flag)
252                     {
253                         WORD32 tmp_wd = sao_wd_luma;
254                         no_loop_filter_enabled = 1;
255                         while(tmp_wd > 0)
256                         {
257                             if(CTZ(u4_no_loop_filter_flag))
258                             {
259                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
260                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
261                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
262                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
263                             }
264                             else
265                             {
266                                 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
267                                 {
268                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
269                                     {
270                                         pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
271                                     }
272                                 }
273 
274                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
275                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
276                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
277                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
278                             }
279                         }
280 
281                         pu1_src_tmp -= sao_wd_luma;
282                     }
283 
284                     pu1_src_tmp += min_cu * src_strd;
285                     pu1_src_copy += min_cu * tmp_strd;
286                 }
287             }
288 
289             if(1 == ps_sao->b3_y_type_idx)
290             {
291                 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
292                                                                           src_strd,
293                                                                           pu1_src_left_luma,
294                                                                           pu1_src_top_luma,
295                                                                           ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
296                                                                           ps_sao->b5_y_band_pos,
297                                                                           ai1_offset_y,
298                                                                           sao_wd_luma,
299                                                                           sao_ht_luma);
300             }
301             else // if(2 <= ps_sao->b3_y_type_idx)
302             {
303                 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
304                 au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
305                 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
306                                                                   src_strd,
307                                                                   pu1_src_left_luma,
308                                                                   pu1_src_top_luma,
309                                                                   ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
310                                                                   au1_src_top_right,
311                                                                   au1_src_bot_left,
312                                                                   au1_avail_luma,
313                                                                   ai1_offset_y,
314                                                                   sao_wd_luma,
315                                                                   sao_ht_luma);
316             }
317 
318             /* Check the loop filter flags and copy the original values back if they are set */
319             if(no_loop_filter_enabled)
320             {
321                 UWORD32 u4_no_loop_filter_flag;
322                 WORD32 min_cu = 8;
323                 UWORD8 *pu1_src_tmp = pu1_src_luma;
324 
325                 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
326                 {
327                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
328                     u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
329 
330                     if(u4_no_loop_filter_flag)
331                     {
332                         WORD32 tmp_wd = sao_wd_luma;
333                         while(tmp_wd > 0)
334                         {
335                             if(CTZ(u4_no_loop_filter_flag))
336                             {
337                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
338                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
339                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
340                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
341                             }
342                             else
343                             {
344                                 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
345                                 {
346                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
347                                     {
348                                         pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
349                                     }
350                                 }
351 
352                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
353                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
354                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
355                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
356                             }
357                         }
358 
359                         pu1_src_tmp -= sao_wd_luma;
360                     }
361 
362                     pu1_src_tmp += min_cu * src_strd;
363                     pu1_src_copy += min_cu * tmp_strd;
364                 }
365             }
366 
367         }
368 
369         if(0 == ps_sao->b3_cb_type_idx)
370         {
371             for(row = 0; row < sao_ht_chroma; row++)
372             {
373                 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
374                 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
375             }
376             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
377             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
378 
379             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
380         }
381         else
382         {
383             UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
384             UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
385             WORD32 tmp_strd = MAX_CTB_SIZE + 4;
386             WORD32 no_loop_filter_enabled = 0;
387 
388             /* Check the loop filter flags and copy the original values for back up */
389             {
390                 UWORD32 u4_no_loop_filter_flag;
391                 WORD32 min_cu = 4;
392                 UWORD8 *pu1_src_tmp = pu1_src_chroma;
393 
394                 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
395                 {
396                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
397                     u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
398 
399                     if(u4_no_loop_filter_flag)
400                     {
401                         WORD32 tmp_wd = sao_wd_chroma;
402                         no_loop_filter_enabled = 1;
403                         while(tmp_wd > 0)
404                         {
405                             if(CTZ(u4_no_loop_filter_flag))
406                             {
407                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
408                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
409                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
410                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
411                             }
412                             else
413                             {
414                                 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
415                                 {
416                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
417                                     {
418                                         pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
419                                     }
420                                 }
421 
422                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
423                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
424                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
425                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
426                             }
427                         }
428 
429                         pu1_src_tmp -= sao_wd_chroma;
430                     }
431 
432                     pu1_src_tmp += min_cu * src_strd;
433                     pu1_src_copy += min_cu * tmp_strd;
434                 }
435             }
436 
437             if(1 == ps_sao->b3_cb_type_idx)
438             {
439                 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
440                                                                             src_strd,
441                                                                             pu1_src_left_chroma,
442                                                                             pu1_src_top_chroma,
443                                                                             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
444                                                                             ps_sao->b5_cb_band_pos,
445                                                                             ps_sao->b5_cr_band_pos,
446                                                                             ai1_offset_cb,
447                                                                             ai1_offset_cr,
448                                                                             sao_wd_chroma,
449                                                                             sao_ht_chroma
450                                                                            );
451             }
452             else // if(2 <= ps_sao->b3_cb_type_idx)
453             {
454                 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
455                 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
456                 au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
457                 au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
458                 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
459                                                                      src_strd,
460                                                                      pu1_src_left_chroma,
461                                                                      pu1_src_top_chroma,
462                                                                      ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
463                                                                      au1_src_top_right,
464                                                                      au1_src_bot_left,
465                                                                      au1_avail_chroma,
466                                                                      ai1_offset_cb,
467                                                                      ai1_offset_cr,
468                                                                      sao_wd_chroma,
469                                                                      sao_ht_chroma);
470             }
471 
472             /* Check the loop filter flags and copy the original values back if they are set */
473             if(no_loop_filter_enabled)
474             {
475                 UWORD32 u4_no_loop_filter_flag;
476                 WORD32 min_cu = 4;
477                 UWORD8 *pu1_src_tmp = pu1_src_chroma;
478 
479                 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
480                 {
481                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
482                     u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
483 
484                     if(u4_no_loop_filter_flag)
485                     {
486                         WORD32 tmp_wd = sao_wd_chroma;
487                         while(tmp_wd > 0)
488                         {
489                             if(CTZ(u4_no_loop_filter_flag))
490                             {
491                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
492                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
493                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
494                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
495                             }
496                             else
497                             {
498                                 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
499                                 {
500                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
501                                     {
502                                         pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
503                                     }
504                                 }
505 
506                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
507                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
508                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
509                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
510                             }
511                         }
512 
513                         pu1_src_tmp -= sao_wd_chroma;
514                     }
515 
516                     pu1_src_tmp += min_cu * src_strd;
517                     pu1_src_copy += min_cu * tmp_strd;
518                 }
519             }
520 
521         }
522 
523     }
524 }
525 
ihevcd_sao_shift_ctb(sao_ctxt_t * ps_sao_ctxt)526 void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
527 {
528     codec_t *ps_codec = ps_sao_ctxt->ps_codec;
529     UWORD8 *pu1_src_luma;
530     UWORD8 *pu1_src_chroma;
531     WORD32 src_strd;
532     WORD32 ctb_size;
533     WORD32 log2_ctb_size;
534     sps_t *ps_sps;
535     sao_t *ps_sao;
536     pps_t *ps_pps;
537     slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
538     tile_t *ps_tile;
539     UWORD16 *pu1_slice_idx;
540     UWORD16 *pu1_tile_idx;
541     WORD32 row, col;
542     UWORD8 au1_avail_luma[8];
543     UWORD8 au1_avail_chroma[8];
544     UWORD8 au1_tile_slice_boundary[8];
545     UWORD8 au4_ilf_across_tile_slice_enable[8];
546     WORD32 i;
547     UWORD8 *pu1_src_top_luma;
548     UWORD8 *pu1_src_top_chroma;
549     UWORD8 *pu1_src_left_luma;
550     UWORD8 *pu1_src_left_chroma;
551     UWORD8 au1_src_top_right[2];
552     UWORD8 au1_src_bot_left[2];
553     UWORD8 *pu1_no_loop_filter_flag;
554     UWORD8 *pu1_src_backup_luma;
555     UWORD8 *pu1_src_backup_chroma;
556     WORD32 backup_strd;
557     WORD32 loop_filter_strd;
558 
559     WORD32 no_loop_filter_enabled_luma = 0;
560     WORD32 no_loop_filter_enabled_chroma = 0;
561     UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
562     UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
563     UWORD8 *pu1_sao_src_luma_top_left_ctb;
564     UWORD8 *pu1_sao_src_chroma_top_left_ctb;
565     UWORD8 *pu1_sao_src_top_left_luma_top_right;
566     UWORD8 *pu1_sao_src_top_left_chroma_top_right;
567     UWORD8  u1_sao_src_top_left_luma_bot_left;
568     UWORD8  *pu1_sao_src_top_left_luma_bot_left;
569     UWORD8 *au1_sao_src_top_left_chroma_bot_left;
570     UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
571 
572     WORD8 ai1_offset_y[5];
573     WORD8 ai1_offset_cb[5];
574     WORD8 ai1_offset_cr[5];
575     WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
576 
577     PROFILE_DISABLE_SAO();
578 
579     ai1_offset_y[0] = 0;
580     ai1_offset_cb[0] = 0;
581     ai1_offset_cr[0] = 0;
582 
583     ps_sps = ps_sao_ctxt->ps_sps;
584     ps_pps = ps_sao_ctxt->ps_pps;
585     ps_tile = ps_sao_ctxt->ps_tile;
586 
587     log2_ctb_size = ps_sps->i1_log2_ctb_size;
588     ctb_size = (1 << log2_ctb_size);
589     src_strd = ps_sao_ctxt->ps_codec->i4_strd;
590     ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
591     ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
592 
593     pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
594     pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
595     pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
596     pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
597 
598     /*Stores the left value for each row ctbs- Needed for column tiles*/
599     pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
600     pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
601     pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
602     pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
603     u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
604     pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
605     au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
606     pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
607     pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
608     pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
609 
610     ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
611     loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
612     backup_strd = 2 * MAX_CTB_SIZE;
613 
614     DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
615 
616     {
617         /* Check the loop filter flags and copy the original values for back up */
618         /* Luma */
619         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
620         {
621             UWORD32 u4_no_loop_filter_flag;
622             WORD32 loop_filter_bit_pos;
623             WORD32 log2_min_cu = 3;
624             WORD32 min_cu = (1 << log2_min_cu);
625             UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
626             WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
627             WORD32 sao_blk_wd = ctb_size;
628             WORD32 remaining_rows;
629             WORD32 remaining_cols;
630 
631             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
632             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
633             if(remaining_rows <= SAO_SHIFT_CTB)
634                 sao_blk_ht += remaining_rows;
635             if(remaining_cols <= SAO_SHIFT_CTB)
636                 sao_blk_wd += remaining_cols;
637 
638             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
639             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
640 
641             pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
642 
643             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
644                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
645             if(ps_sao_ctxt->i4_ctb_x > 0)
646                 loop_filter_bit_pos -= 1;
647 
648             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
649                             (loop_filter_bit_pos >> 3);
650 
651             for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
652                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
653             {
654                 WORD32 tmp_wd = sao_blk_wd;
655 
656                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
657                                 (loop_filter_bit_pos & 7);
658                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
659 
660                 if(u4_no_loop_filter_flag)
661                 {
662                     no_loop_filter_enabled_luma = 1;
663                     while(tmp_wd > 0)
664                     {
665                         if(CTZ(u4_no_loop_filter_flag))
666                         {
667                             pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
668                             pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
669                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
670                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
671                         }
672                         else
673                         {
674                             for(row = 0; row < min_cu; row++)
675                             {
676                                 for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
677                                 {
678                                     pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
679                                 }
680                             }
681                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
682                             pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
683                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
684                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
685                         }
686                     }
687 
688                     pu1_src_tmp_luma -= sao_blk_wd;
689                     pu1_src_backup_luma -= sao_blk_wd;
690                 }
691 
692                 pu1_src_tmp_luma += (src_strd << log2_min_cu);
693                 pu1_src_backup_luma += (backup_strd << log2_min_cu);
694             }
695         }
696 
697         /* Chroma */
698         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
699         {
700             UWORD32 u4_no_loop_filter_flag;
701             WORD32 loop_filter_bit_pos;
702             WORD32 log2_min_cu = 3;
703             WORD32 min_cu = (1 << log2_min_cu);
704             UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
705             WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
706             WORD32 sao_blk_wd = ctb_size;
707             WORD32 remaining_rows;
708             WORD32 remaining_cols;
709 
710             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
711             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
712             if(remaining_rows <= 2 * SAO_SHIFT_CTB)
713                 sao_blk_ht += remaining_rows;
714             if(remaining_cols <= 2 * SAO_SHIFT_CTB)
715                 sao_blk_wd += remaining_cols;
716 
717             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
718             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
719 
720             pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
721 
722             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
723                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
724             if(ps_sao_ctxt->i4_ctb_x > 0)
725                 loop_filter_bit_pos -= 2;
726 
727             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
728                             (loop_filter_bit_pos >> 3);
729 
730             for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
731                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
732             {
733                 WORD32 tmp_wd = sao_blk_wd;
734 
735                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
736                                 (loop_filter_bit_pos & 7);
737                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
738 
739                 if(u4_no_loop_filter_flag)
740                 {
741                     no_loop_filter_enabled_chroma = 1;
742                     while(tmp_wd > 0)
743                     {
744                         if(CTZ(u4_no_loop_filter_flag))
745                         {
746                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
747                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
748                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
749                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
750                         }
751                         else
752                         {
753                             for(row = 0; row < min_cu / 2; row++)
754                             {
755                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
756                                 {
757                                     pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
758                                 }
759                             }
760 
761                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
762                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
763                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
764                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
765                         }
766                     }
767 
768                     pu1_src_tmp_chroma -= sao_blk_wd;
769                     pu1_src_backup_chroma -= sao_blk_wd;
770                 }
771 
772                 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
773                 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
774             }
775         }
776     }
777 
778     DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
779 
780     /* Top-left CTB */
781     if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
782     {
783         WORD32 sao_wd_luma = SAO_SHIFT_CTB;
784         WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
785         WORD32 sao_ht_luma = SAO_SHIFT_CTB;
786         WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
787 
788         WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
789         WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
790         WORD32 au4_idx_tl[8], idx_tl;
791 
792 
793         pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
794         pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
795         ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
796         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
797         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
798         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
799         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
800 
801         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
802         {
803             if(0 == ps_sao->b3_y_type_idx)
804             {
805                 /* Update left, top and top-left */
806                 for(row = 0; row < sao_ht_luma; row++)
807                 {
808                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
809                 }
810                 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
811 
812                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
813 
814 
815             }
816 
817             else if(1 == ps_sao->b3_y_type_idx)
818             {
819                 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
820                 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
821                 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
822                 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
823 
824                 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
825                                                                           src_strd,
826                                                                           pu1_src_left_luma,
827                                                                           pu1_src_top_luma,
828                                                                           pu1_sao_src_luma_top_left_ctb,
829                                                                           ps_sao->b5_y_band_pos,
830                                                                           ai1_offset_y,
831                                                                           sao_wd_luma,
832                                                                           sao_ht_luma
833                                                                          );
834             }
835 
836             else // if(2 <= ps_sao->b3_y_type_idx)
837             {
838                 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
839                 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
840                 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
841                 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
842 
843                 for(i = 0; i < 8; i++)
844                 {
845                     au1_avail_luma[i] = 255;
846                     au1_tile_slice_boundary[i] = 0;
847                     au4_idx_tl[i] = 0;
848                     au4_ilf_across_tile_slice_enable[i] = 1;
849                 }
850 
851                 /******************************************************************
852                  * Derive the  Top-left CTB's neighbor pixel's slice indices.
853                  *
854                  *          TL_T
855                  *       4  _2__5________
856                  *     0   |    |       |
857                  *    TL_L | TL | 1 TL_R|
858                  *         |____|_______|____
859                  *        6|TL_D|7      |    |
860                  *         | 3  |       |    |
861                  *         |____|_______|    |
862                  *              |            |
863                  *              |            |
864                  *              |____________|
865                  *
866                  *****************************************************************/
867 
868                 /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
869                 {
870                     if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
871                     {
872                         {
873                             /*Assuming that sao shift is uniform along x and y directions*/
874                             if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
875                             {
876                                 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
877                                 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
878                             }
879                             else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
880                             {
881                                 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
882                                 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
883                             }
884                             ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
885                             ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
886 
887                             ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
888                             ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
889 
890                             ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
891                             ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
892 
893                             ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
894                             ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
895                         }
896 
897                         if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
898                         {
899                             /*Calculate slice indices for neighbor pixels*/
900                             idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
901                             au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
902                             au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
903                             au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
904                             au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
905                             au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
906 
907                             if((0 == (1 << log2_ctb_size) - sao_wd_luma))
908                             {
909                                 if(ps_sao_ctxt->i4_ctb_x == 1)
910                                 {
911                                     au4_idx_tl[6] = -1;
912                                     au4_idx_tl[4] = -1;
913                                 }
914                                 else
915                                 {
916                                     au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
917                                 }
918                                 if(ps_sao_ctxt->i4_ctb_y == 1)
919                                 {
920                                     au4_idx_tl[5] = -1;
921                                     au4_idx_tl[4] = -1;
922                                 }
923                                 else
924                                 {
925                                     au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
926                                     au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
927                                 }
928                                 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
929                             }
930 
931                             /* Verify that the neighbor ctbs dont cross pic boundary.
932                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
933                              * of the pixel having a greater address is checked. Accordingly, set the availability flags.
934                              * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
935                              * the respective pixel's flags are checked
936                              */
937 
938                             if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
939                             {
940                                 au4_ilf_across_tile_slice_enable[4] = 0;
941                                 au4_ilf_across_tile_slice_enable[6] = 0;
942                             }
943                             else
944                             {
945                                 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
946                             }
947                             if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
948                             {
949                                 au4_ilf_across_tile_slice_enable[5] = 0;
950                                 au4_ilf_across_tile_slice_enable[4] = 0;
951                             }
952                             else
953                             {
954                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
955                                 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
956                             }
957                             au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
958                             au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
959                             au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
960                             au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
961                             au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
962 
963                             /*
964                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
965                              * of the pixel having a greater address is checked. Accordingly, set the availability flags.
966                              * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
967                              * the respective pixel's flags are checked
968                              */
969                             for(i = 0; i < 8; i++)
970                             {
971                                 /*Sets the edges that lie on the slice/tile boundary*/
972                                 if(au4_idx_tl[i] != idx_tl)
973                                 {
974                                     au1_tile_slice_boundary[i] = 1;
975                                 }
976                                 else
977                                 {
978                                     au4_ilf_across_tile_slice_enable[i] = 1;
979                                 }
980                             }
981 
982                             ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
983                         }
984 
985                         if(ps_pps->i1_tiles_enabled_flag)
986                         {
987                             /* Calculate availability flags at slice boundary */
988                             if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
989                             {
990                                 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
991                                 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
992                                 {
993                                     /*Set the boundary arrays*/
994                                     /*Calculate tile indices for neighbor pixels*/
995                                     idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
996                                     au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
997                                     au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
998                                     au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
999                                     au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1000                                     au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1001 
1002                                     if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1003                                     {
1004                                         if(ps_sao_ctxt->i4_ctb_x == 1)
1005                                         {
1006                                             au4_idx_tl[6] = -1;
1007                                             au4_idx_tl[4] = -1;
1008                                         }
1009                                         else
1010                                         {
1011                                             au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1012                                         }
1013                                         if(ps_sao_ctxt->i4_ctb_y == 1)
1014                                         {
1015                                             au4_idx_tl[5] = -1;
1016                                             au4_idx_tl[4] = -1;
1017                                         }
1018                                         else
1019                                         {
1020                                             au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1021                                             au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1022                                         }
1023                                         au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1024                                     }
1025                                     for(i = 0; i < 8; i++)
1026                                     {
1027                                         /*Sets the edges that lie on the tile boundary*/
1028                                         if(au4_idx_tl[i] != idx_tl)
1029                                         {
1030                                             au1_tile_slice_boundary[i] |= 1;
1031                                             au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1032                                         }
1033                                     }
1034                                 }
1035                             }
1036                         }
1037 
1038 
1039                         /*Set availability flags based on tile and slice boundaries*/
1040                         for(i = 0; i < 8; i++)
1041                         {
1042                             /*Sets the edges that lie on the slice/tile boundary*/
1043                             if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1044                             {
1045                                 au1_avail_luma[i] = 0;
1046                             }
1047                         }
1048                     }
1049                 }
1050 
1051                 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1052                 {
1053                     au1_avail_luma[0] = 0;
1054                     au1_avail_luma[4] = 0;
1055                     au1_avail_luma[6] = 0;
1056                 }
1057 
1058                 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1059                 {
1060                     au1_avail_luma[1] = 0;
1061                     au1_avail_luma[5] = 0;
1062                     au1_avail_luma[7] = 0;
1063                 }
1064                 //y==1 case
1065                 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
1066                 {
1067                     au1_avail_luma[2] = 0;
1068                     au1_avail_luma[4] = 0;
1069                     au1_avail_luma[5] = 0;
1070                 }
1071                 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1072                 {
1073                     au1_avail_luma[3] = 0;
1074                     au1_avail_luma[6] = 0;
1075                     au1_avail_luma[7] = 0;
1076                 }
1077 
1078                 {
1079                     au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1080                     u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
1081                     ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1082                                                                       src_strd,
1083                                                                       pu1_src_left_luma,
1084                                                                       pu1_src_top_luma,
1085                                                                       pu1_sao_src_luma_top_left_ctb,
1086                                                                       au1_src_top_right,
1087                                                                       &u1_sao_src_top_left_luma_bot_left,
1088                                                                       au1_avail_luma,
1089                                                                       ai1_offset_y,
1090                                                                       sao_wd_luma,
1091                                                                       sao_ht_luma);
1092                 }
1093             }
1094 
1095         }
1096 
1097         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1098         {
1099             if(0 == ps_sao->b3_cb_type_idx)
1100             {
1101                 for(row = 0; row < sao_ht_chroma; row++)
1102                 {
1103                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1104                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1105                 }
1106                 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1107                 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1108 
1109                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1110 
1111             }
1112 
1113             else if(1 == ps_sao->b3_cb_type_idx)
1114             {
1115                 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1116                 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1117                 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1118                 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1119 
1120                 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1121                 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1122                 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1123                 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1124 
1125                 if(chroma_yuv420sp_vu)
1126                 {
1127                     ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1128                                                                                 src_strd,
1129                                                                                 pu1_src_left_chroma,
1130                                                                                 pu1_src_top_chroma,
1131                                                                                 pu1_sao_src_chroma_top_left_ctb,
1132                                                                                 ps_sao->b5_cr_band_pos,
1133                                                                                 ps_sao->b5_cb_band_pos,
1134                                                                                 ai1_offset_cr,
1135                                                                                 ai1_offset_cb,
1136                                                                                 sao_wd_chroma,
1137                                                                                 sao_ht_chroma
1138                                                                                );
1139                 }
1140                 else
1141                 {
1142                     ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1143                                                                                 src_strd,
1144                                                                                 pu1_src_left_chroma,
1145                                                                                 pu1_src_top_chroma,
1146                                                                                 pu1_sao_src_chroma_top_left_ctb,
1147                                                                                 ps_sao->b5_cb_band_pos,
1148                                                                                 ps_sao->b5_cr_band_pos,
1149                                                                                 ai1_offset_cb,
1150                                                                                 ai1_offset_cr,
1151                                                                                 sao_wd_chroma,
1152                                                                                 sao_ht_chroma
1153                                                                                );
1154                 }
1155             }
1156 
1157             else // if(2 <= ps_sao->b3_cb_type_idx)
1158             {
1159                 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1160                 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1161                 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1162                 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1163 
1164                 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1165                 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1166                 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1167                 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1168                 for(i = 0; i < 8; i++)
1169                 {
1170                     au1_avail_chroma[i] = 255;
1171                     au1_tile_slice_boundary[i] = 0;
1172                     au4_idx_tl[i] = 0;
1173                     au4_ilf_across_tile_slice_enable[i] = 1;
1174                 }
1175                 /*In case of slices*/
1176                 {
1177                     if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1178                     {
1179                         if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
1180                         {
1181                             ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
1182                             ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
1183                         }
1184                         else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
1185                         {
1186                             ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
1187                             ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
1188                         }
1189                         ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
1190                         ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
1191 
1192                         ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
1193                         ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
1194 
1195                         ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
1196                         ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
1197 
1198                         ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
1199                         ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
1200 
1201                         if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1202                         {
1203 
1204                             idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1205                             au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1206                             au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1207                             au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1208                             au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1209                             au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1210 
1211                             if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
1212                             {
1213                                 if(ps_sao_ctxt->i4_ctb_x == 1)
1214                                 {
1215                                     au4_idx_tl[6] = -1;
1216                                     au4_idx_tl[4] = -1;
1217                                 }
1218                                 else
1219                                 {
1220                                     au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1221                                 }
1222                                 if(ps_sao_ctxt->i4_ctb_y == 1)
1223                                 {
1224                                     au4_idx_tl[5] = -1;
1225                                     au4_idx_tl[4] = -1;
1226                                 }
1227                                 else
1228                                 {
1229                                     au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1230                                     au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1231                                 }
1232                                 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1233                             }
1234 
1235                             /* Verify that the neighbor ctbs don't cross pic boundary
1236                              * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
1237                             if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
1238                             {
1239                                 au4_ilf_across_tile_slice_enable[4] = 0;
1240                                 au4_ilf_across_tile_slice_enable[6] = 0;
1241                             }
1242                             else
1243                             {
1244                                 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1245                             }
1246                             if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
1247                             {
1248                                 au4_ilf_across_tile_slice_enable[5] = 0;
1249                                 au4_ilf_across_tile_slice_enable[4] = 0;
1250                             }
1251                             else
1252                             {
1253                                 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1254                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1255                             }
1256                             au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1257                             au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1258                             au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1259                             au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1260                             au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1261                             /*
1262                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1263                              * of the pixel having a greater address is checked. Accordingly, set the availability flags
1264                              */
1265                             for(i = 0; i < 8; i++)
1266                             {
1267                                 /*Sets the edges that lie on the slice/tile boundary*/
1268                                 if(au4_idx_tl[i] != idx_tl)
1269                                 {
1270                                     au1_tile_slice_boundary[i] = 1;
1271                                 }
1272                                 else
1273                                 {
1274                                     au4_ilf_across_tile_slice_enable[i] = 1;
1275                                 }
1276                             }
1277 
1278                             /*Reset indices*/
1279                             for(i = 0; i < 8; i++)
1280                             {
1281                                 au4_idx_tl[i] = 0;
1282                             }
1283                         }
1284                         if(ps_pps->i1_tiles_enabled_flag)
1285                         {
1286                             /* Calculate availability flags at slice boundary */
1287                             if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1288                             {
1289                                 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1290                                 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1291                                 {
1292                                     /*Set the boundary arrays*/
1293                                     /*Calculate tile indices for neighbor pixels*/
1294                                     idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1295                                     au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1296                                     au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1297                                     au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1298                                     au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1299                                     au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1300 
1301                                     if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1302                                     {
1303                                         if(ps_sao_ctxt->i4_ctb_x == 1)
1304                                         {
1305                                             au4_idx_tl[6] = -1;
1306                                             au4_idx_tl[4] = -1;
1307                                         }
1308                                         else
1309                                         {
1310                                             au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1311                                         }
1312                                         if(ps_sao_ctxt->i4_ctb_y == 1)
1313                                         {
1314                                             au4_idx_tl[5] = -1;
1315                                             au4_idx_tl[4] = -1;
1316                                         }
1317                                         else
1318                                         {
1319                                             au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1320                                             au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1321                                         }
1322                                         au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1323                                     }
1324                                     for(i = 0; i < 8; i++)
1325                                     {
1326                                         /*Sets the edges that lie on the tile boundary*/
1327                                         if(au4_idx_tl[i] != idx_tl)
1328                                         {
1329                                             au1_tile_slice_boundary[i] |= 1;
1330                                             au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1331                                         }
1332                                     }
1333                                 }
1334                             }
1335                         }
1336 
1337                         for(i = 0; i < 8; i++)
1338                         {
1339                             /*Sets the edges that lie on the slice/tile boundary*/
1340                             if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1341                             {
1342                                 au1_avail_chroma[i] = 0;
1343                             }
1344                         }
1345                     }
1346                 }
1347 
1348                 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
1349                 {
1350                     au1_avail_chroma[0] = 0;
1351                     au1_avail_chroma[4] = 0;
1352                     au1_avail_chroma[6] = 0;
1353                 }
1354                 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1355                 {
1356                     au1_avail_chroma[1] = 0;
1357                     au1_avail_chroma[5] = 0;
1358                     au1_avail_chroma[7] = 0;
1359                 }
1360 
1361                 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1362                 {
1363                     au1_avail_chroma[2] = 0;
1364                     au1_avail_chroma[4] = 0;
1365                     au1_avail_chroma[5] = 0;
1366                 }
1367                 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1368                 {
1369                     au1_avail_chroma[3] = 0;
1370                     au1_avail_chroma[6] = 0;
1371                     au1_avail_chroma[7] = 0;
1372                 }
1373 
1374                 {
1375                     au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
1376                     au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
1377                     au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
1378                     au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
1379                     if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
1380                     {
1381                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1382                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1383                     }
1384 
1385                     if(chroma_yuv420sp_vu)
1386                     {
1387                         ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1388                                                                              src_strd,
1389                                                                              pu1_src_left_chroma,
1390                                                                              pu1_src_top_chroma,
1391                                                                              pu1_sao_src_chroma_top_left_ctb,
1392                                                                              au1_src_top_right,
1393                                                                              au1_sao_src_top_left_chroma_bot_left,
1394                                                                              au1_avail_chroma,
1395                                                                              ai1_offset_cr,
1396                                                                              ai1_offset_cb,
1397                                                                              sao_wd_chroma,
1398                                                                              sao_ht_chroma);
1399                     }
1400                     else
1401                     {
1402                         ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1403                                                                              src_strd,
1404                                                                              pu1_src_left_chroma,
1405                                                                              pu1_src_top_chroma,
1406                                                                              pu1_sao_src_chroma_top_left_ctb,
1407                                                                              au1_src_top_right,
1408                                                                              au1_sao_src_top_left_chroma_bot_left,
1409                                                                              au1_avail_chroma,
1410                                                                              ai1_offset_cb,
1411                                                                              ai1_offset_cr,
1412                                                                              sao_wd_chroma,
1413                                                                              sao_ht_chroma);
1414                     }
1415                 }
1416             }
1417         }
1418 
1419         pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1420         pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
1421         ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1422     }
1423 
1424 
1425     /* Top CTB */
1426     if((ps_sao_ctxt->i4_ctb_y > 0))
1427     {
1428         WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1429         WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
1430         WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1431         WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1432 
1433         WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1434         WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1435         WORD32 au4_idx_t[8], idx_t;
1436 
1437         WORD32 remaining_cols;
1438 
1439         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1440         if(remaining_cols <= SAO_SHIFT_CTB)
1441         {
1442             sao_wd_luma += remaining_cols;
1443         }
1444         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
1445         if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1446         {
1447             sao_wd_chroma += remaining_cols;
1448         }
1449 
1450         pu1_src_luma -= (sao_ht_luma * src_strd);
1451         pu1_src_chroma -= (sao_ht_chroma * src_strd);
1452         ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1453         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1454         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1455         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1456         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
1457 
1458         if(0 != sao_wd_luma)
1459         {
1460             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
1461             {
1462                 if(0 == ps_sao->b3_y_type_idx)
1463                 {
1464                     /* Update left, top and top-left */
1465                     for(row = 0; row < sao_ht_luma; row++)
1466                     {
1467                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1468                     }
1469                     pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1470 
1471                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1472 
1473                 }
1474 
1475                 else if(1 == ps_sao->b3_y_type_idx)
1476                 {
1477                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1478                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1479                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1480                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1481 
1482                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1483                                                                               src_strd,
1484                                                                               pu1_src_left_luma,
1485                                                                               pu1_src_top_luma,
1486                                                                               pu1_sao_src_luma_top_left_ctb,
1487                                                                               ps_sao->b5_y_band_pos,
1488                                                                               ai1_offset_y,
1489                                                                               sao_wd_luma,
1490                                                                               sao_ht_luma
1491                                                                              );
1492                 }
1493 
1494                 else // if(2 <= ps_sao->b3_y_type_idx)
1495                 {
1496                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1497                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1498                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1499                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1500 
1501                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1502                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1503                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1504 
1505                     for(i = 0; i < 8; i++)
1506                     {
1507 
1508                         au4_ilf_across_tile_slice_enable[i] = 1;
1509                     }
1510                     /******************************************************************
1511                      * Derive the  Top-left CTB's neighbor pixel's slice indices.
1512                      *
1513                      *               T_T
1514                      *          ____________
1515                      *         |    |       |
1516                      *         | T_L|  T    |T_R
1517                      *         |    | ______|____
1518                      *         |    |  T_D  |    |
1519                      *         |    |       |    |
1520                      *         |____|_______|    |
1521                      *              |            |
1522                      *              |            |
1523                      *              |____________|
1524                      *
1525                      *****************************************************************/
1526 
1527                     /*In case of slices*/
1528                     {
1529                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1530                         {
1531 
1532                             ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1533                             ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1534 
1535                             ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1536                             ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1537 
1538                             ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1539                             ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1540 
1541                             ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1542                             ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1543 
1544                             ctbx_t = ps_sao_ctxt->i4_ctb_x;
1545                             ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1546 
1547                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1548                             {
1549                                 /*Calculate neighbor ctb slice indices*/
1550                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1551                                 {
1552                                     au4_idx_t[0] = -1;
1553                                     au4_idx_t[6] = -1;
1554                                     au4_idx_t[4] = -1;
1555                                 }
1556                                 else
1557                                 {
1558                                     au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1559                                     au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1560                                 }
1561                                 idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1562                                 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1563                                 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1564                                 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1565 
1566                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1567                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1568                                 {
1569                                     au4_ilf_across_tile_slice_enable[4] = 0;
1570                                     au4_ilf_across_tile_slice_enable[6] = 0;
1571                                     au4_ilf_across_tile_slice_enable[0] = 0;
1572                                 }
1573                                 else
1574                                 {
1575                                     au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1576                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1577                                 }
1578 
1579 
1580 
1581                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1582                                 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1583                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1584                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1585                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1586                                 /*
1587                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1588                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
1589                                  */
1590 
1591                                 for(i = 0; i < 8; i++)
1592                                 {
1593                                     /*Sets the edges that lie on the slice/tile boundary*/
1594                                     if(au4_idx_t[i] != idx_t)
1595                                     {
1596                                         au1_tile_slice_boundary[i] = 1;
1597                                         /*Check for slice flag at such boundaries*/
1598                                     }
1599                                     else
1600                                     {
1601                                         au4_ilf_across_tile_slice_enable[i] = 1;
1602                                     }
1603                                 }
1604                                 /*Reset indices*/
1605                                 for(i = 0; i < 8; i++)
1606                                 {
1607                                     au4_idx_t[i] = 0;
1608                                 }
1609                             }
1610 
1611                             if(ps_pps->i1_tiles_enabled_flag)
1612                             {
1613                                 /* Calculate availability flags at slice boundary */
1614                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1615                                 {
1616                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1617                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1618                                     {
1619                                         /*Calculate neighbor ctb slice indices*/
1620                                         if(0 == ps_sao_ctxt->i4_ctb_x)
1621                                         {
1622                                             au4_idx_t[0] = -1;
1623                                             au4_idx_t[6] = -1;
1624                                             au4_idx_t[4] = -1;
1625                                         }
1626                                         else
1627                                         {
1628                                             au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1629                                             au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1630                                         }
1631                                         idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1632                                         au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1633                                         au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1634                                         au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1635 
1636                                         for(i = 0; i < 8; i++)
1637                                         {
1638                                             /*Sets the edges that lie on the tile boundary*/
1639                                             if(au4_idx_t[i] != idx_t)
1640                                             {
1641                                                 au1_tile_slice_boundary[i] |= 1;
1642                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1643                                             }
1644                                         }
1645                                     }
1646                                 }
1647                             }
1648 
1649                             for(i = 0; i < 8; i++)
1650                             {
1651                                 /*Sets the edges that lie on the slice/tile boundary*/
1652                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1653                                 {
1654                                     au1_avail_luma[i] = 0;
1655                                 }
1656                             }
1657                         }
1658                     }
1659 
1660 
1661                     if(0 == ps_sao_ctxt->i4_ctb_x)
1662                     {
1663                         au1_avail_luma[0] = 0;
1664                         au1_avail_luma[4] = 0;
1665                         au1_avail_luma[6] = 0;
1666                     }
1667 
1668                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1669                     {
1670                         au1_avail_luma[1] = 0;
1671                         au1_avail_luma[5] = 0;
1672                         au1_avail_luma[7] = 0;
1673                     }
1674 
1675                     if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1676                     {
1677                         au1_avail_luma[2] = 0;
1678                         au1_avail_luma[4] = 0;
1679                         au1_avail_luma[5] = 0;
1680                     }
1681 
1682                     if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1683                     {
1684                         au1_avail_luma[3] = 0;
1685                         au1_avail_luma[6] = 0;
1686                         au1_avail_luma[7] = 0;
1687                     }
1688 
1689                     {
1690                         au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1691                         u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1692                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1693                                                                           src_strd,
1694                                                                           pu1_src_left_luma,
1695                                                                           pu1_src_top_luma,
1696                                                                           pu1_sao_src_luma_top_left_ctb,
1697                                                                           au1_src_top_right,
1698                                                                           &u1_sao_src_top_left_luma_bot_left,
1699                                                                           au1_avail_luma,
1700                                                                           ai1_offset_y,
1701                                                                           sao_wd_luma,
1702                                                                           sao_ht_luma);
1703                     }
1704                 }
1705             }
1706         }
1707 
1708         if(0 != sao_wd_chroma)
1709         {
1710             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1711             {
1712                 if(0 == ps_sao->b3_cb_type_idx)
1713                 {
1714 
1715                     for(row = 0; row < sao_ht_chroma; row++)
1716                     {
1717                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1718                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1719                     }
1720                     pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1721                     pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1722 
1723                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1724 
1725                 }
1726 
1727                 else if(1 == ps_sao->b3_cb_type_idx)
1728                 {
1729                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1730                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1731                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1732                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1733 
1734                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1735                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1736                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1737                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1738 
1739                     if(chroma_yuv420sp_vu)
1740                     {
1741                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1742                                                                                     src_strd,
1743                                                                                     pu1_src_left_chroma,
1744                                                                                     pu1_src_top_chroma,
1745                                                                                     pu1_sao_src_chroma_top_left_ctb,
1746                                                                                     ps_sao->b5_cr_band_pos,
1747                                                                                     ps_sao->b5_cb_band_pos,
1748                                                                                     ai1_offset_cr,
1749                                                                                     ai1_offset_cb,
1750                                                                                     sao_wd_chroma,
1751                                                                                     sao_ht_chroma
1752                                                                                    );
1753                     }
1754                     else
1755                     {
1756                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1757                                                                                     src_strd,
1758                                                                                     pu1_src_left_chroma,
1759                                                                                     pu1_src_top_chroma,
1760                                                                                     pu1_sao_src_chroma_top_left_ctb,
1761                                                                                     ps_sao->b5_cb_band_pos,
1762                                                                                     ps_sao->b5_cr_band_pos,
1763                                                                                     ai1_offset_cb,
1764                                                                                     ai1_offset_cr,
1765                                                                                     sao_wd_chroma,
1766                                                                                     sao_ht_chroma
1767                                                                                    );
1768                     }
1769                 }
1770                 else // if(2 <= ps_sao->b3_cb_type_idx)
1771                 {
1772                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1773                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1774                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1775                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1776 
1777                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1778                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1779                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1780                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1781 
1782                     for(i = 0; i < 8; i++)
1783                     {
1784                         au1_avail_chroma[i] = 255;
1785                         au1_tile_slice_boundary[i] = 0;
1786                         au4_idx_t[i] = 0;
1787                         au4_ilf_across_tile_slice_enable[i] = 1;
1788                     }
1789 
1790                     {
1791                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1792                         {
1793                             ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1794                             ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1795 
1796                             ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1797                             ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1798 
1799                             ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1800                             ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1801 
1802                             ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1803                             ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1804 
1805                             ctbx_t = ps_sao_ctxt->i4_ctb_x;
1806                             ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1807 
1808                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1809                             {
1810                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1811                                 {
1812                                     au4_idx_t[0] = -1;
1813                                     au4_idx_t[6] = -1;
1814                                     au4_idx_t[4] = -1;
1815                                 }
1816                                 else
1817                                 {
1818                                     au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1819                                     au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1820                                 }
1821                                 idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1822                                 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1823                                 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1824                                 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1825 
1826                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1827 
1828                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1829                                 {
1830                                     au4_ilf_across_tile_slice_enable[4] = 0;
1831                                     au4_ilf_across_tile_slice_enable[6] = 0;
1832                                     au4_ilf_across_tile_slice_enable[0] = 0;
1833                                 }
1834                                 else
1835                                 {
1836                                     au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1837                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1838                                 }
1839 
1840                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1841                                 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1842                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1843                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1844                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1845                                 /*
1846                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1847                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
1848                                  */
1849                                 for(i = 0; i < 8; i++)
1850                                 {
1851                                     /*Sets the edges that lie on the slice/tile boundary*/
1852                                     if(au4_idx_t[i] != idx_t)
1853                                     {
1854                                         au1_tile_slice_boundary[i] = 1;
1855                                     }
1856                                     else
1857                                     {
1858                                         /*Indicates that the neighbour belongs to same/dependent slice*/
1859                                         au4_ilf_across_tile_slice_enable[i] = 1;
1860                                     }
1861                                 }
1862                                 /*Reset indices*/
1863                                 for(i = 0; i < 8; i++)
1864                                 {
1865                                     au4_idx_t[i] = 0;
1866                                 }
1867                             }
1868                             if(ps_pps->i1_tiles_enabled_flag)
1869                             {
1870                                 /* Calculate availability flags at slice boundary */
1871                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1872                                 {
1873                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1874                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1875                                     {
1876                                         /*Calculate neighbor ctb slice indices*/
1877                                         if(0 == ps_sao_ctxt->i4_ctb_x)
1878                                         {
1879                                             au4_idx_t[0] = -1;
1880                                             au4_idx_t[6] = -1;
1881                                             au4_idx_t[4] = -1;
1882                                         }
1883                                         else
1884                                         {
1885                                             au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1886                                             au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1887                                         }
1888                                         idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1889                                         au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1890                                         au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1891                                         au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1892 
1893                                         for(i = 0; i < 8; i++)
1894                                         {
1895                                             /*Sets the edges that lie on the tile boundary*/
1896                                             if(au4_idx_t[i] != idx_t)
1897                                             {
1898                                                 au1_tile_slice_boundary[i] |= 1;
1899                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1900                                             }
1901                                         }
1902                                     }
1903                                 }
1904                             }
1905                             for(i = 0; i < 8; i++)
1906                             {
1907                                 /*Sets the edges that lie on the slice/tile boundary*/
1908                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1909                                 {
1910                                     au1_avail_chroma[i] = 0;
1911                                 }
1912                             }
1913 
1914                         }
1915                     }
1916                     if(0 == ps_sao_ctxt->i4_ctb_x)
1917                     {
1918                         au1_avail_chroma[0] = 0;
1919                         au1_avail_chroma[4] = 0;
1920                         au1_avail_chroma[6] = 0;
1921                     }
1922 
1923                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
1924                     {
1925                         au1_avail_chroma[1] = 0;
1926                         au1_avail_chroma[5] = 0;
1927                         au1_avail_chroma[7] = 0;
1928                     }
1929 
1930                     if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1931                     {
1932                         au1_avail_chroma[2] = 0;
1933                         au1_avail_chroma[4] = 0;
1934                         au1_avail_chroma[5] = 0;
1935                     }
1936 
1937                     if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1938                     {
1939                         au1_avail_chroma[3] = 0;
1940                         au1_avail_chroma[6] = 0;
1941                         au1_avail_chroma[7] = 0;
1942                     }
1943 
1944                     {
1945                         au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
1946                         au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
1947                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1948                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1949 
1950                         if(chroma_yuv420sp_vu)
1951                         {
1952                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1953                                                                                  src_strd,
1954                                                                                  pu1_src_left_chroma,
1955                                                                                  pu1_src_top_chroma,
1956                                                                                  pu1_sao_src_chroma_top_left_ctb,
1957                                                                                  au1_src_top_right,
1958                                                                                  au1_sao_src_top_left_chroma_bot_left,
1959                                                                                  au1_avail_chroma,
1960                                                                                  ai1_offset_cr,
1961                                                                                  ai1_offset_cb,
1962                                                                                  sao_wd_chroma,
1963                                                                                  sao_ht_chroma);
1964                         }
1965                         else
1966                         {
1967                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1968                                                                                  src_strd,
1969                                                                                  pu1_src_left_chroma,
1970                                                                                  pu1_src_top_chroma,
1971                                                                                  pu1_sao_src_chroma_top_left_ctb,
1972                                                                                  au1_src_top_right,
1973                                                                                  au1_sao_src_top_left_chroma_bot_left,
1974                                                                                  au1_avail_chroma,
1975                                                                                  ai1_offset_cb,
1976                                                                                  ai1_offset_cr,
1977                                                                                  sao_wd_chroma,
1978                                                                                  sao_ht_chroma);
1979                         }
1980                     }
1981 
1982                 }
1983             }
1984         }
1985 
1986         pu1_src_luma += sao_ht_luma * src_strd;
1987         pu1_src_chroma += sao_ht_chroma * src_strd;
1988         ps_sao += (ps_sps->i2_pic_wd_in_ctb);
1989     }
1990 
1991     /* Left CTB */
1992     if(ps_sao_ctxt->i4_ctb_x > 0)
1993     {
1994         WORD32 sao_wd_luma = SAO_SHIFT_CTB;
1995         WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
1996         WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
1997         WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
1998 
1999         WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
2000         WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
2001         WORD32 au4_idx_l[8], idx_l;
2002 
2003         WORD32 remaining_rows;
2004         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2005         if(remaining_rows <= SAO_SHIFT_CTB)
2006         {
2007             sao_ht_luma += remaining_rows;
2008         }
2009         remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2010         if(remaining_rows <= SAO_SHIFT_CTB)
2011         {
2012             sao_ht_chroma += remaining_rows;
2013         }
2014 
2015         pu1_src_luma -= sao_wd_luma;
2016         pu1_src_chroma -= sao_wd_chroma;
2017         ps_sao -= 1;
2018         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
2019         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
2020         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2021         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2022 
2023 
2024         if(0 != sao_ht_luma)
2025         {
2026             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2027             {
2028                 if(0 == ps_sao->b3_y_type_idx)
2029                 {
2030                     /* Update left, top and top-left */
2031                     for(row = 0; row < sao_ht_luma; row++)
2032                     {
2033                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2034                     }
2035                     /*Update in next location*/
2036                     pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2037 
2038                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2039 
2040                 }
2041 
2042                 else if(1 == ps_sao->b3_y_type_idx)
2043                 {
2044                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2045                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2046                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2047                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2048 
2049                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2050                                                                               src_strd,
2051                                                                               pu1_src_left_luma,
2052                                                                               pu1_src_top_luma,
2053                                                                               pu1_sao_src_top_left_luma_curr_ctb,
2054                                                                               ps_sao->b5_y_band_pos,
2055                                                                               ai1_offset_y,
2056                                                                               sao_wd_luma,
2057                                                                               sao_ht_luma
2058                                                                              );
2059                 }
2060 
2061                 else // if(2 <= ps_sao->b3_y_type_idx)
2062                 {
2063                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2064                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2065                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2066                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2067 
2068                     for(i = 0; i < 8; i++)
2069                     {
2070                         au1_avail_luma[i] = 255;
2071                         au1_tile_slice_boundary[i] = 0;
2072                         au4_idx_l[i] = 0;
2073                         au4_ilf_across_tile_slice_enable[i] = 1;
2074                     }
2075                     /******************************************************************
2076                      * Derive the  Top-left CTB's neighbour pixel's slice indices.
2077                      *
2078                      *
2079                      *          ____________
2080                      *         |    |       |
2081                      *         | L_T|       |
2082                      *         |____|_______|____
2083                      *         |    |       |    |
2084                      *     L_L |  L |  L_R  |    |
2085                      *         |____|_______|    |
2086                      *              |            |
2087                      *          L_D |            |
2088                      *              |____________|
2089                      *
2090                      *****************************************************************/
2091 
2092                     /*In case of slices or tiles*/
2093                     {
2094                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2095                         {
2096                             ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2097                             ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2098 
2099                             ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2100                             ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2101 
2102                             ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2103                             ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2104 
2105                             ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2106                             ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2107 
2108                             ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2109                             ctby_l = ps_sao_ctxt->i4_ctb_y;
2110 
2111                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2112                             {
2113                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2114                                 {
2115                                     au4_idx_l[2] = -1;
2116                                     au4_idx_l[4] = -1;
2117                                     au4_idx_l[5] = -1;
2118                                 }
2119                                 else
2120                                 {
2121                                     au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2122                                     au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2123                                 }
2124                                 idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2125                                 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2126                                 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2127                                 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2128 
2129                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
2130                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2131                                 {
2132                                     au4_ilf_across_tile_slice_enable[2] = 0;
2133                                     au4_ilf_across_tile_slice_enable[4] = 0;
2134                                     au4_ilf_across_tile_slice_enable[5] = 0;
2135                                 }
2136                                 else
2137                                 {
2138                                     au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2139                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2140 
2141                                 }
2142                                 //TODO: ILF flag checks for [0] and [6] is missing.
2143                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2144                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2145                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2146                                 /*
2147                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2148                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
2149                                  */
2150                                 for(i = 0; i < 8; i++)
2151                                 {
2152                                     /*Sets the edges that lie on the slice/tile boundary*/
2153                                     if(au4_idx_l[i] != idx_l)
2154                                     {
2155                                         au1_tile_slice_boundary[i] = 1;
2156                                     }
2157                                     else
2158                                     {
2159                                         au4_ilf_across_tile_slice_enable[i] = 1;
2160                                     }
2161                                 }
2162                                 /*Reset indices*/
2163                                 for(i = 0; i < 8; i++)
2164                                 {
2165                                     au4_idx_l[i] = 0;
2166                                 }
2167                             }
2168 
2169                             if(ps_pps->i1_tiles_enabled_flag)
2170                             {
2171                                 /* Calculate availability flags at slice boundary */
2172                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2173                                 {
2174                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2175                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2176                                     {
2177                                         if(0 == ps_sao_ctxt->i4_ctb_y)
2178                                         {
2179                                             au4_idx_l[2] = -1;
2180                                             au4_idx_l[4] = -1;
2181                                             au4_idx_l[5] = -1;
2182                                         }
2183                                         else
2184                                         {
2185                                             au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2186                                             au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2187                                         }
2188 
2189                                         idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2190                                         au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2191                                         au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2192                                         au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2193 
2194                                         for(i = 0; i < 8; i++)
2195                                         {
2196                                             /*Sets the edges that lie on the slice/tile boundary*/
2197                                             if(au4_idx_l[i] != idx_l)
2198                                             {
2199                                                 au1_tile_slice_boundary[i] |= 1;
2200                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
2201                                             }
2202                                         }
2203                                     }
2204                                 }
2205                             }
2206 
2207                             for(i = 0; i < 8; i++)
2208                             {
2209                                 /*Sets the edges that lie on the slice/tile boundary*/
2210                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2211                                 {
2212                                     au1_avail_luma[i] = 0;
2213                                 }
2214                             }
2215                         }
2216                     }
2217                     if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
2218                     {
2219                         au1_avail_luma[0] = 0;
2220                         au1_avail_luma[4] = 0;
2221                         au1_avail_luma[6] = 0;
2222                     }
2223                     if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2224                     {
2225                         au1_avail_luma[1] = 0;
2226                         au1_avail_luma[5] = 0;
2227                         au1_avail_luma[7] = 0;
2228                     }
2229 
2230                     if(0 == ps_sao_ctxt->i4_ctb_y)
2231                     {
2232                         au1_avail_luma[2] = 0;
2233                         au1_avail_luma[4] = 0;
2234                         au1_avail_luma[5] = 0;
2235                     }
2236 
2237                     if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2238                     {
2239                         au1_avail_luma[3] = 0;
2240                         au1_avail_luma[6] = 0;
2241                         au1_avail_luma[7] = 0;
2242                     }
2243 
2244                     {
2245                         au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
2246                         u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
2247                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2248                                                                           src_strd,
2249                                                                           pu1_src_left_luma,
2250                                                                           pu1_src_top_luma,
2251                                                                           pu1_sao_src_top_left_luma_curr_ctb,
2252                                                                           au1_src_top_right,
2253                                                                           &u1_sao_src_top_left_luma_bot_left,
2254                                                                           au1_avail_luma,
2255                                                                           ai1_offset_y,
2256                                                                           sao_wd_luma,
2257                                                                           sao_ht_luma);
2258                     }
2259 
2260                 }
2261             }
2262         }
2263 
2264         if(0 != sao_ht_chroma)
2265         {
2266             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
2267             {
2268                 if(0 == ps_sao->b3_cb_type_idx)
2269                 {
2270                     for(row = 0; row < sao_ht_chroma; row++)
2271                     {
2272                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2273                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2274                     }
2275                     pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2276                     pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2277 
2278                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2279                 }
2280 
2281                 else if(1 == ps_sao->b3_cb_type_idx)
2282                 {
2283                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2284                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2285                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2286                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2287 
2288                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2289                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2290                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2291                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2292 
2293                     if(chroma_yuv420sp_vu)
2294                     {
2295                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2296                                                                                     src_strd,
2297                                                                                     pu1_src_left_chroma,
2298                                                                                     pu1_src_top_chroma,
2299                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
2300                                                                                     ps_sao->b5_cr_band_pos,
2301                                                                                     ps_sao->b5_cb_band_pos,
2302                                                                                     ai1_offset_cr,
2303                                                                                     ai1_offset_cb,
2304                                                                                     sao_wd_chroma,
2305                                                                                     sao_ht_chroma
2306                                                                                    );
2307                     }
2308                     else
2309                     {
2310                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2311                                                                                     src_strd,
2312                                                                                     pu1_src_left_chroma,
2313                                                                                     pu1_src_top_chroma,
2314                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
2315                                                                                     ps_sao->b5_cb_band_pos,
2316                                                                                     ps_sao->b5_cr_band_pos,
2317                                                                                     ai1_offset_cb,
2318                                                                                     ai1_offset_cr,
2319                                                                                     sao_wd_chroma,
2320                                                                                     sao_ht_chroma
2321                                                                                    );
2322                     }
2323                 }
2324 
2325                 else // if(2 <= ps_sao->b3_cb_type_idx)
2326                 {
2327                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2328                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2329                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2330                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2331 
2332                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2333                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2334                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2335                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2336 
2337                     for(i = 0; i < 8; i++)
2338                     {
2339                         au1_avail_chroma[i] = 255;
2340                         au1_tile_slice_boundary[i] = 0;
2341                         au4_idx_l[i] = 0;
2342                         au4_ilf_across_tile_slice_enable[i] = 1;
2343                     }
2344                     /*In case of slices*/
2345                     {
2346                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2347                         {
2348                             ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2349                             ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2350 
2351                             ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2352                             ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2353 
2354                             ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2355                             ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2356 
2357                             ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2358                             ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2359 
2360                             ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2361                             ctby_l = ps_sao_ctxt->i4_ctb_y;
2362 
2363                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2364                             {
2365                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2366                                 {
2367                                     au4_idx_l[2] = -1;
2368                                     au4_idx_l[4] = -1;
2369                                     au4_idx_l[5] = -1;
2370                                 }
2371                                 else
2372                                 {
2373                                     au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2374                                     au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2375                                 }
2376                                 idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2377                                 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2378                                 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2379                                 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2380 
2381                                 /*Verify that the neighbour ctbs dont cross pic boundary.*/
2382                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2383                                 {
2384                                     au4_ilf_across_tile_slice_enable[2] = 0;
2385                                     au4_ilf_across_tile_slice_enable[4] = 0;
2386                                     au4_ilf_across_tile_slice_enable[5] = 0;
2387                                 }
2388                                 else
2389                                 {
2390                                     au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2391                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2392                                 }
2393                                 //  au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2394                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2395                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2396                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2397                                 /*
2398                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2399                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
2400                                  */
2401                                 for(i = 0; i < 8; i++)
2402                                 {
2403                                     /*Sets the edges that lie on the slice/tile boundary*/
2404                                     if(au4_idx_l[i] != idx_l)
2405                                     {
2406                                         au1_tile_slice_boundary[i] = 1;
2407                                     }
2408                                     else
2409                                     {
2410                                         au4_ilf_across_tile_slice_enable[i] = 1;
2411                                     }
2412                                 }
2413                                 /*Reset indices*/
2414                                 for(i = 0; i < 8; i++)
2415                                 {
2416                                     au4_idx_l[i] = 0;
2417                                 }
2418                             }
2419                             if(ps_pps->i1_tiles_enabled_flag)
2420                             {
2421                                 /* Calculate availability flags at slice boundary */
2422                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2423                                 {
2424                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2425                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2426                                     {
2427                                         if(0 == ps_sao_ctxt->i4_ctb_y)
2428                                         {
2429                                             au4_idx_l[2] = -1;
2430                                             au4_idx_l[4] = -1;
2431                                             au4_idx_l[5] = -1;
2432                                         }
2433                                         else
2434                                         {
2435                                             au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2436                                             au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2437                                         }
2438 
2439                                         idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2440                                         au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2441                                         au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2442                                         au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2443 
2444                                         for(i = 0; i < 8; i++)
2445                                         {
2446                                             /*Sets the edges that lie on the slice/tile boundary*/
2447                                             if(au4_idx_l[i] != idx_l)
2448                                             {
2449                                                 au1_tile_slice_boundary[i] |= 1;
2450                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2451                                             }
2452                                         }
2453                                     }
2454                                 }
2455                             }
2456                             for(i = 0; i < 8; i++)
2457                             {
2458                                 /*Sets the edges that lie on the slice/tile boundary*/
2459                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2460                                 {
2461                                     au1_avail_chroma[i] = 0;
2462                                 }
2463                             }
2464                         }
2465                     }
2466                     if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
2467                     {
2468                         au1_avail_chroma[0] = 0;
2469                         au1_avail_chroma[4] = 0;
2470                         au1_avail_chroma[6] = 0;
2471                     }
2472 
2473                     if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2474                     {
2475                         au1_avail_chroma[1] = 0;
2476                         au1_avail_chroma[5] = 0;
2477                         au1_avail_chroma[7] = 0;
2478                     }
2479 
2480                     if(0 == ps_sao_ctxt->i4_ctb_y)
2481                     {
2482                         au1_avail_chroma[2] = 0;
2483                         au1_avail_chroma[4] = 0;
2484                         au1_avail_chroma[5] = 0;
2485                     }
2486 
2487                     if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
2488                     {
2489                         au1_avail_chroma[3] = 0;
2490                         au1_avail_chroma[6] = 0;
2491                         au1_avail_chroma[7] = 0;
2492                     }
2493 
2494                     {
2495                         au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2496                         au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2497                         au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2498                         au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2499                         //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2500                         //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2501                         if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2502                         {
2503                             au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
2504                             au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
2505                         }
2506 
2507 
2508                         if(chroma_yuv420sp_vu)
2509                         {
2510                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2511                                                                                  src_strd,
2512                                                                                  pu1_src_left_chroma,
2513                                                                                  pu1_src_top_chroma,
2514                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
2515                                                                                  au1_src_top_right,
2516                                                                                  au1_src_bot_left,
2517                                                                                  au1_avail_chroma,
2518                                                                                  ai1_offset_cr,
2519                                                                                  ai1_offset_cb,
2520                                                                                  sao_wd_chroma,
2521                                                                                  sao_ht_chroma);
2522                         }
2523                         else
2524                         {
2525                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2526                                                                                  src_strd,
2527                                                                                  pu1_src_left_chroma,
2528                                                                                  pu1_src_top_chroma,
2529                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
2530                                                                                  au1_src_top_right,
2531                                                                                  au1_src_bot_left,
2532                                                                                  au1_avail_chroma,
2533                                                                                  ai1_offset_cb,
2534                                                                                  ai1_offset_cr,
2535                                                                                  sao_wd_chroma,
2536                                                                                  sao_ht_chroma);
2537                         }
2538                     }
2539 
2540                 }
2541             }
2542 
2543         }
2544         pu1_src_luma += sao_wd_luma;
2545         pu1_src_chroma += sao_wd_chroma;
2546         ps_sao += 1;
2547     }
2548 
2549 
2550     /* Current CTB */
2551     {
2552         WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2553         WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
2554         WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2555         WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2556         WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2557         WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2558         WORD32 au4_idx_c[8], idx_c;
2559 
2560         WORD32 remaining_rows;
2561         WORD32 remaining_cols;
2562 
2563         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2564         if(remaining_cols <= SAO_SHIFT_CTB)
2565         {
2566             sao_wd_luma += remaining_cols;
2567         }
2568         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
2569         if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2570         {
2571             sao_wd_chroma += remaining_cols;
2572         }
2573 
2574         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2575         if(remaining_rows <= SAO_SHIFT_CTB)
2576         {
2577             sao_ht_luma += remaining_rows;
2578         }
2579         remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2580         if(remaining_rows <= SAO_SHIFT_CTB)
2581         {
2582             sao_ht_chroma += remaining_rows;
2583         }
2584 
2585         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2586         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2587         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2588         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2589 
2590         if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2591         {
2592             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2593             {
2594                 if(0 == ps_sao->b3_y_type_idx)
2595                 {
2596                     /* Update left, top and top-left */
2597                     for(row = 0; row < sao_ht_luma; row++)
2598                     {
2599                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2600                     }
2601                     pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2602 
2603                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2604 
2605                     pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2606 
2607                 }
2608 
2609                 else if(1 == ps_sao->b3_y_type_idx)
2610                 {
2611                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2612                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2613                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2614                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2615 
2616                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2617                                                                               src_strd,
2618                                                                               pu1_src_left_luma,
2619                                                                               pu1_src_top_luma,
2620                                                                               pu1_sao_src_top_left_luma_curr_ctb,
2621                                                                               ps_sao->b5_y_band_pos,
2622                                                                               ai1_offset_y,
2623                                                                               sao_wd_luma,
2624                                                                               sao_ht_luma
2625                                                                              );
2626                 }
2627 
2628                 else // if(2 <= ps_sao->b3_y_type_idx)
2629                 {
2630                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2631                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2632                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2633                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2634 
2635                     for(i = 0; i < 8; i++)
2636                     {
2637                         au1_avail_luma[i] = 255;
2638                         au1_tile_slice_boundary[i] = 0;
2639                         au4_idx_c[i] = 0;
2640                         au4_ilf_across_tile_slice_enable[i] = 1;
2641                     }
2642                     /******************************************************************
2643                      * Derive the  Top-left CTB's neighbour pixel's slice indices.
2644                      *
2645                      *
2646                      *          ____________
2647                      *         |    |       |
2648                      *         |    | C_T   |
2649                      *         |____|_______|____
2650                      *         |    |       |    |
2651                      *         | C_L|   C   | C_R|
2652                      *         |____|_______|    |
2653                      *              |  C_D       |
2654                      *              |            |
2655                      *              |____________|
2656                      *
2657                      *****************************************************************/
2658 
2659                     /*In case of slices*/
2660                     {
2661                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2662                         {
2663                             ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2664                             ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2665 
2666                             ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2667                             ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2668 
2669                             ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2670                             ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2671 
2672                             ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
2673                             ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
2674 
2675                             ctbx_c = ps_sao_ctxt->i4_ctb_x;
2676                             ctby_c = ps_sao_ctxt->i4_ctb_y;
2677 
2678                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2679                             {
2680                                 if(0 == ps_sao_ctxt->i4_ctb_x)
2681                                 {
2682                                     au4_idx_c[6] = -1;
2683                                     au4_idx_c[0] = -1;
2684                                     au4_idx_c[4] = -1;
2685                                 }
2686                                 else
2687                                 {
2688                                     au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2689                                 }
2690 
2691                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2692                                 {
2693                                     au4_idx_c[2] = -1;
2694                                     au4_idx_c[5] = -1;
2695                                     au4_idx_c[4] = -1;
2696                                 }
2697                                 else
2698                                 {
2699                                     au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2700                                     au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2701                                 }
2702                                 idx_c   = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2703                                 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2704                                 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2705 
2706                                 if(0 == ps_sao_ctxt->i4_ctb_x)
2707                                 {
2708                                     au4_ilf_across_tile_slice_enable[6] = 0;
2709                                     au4_ilf_across_tile_slice_enable[0] = 0;
2710                                     au4_ilf_across_tile_slice_enable[4] = 0;
2711                                 }
2712                                 else
2713                                 {
2714                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2715                                     au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2716                                 }
2717                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2718                                 {
2719                                     au4_ilf_across_tile_slice_enable[2] = 0;
2720                                     au4_ilf_across_tile_slice_enable[4] = 0;
2721                                     au4_ilf_across_tile_slice_enable[5] = 0;
2722                                 }
2723                                 else
2724                                 {
2725                                     au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2726                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2727                                 }
2728                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2729                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2730                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2731 
2732                                 /*
2733                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2734                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
2735                                  */
2736                                 for(i = 0; i < 8; i++)
2737                                 {
2738                                     /*Sets the edges that lie on the slice/tile boundary*/
2739                                     if(au4_idx_c[i] != idx_c)
2740                                     {
2741                                         au1_tile_slice_boundary[i] = 1;
2742                                     }
2743                                     else
2744                                     {
2745                                         au4_ilf_across_tile_slice_enable[i] = 1;
2746                                     }
2747                                 }
2748                                 /*Reset indices*/
2749                                 for(i = 0; i < 8; i++)
2750                                 {
2751                                     au4_idx_c[i] = 0;
2752                                 }
2753                             }
2754 
2755                             if(ps_pps->i1_tiles_enabled_flag)
2756                             {
2757                                 /* Calculate availability flags at slice boundary */
2758                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2759                                 {
2760                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2761                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2762                                     {
2763                                         if(0 == ps_sao_ctxt->i4_ctb_x)
2764                                         {
2765                                             au4_idx_c[6] = -1;
2766                                             au4_idx_c[0] = -1;
2767                                             au4_idx_c[4] = -1;
2768                                         }
2769                                         else
2770                                         {
2771                                             au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2772                                         }
2773 
2774                                         if(0 == ps_sao_ctxt->i4_ctb_y)
2775                                         {
2776                                             au4_idx_c[2] = -1;
2777                                             au4_idx_c[5] = -1;
2778                                             au4_idx_c[4] = -1;
2779                                         }
2780                                         else
2781                                         {
2782                                             au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2783                                             au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2784                                         }
2785                                         idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2786                                         au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2787                                         au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2788 
2789                                         for(i = 0; i < 8; i++)
2790                                         {
2791                                             /*Sets the edges that lie on the slice/tile boundary*/
2792                                             if(au4_idx_c[i] != idx_c)
2793                                             {
2794                                                 au1_tile_slice_boundary[i] |= 1;
2795                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2796                                             }
2797                                         }
2798                                     }
2799                                 }
2800                             }
2801 
2802                             for(i = 0; i < 8; i++)
2803                             {
2804                                 /*Sets the edges that lie on the slice/tile boundary*/
2805                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2806                                 {
2807                                     au1_avail_luma[i] = 0;
2808                                 }
2809                             }
2810 
2811                         }
2812                     }
2813                     if(0 == ps_sao_ctxt->i4_ctb_x)
2814                     {
2815                         au1_avail_luma[0] = 0;
2816                         au1_avail_luma[4] = 0;
2817                         au1_avail_luma[6] = 0;
2818                     }
2819 
2820                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2821                     {
2822                         au1_avail_luma[1] = 0;
2823                         au1_avail_luma[5] = 0;
2824                         au1_avail_luma[7] = 0;
2825                     }
2826 
2827                     if(0 == ps_sao_ctxt->i4_ctb_y)
2828                     {
2829                         au1_avail_luma[2] = 0;
2830                         au1_avail_luma[4] = 0;
2831                         au1_avail_luma[5] = 0;
2832                     }
2833 
2834                     if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2835                     {
2836                         au1_avail_luma[3] = 0;
2837                         au1_avail_luma[6] = 0;
2838                         au1_avail_luma[7] = 0;
2839                     }
2840 
2841                     {
2842                         au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2843                         u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2844 
2845                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2846                                                                           src_strd,
2847                                                                           pu1_src_left_luma,
2848                                                                           pu1_src_top_luma,
2849                                                                           pu1_sao_src_top_left_luma_curr_ctb,
2850                                                                           au1_src_top_right,
2851                                                                           &u1_sao_src_top_left_luma_bot_left,
2852                                                                           au1_avail_luma,
2853                                                                           ai1_offset_y,
2854                                                                           sao_wd_luma,
2855                                                                           sao_ht_luma);
2856                     }
2857                     pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2858                     pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2859                 }
2860             }
2861         }
2862 
2863         if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
2864         {
2865             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
2866             {
2867                 if(0 == ps_sao->b3_cb_type_idx)
2868                 {
2869                     for(row = 0; row < sao_ht_chroma; row++)
2870                     {
2871                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2872                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2873                     }
2874                     pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2875                     pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2876 
2877                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2878 
2879                     pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
2880                     pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
2881                 }
2882 
2883                 else if(1 == ps_sao->b3_cb_type_idx)
2884                 {
2885                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2886                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2887                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2888                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2889 
2890                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2891                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2892                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2893                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2894 
2895                     if(chroma_yuv420sp_vu)
2896                     {
2897                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2898                                                                                     src_strd,
2899                                                                                     pu1_src_left_chroma,
2900                                                                                     pu1_src_top_chroma,
2901                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
2902                                                                                     ps_sao->b5_cr_band_pos,
2903                                                                                     ps_sao->b5_cb_band_pos,
2904                                                                                     ai1_offset_cr,
2905                                                                                     ai1_offset_cb,
2906                                                                                     sao_wd_chroma,
2907                                                                                     sao_ht_chroma
2908                                                                                    );
2909                     }
2910                     else
2911                     {
2912                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2913                                                                                     src_strd,
2914                                                                                     pu1_src_left_chroma,
2915                                                                                     pu1_src_top_chroma,
2916                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
2917                                                                                     ps_sao->b5_cb_band_pos,
2918                                                                                     ps_sao->b5_cr_band_pos,
2919                                                                                     ai1_offset_cb,
2920                                                                                     ai1_offset_cr,
2921                                                                                     sao_wd_chroma,
2922                                                                                     sao_ht_chroma
2923                                                                                    );
2924                     }
2925                 }
2926 
2927                 else // if(2 <= ps_sao->b3_cb_type_idx)
2928                 {
2929                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2930                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2931                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2932                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2933 
2934                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2935                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2936                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2937                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2938 
2939                     for(i = 0; i < 8; i++)
2940                     {
2941                         au1_avail_chroma[i] = 255;
2942                         au1_tile_slice_boundary[i] = 0;
2943                         au4_idx_c[i] = 0;
2944                         au4_ilf_across_tile_slice_enable[i] = 1;
2945                     }
2946                     {
2947                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2948                         {
2949                             ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2950                             ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2951 
2952                             ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2953                             ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2954 
2955                             ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2956                             ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2957 
2958                             ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
2959                             ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
2960 
2961                             ctbx_c = ps_sao_ctxt->i4_ctb_x;
2962                             ctby_c = ps_sao_ctxt->i4_ctb_y;
2963 
2964                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2965                             {
2966                                 if(0 == ps_sao_ctxt->i4_ctb_x)
2967                                 {
2968                                     au4_idx_c[0] = -1;
2969                                     au4_idx_c[4] = -1;
2970                                     au4_idx_c[6] = -1;
2971                                 }
2972                                 else
2973                                 {
2974                                     au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2975                                 }
2976 
2977                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2978                                 {
2979                                     au4_idx_c[2] = -1;
2980                                     au4_idx_c[4] = -1;
2981                                     au4_idx_c[5] = -1;
2982                                 }
2983                                 else
2984                                 {
2985                                     au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2986                                     au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2987                                 }
2988                                 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2989                                 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2990                                 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2991 
2992                                 if(0 == ps_sao_ctxt->i4_ctb_x)
2993                                 {
2994                                     au4_ilf_across_tile_slice_enable[0] = 0;
2995                                     au4_ilf_across_tile_slice_enable[4] = 0;
2996                                     au4_ilf_across_tile_slice_enable[6] = 0;
2997                                 }
2998                                 else
2999                                 {
3000                                     au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
3001                                     au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3002                                 }
3003 
3004                                 if(0 == ps_sao_ctxt->i4_ctb_y)
3005                                 {
3006                                     au4_ilf_across_tile_slice_enable[2] = 0;
3007                                     au4_ilf_across_tile_slice_enable[4] = 0;
3008                                     au4_ilf_across_tile_slice_enable[5] = 0;
3009                                 }
3010                                 else
3011                                 {
3012                                     au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3013                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
3014                                 }
3015 
3016                                 au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
3017                                 au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
3018                                 au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
3019 
3020                                 /*
3021                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
3022                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
3023                                  */
3024                                 for(i = 0; i < 8; i++)
3025                                 {
3026                                     /*Sets the edges that lie on the slice/tile boundary*/
3027                                     if(au4_idx_c[i] != idx_c)
3028                                     {
3029                                         au1_tile_slice_boundary[i] = 1;
3030                                     }
3031                                     else
3032                                     {
3033                                         au4_ilf_across_tile_slice_enable[i] = 1;
3034                                     }
3035                                 }
3036                                 /*Reset indices*/
3037                                 for(i = 0; i < 8; i++)
3038                                 {
3039                                     au4_idx_c[i] = 0;
3040                                 }
3041                             }
3042 
3043                             if(ps_pps->i1_tiles_enabled_flag)
3044                             {
3045                                 /* Calculate availability flags at slice boundary */
3046                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
3047                                 {
3048                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
3049                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
3050                                     {
3051                                         if(0 == ps_sao_ctxt->i4_ctb_x)
3052                                         {
3053                                             au4_idx_c[6] = -1;
3054                                             au4_idx_c[0] = -1;
3055                                             au4_idx_c[4] = -1;
3056                                         }
3057                                         else
3058                                         {
3059                                             au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3060                                         }
3061 
3062                                         if(0 == ps_sao_ctxt->i4_ctb_y)
3063                                         {
3064                                             au4_idx_c[2] = -1;
3065                                             au4_idx_c[5] = -1;
3066                                             au4_idx_c[4] = -1;
3067                                         }
3068                                         else
3069                                         {
3070                                             au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3071                                             au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3072                                         }
3073                                         idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3074                                         au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3075                                         au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3076 
3077                                         for(i = 0; i < 8; i++)
3078                                         {
3079                                             /*Sets the edges that lie on the slice/tile boundary*/
3080                                             if(au4_idx_c[i] != idx_c)
3081                                             {
3082                                                 au1_tile_slice_boundary[i] |= 1;
3083                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
3084                                             }
3085                                         }
3086                                     }
3087                                 }
3088                             }
3089 
3090                             for(i = 0; i < 8; i++)
3091                             {
3092                                 /*Sets the edges that lie on the slice/tile boundary*/
3093                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
3094                                 {
3095                                     au1_avail_chroma[i] = 0;
3096                                 }
3097                             }
3098                         }
3099                     }
3100 
3101                     if(0 == ps_sao_ctxt->i4_ctb_x)
3102                     {
3103                         au1_avail_chroma[0] = 0;
3104                         au1_avail_chroma[4] = 0;
3105                         au1_avail_chroma[6] = 0;
3106                     }
3107 
3108                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
3109                     {
3110                         au1_avail_chroma[1] = 0;
3111                         au1_avail_chroma[5] = 0;
3112                         au1_avail_chroma[7] = 0;
3113                     }
3114 
3115                     if(0 == ps_sao_ctxt->i4_ctb_y)
3116                     {
3117                         au1_avail_chroma[2] = 0;
3118                         au1_avail_chroma[4] = 0;
3119                         au1_avail_chroma[5] = 0;
3120                     }
3121 
3122                     if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
3123                     {
3124                         au1_avail_chroma[3] = 0;
3125                         au1_avail_chroma[6] = 0;
3126                         au1_avail_chroma[7] = 0;
3127                     }
3128 
3129                     {
3130                         au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
3131                         au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
3132 
3133                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
3134                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
3135 
3136                         if(chroma_yuv420sp_vu)
3137                         {
3138                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3139                                                                                  src_strd,
3140                                                                                  pu1_src_left_chroma,
3141                                                                                  pu1_src_top_chroma,
3142                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
3143                                                                                  au1_src_top_right,
3144                                                                                  au1_sao_src_top_left_chroma_bot_left,
3145                                                                                  au1_avail_chroma,
3146                                                                                  ai1_offset_cr,
3147                                                                                  ai1_offset_cb,
3148                                                                                  sao_wd_chroma,
3149                                                                                  sao_ht_chroma);
3150                         }
3151                         else
3152                         {
3153                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3154                                                                                  src_strd,
3155                                                                                  pu1_src_left_chroma,
3156                                                                                  pu1_src_top_chroma,
3157                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
3158                                                                                  au1_src_top_right,
3159                                                                                  au1_sao_src_top_left_chroma_bot_left,
3160                                                                                  au1_avail_chroma,
3161                                                                                  ai1_offset_cb,
3162                                                                                  ai1_offset_cr,
3163                                                                                  sao_wd_chroma,
3164                                                                                  sao_ht_chroma);
3165                         }
3166                     }
3167 
3168                 }
3169                 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3170                 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3171 
3172                 pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
3173                 pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
3174             }
3175 
3176         }
3177     }
3178 
3179 
3180 
3181 
3182 /* If no loop filter is enabled copy the backed up values */
3183     {
3184         /* Luma */
3185         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag && no_loop_filter_enabled_luma)
3186         {
3187             UWORD32 u4_no_loop_filter_flag;
3188             WORD32 loop_filter_bit_pos;
3189             WORD32 log2_min_cu = 3;
3190             WORD32 min_cu = (1 << log2_min_cu);
3191             UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
3192             WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
3193             WORD32 sao_blk_wd = ctb_size;
3194             WORD32 remaining_rows;
3195             WORD32 remaining_cols;
3196 
3197             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3198             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3199             if(remaining_rows <= SAO_SHIFT_CTB)
3200                 sao_blk_ht += remaining_rows;
3201             if(remaining_cols <= SAO_SHIFT_CTB)
3202                 sao_blk_wd += remaining_cols;
3203 
3204             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
3205             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3206 
3207             pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
3208 
3209             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3210                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3211             if(ps_sao_ctxt->i4_ctb_x > 0)
3212                 loop_filter_bit_pos -= 1;
3213 
3214             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3215                             (loop_filter_bit_pos >> 3);
3216 
3217             for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
3218                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3219             {
3220                 WORD32 tmp_wd = sao_blk_wd;
3221 
3222                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3223                                 (loop_filter_bit_pos & 7);
3224                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3225 
3226                 if(u4_no_loop_filter_flag)
3227                 {
3228                     while(tmp_wd > 0)
3229                     {
3230                         if(CTZ(u4_no_loop_filter_flag))
3231                         {
3232                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3233                             pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3234                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
3235                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3236                         }
3237                         else
3238                         {
3239                             for(row = 0; row < min_cu; row++)
3240                             {
3241                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3242                                 {
3243                                     pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
3244                                 }
3245                             }
3246                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3247                             pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3248                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
3249                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3250                         }
3251                     }
3252 
3253                     pu1_src_tmp_luma -= sao_blk_wd;
3254                     pu1_src_backup_luma -= sao_blk_wd;
3255                 }
3256 
3257                 pu1_src_tmp_luma += (src_strd << log2_min_cu);
3258                 pu1_src_backup_luma += (backup_strd << log2_min_cu);
3259             }
3260         }
3261 
3262         /* Chroma */
3263         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag && no_loop_filter_enabled_chroma)
3264         {
3265             UWORD32 u4_no_loop_filter_flag;
3266             WORD32 loop_filter_bit_pos;
3267             WORD32 log2_min_cu = 3;
3268             WORD32 min_cu = (1 << log2_min_cu);
3269             UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
3270             WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
3271             WORD32 sao_blk_wd = ctb_size;
3272             WORD32 remaining_rows;
3273             WORD32 remaining_cols;
3274 
3275             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3276             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3277             if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3278                 sao_blk_ht += remaining_rows;
3279             if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3280                 sao_blk_wd += remaining_cols;
3281 
3282             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3283             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3284 
3285             pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3286 
3287             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3288                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3289             if(ps_sao_ctxt->i4_ctb_x > 0)
3290                 loop_filter_bit_pos -= 2;
3291 
3292             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3293                             (loop_filter_bit_pos >> 3);
3294 
3295             for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3296                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3297             {
3298                 WORD32 tmp_wd = sao_blk_wd;
3299 
3300                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3301                                 (loop_filter_bit_pos & 7);
3302                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3303 
3304                 if(u4_no_loop_filter_flag)
3305                 {
3306                     while(tmp_wd > 0)
3307                     {
3308                         if(CTZ(u4_no_loop_filter_flag))
3309                         {
3310                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3311                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3312                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
3313                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3314                         }
3315                         else
3316                         {
3317                             for(row = 0; row < min_cu / 2; row++)
3318                             {
3319                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3320                                 {
3321                                     pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
3322                                 }
3323                             }
3324 
3325                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3326                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3327                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
3328                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3329                         }
3330                     }
3331 
3332                     pu1_src_tmp_chroma -= sao_blk_wd;
3333                     pu1_src_backup_chroma -= sao_blk_wd;
3334                 }
3335 
3336                 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
3337                 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
3338             }
3339         }
3340     }
3341 
3342 }
3343 
3344