• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19  *******************************************************************************
20  * @file
21  *  ihevc_sao.c
22  *
23  * @brief
24  *  Contains function definitions for sample adaptive offset process
25  *
26  * @author
27  *  Srinivas T
28  *
29  * @par List of Functions:
30  *
31  * @remarks
32  *  None
33  *
34  *******************************************************************************
35  */
36 
37 #include <stdio.h>
38 #include <stddef.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <assert.h>
42 
43 #include "ihevc_typedefs.h"
44 #include "iv.h"
45 #include "ivd.h"
46 #include "ihevcd_cxa.h"
47 #include "ithread.h"
48 
49 #include "ihevc_defs.h"
50 #include "ihevc_debug.h"
51 #include "ihevc_defs.h"
52 #include "ihevc_structs.h"
53 #include "ihevc_macros.h"
54 #include "ihevc_platform_macros.h"
55 #include "ihevc_cabac_tables.h"
56 #include "ihevc_sao.h"
57 #include "ihevc_mem_fns.h"
58 
59 #include "ihevc_error.h"
60 #include "ihevc_common_tables.h"
61 
62 #include "ihevcd_trace.h"
63 #include "ihevcd_defs.h"
64 #include "ihevcd_function_selector.h"
65 #include "ihevcd_structs.h"
66 #include "ihevcd_error.h"
67 #include "ihevcd_nal.h"
68 #include "ihevcd_bitstream.h"
69 #include "ihevcd_job_queue.h"
70 #include "ihevcd_utils.h"
71 
72 #include "ihevc_deblk.h"
73 #include "ihevc_deblk_tables.h"
74 #include "ihevcd_profile.h"
75 #include "ihevcd_sao.h"
76 #include "ihevcd_debug.h"
77 
78 #define SAO_SHIFT_CTB    8
79 
80 /**
81  * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
82  */
ihevcd_sao_ctb(sao_ctxt_t * ps_sao_ctxt)83 void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
84 {
85     codec_t *ps_codec = ps_sao_ctxt->ps_codec;
86     UWORD8 *pu1_src_luma;
87     UWORD8 *pu1_src_chroma;
88     WORD32 src_strd;
89     WORD32 ctb_size;
90     WORD32 log2_ctb_size;
91     sps_t *ps_sps;
92     sao_t *ps_sao;
93     WORD32 row, col;
94     UWORD8 au1_avail_luma[8];
95     UWORD8 au1_avail_chroma[8];
96     WORD32 i;
97     UWORD8 *pu1_src_top_luma;
98     UWORD8 *pu1_src_top_chroma;
99     UWORD8 *pu1_src_left_luma;
100     UWORD8 *pu1_src_left_chroma;
101     UWORD8 au1_src_top_right[2];
102     UWORD8 au1_src_bot_left[2];
103     UWORD8 *pu1_no_loop_filter_flag;
104     WORD32 loop_filter_strd;
105 
106     WORD8 ai1_offset_y[5];
107     WORD8 ai1_offset_cb[5];
108     WORD8 ai1_offset_cr[5];
109 
110     PROFILE_DISABLE_SAO();
111 
112     ai1_offset_y[0] = 0;
113     ai1_offset_cb[0] = 0;
114     ai1_offset_cr[0] = 0;
115 
116     ps_sps = ps_sao_ctxt->ps_sps;
117     log2_ctb_size = ps_sps->i1_log2_ctb_size;
118     ctb_size = (1 << log2_ctb_size);
119     src_strd = ps_sao_ctxt->ps_codec->i4_strd;
120     pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
121     pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
122 
123     ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
124     loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
125 
126     /* Current CTB */
127     {
128         WORD32 sao_wd_luma;
129         WORD32 sao_wd_chroma;
130         WORD32 sao_ht_luma;
131         WORD32 sao_ht_chroma;
132 
133         WORD32 remaining_rows;
134         WORD32 remaining_cols;
135 
136         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
137         sao_wd_luma = MIN(ctb_size, remaining_cols);
138         sao_wd_chroma = MIN(ctb_size, remaining_cols);
139 
140         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
141         sao_ht_luma = MIN(ctb_size, remaining_rows);
142         sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
143 
144         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
145         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
146         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
147         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
148 
149         pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
150                         ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
151                         ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
152 
153         ai1_offset_y[1] = ps_sao->b4_y_offset_1;
154         ai1_offset_y[2] = ps_sao->b4_y_offset_2;
155         ai1_offset_y[3] = ps_sao->b4_y_offset_3;
156         ai1_offset_y[4] = ps_sao->b4_y_offset_4;
157 
158         ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
159         ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
160         ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
161         ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
162 
163         ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
164         ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
165         ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
166         ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
167 
168         for(i = 0; i < 8; i++)
169         {
170             au1_avail_luma[i] = 255;
171             au1_avail_chroma[i] = 255;
172         }
173 
174 
175         if(0 == ps_sao_ctxt->i4_ctb_x)
176         {
177             au1_avail_luma[0] = 0;
178             au1_avail_luma[4] = 0;
179             au1_avail_luma[6] = 0;
180 
181             au1_avail_chroma[0] = 0;
182             au1_avail_chroma[4] = 0;
183             au1_avail_chroma[6] = 0;
184         }
185 
186         if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
187         {
188             au1_avail_luma[1] = 0;
189             au1_avail_luma[5] = 0;
190             au1_avail_luma[7] = 0;
191 
192             au1_avail_chroma[1] = 0;
193             au1_avail_chroma[5] = 0;
194             au1_avail_chroma[7] = 0;
195         }
196 
197         if(0 == ps_sao_ctxt->i4_ctb_y)
198         {
199             au1_avail_luma[2] = 0;
200             au1_avail_luma[4] = 0;
201             au1_avail_luma[5] = 0;
202 
203             au1_avail_chroma[2] = 0;
204             au1_avail_chroma[4] = 0;
205             au1_avail_chroma[5] = 0;
206         }
207 
208         if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
209         {
210             au1_avail_luma[3] = 0;
211             au1_avail_luma[6] = 0;
212             au1_avail_luma[7] = 0;
213 
214             au1_avail_chroma[3] = 0;
215             au1_avail_chroma[6] = 0;
216             au1_avail_chroma[7] = 0;
217         }
218 
219 
220         if(0 == ps_sao->b3_y_type_idx)
221         {
222             /* Update left, top and top-left */
223             for(row = 0; row < sao_ht_luma; row++)
224             {
225                 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
226             }
227             ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
228 
229             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
230 
231         }
232         else
233         {
234             UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
235             UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
236             WORD32 tmp_strd = MAX_CTB_SIZE + 2;
237             WORD32 no_loop_filter_enabled = 0;
238 
239             /* Check the loop filter flags and copy the original values for back up */
240             {
241                 UWORD32 u4_no_loop_filter_flag;
242                 WORD32 min_cu = 8;
243                 UWORD8 *pu1_src_tmp = pu1_src_luma;
244 
245                 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
246                 {
247                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
248                                     ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
249                     u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
250 
251                     if(u4_no_loop_filter_flag)
252                     {
253                         WORD32 tmp_wd = sao_wd_luma;
254                         no_loop_filter_enabled = 1;
255                         while(tmp_wd > 0)
256                         {
257                             if(CTZ(u4_no_loop_filter_flag))
258                             {
259                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
260                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
261                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
262                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
263                             }
264                             else
265                             {
266                                 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
267                                 {
268                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
269                                     {
270                                         pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
271                                     }
272                                 }
273 
274                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
275                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
276                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
277                                 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
278                             }
279                         }
280 
281                         pu1_src_tmp -= sao_wd_luma;
282                     }
283 
284                     pu1_src_tmp += min_cu * src_strd;
285                     pu1_src_copy += min_cu * tmp_strd;
286                 }
287             }
288 
289             if(1 == ps_sao->b3_y_type_idx)
290             {
291                 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
292                                                                           src_strd,
293                                                                           pu1_src_left_luma,
294                                                                           pu1_src_top_luma,
295                                                                           ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
296                                                                           ps_sao->b5_y_band_pos,
297                                                                           ai1_offset_y,
298                                                                           sao_wd_luma,
299                                                                           sao_ht_luma);
300             }
301             else // if(2 <= ps_sao->b3_y_type_idx)
302             {
303                 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
304                 au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
305                 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
306                                                                   src_strd,
307                                                                   pu1_src_left_luma,
308                                                                   pu1_src_top_luma,
309                                                                   ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
310                                                                   au1_src_top_right,
311                                                                   au1_src_bot_left,
312                                                                   au1_avail_luma,
313                                                                   ai1_offset_y,
314                                                                   sao_wd_luma,
315                                                                   sao_ht_luma);
316             }
317 
318             /* Check the loop filter flags and copy the original values back if they are set */
319             if(no_loop_filter_enabled)
320             {
321                 UWORD32 u4_no_loop_filter_flag;
322                 WORD32 min_cu = 8;
323                 UWORD8 *pu1_src_tmp = pu1_src_luma;
324 
325                 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
326                 {
327                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
328                     u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
329 
330                     if(u4_no_loop_filter_flag)
331                     {
332                         WORD32 tmp_wd = sao_wd_luma;
333                         while(tmp_wd > 0)
334                         {
335                             if(CTZ(u4_no_loop_filter_flag))
336                             {
337                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
338                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
339                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
340                                 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
341                             }
342                             else
343                             {
344                                 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
345                                 {
346                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
347                                     {
348                                         pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
349                                     }
350                                 }
351 
352                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
353                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
354                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
355                                 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
356                             }
357                         }
358 
359                         pu1_src_tmp -= sao_wd_luma;
360                     }
361 
362                     pu1_src_tmp += min_cu * src_strd;
363                     pu1_src_copy += min_cu * tmp_strd;
364                 }
365             }
366 
367         }
368 
369         if(0 == ps_sao->b3_cb_type_idx)
370         {
371             for(row = 0; row < sao_ht_chroma; row++)
372             {
373                 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
374                 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
375             }
376             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
377             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
378 
379             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
380         }
381         else
382         {
383             UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
384             UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
385             WORD32 tmp_strd = MAX_CTB_SIZE + 4;
386             WORD32 no_loop_filter_enabled = 0;
387 
388             /* Check the loop filter flags and copy the original values for back up */
389             {
390                 UWORD32 u4_no_loop_filter_flag;
391                 WORD32 min_cu = 4;
392                 UWORD8 *pu1_src_tmp = pu1_src_chroma;
393 
394                 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
395                 {
396                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
397                     u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
398 
399                     if(u4_no_loop_filter_flag)
400                     {
401                         WORD32 tmp_wd = sao_wd_chroma;
402                         no_loop_filter_enabled = 1;
403                         while(tmp_wd > 0)
404                         {
405                             if(CTZ(u4_no_loop_filter_flag))
406                             {
407                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
408                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
409                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
410                                 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
411                             }
412                             else
413                             {
414                                 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
415                                 {
416                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
417                                     {
418                                         pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
419                                     }
420                                 }
421 
422                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
423                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
424                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
425                                 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
426                             }
427                         }
428 
429                         pu1_src_tmp -= sao_wd_chroma;
430                     }
431 
432                     pu1_src_tmp += min_cu * src_strd;
433                     pu1_src_copy += min_cu * tmp_strd;
434                 }
435             }
436 
437             if(1 == ps_sao->b3_cb_type_idx)
438             {
439                 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
440                                                                             src_strd,
441                                                                             pu1_src_left_chroma,
442                                                                             pu1_src_top_chroma,
443                                                                             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
444                                                                             ps_sao->b5_cb_band_pos,
445                                                                             ps_sao->b5_cr_band_pos,
446                                                                             ai1_offset_cb,
447                                                                             ai1_offset_cr,
448                                                                             sao_wd_chroma,
449                                                                             sao_ht_chroma
450                                                                            );
451             }
452             else // if(2 <= ps_sao->b3_cb_type_idx)
453             {
454                 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
455                 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
456                 au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
457                 au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
458                 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
459                                                                      src_strd,
460                                                                      pu1_src_left_chroma,
461                                                                      pu1_src_top_chroma,
462                                                                      ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
463                                                                      au1_src_top_right,
464                                                                      au1_src_bot_left,
465                                                                      au1_avail_chroma,
466                                                                      ai1_offset_cb,
467                                                                      ai1_offset_cr,
468                                                                      sao_wd_chroma,
469                                                                      sao_ht_chroma);
470             }
471 
472             /* Check the loop filter flags and copy the original values back if they are set */
473             if(no_loop_filter_enabled)
474             {
475                 UWORD32 u4_no_loop_filter_flag;
476                 WORD32 min_cu = 4;
477                 UWORD8 *pu1_src_tmp = pu1_src_chroma;
478 
479                 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
480                 {
481                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
482                     u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
483 
484                     if(u4_no_loop_filter_flag)
485                     {
486                         WORD32 tmp_wd = sao_wd_chroma;
487                         while(tmp_wd > 0)
488                         {
489                             if(CTZ(u4_no_loop_filter_flag))
490                             {
491                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
492                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
493                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
494                                 tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
495                             }
496                             else
497                             {
498                                 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
499                                 {
500                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
501                                     {
502                                         pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
503                                     }
504                                 }
505 
506                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
507                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
508                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
509                                 tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
510                             }
511                         }
512 
513                         pu1_src_tmp -= sao_wd_chroma;
514                     }
515 
516                     pu1_src_tmp += min_cu * src_strd;
517                     pu1_src_copy += min_cu * tmp_strd;
518                 }
519             }
520 
521         }
522 
523     }
524 }
525 
ihevcd_sao_shift_ctb(sao_ctxt_t * ps_sao_ctxt)526 void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
527 {
528     codec_t *ps_codec = ps_sao_ctxt->ps_codec;
529     UWORD8 *pu1_src_luma;
530     UWORD8 *pu1_src_chroma;
531     WORD32 src_strd;
532     WORD32 ctb_size;
533     WORD32 log2_ctb_size;
534     sps_t *ps_sps;
535     sao_t *ps_sao;
536     pps_t *ps_pps;
537     slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
538     tile_t *ps_tile;
539     UWORD16 *pu1_slice_idx;
540     UWORD16 *pu1_tile_idx;
541     WORD32 row, col;
542     UWORD8 au1_avail_luma[8];
543     UWORD8 au1_avail_chroma[8];
544     UWORD8 au1_tile_slice_boundary[8];
545     UWORD8 au4_ilf_across_tile_slice_enable[8];
546     WORD32 i;
547     UWORD8 *pu1_src_top_luma;
548     UWORD8 *pu1_src_top_chroma;
549     UWORD8 *pu1_src_left_luma;
550     UWORD8 *pu1_src_left_chroma;
551     UWORD8 au1_src_top_right[2];
552     UWORD8 au1_src_bot_left[2];
553     UWORD8 *pu1_no_loop_filter_flag;
554     UWORD8 *pu1_src_backup_luma;
555     UWORD8 *pu1_src_backup_chroma;
556     WORD32 backup_strd;
557     WORD32 loop_filter_strd;
558 
559     WORD32 no_loop_filter_enabled_luma = 0;
560     WORD32 no_loop_filter_enabled_chroma = 0;
561     UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
562     UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
563     UWORD8 *pu1_sao_src_luma_top_left_ctb;
564     UWORD8 *pu1_sao_src_chroma_top_left_ctb;
565     UWORD8 *pu1_sao_src_top_left_luma_top_right;
566     UWORD8 *pu1_sao_src_top_left_chroma_top_right;
567     UWORD8  u1_sao_src_top_left_luma_bot_left;
568     UWORD8  *pu1_sao_src_top_left_luma_bot_left;
569     UWORD8 *au1_sao_src_top_left_chroma_bot_left;
570     UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
571     /* Only 5 values are used, but arrays are large
572      enough so that SIMD functions can read 64 bits at a time */
573     WORD8 ai1_offset_y[8];
574     WORD8 ai1_offset_cb[8];
575     WORD8 ai1_offset_cr[8];
576     WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
577 
578     PROFILE_DISABLE_SAO();
579 
580     ai1_offset_y[0] = 0;
581     ai1_offset_cb[0] = 0;
582     ai1_offset_cr[0] = 0;
583 
584     ps_sps = ps_sao_ctxt->ps_sps;
585     ps_pps = ps_sao_ctxt->ps_pps;
586     ps_tile = ps_sao_ctxt->ps_tile;
587 
588     log2_ctb_size = ps_sps->i1_log2_ctb_size;
589     ctb_size = (1 << log2_ctb_size);
590     src_strd = ps_sao_ctxt->ps_codec->i4_strd;
591     ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
592     ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
593 
594     pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
595     pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
596     pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
597     pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
598 
599     /*Stores the left value for each row ctbs- Needed for column tiles*/
600     pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
601     pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
602     pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
603     pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
604     u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
605     pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
606     au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
607     pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
608     pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
609     pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
610 
611     ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
612     loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
613     backup_strd = 2 * MAX_CTB_SIZE;
614 
615     DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
616 
617     {
618         /* Check the loop filter flags and copy the original values for back up */
619         /* Luma */
620 
621         /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs
622          * can belong to different slice with their own sao_enable flag */
623         {
624             UWORD32 u4_no_loop_filter_flag;
625             WORD32 loop_filter_bit_pos;
626             WORD32 log2_min_cu = 3;
627             WORD32 min_cu = (1 << log2_min_cu);
628             UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
629             WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
630             WORD32 sao_blk_wd = ctb_size;
631             WORD32 remaining_rows;
632             WORD32 remaining_cols;
633 
634             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
635             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
636             if(remaining_rows <= SAO_SHIFT_CTB)
637                 sao_blk_ht += remaining_rows;
638             if(remaining_cols <= SAO_SHIFT_CTB)
639                 sao_blk_wd += remaining_cols;
640 
641             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
642             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
643 
644             pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
645 
646             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
647                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
648             if(ps_sao_ctxt->i4_ctb_x > 0)
649                 loop_filter_bit_pos -= 1;
650 
651             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
652                             (loop_filter_bit_pos >> 3);
653 
654             for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
655                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
656             {
657                 WORD32 tmp_wd = sao_blk_wd;
658 
659                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
660                                 (loop_filter_bit_pos & 7);
661                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
662 
663                 if(u4_no_loop_filter_flag)
664                 {
665                     no_loop_filter_enabled_luma = 1;
666                     while(tmp_wd > 0)
667                     {
668                         if(CTZ(u4_no_loop_filter_flag))
669                         {
670                             pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
671                             pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
672                             tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
673                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
674                         }
675                         else
676                         {
677                             for(row = 0; row < min_cu; row++)
678                             {
679                                 for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
680                                 {
681                                     pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
682                                 }
683                             }
684                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
685                             pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
686                             tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
687                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
688                         }
689                     }
690 
691                     pu1_src_tmp_luma -= sao_blk_wd;
692                     pu1_src_backup_luma -= sao_blk_wd;
693                 }
694 
695                 pu1_src_tmp_luma += (src_strd << log2_min_cu);
696                 pu1_src_backup_luma += (backup_strd << log2_min_cu);
697             }
698         }
699 
700         /* Chroma */
701 
702         {
703             UWORD32 u4_no_loop_filter_flag;
704             WORD32 loop_filter_bit_pos;
705             WORD32 log2_min_cu = 3;
706             WORD32 min_cu = (1 << log2_min_cu);
707             UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
708             WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
709             WORD32 sao_blk_wd = ctb_size;
710             WORD32 remaining_rows;
711             WORD32 remaining_cols;
712 
713             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
714             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
715             if(remaining_rows <= 2 * SAO_SHIFT_CTB)
716                 sao_blk_ht += remaining_rows;
717             if(remaining_cols <= 2 * SAO_SHIFT_CTB)
718                 sao_blk_wd += remaining_cols;
719 
720             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
721             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
722 
723             pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
724 
725             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
726                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
727             if(ps_sao_ctxt->i4_ctb_x > 0)
728                 loop_filter_bit_pos -= 2;
729 
730             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
731                             (loop_filter_bit_pos >> 3);
732 
733             for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
734                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
735             {
736                 WORD32 tmp_wd = sao_blk_wd;
737 
738                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
739                                 (loop_filter_bit_pos & 7);
740                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
741 
742                 if(u4_no_loop_filter_flag)
743                 {
744                     no_loop_filter_enabled_chroma = 1;
745                     while(tmp_wd > 0)
746                     {
747                         if(CTZ(u4_no_loop_filter_flag))
748                         {
749                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
750                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
751                             tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
752                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
753                         }
754                         else
755                         {
756                             for(row = 0; row < min_cu / 2; row++)
757                             {
758                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
759                                 {
760                                     pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
761                                 }
762                             }
763 
764                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
765                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
766                             tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
767                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
768                         }
769                     }
770 
771                     pu1_src_tmp_chroma -= sao_blk_wd;
772                     pu1_src_backup_chroma -= sao_blk_wd;
773                 }
774 
775                 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
776                 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
777             }
778         }
779     }
780 
781     DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
782 
783     /* Top-left CTB */
784     if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
785     {
786         WORD32 sao_wd_luma = SAO_SHIFT_CTB;
787         WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
788         WORD32 sao_ht_luma = SAO_SHIFT_CTB;
789         WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
790 
791         WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
792         WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
793         WORD32 au4_idx_tl[8], idx_tl;
794 
795         slice_header_t *ps_slice_hdr_top_left;
796         {
797             WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
798                                         (ps_sao_ctxt->i4_ctb_x - 1);
799             ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx];
800         }
801 
802 
803         pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
804         pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
805         ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
806         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
807         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
808         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
809         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
810 
811         if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag)
812         {
813             if(0 == ps_sao->b3_y_type_idx)
814             {
815                 /* Update left, top and top-left */
816                 for(row = 0; row < sao_ht_luma; row++)
817                 {
818                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
819                 }
820                 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
821 
822                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
823 
824 
825             }
826 
827             else if(1 == ps_sao->b3_y_type_idx)
828             {
829                 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
830                 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
831                 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
832                 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
833 
834                 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
835                                                                           src_strd,
836                                                                           pu1_src_left_luma,
837                                                                           pu1_src_top_luma,
838                                                                           pu1_sao_src_luma_top_left_ctb,
839                                                                           ps_sao->b5_y_band_pos,
840                                                                           ai1_offset_y,
841                                                                           sao_wd_luma,
842                                                                           sao_ht_luma
843                                                                          );
844             }
845 
846             else // if(2 <= ps_sao->b3_y_type_idx)
847             {
848                 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
849                 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
850                 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
851                 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
852 
853                 for(i = 0; i < 8; i++)
854                 {
855                     au1_avail_luma[i] = 255;
856                     au1_tile_slice_boundary[i] = 0;
857                     au4_idx_tl[i] = 0;
858                     au4_ilf_across_tile_slice_enable[i] = 1;
859                 }
860 
861                 /******************************************************************
862                  * Derive the  Top-left CTB's neighbor pixel's slice indices.
863                  *
864                  *          TL_T
865                  *       4  _2__5________
866                  *     0   |    |       |
867                  *    TL_L | TL | 1 TL_R|
868                  *         |____|_______|____
869                  *        6|TL_D|7      |    |
870                  *         | 3  |       |    |
871                  *         |____|_______|    |
872                  *              |            |
873                  *              |            |
874                  *              |____________|
875                  *
876                  *****************************************************************/
877 
878                 /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
879                 {
880                     if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
881                     {
882                         {
883                             /*Assuming that sao shift is uniform along x and y directions*/
884                             if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
885                             {
886                                 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
887                                 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
888                             }
889                             else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
890                             {
891                                 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
892                                 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
893                             }
894                             ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
895                             ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
896 
897                             ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
898                             ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
899 
900                             ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
901                             ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
902 
903                             ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
904                             ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
905                         }
906 
907                         if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
908                         {
909                             /*Calculate slice indices for neighbor pixels*/
910                             idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
911                             au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
912                             au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
913                             au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
914                             au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
915                             au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
916 
917                             if((0 == (1 << log2_ctb_size) - sao_wd_luma))
918                             {
919                                 if(ps_sao_ctxt->i4_ctb_x == 1)
920                                 {
921                                     au4_idx_tl[6] = -1;
922                                     au4_idx_tl[4] = -1;
923                                 }
924                                 else
925                                 {
926                                     au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
927                                 }
928                                 if(ps_sao_ctxt->i4_ctb_y == 1)
929                                 {
930                                     au4_idx_tl[5] = -1;
931                                     au4_idx_tl[4] = -1;
932                                 }
933                                 else
934                                 {
935                                     au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
936                                     au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
937                                 }
938                                 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
939                             }
940 
941                             /* Verify that the neighbor ctbs dont cross pic boundary.
942                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
943                              * of the pixel having a greater address is checked. Accordingly, set the availability flags.
944                              * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
945                              * the respective pixel's flags are checked
946                              */
947 
948                             if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
949                             {
950                                 au4_ilf_across_tile_slice_enable[4] = 0;
951                                 au4_ilf_across_tile_slice_enable[6] = 0;
952                             }
953                             else
954                             {
955                                 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
956                             }
957                             if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
958                             {
959                                 au4_ilf_across_tile_slice_enable[5] = 0;
960                                 au4_ilf_across_tile_slice_enable[4] = 0;
961                             }
962                             else
963                             {
964                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
965                                 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
966                             }
967                             au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
968                             au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
969                             au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
970                             au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
971                             au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
972 
973                             if(au4_idx_tl[5] > idx_tl)
974                             {
975                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
976                             }
977 
978                             /*
979                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
980                              * of the pixel having a greater address is checked. Accordingly, set the availability flags.
981                              * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
982                              * the respective pixel's flags are checked
983                              */
984                             for(i = 0; i < 8; i++)
985                             {
986                                 /*Sets the edges that lie on the slice/tile boundary*/
987                                 if(au4_idx_tl[i] != idx_tl)
988                                 {
989                                     au1_tile_slice_boundary[i] = 1;
990                                 }
991                                 else
992                                 {
993                                     au4_ilf_across_tile_slice_enable[i] = 1;
994                                 }
995                             }
996 
997                             ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
998                         }
999 
1000                         if(ps_pps->i1_tiles_enabled_flag)
1001                         {
1002                             /* Calculate availability flags at slice boundary */
1003                             if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1004                             {
1005                                 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1006                                 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1007                                 {
1008                                     /*Set the boundary arrays*/
1009                                     /*Calculate tile indices for neighbor pixels*/
1010                                     idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1011                                     au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1012                                     au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1013                                     au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1014                                     au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1015                                     au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1016 
1017                                     if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1018                                     {
1019                                         if(ps_sao_ctxt->i4_ctb_x == 1)
1020                                         {
1021                                             au4_idx_tl[6] = -1;
1022                                             au4_idx_tl[4] = -1;
1023                                         }
1024                                         else
1025                                         {
1026                                             au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1027                                         }
1028                                         if(ps_sao_ctxt->i4_ctb_y == 1)
1029                                         {
1030                                             au4_idx_tl[5] = -1;
1031                                             au4_idx_tl[4] = -1;
1032                                         }
1033                                         else
1034                                         {
1035                                             au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1036                                             au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1037                                         }
1038                                         au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1039                                     }
1040                                     for(i = 0; i < 8; i++)
1041                                     {
1042                                         /*Sets the edges that lie on the tile boundary*/
1043                                         if(au4_idx_tl[i] != idx_tl)
1044                                         {
1045                                             au1_tile_slice_boundary[i] |= 1;
1046                                             au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1047                                         }
1048                                     }
1049                                 }
1050                             }
1051                         }
1052 
1053 
1054                         /*Set availability flags based on tile and slice boundaries*/
1055                         for(i = 0; i < 8; i++)
1056                         {
1057                             /*Sets the edges that lie on the slice/tile boundary*/
1058                             if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1059                             {
1060                                 au1_avail_luma[i] = 0;
1061                             }
1062                         }
1063                     }
1064                 }
1065 
1066                 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1067                 {
1068                     au1_avail_luma[0] = 0;
1069                     au1_avail_luma[4] = 0;
1070                     au1_avail_luma[6] = 0;
1071                 }
1072 
1073                 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1074                 {
1075                     au1_avail_luma[1] = 0;
1076                     au1_avail_luma[5] = 0;
1077                     au1_avail_luma[7] = 0;
1078                 }
1079                 //y==1 case
1080                 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
1081                 {
1082                     au1_avail_luma[2] = 0;
1083                     au1_avail_luma[4] = 0;
1084                     au1_avail_luma[5] = 0;
1085                 }
1086                 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1087                 {
1088                     au1_avail_luma[3] = 0;
1089                     au1_avail_luma[6] = 0;
1090                     au1_avail_luma[7] = 0;
1091                 }
1092 
1093                 {
1094                     au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1095                     u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
1096                     ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1097                                                                       src_strd,
1098                                                                       pu1_src_left_luma,
1099                                                                       pu1_src_top_luma,
1100                                                                       pu1_sao_src_luma_top_left_ctb,
1101                                                                       au1_src_top_right,
1102                                                                       &u1_sao_src_top_left_luma_bot_left,
1103                                                                       au1_avail_luma,
1104                                                                       ai1_offset_y,
1105                                                                       sao_wd_luma,
1106                                                                       sao_ht_luma);
1107                 }
1108             }
1109 
1110         }
1111         else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1112         {
1113             /* Update left, top and top-left */
1114             for(row = 0; row < sao_ht_luma; row++)
1115             {
1116                 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1117             }
1118             pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1119 
1120             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1121         }
1122 
1123         if(ps_slice_hdr_top_left->i1_slice_sao_chroma_flag)
1124         {
1125             if(0 == ps_sao->b3_cb_type_idx)
1126             {
1127                 for(row = 0; row < sao_ht_chroma; row++)
1128                 {
1129                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1130                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1131                 }
1132                 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1133                 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1134 
1135                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1136 
1137             }
1138 
1139             else if(1 == ps_sao->b3_cb_type_idx)
1140             {
1141                 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1142                 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1143                 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1144                 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1145 
1146                 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1147                 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1148                 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1149                 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1150 
1151                 if(chroma_yuv420sp_vu)
1152                 {
1153                     ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1154                                                                                 src_strd,
1155                                                                                 pu1_src_left_chroma,
1156                                                                                 pu1_src_top_chroma,
1157                                                                                 pu1_sao_src_chroma_top_left_ctb,
1158                                                                                 ps_sao->b5_cr_band_pos,
1159                                                                                 ps_sao->b5_cb_band_pos,
1160                                                                                 ai1_offset_cr,
1161                                                                                 ai1_offset_cb,
1162                                                                                 sao_wd_chroma,
1163                                                                                 sao_ht_chroma
1164                                                                                );
1165                 }
1166                 else
1167                 {
1168                     ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1169                                                                                 src_strd,
1170                                                                                 pu1_src_left_chroma,
1171                                                                                 pu1_src_top_chroma,
1172                                                                                 pu1_sao_src_chroma_top_left_ctb,
1173                                                                                 ps_sao->b5_cb_band_pos,
1174                                                                                 ps_sao->b5_cr_band_pos,
1175                                                                                 ai1_offset_cb,
1176                                                                                 ai1_offset_cr,
1177                                                                                 sao_wd_chroma,
1178                                                                                 sao_ht_chroma
1179                                                                                );
1180                 }
1181             }
1182 
1183             else // if(2 <= ps_sao->b3_cb_type_idx)
1184             {
1185                 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1186                 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1187                 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1188                 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1189 
1190                 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1191                 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1192                 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1193                 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1194                 for(i = 0; i < 8; i++)
1195                 {
1196                     au1_avail_chroma[i] = 255;
1197                     au1_tile_slice_boundary[i] = 0;
1198                     au4_idx_tl[i] = 0;
1199                     au4_ilf_across_tile_slice_enable[i] = 1;
1200                 }
1201                 /*In case of slices*/
1202                 {
1203                     if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1204                     {
1205                         if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
1206                         {
1207                             ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
1208                             ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
1209                         }
1210                         else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
1211                         {
1212                             ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
1213                             ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
1214                         }
1215                         ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
1216                         ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
1217 
1218                         ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
1219                         ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
1220 
1221                         ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
1222                         ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
1223 
1224                         ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
1225                         ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
1226 
1227                         if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1228                         {
1229 
1230                             idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1231                             au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1232                             au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1233                             au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1234                             au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1235                             au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1236 
1237                             if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
1238                             {
1239                                 if(ps_sao_ctxt->i4_ctb_x == 1)
1240                                 {
1241                                     au4_idx_tl[6] = -1;
1242                                     au4_idx_tl[4] = -1;
1243                                 }
1244                                 else
1245                                 {
1246                                     au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1247                                 }
1248                                 if(ps_sao_ctxt->i4_ctb_y == 1)
1249                                 {
1250                                     au4_idx_tl[5] = -1;
1251                                     au4_idx_tl[4] = -1;
1252                                 }
1253                                 else
1254                                 {
1255                                     au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1256                                     au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1257                                 }
1258                                 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1259                             }
1260 
1261                             /* Verify that the neighbor ctbs don't cross pic boundary
1262                              * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
1263                             if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
1264                             {
1265                                 au4_ilf_across_tile_slice_enable[4] = 0;
1266                                 au4_ilf_across_tile_slice_enable[6] = 0;
1267                             }
1268                             else
1269                             {
1270                                 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1271                             }
1272                             if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
1273                             {
1274                                 au4_ilf_across_tile_slice_enable[5] = 0;
1275                                 au4_ilf_across_tile_slice_enable[4] = 0;
1276                             }
1277                             else
1278                             {
1279                                 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1280                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1281                             }
1282                             au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1283                             au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1284                             au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1285                             au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1286                             au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1287                             /*
1288                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1289                              * of the pixel having a greater address is checked. Accordingly, set the availability flags
1290                              */
1291                             for(i = 0; i < 8; i++)
1292                             {
1293                                 /*Sets the edges that lie on the slice/tile boundary*/
1294                                 if(au4_idx_tl[i] != idx_tl)
1295                                 {
1296                                     au1_tile_slice_boundary[i] = 1;
1297                                 }
1298                                 else
1299                                 {
1300                                     au4_ilf_across_tile_slice_enable[i] = 1;
1301                                 }
1302                             }
1303 
1304                             /*Reset indices*/
1305                             for(i = 0; i < 8; i++)
1306                             {
1307                                 au4_idx_tl[i] = 0;
1308                             }
1309                         }
1310                         if(ps_pps->i1_tiles_enabled_flag)
1311                         {
1312                             /* Calculate availability flags at slice boundary */
1313                             if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1314                             {
1315                                 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1316                                 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1317                                 {
1318                                     /*Set the boundary arrays*/
1319                                     /*Calculate tile indices for neighbor pixels*/
1320                                     idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1321                                     au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1322                                     au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1323                                     au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1324                                     au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1325                                     au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1326 
1327                                     if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1328                                     {
1329                                         if(ps_sao_ctxt->i4_ctb_x == 1)
1330                                         {
1331                                             au4_idx_tl[6] = -1;
1332                                             au4_idx_tl[4] = -1;
1333                                         }
1334                                         else
1335                                         {
1336                                             au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1337                                         }
1338                                         if(ps_sao_ctxt->i4_ctb_y == 1)
1339                                         {
1340                                             au4_idx_tl[5] = -1;
1341                                             au4_idx_tl[4] = -1;
1342                                         }
1343                                         else
1344                                         {
1345                                             au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1346                                             au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1347                                         }
1348                                         au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1349                                     }
1350                                     for(i = 0; i < 8; i++)
1351                                     {
1352                                         /*Sets the edges that lie on the tile boundary*/
1353                                         if(au4_idx_tl[i] != idx_tl)
1354                                         {
1355                                             au1_tile_slice_boundary[i] |= 1;
1356                                             au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1357                                         }
1358                                     }
1359                                 }
1360                             }
1361                         }
1362 
1363                         for(i = 0; i < 8; i++)
1364                         {
1365                             /*Sets the edges that lie on the slice/tile boundary*/
1366                             if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1367                             {
1368                                 au1_avail_chroma[i] = 0;
1369                             }
1370                         }
1371                     }
1372                 }
1373 
1374                 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
1375                 {
1376                     au1_avail_chroma[0] = 0;
1377                     au1_avail_chroma[4] = 0;
1378                     au1_avail_chroma[6] = 0;
1379                 }
1380                 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1381                 {
1382                     au1_avail_chroma[1] = 0;
1383                     au1_avail_chroma[5] = 0;
1384                     au1_avail_chroma[7] = 0;
1385                 }
1386 
1387                 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1388                 {
1389                     au1_avail_chroma[2] = 0;
1390                     au1_avail_chroma[4] = 0;
1391                     au1_avail_chroma[5] = 0;
1392                 }
1393                 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1394                 {
1395                     au1_avail_chroma[3] = 0;
1396                     au1_avail_chroma[6] = 0;
1397                     au1_avail_chroma[7] = 0;
1398                 }
1399 
1400                 {
1401                     au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
1402                     au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
1403                     au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
1404                     au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
1405                     if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
1406                     {
1407                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1408                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1409                     }
1410 
1411                     if(chroma_yuv420sp_vu)
1412                     {
1413                         ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1414                                                                              src_strd,
1415                                                                              pu1_src_left_chroma,
1416                                                                              pu1_src_top_chroma,
1417                                                                              pu1_sao_src_chroma_top_left_ctb,
1418                                                                              au1_src_top_right,
1419                                                                              au1_sao_src_top_left_chroma_bot_left,
1420                                                                              au1_avail_chroma,
1421                                                                              ai1_offset_cr,
1422                                                                              ai1_offset_cb,
1423                                                                              sao_wd_chroma,
1424                                                                              sao_ht_chroma);
1425                     }
1426                     else
1427                     {
1428                         ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1429                                                                              src_strd,
1430                                                                              pu1_src_left_chroma,
1431                                                                              pu1_src_top_chroma,
1432                                                                              pu1_sao_src_chroma_top_left_ctb,
1433                                                                              au1_src_top_right,
1434                                                                              au1_sao_src_top_left_chroma_bot_left,
1435                                                                              au1_avail_chroma,
1436                                                                              ai1_offset_cb,
1437                                                                              ai1_offset_cr,
1438                                                                              sao_wd_chroma,
1439                                                                              sao_ht_chroma);
1440                     }
1441                 }
1442             }
1443         }
1444         else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1445         {
1446             for(row = 0; row < sao_ht_chroma; row++)
1447             {
1448                 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1449                 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1450             }
1451             pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1452             pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1453 
1454             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1455         }
1456 
1457         pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1458         pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
1459         ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1460     }
1461 
1462 
1463     /* Top CTB */
1464     if((ps_sao_ctxt->i4_ctb_y > 0))
1465     {
1466         WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1467         WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
1468         WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1469         WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1470 
1471         WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1472         WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1473         WORD32 au4_idx_t[8], idx_t;
1474 
1475         WORD32 remaining_cols;
1476 
1477         slice_header_t *ps_slice_hdr_top;
1478         {
1479             WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
1480                                         (ps_sao_ctxt->i4_ctb_x);
1481             ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx];
1482         }
1483 
1484         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1485         if(remaining_cols <= SAO_SHIFT_CTB)
1486         {
1487             sao_wd_luma += remaining_cols;
1488         }
1489         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
1490         if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1491         {
1492             sao_wd_chroma += remaining_cols;
1493         }
1494 
1495         pu1_src_luma -= (sao_ht_luma * src_strd);
1496         pu1_src_chroma -= (sao_ht_chroma * src_strd);
1497         ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1498         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1499         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1500         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1501         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
1502 
1503         if(0 != sao_wd_luma)
1504         {
1505             if(ps_slice_hdr_top->i1_slice_sao_luma_flag)
1506             {
1507                 if(0 == ps_sao->b3_y_type_idx)
1508                 {
1509                     /* Update left, top and top-left */
1510                     for(row = 0; row < sao_ht_luma; row++)
1511                     {
1512                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1513                     }
1514                     pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1515 
1516                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1517 
1518                 }
1519 
1520                 else if(1 == ps_sao->b3_y_type_idx)
1521                 {
1522                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1523                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1524                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1525                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1526 
1527                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1528                                                                               src_strd,
1529                                                                               pu1_src_left_luma,
1530                                                                               pu1_src_top_luma,
1531                                                                               pu1_sao_src_luma_top_left_ctb,
1532                                                                               ps_sao->b5_y_band_pos,
1533                                                                               ai1_offset_y,
1534                                                                               sao_wd_luma,
1535                                                                               sao_ht_luma
1536                                                                              );
1537                 }
1538 
1539                 else // if(2 <= ps_sao->b3_y_type_idx)
1540                 {
1541                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1542                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1543                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1544                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1545 
1546                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1547                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1548                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1549 
1550                     for(i = 0; i < 8; i++)
1551                     {
1552 
1553                         au4_ilf_across_tile_slice_enable[i] = 1;
1554                     }
1555                     /******************************************************************
1556                      * Derive the  Top-left CTB's neighbor pixel's slice indices.
1557                      *
1558                      *               T_T
1559                      *          ____________
1560                      *         |    |       |
1561                      *         | T_L|  T    |T_R
1562                      *         |    | ______|____
1563                      *         |    |  T_D  |    |
1564                      *         |    |       |    |
1565                      *         |____|_______|    |
1566                      *              |            |
1567                      *              |            |
1568                      *              |____________|
1569                      *
1570                      *****************************************************************/
1571 
1572                     /*In case of slices*/
1573                     {
1574                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1575                         {
1576 
1577                             ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1578                             ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1579 
1580                             ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1581                             ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1582 
1583                             ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1584                             ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1585 
1586                             ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1587                             ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1588 
1589                             ctbx_t = ps_sao_ctxt->i4_ctb_x;
1590                             ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1591 
1592                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1593                             {
1594                                 /*Calculate neighbor ctb slice indices*/
1595                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1596                                 {
1597                                     au4_idx_t[0] = -1;
1598                                     au4_idx_t[6] = -1;
1599                                     au4_idx_t[4] = -1;
1600                                 }
1601                                 else
1602                                 {
1603                                     au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1604                                     au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1605                                 }
1606                                 idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1607                                 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1608                                 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1609                                 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1610 
1611                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1612                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1613                                 {
1614                                     au4_ilf_across_tile_slice_enable[4] = 0;
1615                                     au4_ilf_across_tile_slice_enable[6] = 0;
1616                                     au4_ilf_across_tile_slice_enable[0] = 0;
1617                                 }
1618                                 else
1619                                 {
1620                                     au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1621                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1622                                 }
1623 
1624 
1625 
1626                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1627                                 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1628                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1629                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1630                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1631 
1632                                 if(au4_idx_t[6] < idx_t)
1633                                 {
1634                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1635                                 }
1636 
1637                                 /*
1638                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1639                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
1640                                  */
1641 
1642                                 for(i = 0; i < 8; i++)
1643                                 {
1644                                     /*Sets the edges that lie on the slice/tile boundary*/
1645                                     if(au4_idx_t[i] != idx_t)
1646                                     {
1647                                         au1_tile_slice_boundary[i] = 1;
1648                                         /*Check for slice flag at such boundaries*/
1649                                     }
1650                                     else
1651                                     {
1652                                         au4_ilf_across_tile_slice_enable[i] = 1;
1653                                     }
1654                                 }
1655                                 /*Reset indices*/
1656                                 for(i = 0; i < 8; i++)
1657                                 {
1658                                     au4_idx_t[i] = 0;
1659                                 }
1660                             }
1661 
1662                             if(ps_pps->i1_tiles_enabled_flag)
1663                             {
1664                                 /* Calculate availability flags at slice boundary */
1665                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1666                                 {
1667                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1668                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1669                                     {
1670                                         /*Calculate neighbor ctb slice indices*/
1671                                         if(0 == ps_sao_ctxt->i4_ctb_x)
1672                                         {
1673                                             au4_idx_t[0] = -1;
1674                                             au4_idx_t[6] = -1;
1675                                             au4_idx_t[4] = -1;
1676                                         }
1677                                         else
1678                                         {
1679                                             au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1680                                             au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1681                                         }
1682                                         idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1683                                         au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1684                                         au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1685                                         au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1686 
1687                                         for(i = 0; i < 8; i++)
1688                                         {
1689                                             /*Sets the edges that lie on the tile boundary*/
1690                                             if(au4_idx_t[i] != idx_t)
1691                                             {
1692                                                 au1_tile_slice_boundary[i] |= 1;
1693                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1694                                             }
1695                                         }
1696                                     }
1697                                 }
1698                             }
1699 
1700                             for(i = 0; i < 8; i++)
1701                             {
1702                                 /*Sets the edges that lie on the slice/tile boundary*/
1703                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1704                                 {
1705                                     au1_avail_luma[i] = 0;
1706                                 }
1707                             }
1708                         }
1709                     }
1710 
1711 
1712                     if(0 == ps_sao_ctxt->i4_ctb_x)
1713                     {
1714                         au1_avail_luma[0] = 0;
1715                         au1_avail_luma[4] = 0;
1716                         au1_avail_luma[6] = 0;
1717                     }
1718 
1719                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1720                     {
1721                         au1_avail_luma[1] = 0;
1722                         au1_avail_luma[5] = 0;
1723                         au1_avail_luma[7] = 0;
1724                     }
1725 
1726                     if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1727                     {
1728                         au1_avail_luma[2] = 0;
1729                         au1_avail_luma[4] = 0;
1730                         au1_avail_luma[5] = 0;
1731                     }
1732 
1733                     if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1734                     {
1735                         au1_avail_luma[3] = 0;
1736                         au1_avail_luma[6] = 0;
1737                         au1_avail_luma[7] = 0;
1738                     }
1739 
1740                     {
1741                         au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1742                         u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1743                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1744                                                                           src_strd,
1745                                                                           pu1_src_left_luma,
1746                                                                           pu1_src_top_luma,
1747                                                                           pu1_sao_src_luma_top_left_ctb,
1748                                                                           au1_src_top_right,
1749                                                                           &u1_sao_src_top_left_luma_bot_left,
1750                                                                           au1_avail_luma,
1751                                                                           ai1_offset_y,
1752                                                                           sao_wd_luma,
1753                                                                           sao_ht_luma);
1754                     }
1755                 }
1756             }
1757             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1758             {
1759                 /* Update left, top and top-left */
1760                 for(row = 0; row < sao_ht_luma; row++)
1761                 {
1762                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1763                 }
1764                 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1765 
1766                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1767             }
1768         }
1769 
1770         if(0 != sao_wd_chroma)
1771         {
1772             if(ps_slice_hdr_top->i1_slice_sao_chroma_flag)
1773             {
1774                 if(0 == ps_sao->b3_cb_type_idx)
1775                 {
1776 
1777                     for(row = 0; row < sao_ht_chroma; row++)
1778                     {
1779                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1780                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1781                     }
1782                     pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1783                     pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1784 
1785                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1786 
1787                 }
1788 
1789                 else if(1 == ps_sao->b3_cb_type_idx)
1790                 {
1791                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1792                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1793                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1794                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1795 
1796                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1797                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1798                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1799                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1800 
1801                     if(chroma_yuv420sp_vu)
1802                     {
1803                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1804                                                                                     src_strd,
1805                                                                                     pu1_src_left_chroma,
1806                                                                                     pu1_src_top_chroma,
1807                                                                                     pu1_sao_src_chroma_top_left_ctb,
1808                                                                                     ps_sao->b5_cr_band_pos,
1809                                                                                     ps_sao->b5_cb_band_pos,
1810                                                                                     ai1_offset_cr,
1811                                                                                     ai1_offset_cb,
1812                                                                                     sao_wd_chroma,
1813                                                                                     sao_ht_chroma
1814                                                                                    );
1815                     }
1816                     else
1817                     {
1818                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1819                                                                                     src_strd,
1820                                                                                     pu1_src_left_chroma,
1821                                                                                     pu1_src_top_chroma,
1822                                                                                     pu1_sao_src_chroma_top_left_ctb,
1823                                                                                     ps_sao->b5_cb_band_pos,
1824                                                                                     ps_sao->b5_cr_band_pos,
1825                                                                                     ai1_offset_cb,
1826                                                                                     ai1_offset_cr,
1827                                                                                     sao_wd_chroma,
1828                                                                                     sao_ht_chroma
1829                                                                                    );
1830                     }
1831                 }
1832                 else // if(2 <= ps_sao->b3_cb_type_idx)
1833                 {
1834                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1835                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1836                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1837                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1838 
1839                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1840                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1841                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1842                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1843 
1844                     for(i = 0; i < 8; i++)
1845                     {
1846                         au1_avail_chroma[i] = 255;
1847                         au1_tile_slice_boundary[i] = 0;
1848                         au4_idx_t[i] = 0;
1849                         au4_ilf_across_tile_slice_enable[i] = 1;
1850                     }
1851 
1852                     {
1853                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1854                         {
1855                             ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1856                             ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1857 
1858                             ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1859                             ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1860 
1861                             ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1862                             ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1863 
1864                             ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1865                             ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1866 
1867                             ctbx_t = ps_sao_ctxt->i4_ctb_x;
1868                             ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1869 
1870                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1871                             {
1872                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1873                                 {
1874                                     au4_idx_t[0] = -1;
1875                                     au4_idx_t[6] = -1;
1876                                     au4_idx_t[4] = -1;
1877                                 }
1878                                 else
1879                                 {
1880                                     au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1881                                     au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1882                                 }
1883                                 idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1884                                 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1885                                 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1886                                 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1887 
1888                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
1889 
1890                                 if(0 == ps_sao_ctxt->i4_ctb_x)
1891                                 {
1892                                     au4_ilf_across_tile_slice_enable[4] = 0;
1893                                     au4_ilf_across_tile_slice_enable[6] = 0;
1894                                     au4_ilf_across_tile_slice_enable[0] = 0;
1895                                 }
1896                                 else
1897                                 {
1898                                     au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1899                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1900                                 }
1901 
1902                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1903                                 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1904                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1905                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1906                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1907 
1908                                 if(idx_t > au4_idx_t[6])
1909                                 {
1910                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1911                                 }
1912 
1913                                 /*
1914                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1915                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
1916                                  */
1917                                 for(i = 0; i < 8; i++)
1918                                 {
1919                                     /*Sets the edges that lie on the slice/tile boundary*/
1920                                     if(au4_idx_t[i] != idx_t)
1921                                     {
1922                                         au1_tile_slice_boundary[i] = 1;
1923                                     }
1924                                     else
1925                                     {
1926                                         /*Indicates that the neighbour belongs to same/dependent slice*/
1927                                         au4_ilf_across_tile_slice_enable[i] = 1;
1928                                     }
1929                                 }
1930                                 /*Reset indices*/
1931                                 for(i = 0; i < 8; i++)
1932                                 {
1933                                     au4_idx_t[i] = 0;
1934                                 }
1935                             }
1936                             if(ps_pps->i1_tiles_enabled_flag)
1937                             {
1938                                 /* Calculate availability flags at slice boundary */
1939                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1940                                 {
1941                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1942                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1943                                     {
1944                                         /*Calculate neighbor ctb slice indices*/
1945                                         if(0 == ps_sao_ctxt->i4_ctb_x)
1946                                         {
1947                                             au4_idx_t[0] = -1;
1948                                             au4_idx_t[6] = -1;
1949                                             au4_idx_t[4] = -1;
1950                                         }
1951                                         else
1952                                         {
1953                                             au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1954                                             au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1955                                         }
1956                                         idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1957                                         au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1958                                         au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1959                                         au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1960 
1961                                         for(i = 0; i < 8; i++)
1962                                         {
1963                                             /*Sets the edges that lie on the tile boundary*/
1964                                             if(au4_idx_t[i] != idx_t)
1965                                             {
1966                                                 au1_tile_slice_boundary[i] |= 1;
1967                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1968                                             }
1969                                         }
1970                                     }
1971                                 }
1972                             }
1973                             for(i = 0; i < 8; i++)
1974                             {
1975                                 /*Sets the edges that lie on the slice/tile boundary*/
1976                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1977                                 {
1978                                     au1_avail_chroma[i] = 0;
1979                                 }
1980                             }
1981 
1982                         }
1983                     }
1984                     if(0 == ps_sao_ctxt->i4_ctb_x)
1985                     {
1986                         au1_avail_chroma[0] = 0;
1987                         au1_avail_chroma[4] = 0;
1988                         au1_avail_chroma[6] = 0;
1989                     }
1990 
1991                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
1992                     {
1993                         au1_avail_chroma[1] = 0;
1994                         au1_avail_chroma[5] = 0;
1995                         au1_avail_chroma[7] = 0;
1996                     }
1997 
1998                     if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1999                     {
2000                         au1_avail_chroma[2] = 0;
2001                         au1_avail_chroma[4] = 0;
2002                         au1_avail_chroma[5] = 0;
2003                     }
2004 
2005                     if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
2006                     {
2007                         au1_avail_chroma[3] = 0;
2008                         au1_avail_chroma[6] = 0;
2009                         au1_avail_chroma[7] = 0;
2010                     }
2011 
2012                     {
2013                         au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
2014                         au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
2015                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2016                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2017 
2018                         if(chroma_yuv420sp_vu)
2019                         {
2020                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2021                                                                                  src_strd,
2022                                                                                  pu1_src_left_chroma,
2023                                                                                  pu1_src_top_chroma,
2024                                                                                  pu1_sao_src_chroma_top_left_ctb,
2025                                                                                  au1_src_top_right,
2026                                                                                  au1_sao_src_top_left_chroma_bot_left,
2027                                                                                  au1_avail_chroma,
2028                                                                                  ai1_offset_cr,
2029                                                                                  ai1_offset_cb,
2030                                                                                  sao_wd_chroma,
2031                                                                                  sao_ht_chroma);
2032                         }
2033                         else
2034                         {
2035                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2036                                                                                  src_strd,
2037                                                                                  pu1_src_left_chroma,
2038                                                                                  pu1_src_top_chroma,
2039                                                                                  pu1_sao_src_chroma_top_left_ctb,
2040                                                                                  au1_src_top_right,
2041                                                                                  au1_sao_src_top_left_chroma_bot_left,
2042                                                                                  au1_avail_chroma,
2043                                                                                  ai1_offset_cb,
2044                                                                                  ai1_offset_cr,
2045                                                                                  sao_wd_chroma,
2046                                                                                  sao_ht_chroma);
2047                         }
2048                     }
2049 
2050                 }
2051             }
2052             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2053             {
2054                 for(row = 0; row < sao_ht_chroma; row++)
2055                 {
2056                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2057                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2058                 }
2059                 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2060                 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2061 
2062                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2063             }
2064         }
2065 
2066         pu1_src_luma += sao_ht_luma * src_strd;
2067         pu1_src_chroma += sao_ht_chroma * src_strd;
2068         ps_sao += (ps_sps->i2_pic_wd_in_ctb);
2069     }
2070 
2071     /* Left CTB */
2072     if(ps_sao_ctxt->i4_ctb_x > 0)
2073     {
2074         WORD32 sao_wd_luma = SAO_SHIFT_CTB;
2075         WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
2076         WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2077         WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2078 
2079         WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
2080         WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
2081         WORD32 au4_idx_l[8], idx_l;
2082 
2083         WORD32 remaining_rows;
2084         slice_header_t *ps_slice_hdr_left;
2085         {
2086             WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb +
2087                                         (ps_sao_ctxt->i4_ctb_x - 1);
2088             ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx];
2089         }
2090 
2091         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2092         if(remaining_rows <= SAO_SHIFT_CTB)
2093         {
2094             sao_ht_luma += remaining_rows;
2095         }
2096         remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2097         if(remaining_rows <= SAO_SHIFT_CTB)
2098         {
2099             sao_ht_chroma += remaining_rows;
2100         }
2101 
2102         pu1_src_luma -= sao_wd_luma;
2103         pu1_src_chroma -= sao_wd_chroma;
2104         ps_sao -= 1;
2105         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
2106         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
2107         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2108         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2109 
2110 
2111         if(0 != sao_ht_luma)
2112         {
2113             if(ps_slice_hdr_left->i1_slice_sao_luma_flag)
2114             {
2115                 if(0 == ps_sao->b3_y_type_idx)
2116                 {
2117                     /* Update left, top and top-left */
2118                     for(row = 0; row < sao_ht_luma; row++)
2119                     {
2120                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2121                     }
2122                     /*Update in next location*/
2123                     pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2124 
2125                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2126 
2127                 }
2128 
2129                 else if(1 == ps_sao->b3_y_type_idx)
2130                 {
2131                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2132                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2133                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2134                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2135 
2136                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2137                                                                               src_strd,
2138                                                                               pu1_src_left_luma,
2139                                                                               pu1_src_top_luma,
2140                                                                               pu1_sao_src_top_left_luma_curr_ctb,
2141                                                                               ps_sao->b5_y_band_pos,
2142                                                                               ai1_offset_y,
2143                                                                               sao_wd_luma,
2144                                                                               sao_ht_luma
2145                                                                              );
2146                 }
2147 
2148                 else // if(2 <= ps_sao->b3_y_type_idx)
2149                 {
2150                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2151                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2152                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2153                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2154 
2155                     for(i = 0; i < 8; i++)
2156                     {
2157                         au1_avail_luma[i] = 255;
2158                         au1_tile_slice_boundary[i] = 0;
2159                         au4_idx_l[i] = 0;
2160                         au4_ilf_across_tile_slice_enable[i] = 1;
2161                     }
2162                     /******************************************************************
2163                      * Derive the  Top-left CTB's neighbour pixel's slice indices.
2164                      *
2165                      *
2166                      *          ____________
2167                      *         |    |       |
2168                      *         | L_T|       |
2169                      *         |____|_______|____
2170                      *         |    |       |    |
2171                      *     L_L |  L |  L_R  |    |
2172                      *         |____|_______|    |
2173                      *              |            |
2174                      *          L_D |            |
2175                      *              |____________|
2176                      *
2177                      *****************************************************************/
2178 
2179                     /*In case of slices or tiles*/
2180                     {
2181                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2182                         {
2183                             ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2184                             ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2185 
2186                             ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2187                             ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2188 
2189                             ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2190                             ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2191 
2192                             ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2193                             ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2194 
2195                             ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2196                             ctby_l = ps_sao_ctxt->i4_ctb_y;
2197 
2198                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2199                             {
2200                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2201                                 {
2202                                     au4_idx_l[2] = -1;
2203                                     au4_idx_l[4] = -1;
2204                                     au4_idx_l[5] = -1;
2205                                 }
2206                                 else
2207                                 {
2208                                     au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2209                                     au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2210                                 }
2211                                 idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2212                                 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2213                                 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2214                                 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2215 
2216                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
2217                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2218                                 {
2219                                     au4_ilf_across_tile_slice_enable[2] = 0;
2220                                     au4_ilf_across_tile_slice_enable[4] = 0;
2221                                     au4_ilf_across_tile_slice_enable[5] = 0;
2222                                 }
2223                                 else
2224                                 {
2225                                     au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2226                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2227 
2228                                 }
2229                                 //TODO: ILF flag checks for [0] and [6] is missing.
2230                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2231                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2232                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2233 
2234                                 if(idx_l < au4_idx_l[5])
2235                                 {
2236                                     au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2237                                 }
2238 
2239                                 /*
2240                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2241                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
2242                                  */
2243                                 for(i = 0; i < 8; i++)
2244                                 {
2245                                     /*Sets the edges that lie on the slice/tile boundary*/
2246                                     if(au4_idx_l[i] != idx_l)
2247                                     {
2248                                         au1_tile_slice_boundary[i] = 1;
2249                                     }
2250                                     else
2251                                     {
2252                                         au4_ilf_across_tile_slice_enable[i] = 1;
2253                                     }
2254                                 }
2255                                 /*Reset indices*/
2256                                 for(i = 0; i < 8; i++)
2257                                 {
2258                                     au4_idx_l[i] = 0;
2259                                 }
2260                             }
2261 
2262                             if(ps_pps->i1_tiles_enabled_flag)
2263                             {
2264                                 /* Calculate availability flags at slice boundary */
2265                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2266                                 {
2267                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2268                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2269                                     {
2270                                         if(0 == ps_sao_ctxt->i4_ctb_y)
2271                                         {
2272                                             au4_idx_l[2] = -1;
2273                                             au4_idx_l[4] = -1;
2274                                             au4_idx_l[5] = -1;
2275                                         }
2276                                         else
2277                                         {
2278                                             au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2279                                             au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2280                                         }
2281 
2282                                         idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2283                                         au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2284                                         au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2285                                         au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2286 
2287                                         for(i = 0; i < 8; i++)
2288                                         {
2289                                             /*Sets the edges that lie on the slice/tile boundary*/
2290                                             if(au4_idx_l[i] != idx_l)
2291                                             {
2292                                                 au1_tile_slice_boundary[i] |= 1;
2293                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
2294                                             }
2295                                         }
2296                                     }
2297                                 }
2298                             }
2299 
2300                             for(i = 0; i < 8; i++)
2301                             {
2302                                 /*Sets the edges that lie on the slice/tile boundary*/
2303                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2304                                 {
2305                                     au1_avail_luma[i] = 0;
2306                                 }
2307                             }
2308                         }
2309                     }
2310                     if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
2311                     {
2312                         au1_avail_luma[0] = 0;
2313                         au1_avail_luma[4] = 0;
2314                         au1_avail_luma[6] = 0;
2315                     }
2316                     if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2317                     {
2318                         au1_avail_luma[1] = 0;
2319                         au1_avail_luma[5] = 0;
2320                         au1_avail_luma[7] = 0;
2321                     }
2322 
2323                     if(0 == ps_sao_ctxt->i4_ctb_y)
2324                     {
2325                         au1_avail_luma[2] = 0;
2326                         au1_avail_luma[4] = 0;
2327                         au1_avail_luma[5] = 0;
2328                     }
2329 
2330                     if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2331                     {
2332                         au1_avail_luma[3] = 0;
2333                         au1_avail_luma[6] = 0;
2334                         au1_avail_luma[7] = 0;
2335                     }
2336 
2337                     {
2338                         au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
2339                         u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
2340                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2341                                                                           src_strd,
2342                                                                           pu1_src_left_luma,
2343                                                                           pu1_src_top_luma,
2344                                                                           pu1_sao_src_top_left_luma_curr_ctb,
2345                                                                           au1_src_top_right,
2346                                                                           &u1_sao_src_top_left_luma_bot_left,
2347                                                                           au1_avail_luma,
2348                                                                           ai1_offset_y,
2349                                                                           sao_wd_luma,
2350                                                                           sao_ht_luma);
2351                     }
2352 
2353                 }
2354             }
2355             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2356             {
2357                 /* Update left, top and top-left */
2358                 for(row = 0; row < sao_ht_luma; row++)
2359                 {
2360                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2361                 }
2362                 /*Update in next location*/
2363                 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2364 
2365                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2366             }
2367         }
2368 
2369         if(0 != sao_ht_chroma)
2370         {
2371             if(ps_slice_hdr_left->i1_slice_sao_chroma_flag)
2372             {
2373                 if(0 == ps_sao->b3_cb_type_idx)
2374                 {
2375                     for(row = 0; row < sao_ht_chroma; row++)
2376                     {
2377                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2378                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2379                     }
2380                     pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2381                     pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2382 
2383                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2384                 }
2385 
2386                 else if(1 == ps_sao->b3_cb_type_idx)
2387                 {
2388                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2389                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2390                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2391                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2392 
2393                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2394                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2395                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2396                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2397 
2398                     if(chroma_yuv420sp_vu)
2399                     {
2400                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2401                                                                                     src_strd,
2402                                                                                     pu1_src_left_chroma,
2403                                                                                     pu1_src_top_chroma,
2404                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
2405                                                                                     ps_sao->b5_cr_band_pos,
2406                                                                                     ps_sao->b5_cb_band_pos,
2407                                                                                     ai1_offset_cr,
2408                                                                                     ai1_offset_cb,
2409                                                                                     sao_wd_chroma,
2410                                                                                     sao_ht_chroma
2411                                                                                    );
2412                     }
2413                     else
2414                     {
2415                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2416                                                                                     src_strd,
2417                                                                                     pu1_src_left_chroma,
2418                                                                                     pu1_src_top_chroma,
2419                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
2420                                                                                     ps_sao->b5_cb_band_pos,
2421                                                                                     ps_sao->b5_cr_band_pos,
2422                                                                                     ai1_offset_cb,
2423                                                                                     ai1_offset_cr,
2424                                                                                     sao_wd_chroma,
2425                                                                                     sao_ht_chroma
2426                                                                                    );
2427                     }
2428                 }
2429 
2430                 else // if(2 <= ps_sao->b3_cb_type_idx)
2431                 {
2432                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2433                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2434                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2435                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2436 
2437                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2438                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2439                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2440                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2441 
2442                     for(i = 0; i < 8; i++)
2443                     {
2444                         au1_avail_chroma[i] = 255;
2445                         au1_tile_slice_boundary[i] = 0;
2446                         au4_idx_l[i] = 0;
2447                         au4_ilf_across_tile_slice_enable[i] = 1;
2448                     }
2449                     /*In case of slices*/
2450                     {
2451                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2452                         {
2453                             ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2454                             ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2455 
2456                             ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2457                             ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2458 
2459                             ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2460                             ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2461 
2462                             ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2463                             ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2464 
2465                             ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2466                             ctby_l = ps_sao_ctxt->i4_ctb_y;
2467 
2468                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2469                             {
2470                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2471                                 {
2472                                     au4_idx_l[2] = -1;
2473                                     au4_idx_l[4] = -1;
2474                                     au4_idx_l[5] = -1;
2475                                 }
2476                                 else
2477                                 {
2478                                     au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2479                                     au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2480                                 }
2481                                 idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2482                                 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2483                                 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2484                                 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2485 
2486                                 /*Verify that the neighbour ctbs dont cross pic boundary.*/
2487                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2488                                 {
2489                                     au4_ilf_across_tile_slice_enable[2] = 0;
2490                                     au4_ilf_across_tile_slice_enable[4] = 0;
2491                                     au4_ilf_across_tile_slice_enable[5] = 0;
2492                                 }
2493                                 else
2494                                 {
2495                                     au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2496                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2497                                 }
2498 
2499                                 if(au4_idx_l[5] > idx_l)
2500                                 {
2501                                     au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2502                                 }
2503 
2504                                 //  au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2505                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2506                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2507                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2508                                 /*
2509                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2510                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
2511                                  */
2512                                 for(i = 0; i < 8; i++)
2513                                 {
2514                                     /*Sets the edges that lie on the slice/tile boundary*/
2515                                     if(au4_idx_l[i] != idx_l)
2516                                     {
2517                                         au1_tile_slice_boundary[i] = 1;
2518                                     }
2519                                     else
2520                                     {
2521                                         au4_ilf_across_tile_slice_enable[i] = 1;
2522                                     }
2523                                 }
2524                                 /*Reset indices*/
2525                                 for(i = 0; i < 8; i++)
2526                                 {
2527                                     au4_idx_l[i] = 0;
2528                                 }
2529                             }
2530                             if(ps_pps->i1_tiles_enabled_flag)
2531                             {
2532                                 /* Calculate availability flags at slice boundary */
2533                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2534                                 {
2535                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2536                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2537                                     {
2538                                         if(0 == ps_sao_ctxt->i4_ctb_y)
2539                                         {
2540                                             au4_idx_l[2] = -1;
2541                                             au4_idx_l[4] = -1;
2542                                             au4_idx_l[5] = -1;
2543                                         }
2544                                         else
2545                                         {
2546                                             au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2547                                             au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2548                                         }
2549 
2550                                         idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2551                                         au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2552                                         au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2553                                         au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2554 
2555                                         for(i = 0; i < 8; i++)
2556                                         {
2557                                             /*Sets the edges that lie on the slice/tile boundary*/
2558                                             if(au4_idx_l[i] != idx_l)
2559                                             {
2560                                                 au1_tile_slice_boundary[i] |= 1;
2561                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2562                                             }
2563                                         }
2564                                     }
2565                                 }
2566                             }
2567                             for(i = 0; i < 8; i++)
2568                             {
2569                                 /*Sets the edges that lie on the slice/tile boundary*/
2570                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2571                                 {
2572                                     au1_avail_chroma[i] = 0;
2573                                 }
2574                             }
2575                         }
2576                     }
2577                     if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
2578                     {
2579                         au1_avail_chroma[0] = 0;
2580                         au1_avail_chroma[4] = 0;
2581                         au1_avail_chroma[6] = 0;
2582                     }
2583 
2584                     if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2585                     {
2586                         au1_avail_chroma[1] = 0;
2587                         au1_avail_chroma[5] = 0;
2588                         au1_avail_chroma[7] = 0;
2589                     }
2590 
2591                     if(0 == ps_sao_ctxt->i4_ctb_y)
2592                     {
2593                         au1_avail_chroma[2] = 0;
2594                         au1_avail_chroma[4] = 0;
2595                         au1_avail_chroma[5] = 0;
2596                     }
2597 
2598                     if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
2599                     {
2600                         au1_avail_chroma[3] = 0;
2601                         au1_avail_chroma[6] = 0;
2602                         au1_avail_chroma[7] = 0;
2603                     }
2604 
2605                     {
2606                         au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2607                         au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2608                         au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2609                         au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2610                         //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2611                         //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2612                         if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2613                         {
2614                             au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
2615                             au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
2616                         }
2617 
2618 
2619                         if(chroma_yuv420sp_vu)
2620                         {
2621                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2622                                                                                  src_strd,
2623                                                                                  pu1_src_left_chroma,
2624                                                                                  pu1_src_top_chroma,
2625                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
2626                                                                                  au1_src_top_right,
2627                                                                                  au1_src_bot_left,
2628                                                                                  au1_avail_chroma,
2629                                                                                  ai1_offset_cr,
2630                                                                                  ai1_offset_cb,
2631                                                                                  sao_wd_chroma,
2632                                                                                  sao_ht_chroma);
2633                         }
2634                         else
2635                         {
2636                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2637                                                                                  src_strd,
2638                                                                                  pu1_src_left_chroma,
2639                                                                                  pu1_src_top_chroma,
2640                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
2641                                                                                  au1_src_top_right,
2642                                                                                  au1_src_bot_left,
2643                                                                                  au1_avail_chroma,
2644                                                                                  ai1_offset_cb,
2645                                                                                  ai1_offset_cr,
2646                                                                                  sao_wd_chroma,
2647                                                                                  sao_ht_chroma);
2648                         }
2649                     }
2650 
2651                 }
2652             }
2653             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2654             {
2655                 for(row = 0; row < sao_ht_chroma; row++)
2656                 {
2657                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2658                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2659                 }
2660                 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2661                 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2662 
2663                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2664             }
2665 
2666         }
2667         pu1_src_luma += sao_wd_luma;
2668         pu1_src_chroma += sao_wd_chroma;
2669         ps_sao += 1;
2670     }
2671 
2672 
2673     /* Current CTB */
2674     {
2675         WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2676         WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
2677         WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2678         WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2679         WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2680         WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2681         WORD32 au4_idx_c[8], idx_c;
2682 
2683         WORD32 remaining_rows;
2684         WORD32 remaining_cols;
2685 
2686         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2687         if(remaining_cols <= SAO_SHIFT_CTB)
2688         {
2689             sao_wd_luma += remaining_cols;
2690         }
2691         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
2692         if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2693         {
2694             sao_wd_chroma += remaining_cols;
2695         }
2696 
2697         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2698         if(remaining_rows <= SAO_SHIFT_CTB)
2699         {
2700             sao_ht_luma += remaining_rows;
2701         }
2702         remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2703         if(remaining_rows <= SAO_SHIFT_CTB)
2704         {
2705             sao_ht_chroma += remaining_rows;
2706         }
2707 
2708         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2709         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2710         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2711         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2712 
2713         if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2714         {
2715             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2716             {
2717                 if(0 == ps_sao->b3_y_type_idx)
2718                 {
2719                     /* Update left, top and top-left */
2720                     for(row = 0; row < sao_ht_luma; row++)
2721                     {
2722                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2723                     }
2724                     pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2725 
2726                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2727 
2728                     pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2729 
2730                 }
2731 
2732                 else if(1 == ps_sao->b3_y_type_idx)
2733                 {
2734                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2735                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2736                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2737                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2738 
2739                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2740                                                                               src_strd,
2741                                                                               pu1_src_left_luma,
2742                                                                               pu1_src_top_luma,
2743                                                                               pu1_sao_src_top_left_luma_curr_ctb,
2744                                                                               ps_sao->b5_y_band_pos,
2745                                                                               ai1_offset_y,
2746                                                                               sao_wd_luma,
2747                                                                               sao_ht_luma
2748                                                                              );
2749                 }
2750 
2751                 else // if(2 <= ps_sao->b3_y_type_idx)
2752                 {
2753                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2754                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2755                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2756                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2757 
2758                     for(i = 0; i < 8; i++)
2759                     {
2760                         au1_avail_luma[i] = 255;
2761                         au1_tile_slice_boundary[i] = 0;
2762                         au4_idx_c[i] = 0;
2763                         au4_ilf_across_tile_slice_enable[i] = 1;
2764                     }
2765                     /******************************************************************
2766                      * Derive the  Top-left CTB's neighbour pixel's slice indices.
2767                      *
2768                      *
2769                      *          ____________
2770                      *         |    |       |
2771                      *         |    | C_T   |
2772                      *         |____|_______|____
2773                      *         |    |       |    |
2774                      *         | C_L|   C   | C_R|
2775                      *         |____|_______|    |
2776                      *              |  C_D       |
2777                      *              |            |
2778                      *              |____________|
2779                      *
2780                      *****************************************************************/
2781 
2782                     /*In case of slices*/
2783                     {
2784                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2785                         {
2786                             ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2787                             ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2788 
2789                             ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2790                             ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2791 
2792                             ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2793                             ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2794 
2795                             ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
2796                             ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
2797 
2798                             ctbx_c = ps_sao_ctxt->i4_ctb_x;
2799                             ctby_c = ps_sao_ctxt->i4_ctb_y;
2800 
2801                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2802                             {
2803                                 if(0 == ps_sao_ctxt->i4_ctb_x)
2804                                 {
2805                                     au4_idx_c[6] = -1;
2806                                     au4_idx_c[0] = -1;
2807                                     au4_idx_c[4] = -1;
2808                                 }
2809                                 else
2810                                 {
2811                                     au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2812                                 }
2813 
2814                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2815                                 {
2816                                     au4_idx_c[2] = -1;
2817                                     au4_idx_c[5] = -1;
2818                                     au4_idx_c[4] = -1;
2819                                 }
2820                                 else
2821                                 {
2822                                     au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2823                                     au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2824                                 }
2825                                 idx_c   = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2826                                 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2827                                 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2828 
2829                                 if(0 == ps_sao_ctxt->i4_ctb_x)
2830                                 {
2831                                     au4_ilf_across_tile_slice_enable[6] = 0;
2832                                     au4_ilf_across_tile_slice_enable[0] = 0;
2833                                     au4_ilf_across_tile_slice_enable[4] = 0;
2834                                 }
2835                                 else
2836                                 {
2837                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2838                                     au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2839                                 }
2840                                 if(0 == ps_sao_ctxt->i4_ctb_y)
2841                                 {
2842                                     au4_ilf_across_tile_slice_enable[2] = 0;
2843                                     au4_ilf_across_tile_slice_enable[4] = 0;
2844                                     au4_ilf_across_tile_slice_enable[5] = 0;
2845                                 }
2846                                 else
2847                                 {
2848                                     au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2849                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2850                                 }
2851                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2852                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2853                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2854 
2855                                 if(au4_idx_c[6] < idx_c)
2856                                 {
2857                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2858                                 }
2859 
2860                                 /*
2861                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2862                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
2863                                  */
2864                                 for(i = 0; i < 8; i++)
2865                                 {
2866                                     /*Sets the edges that lie on the slice/tile boundary*/
2867                                     if(au4_idx_c[i] != idx_c)
2868                                     {
2869                                         au1_tile_slice_boundary[i] = 1;
2870                                     }
2871                                     else
2872                                     {
2873                                         au4_ilf_across_tile_slice_enable[i] = 1;
2874                                     }
2875                                 }
2876                                 /*Reset indices*/
2877                                 for(i = 0; i < 8; i++)
2878                                 {
2879                                     au4_idx_c[i] = 0;
2880                                 }
2881                             }
2882 
2883                             if(ps_pps->i1_tiles_enabled_flag)
2884                             {
2885                                 /* Calculate availability flags at slice boundary */
2886                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2887                                 {
2888                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2889                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2890                                     {
2891                                         if(0 == ps_sao_ctxt->i4_ctb_x)
2892                                         {
2893                                             au4_idx_c[6] = -1;
2894                                             au4_idx_c[0] = -1;
2895                                             au4_idx_c[4] = -1;
2896                                         }
2897                                         else
2898                                         {
2899                                             au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2900                                         }
2901 
2902                                         if(0 == ps_sao_ctxt->i4_ctb_y)
2903                                         {
2904                                             au4_idx_c[2] = -1;
2905                                             au4_idx_c[5] = -1;
2906                                             au4_idx_c[4] = -1;
2907                                         }
2908                                         else
2909                                         {
2910                                             au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2911                                             au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2912                                         }
2913                                         idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2914                                         au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2915                                         au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2916 
2917                                         for(i = 0; i < 8; i++)
2918                                         {
2919                                             /*Sets the edges that lie on the slice/tile boundary*/
2920                                             if(au4_idx_c[i] != idx_c)
2921                                             {
2922                                                 au1_tile_slice_boundary[i] |= 1;
2923                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2924                                             }
2925                                         }
2926                                     }
2927                                 }
2928                             }
2929 
2930                             for(i = 0; i < 8; i++)
2931                             {
2932                                 /*Sets the edges that lie on the slice/tile boundary*/
2933                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2934                                 {
2935                                     au1_avail_luma[i] = 0;
2936                                 }
2937                             }
2938 
2939                         }
2940                     }
2941                     if(0 == ps_sao_ctxt->i4_ctb_x)
2942                     {
2943                         au1_avail_luma[0] = 0;
2944                         au1_avail_luma[4] = 0;
2945                         au1_avail_luma[6] = 0;
2946                     }
2947 
2948                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2949                     {
2950                         au1_avail_luma[1] = 0;
2951                         au1_avail_luma[5] = 0;
2952                         au1_avail_luma[7] = 0;
2953                     }
2954 
2955                     if(0 == ps_sao_ctxt->i4_ctb_y)
2956                     {
2957                         au1_avail_luma[2] = 0;
2958                         au1_avail_luma[4] = 0;
2959                         au1_avail_luma[5] = 0;
2960                     }
2961 
2962                     if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2963                     {
2964                         au1_avail_luma[3] = 0;
2965                         au1_avail_luma[6] = 0;
2966                         au1_avail_luma[7] = 0;
2967                     }
2968 
2969                     {
2970                         au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2971                         u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2972 
2973                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2974                                                                           src_strd,
2975                                                                           pu1_src_left_luma,
2976                                                                           pu1_src_top_luma,
2977                                                                           pu1_sao_src_top_left_luma_curr_ctb,
2978                                                                           au1_src_top_right,
2979                                                                           &u1_sao_src_top_left_luma_bot_left,
2980                                                                           au1_avail_luma,
2981                                                                           ai1_offset_y,
2982                                                                           sao_wd_luma,
2983                                                                           sao_ht_luma);
2984                     }
2985                     pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2986                     pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2987                 }
2988             }
2989             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2990             {
2991                 /* Update left, top and top-left */
2992                 for(row = 0; row < sao_ht_luma; row++)
2993                 {
2994                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2995                 }
2996                 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2997 
2998                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2999 
3000                 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
3001             }
3002         }
3003 
3004         if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
3005         {
3006             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
3007             {
3008                 if(0 == ps_sao->b3_cb_type_idx)
3009                 {
3010                     for(row = 0; row < sao_ht_chroma; row++)
3011                     {
3012                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3013                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3014                     }
3015                     pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3016                     pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3017 
3018                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3019 
3020                     pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3021                     pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3022                 }
3023 
3024                 else if(1 == ps_sao->b3_cb_type_idx)
3025                 {
3026                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3027                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3028                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3029                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3030 
3031                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3032                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3033                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3034                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3035 
3036                     if(chroma_yuv420sp_vu)
3037                     {
3038                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3039                                                                                     src_strd,
3040                                                                                     pu1_src_left_chroma,
3041                                                                                     pu1_src_top_chroma,
3042                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
3043                                                                                     ps_sao->b5_cr_band_pos,
3044                                                                                     ps_sao->b5_cb_band_pos,
3045                                                                                     ai1_offset_cr,
3046                                                                                     ai1_offset_cb,
3047                                                                                     sao_wd_chroma,
3048                                                                                     sao_ht_chroma
3049                                                                                    );
3050                     }
3051                     else
3052                     {
3053                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3054                                                                                     src_strd,
3055                                                                                     pu1_src_left_chroma,
3056                                                                                     pu1_src_top_chroma,
3057                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
3058                                                                                     ps_sao->b5_cb_band_pos,
3059                                                                                     ps_sao->b5_cr_band_pos,
3060                                                                                     ai1_offset_cb,
3061                                                                                     ai1_offset_cr,
3062                                                                                     sao_wd_chroma,
3063                                                                                     sao_ht_chroma
3064                                                                                    );
3065                     }
3066                 }
3067 
3068                 else // if(2 <= ps_sao->b3_cb_type_idx)
3069                 {
3070                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3071                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3072                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3073                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3074 
3075                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3076                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3077                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3078                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3079 
3080                     for(i = 0; i < 8; i++)
3081                     {
3082                         au1_avail_chroma[i] = 255;
3083                         au1_tile_slice_boundary[i] = 0;
3084                         au4_idx_c[i] = 0;
3085                         au4_ilf_across_tile_slice_enable[i] = 1;
3086                     }
3087                     {
3088                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3089                         {
3090                             ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
3091                             ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
3092 
3093                             ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
3094                             ctby_c_l = ps_sao_ctxt->i4_ctb_y;
3095 
3096                             ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
3097                             ctby_c_r = ps_sao_ctxt->i4_ctb_y;
3098 
3099                             ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
3100                             ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
3101 
3102                             ctbx_c = ps_sao_ctxt->i4_ctb_x;
3103                             ctby_c = ps_sao_ctxt->i4_ctb_y;
3104 
3105                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
3106                             {
3107                                 if(0 == ps_sao_ctxt->i4_ctb_x)
3108                                 {
3109                                     au4_idx_c[0] = -1;
3110                                     au4_idx_c[4] = -1;
3111                                     au4_idx_c[6] = -1;
3112                                 }
3113                                 else
3114                                 {
3115                                     au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3116                                 }
3117 
3118                                 if(0 == ps_sao_ctxt->i4_ctb_y)
3119                                 {
3120                                     au4_idx_c[2] = -1;
3121                                     au4_idx_c[4] = -1;
3122                                     au4_idx_c[5] = -1;
3123                                 }
3124                                 else
3125                                 {
3126                                     au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3127                                     au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3128                                 }
3129                                 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3130                                 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3131                                 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3132 
3133                                 if(0 == ps_sao_ctxt->i4_ctb_x)
3134                                 {
3135                                     au4_ilf_across_tile_slice_enable[0] = 0;
3136                                     au4_ilf_across_tile_slice_enable[4] = 0;
3137                                     au4_ilf_across_tile_slice_enable[6] = 0;
3138                                 }
3139                                 else
3140                                 {
3141                                     au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
3142                                     au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3143                                 }
3144 
3145                                 if(0 == ps_sao_ctxt->i4_ctb_y)
3146                                 {
3147                                     au4_ilf_across_tile_slice_enable[2] = 0;
3148                                     au4_ilf_across_tile_slice_enable[4] = 0;
3149                                     au4_ilf_across_tile_slice_enable[5] = 0;
3150                                 }
3151                                 else
3152                                 {
3153                                     au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3154                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
3155                                 }
3156 
3157                                 au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
3158                                 au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
3159                                 au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
3160 
3161                                 if(idx_c > au4_idx_c[6])
3162                                 {
3163                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3164                                 }
3165 
3166                                 /*
3167                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
3168                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
3169                                  */
3170                                 for(i = 0; i < 8; i++)
3171                                 {
3172                                     /*Sets the edges that lie on the slice/tile boundary*/
3173                                     if(au4_idx_c[i] != idx_c)
3174                                     {
3175                                         au1_tile_slice_boundary[i] = 1;
3176                                     }
3177                                     else
3178                                     {
3179                                         au4_ilf_across_tile_slice_enable[i] = 1;
3180                                     }
3181                                 }
3182                                 /*Reset indices*/
3183                                 for(i = 0; i < 8; i++)
3184                                 {
3185                                     au4_idx_c[i] = 0;
3186                                 }
3187                             }
3188 
3189                             if(ps_pps->i1_tiles_enabled_flag)
3190                             {
3191                                 /* Calculate availability flags at slice boundary */
3192                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
3193                                 {
3194                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
3195                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
3196                                     {
3197                                         if(0 == ps_sao_ctxt->i4_ctb_x)
3198                                         {
3199                                             au4_idx_c[6] = -1;
3200                                             au4_idx_c[0] = -1;
3201                                             au4_idx_c[4] = -1;
3202                                         }
3203                                         else
3204                                         {
3205                                             au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3206                                         }
3207 
3208                                         if(0 == ps_sao_ctxt->i4_ctb_y)
3209                                         {
3210                                             au4_idx_c[2] = -1;
3211                                             au4_idx_c[5] = -1;
3212                                             au4_idx_c[4] = -1;
3213                                         }
3214                                         else
3215                                         {
3216                                             au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3217                                             au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3218                                         }
3219                                         idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3220                                         au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3221                                         au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3222 
3223                                         for(i = 0; i < 8; i++)
3224                                         {
3225                                             /*Sets the edges that lie on the slice/tile boundary*/
3226                                             if(au4_idx_c[i] != idx_c)
3227                                             {
3228                                                 au1_tile_slice_boundary[i] |= 1;
3229                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
3230                                             }
3231                                         }
3232                                     }
3233                                 }
3234                             }
3235 
3236                             for(i = 0; i < 8; i++)
3237                             {
3238                                 /*Sets the edges that lie on the slice/tile boundary*/
3239                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
3240                                 {
3241                                     au1_avail_chroma[i] = 0;
3242                                 }
3243                             }
3244                         }
3245                     }
3246 
3247                     if(0 == ps_sao_ctxt->i4_ctb_x)
3248                     {
3249                         au1_avail_chroma[0] = 0;
3250                         au1_avail_chroma[4] = 0;
3251                         au1_avail_chroma[6] = 0;
3252                     }
3253 
3254                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
3255                     {
3256                         au1_avail_chroma[1] = 0;
3257                         au1_avail_chroma[5] = 0;
3258                         au1_avail_chroma[7] = 0;
3259                     }
3260 
3261                     if(0 == ps_sao_ctxt->i4_ctb_y)
3262                     {
3263                         au1_avail_chroma[2] = 0;
3264                         au1_avail_chroma[4] = 0;
3265                         au1_avail_chroma[5] = 0;
3266                     }
3267 
3268                     if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
3269                     {
3270                         au1_avail_chroma[3] = 0;
3271                         au1_avail_chroma[6] = 0;
3272                         au1_avail_chroma[7] = 0;
3273                     }
3274 
3275                     {
3276                         au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
3277                         au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
3278 
3279                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
3280                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
3281 
3282                         if(chroma_yuv420sp_vu)
3283                         {
3284                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3285                                                                                  src_strd,
3286                                                                                  pu1_src_left_chroma,
3287                                                                                  pu1_src_top_chroma,
3288                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
3289                                                                                  au1_src_top_right,
3290                                                                                  au1_sao_src_top_left_chroma_bot_left,
3291                                                                                  au1_avail_chroma,
3292                                                                                  ai1_offset_cr,
3293                                                                                  ai1_offset_cb,
3294                                                                                  sao_wd_chroma,
3295                                                                                  sao_ht_chroma);
3296                         }
3297                         else
3298                         {
3299                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3300                                                                                  src_strd,
3301                                                                                  pu1_src_left_chroma,
3302                                                                                  pu1_src_top_chroma,
3303                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
3304                                                                                  au1_src_top_right,
3305                                                                                  au1_sao_src_top_left_chroma_bot_left,
3306                                                                                  au1_avail_chroma,
3307                                                                                  ai1_offset_cb,
3308                                                                                  ai1_offset_cr,
3309                                                                                  sao_wd_chroma,
3310                                                                                  sao_ht_chroma);
3311                         }
3312                     }
3313 
3314                 }
3315                 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3316                 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3317 
3318                 pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
3319                 pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
3320             }
3321             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3322             {
3323                 for(row = 0; row < sao_ht_chroma; row++)
3324                 {
3325                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3326                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3327                 }
3328                 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3329                 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3330 
3331                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3332 
3333                 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3334                 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3335             }
3336 
3337         }
3338     }
3339 
3340 
3341 
3342 
3343 /* If no loop filter is enabled copy the backed up values */
3344     {
3345         /* Luma */
3346         if(no_loop_filter_enabled_luma)
3347         {
3348             UWORD32 u4_no_loop_filter_flag;
3349             WORD32 loop_filter_bit_pos;
3350             WORD32 log2_min_cu = 3;
3351             WORD32 min_cu = (1 << log2_min_cu);
3352             UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
3353             WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
3354             WORD32 sao_blk_wd = ctb_size;
3355             WORD32 remaining_rows;
3356             WORD32 remaining_cols;
3357 
3358             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3359             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3360             if(remaining_rows <= SAO_SHIFT_CTB)
3361                 sao_blk_ht += remaining_rows;
3362             if(remaining_cols <= SAO_SHIFT_CTB)
3363                 sao_blk_wd += remaining_cols;
3364 
3365             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
3366             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3367 
3368             pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
3369 
3370             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3371                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3372             if(ps_sao_ctxt->i4_ctb_x > 0)
3373                 loop_filter_bit_pos -= 1;
3374 
3375             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3376                             (loop_filter_bit_pos >> 3);
3377 
3378             for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
3379                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3380             {
3381                 WORD32 tmp_wd = sao_blk_wd;
3382 
3383                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3384                                 (loop_filter_bit_pos & 7);
3385                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3386 
3387                 if(u4_no_loop_filter_flag)
3388                 {
3389                     while(tmp_wd > 0)
3390                     {
3391                         if(CTZ(u4_no_loop_filter_flag))
3392                         {
3393                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3394                             pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3395                             tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3396                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3397                         }
3398                         else
3399                         {
3400                             for(row = 0; row < min_cu; row++)
3401                             {
3402                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3403                                 {
3404                                     pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
3405                                 }
3406                             }
3407                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3408                             pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3409                             tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3410                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3411                         }
3412                     }
3413 
3414                     pu1_src_tmp_luma -= sao_blk_wd;
3415                     pu1_src_backup_luma -= sao_blk_wd;
3416                 }
3417 
3418                 pu1_src_tmp_luma += (src_strd << log2_min_cu);
3419                 pu1_src_backup_luma += (backup_strd << log2_min_cu);
3420             }
3421         }
3422 
3423         /* Chroma */
3424         if(no_loop_filter_enabled_chroma)
3425         {
3426             UWORD32 u4_no_loop_filter_flag;
3427             WORD32 loop_filter_bit_pos;
3428             WORD32 log2_min_cu = 3;
3429             WORD32 min_cu = (1 << log2_min_cu);
3430             UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
3431             WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
3432             WORD32 sao_blk_wd = ctb_size;
3433             WORD32 remaining_rows;
3434             WORD32 remaining_cols;
3435 
3436             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3437             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3438             if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3439                 sao_blk_ht += remaining_rows;
3440             if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3441                 sao_blk_wd += remaining_cols;
3442 
3443             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3444             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3445 
3446             pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3447 
3448             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3449                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3450             if(ps_sao_ctxt->i4_ctb_x > 0)
3451                 loop_filter_bit_pos -= 2;
3452 
3453             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3454                             (loop_filter_bit_pos >> 3);
3455 
3456             for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3457                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3458             {
3459                 WORD32 tmp_wd = sao_blk_wd;
3460 
3461                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3462                                 (loop_filter_bit_pos & 7);
3463                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3464 
3465                 if(u4_no_loop_filter_flag)
3466                 {
3467                     while(tmp_wd > 0)
3468                     {
3469                         if(CTZ(u4_no_loop_filter_flag))
3470                         {
3471                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3472                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3473                             tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3474                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3475                         }
3476                         else
3477                         {
3478                             for(row = 0; row < min_cu / 2; row++)
3479                             {
3480                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3481                                 {
3482                                     pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
3483                                 }
3484                             }
3485 
3486                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3487                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3488                             tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3489                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3490                         }
3491                     }
3492 
3493                     pu1_src_tmp_chroma -= sao_blk_wd;
3494                     pu1_src_backup_chroma -= sao_blk_wd;
3495                 }
3496 
3497                 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
3498                 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
3499             }
3500         }
3501     }
3502 
3503 }
3504 
3505