1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 * ihevc_inter_pred.c
22 *
23 * @brief
24 * Calculates the prediction samples for a given cbt
25 *
26 * @author
27 * Srinivas T
28 *
29 * @par List of Functions:
30 * - ihevc_inter_pred()
31 *
32 * @remarks
33 * None
34 *
35 *******************************************************************************
36 */
37 #include <stdio.h>
38 #include <stddef.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <assert.h>
42
43 #include "ihevc_typedefs.h"
44 #include "iv.h"
45 #include "ivd.h"
46 #include "ihevcd_cxa.h"
47 #include "ithread.h"
48
49 #include "ihevc_defs.h"
50 #include "ihevc_debug.h"
51 #include "ihevc_structs.h"
52 #include "ihevc_macros.h"
53 #include "ihevc_platform_macros.h"
54 #include "ihevc_cabac_tables.h"
55 #include "ihevc_weighted_pred.h"
56
57 #include "ihevc_error.h"
58 #include "ihevc_common_tables.h"
59
60 #include "ihevcd_trace.h"
61 #include "ihevcd_defs.h"
62 #include "ihevcd_function_selector.h"
63 #include "ihevcd_structs.h"
64 #include "ihevcd_error.h"
65 #include "ihevcd_nal.h"
66 #include "ihevcd_bitstream.h"
67 #include "ihevcd_job_queue.h"
68 #include "ihevcd_utils.h"
69
70 #include "ihevc_inter_pred.h"
71 #include "ihevcd_profile.h"
72
73 static WORD8 gai1_luma_filter[4][NTAPS_LUMA] =
74 {
75 { 0, 0, 0, 64, 0, 0, 0, 0 },
76 { -1, 4, -10, 58, 17, -5, 1, 0 },
77 { -1, 4, -11, 40, 40, -11, 4, -1 },
78 { 0, 1, -5, 17, 58, -10, 4, -1 } };
79
80 /* The filter uses only the first four elements in each array */
81 static WORD8 gai1_chroma_filter[8][NTAPS_LUMA] =
82 {
83 { 0, 64, 0, 0, 0, 0, 0, 0 },
84 { -2, 58, 10, -2, 0, 0, 0, 0 },
85 { -4, 54, 16, -2, 0, 0, 0, 0 },
86 { -6, 46, 28, -4, 0, 0, 0, 0 },
87 { -4, 36, 36, -4, 0, 0, 0, 0 },
88 { -4, 28, 46, -6, 0, 0, 0, 0 },
89 { -2, 16, 54, -4, 0, 0, 0, 0 },
90 { -2, 10, 58, -2, 0, 0, 0, 0 } };
91
92 /**
93 *******************************************************************************
94 *
95 * @brief
96 * Inter prediction CTB level function
97 *
98 * @par Description:
99 * For a given CTB, Inter prediction followed by weighted prediction is
100 * done for all the PUs present in the CTB
101 *
102 * @param[in] ps_ctb
103 * Pointer to the CTB context
104 *
105 * @returns
106 *
107 * @remarks
108 *
109 *
110 *******************************************************************************
111 */
112
ihevcd_inter_pred_ctb(process_ctxt_t * ps_proc)113 void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc)
114 {
115 UWORD8 *ref_pic_luma_l0, *ref_pic_chroma_l0;
116 UWORD8 *ref_pic_luma_l1, *ref_pic_chroma_l1;
117
118 UWORD8 *ref_pic_l0 = NULL, *ref_pic_l1 = NULL;
119
120 slice_header_t *ps_slice_hdr;
121 sps_t *ps_sps;
122 pps_t *ps_pps;
123 pu_t *ps_pu;
124 codec_t *ps_codec;
125 WORD32 pu_indx;
126 WORD32 pu_x, pu_y;
127 WORD32 pu_wd, pu_ht;
128 WORD32 i4_pu_cnt;
129 WORD32 cur_ctb_idx;
130
131 WORD32 clr_indx;
132 WORD32 ntaps;
133
134
135
136 WORD32 ai2_xint[2] = { 0, 0 }, ai2_yint[2] = { 0, 0 };
137 WORD32 ai2_xfrac[2] = { 0, 0 }, ai2_yfrac[2] = { 0, 0 };
138
139 WORD32 weighted_pred, bi_pred;
140
141 WORD32 ref_strd;
142 UWORD8 *pu1_dst_luma, *pu1_dst_chroma;
143
144 UWORD8 *pu1_dst;
145
146 WORD16 *pi2_tmp1, *pi2_tmp2;
147
148 WORD32 luma_weight_l0, luma_weight_l1;
149 WORD32 chroma_weight_l0_cb, chroma_weight_l1_cb, chroma_weight_l0_cr, chroma_weight_l1_cr;
150 WORD32 luma_offset_l0, luma_offset_l1;
151 WORD32 chroma_offset_l0_cb, chroma_offset_l1_cb, chroma_offset_l0_cr, chroma_offset_l1_cr;
152 WORD32 shift, lvl_shift1, lvl_shift2;
153
154 pf_inter_pred func_ptr1, func_ptr2, func_ptr3, func_ptr4;
155 WORD32 func_indx1, func_indx2, func_indx3, func_indx4;
156 void *func_src;
157 void *func_dst;
158 WORD32 func_src_strd;
159 WORD32 func_dst_strd;
160 WORD8 *func_coeff;
161 WORD32 func_wd;
162 WORD32 func_ht;
163 WORD32 next_ctb_idx;
164 WORD8(*coeff)[8];
165 WORD32 chroma_yuv420sp_vu;
166
167 PROFILE_DISABLE_INTER_PRED();
168 ps_codec = ps_proc->ps_codec;
169 ps_slice_hdr = ps_proc->ps_slice_hdr;
170 ps_pps = ps_proc->ps_pps;
171 ps_sps = ps_proc->ps_sps;
172 cur_ctb_idx = ps_proc->i4_ctb_x
173 + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
174 /*
175 * In case of tiles, the next ctb belonging to the same tile must be used to get the PU index
176 */
177
178 next_ctb_idx = ps_proc->i4_next_pu_ctb_cnt;
179 i4_pu_cnt = ps_proc->pu4_pic_pu_idx[next_ctb_idx] - ps_proc->pu4_pic_pu_idx[cur_ctb_idx];
180
181 ps_pu = ps_proc->ps_pu;
182 ref_strd = ps_codec->i4_strd;
183 pi2_tmp1 = ps_proc->pi2_inter_pred_tmp_buf1;
184 pi2_tmp2 = ps_proc->pi2_inter_pred_tmp_buf2;
185 pu1_dst_luma = ps_proc->pu1_cur_pic_luma;
186 pu1_dst_chroma = ps_proc->pu1_cur_pic_chroma;
187
188 chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU);
189
190 ASSERT(PSLICE == ps_slice_hdr->i1_slice_type || BSLICE == ps_slice_hdr->i1_slice_type);
191
192 ref_pic_luma_l0 = NULL;
193 ref_pic_chroma_l0 = NULL;
194
195 luma_weight_l0 = 0;
196 chroma_weight_l0_cb = 0;
197 chroma_weight_l0_cr = 0;
198
199 luma_offset_l0 = 0;
200 chroma_offset_l0_cb = 0;
201 chroma_offset_l0_cr = 0;
202
203 ref_pic_luma_l1 = NULL;
204 ref_pic_chroma_l1 = NULL;
205
206 luma_weight_l1 = 0;
207 chroma_weight_l1_cb = 0;
208 chroma_weight_l1_cr = 0;
209
210 luma_offset_l1 = 0;
211 chroma_offset_l1_cb = 0;
212 chroma_offset_l1_cr = 0;
213
214 for(pu_indx = 0; pu_indx < i4_pu_cnt; pu_indx++, ps_pu++)
215 {
216 /* If the PU is intra then proceed to the next */
217 if(1 == ps_pu->b1_intra_flag)
218 continue;
219 pu_x = (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) + (ps_pu->b4_pos_x << 2);
220 pu_y = (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) + (ps_pu->b4_pos_y << 2);
221
222 pu_wd = (ps_pu->b4_wd + 1) << 2;
223 pu_ht = (ps_pu->b4_ht + 1) << 2;
224
225 weighted_pred = (ps_slice_hdr->i1_slice_type == PSLICE) ? ps_pps->i1_weighted_pred_flag :
226 ps_pps->i1_weighted_bipred_flag;
227 bi_pred = (ps_pu->b2_pred_mode == PRED_BI);
228
229 if(ps_pu->b2_pred_mode != PRED_L1)
230 {
231 pic_buf_t *ps_pic_buf_l0;
232
233 ps_pic_buf_l0 = (pic_buf_t *)((ps_slice_hdr->as_ref_pic_list0[ps_pu->mv.i1_l0_ref_idx].pv_pic_buf));
234
235 ref_pic_luma_l0 = ps_pic_buf_l0->pu1_luma;
236 ref_pic_chroma_l0 = ps_pic_buf_l0->pu1_chroma;
237
238 luma_weight_l0 = ps_slice_hdr->s_wt_ofst.i2_luma_weight_l0[ps_pu->mv.i1_l0_ref_idx];
239 chroma_weight_l0_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l0_cb[ps_pu->mv.i1_l0_ref_idx];
240 chroma_weight_l0_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l0_cr[ps_pu->mv.i1_l0_ref_idx];
241
242 luma_offset_l0 = ps_slice_hdr->s_wt_ofst.i2_luma_offset_l0[ps_pu->mv.i1_l0_ref_idx];
243 chroma_offset_l0_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l0_cb[ps_pu->mv.i1_l0_ref_idx];
244 chroma_offset_l0_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l0_cr[ps_pu->mv.i1_l0_ref_idx];
245 }
246
247 if(ps_pu->b2_pred_mode != PRED_L0)
248 {
249 pic_buf_t *ps_pic_buf_l1;
250 ps_pic_buf_l1 = (pic_buf_t *)((ps_slice_hdr->as_ref_pic_list1[ps_pu->mv.i1_l1_ref_idx].pv_pic_buf));
251 ref_pic_luma_l1 = ps_pic_buf_l1->pu1_luma;
252 ref_pic_chroma_l1 = ps_pic_buf_l1->pu1_chroma;
253
254 luma_weight_l1 = ps_slice_hdr->s_wt_ofst.i2_luma_weight_l1[ps_pu->mv.i1_l1_ref_idx];
255 chroma_weight_l1_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l1_cb[ps_pu->mv.i1_l1_ref_idx];
256 chroma_weight_l1_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l1_cr[ps_pu->mv.i1_l1_ref_idx];
257
258 luma_offset_l1 = ps_slice_hdr->s_wt_ofst.i2_luma_offset_l1[ps_pu->mv.i1_l1_ref_idx];
259 chroma_offset_l1_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l1_cb[ps_pu->mv.i1_l1_ref_idx];
260 chroma_offset_l1_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l1_cr[ps_pu->mv.i1_l1_ref_idx];
261 }
262
263 /*luma and chroma components*/
264 for(clr_indx = 0; clr_indx < 2; clr_indx++)
265 {
266 PROFILE_DISABLE_INTER_PRED_LUMA(clr_indx);
267 PROFILE_DISABLE_INTER_PRED_CHROMA(clr_indx);
268
269 if(clr_indx == 0)
270 {
271 WORD32 mv;
272 if(ps_pu->b2_pred_mode != PRED_L1)
273 {
274 mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
275 ai2_xint[0] = pu_x + (mv >> 2);
276 ai2_xfrac[0] = mv & 3;
277
278 mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
279 ai2_yint[0] = pu_y + (mv >> 2);
280 ai2_yfrac[0] = mv & 3;
281
282 ai2_xfrac[0] &= ps_codec->i4_mv_frac_mask;
283 ai2_yfrac[0] &= ps_codec->i4_mv_frac_mask;
284
285
286 ref_pic_l0 = ref_pic_luma_l0 + ai2_yint[0] * ref_strd
287 + ai2_xint[0];
288 }
289
290 if(ps_pu->b2_pred_mode != PRED_L0)
291 {
292
293 mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
294 ai2_xint[1] = pu_x + (mv >> 2);
295 ai2_xfrac[1] = mv & 3;
296
297 mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
298 ai2_yint[1] = pu_y + (mv >> 2);
299 ai2_yfrac[1] = mv & 3;
300
301 ref_pic_l1 = ref_pic_luma_l1 + ai2_yint[1] * ref_strd
302 + ai2_xint[1];
303 ai2_xfrac[1] &= ps_codec->i4_mv_frac_mask;
304 ai2_yfrac[1] &= ps_codec->i4_mv_frac_mask;
305
306 }
307
308 pu1_dst = pu1_dst_luma + pu_y * ref_strd + pu_x;
309
310 ntaps = NTAPS_LUMA;
311 coeff = gai1_luma_filter;
312 }
313
314 else
315 {
316 WORD32 mv;
317 /* xint is upshifted by 1 because the chroma components are */
318 /* interleaved which is not the assumption made by standard */
319 if(ps_pu->b2_pred_mode != PRED_L1)
320 {
321 mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
322 ai2_xint[0] = (pu_x / 2 + (mv >> 3)) << 1;
323 ai2_xfrac[0] = mv & 7;
324
325 mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
326 ai2_yint[0] = pu_y / 2 + (mv >> 3);
327 ai2_yfrac[0] = mv & 7;
328
329 ref_pic_l0 = ref_pic_chroma_l0 + ai2_yint[0] * ref_strd
330 + ai2_xint[0];
331
332 ai2_xfrac[0] &= ps_codec->i4_mv_frac_mask;
333 ai2_yfrac[0] &= ps_codec->i4_mv_frac_mask;
334
335 }
336
337 if(ps_pu->b2_pred_mode != PRED_L0)
338 {
339 mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
340 ai2_xint[1] = (pu_x / 2 + (mv >> 3)) << 1;
341 ai2_xfrac[1] = mv & 7;
342
343 mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
344 ai2_yint[1] = pu_y / 2 + (mv >> 3);
345 ai2_yfrac[1] = mv & 7;
346
347 ref_pic_l1 = ref_pic_chroma_l1 + ai2_yint[1] * ref_strd
348 + ai2_xint[1];
349 ai2_xfrac[1] &= ps_codec->i4_mv_frac_mask;
350 ai2_yfrac[1] &= ps_codec->i4_mv_frac_mask;
351
352 }
353
354 pu1_dst = pu1_dst_chroma + pu_y * ref_strd / 2 + pu_x;
355
356 ntaps = NTAPS_CHROMA;
357 coeff = gai1_chroma_filter;
358 }
359
360 if(ps_pu->b2_pred_mode != PRED_L1)
361 {
362 func_indx1 = 4 * (weighted_pred || bi_pred) + 1 + 11 * clr_indx;
363 func_indx1 += ai2_xfrac[0] ? 2 : 0;
364 func_indx1 += ai2_yfrac[0] ? 1 : 0;
365
366 func_indx2 = (ai2_xfrac[0] && ai2_yfrac[0])
367 * (9 + (weighted_pred || bi_pred)) + 11 * clr_indx;
368
369 func_ptr1 = ps_codec->apf_inter_pred[func_indx1];
370 func_ptr2 = ps_codec->apf_inter_pred[func_indx2];
371 }
372 else
373 {
374 func_ptr1 = NULL;
375 func_ptr2 = NULL;
376 }
377 if(ps_pu->b2_pred_mode != PRED_L0)
378 {
379 func_indx3 = 4 * (weighted_pred || bi_pred) + 1 + 11 * clr_indx;
380 func_indx3 += ai2_xfrac[1] ? 2 : 0;
381 func_indx3 += ai2_yfrac[1] ? 1 : 0;
382
383 func_indx4 = (ai2_xfrac[1] && ai2_yfrac[1])
384 * (9 + (weighted_pred || bi_pred)) + 11 * clr_indx;
385
386 func_ptr3 = ps_codec->apf_inter_pred[func_indx3];
387 func_ptr4 = ps_codec->apf_inter_pred[func_indx4];
388 }
389 else
390 {
391 func_ptr3 = NULL;
392 func_ptr4 = NULL;
393 }
394
395 /*Function 1*/
396 if(func_ptr1 != NULL)
397 {
398 func_src_strd = ref_strd;
399 func_src = (ai2_xfrac[0] && ai2_yfrac[0]) ?
400 ref_pic_l0 - (ntaps / 2 - 1) * func_src_strd :
401 ref_pic_l0;
402 func_dst = (weighted_pred || bi_pred) ?
403 (void *)pi2_tmp1 : (void *)pu1_dst;
404 if(ai2_xfrac[0] && ai2_yfrac[0])
405 {
406 func_dst = pi2_tmp1;
407 }
408
409 func_dst_strd = (weighted_pred || bi_pred
410 || (ai2_xfrac[0] && ai2_yfrac[0])) ?
411 pu_wd : ref_strd;
412 func_coeff = ai2_xfrac[0] ?
413 coeff[ai2_xfrac[0]] : coeff[ai2_yfrac[0]];
414 func_wd = pu_wd >> clr_indx;
415 func_ht = pu_ht >> clr_indx;
416 func_ht += (ai2_xfrac[0] && ai2_yfrac[0]) ? ntaps - 1 : 0;
417 func_ptr1(func_src, func_dst, func_src_strd, func_dst_strd,
418 func_coeff, func_ht, func_wd);
419 }
420
421 /*Function 2*/
422 if(func_ptr2 != NULL)
423 {
424 func_src_strd = pu_wd;
425 func_src = pi2_tmp1 + (ntaps / 2 - 1) * func_src_strd;
426 func_dst = (weighted_pred || bi_pred) ?
427 (void *)pi2_tmp1 : (void *)pu1_dst;
428
429 func_dst_strd = (weighted_pred || bi_pred) ?
430 pu_wd : ref_strd;
431 func_coeff = coeff[ai2_yfrac[0]];
432 func_wd = pu_wd >> clr_indx;
433 func_ht = pu_ht >> clr_indx;
434 func_ptr2(func_src, func_dst, func_src_strd, func_dst_strd,
435 func_coeff, func_ht, func_wd);
436 }
437
438 if(func_ptr3 != NULL)
439 {
440 func_src_strd = ref_strd;
441 func_src = (ai2_xfrac[1] && ai2_yfrac[1]) ?
442 ref_pic_l1 - (ntaps / 2 - 1) * func_src_strd :
443 ref_pic_l1;
444
445 func_dst = (weighted_pred || bi_pred) ?
446 (void *)pi2_tmp2 : (void *)pu1_dst;
447 if(ai2_xfrac[1] && ai2_yfrac[1])
448 {
449 func_dst = pi2_tmp2;
450 }
451 func_dst_strd = (weighted_pred || bi_pred
452 || (ai2_xfrac[1] && ai2_yfrac[1])) ?
453 pu_wd : ref_strd;
454 func_coeff = ai2_xfrac[1] ?
455 coeff[ai2_xfrac[1]] : coeff[ai2_yfrac[1]];
456 func_wd = pu_wd >> clr_indx;
457 func_ht = pu_ht >> clr_indx;
458 func_ht += (ai2_xfrac[1] && ai2_yfrac[1]) ? ntaps - 1 : 0;
459 func_ptr3(func_src, func_dst, func_src_strd, func_dst_strd,
460 func_coeff, func_ht, func_wd);
461
462 }
463
464 if(func_ptr4 != NULL)
465 {
466 func_src_strd = pu_wd;
467 func_src = pi2_tmp2 + (ntaps / 2 - 1) * func_src_strd;
468
469 func_dst = (weighted_pred || bi_pred) ?
470 (void *)pi2_tmp2 : (void *)pu1_dst;
471 func_dst_strd = (weighted_pred || bi_pred) ?
472 pu_wd : ref_strd;
473 func_coeff = coeff[ai2_yfrac[1]];
474 func_wd = pu_wd >> clr_indx;
475 func_ht = pu_ht >> clr_indx;
476 func_ptr4(func_src, func_dst, func_src_strd, func_dst_strd,
477 func_coeff, func_ht, func_wd);
478
479 }
480
481 PROFILE_DISABLE_INTER_PRED_LUMA_AVERAGING(clr_indx);
482 PROFILE_DISABLE_INTER_PRED_CHROMA_AVERAGING(clr_indx);
483
484
485 if((weighted_pred != 0) && (bi_pred != 0))
486 {
487 lvl_shift1 = 0;
488 lvl_shift2 = 0;
489 if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0]))
490 lvl_shift1 = (1 << 13);
491
492 if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1]))
493 lvl_shift2 = (1 << 13);
494
495
496 if(0 == clr_indx)
497 {
498 shift = ps_slice_hdr->s_wt_ofst.i1_luma_log2_weight_denom
499 + SHIFT_14_MINUS_BIT_DEPTH + 1;
500
501 ps_codec->s_func_selector.ihevc_weighted_pred_bi_fptr(pi2_tmp1,
502 pi2_tmp2,
503 pu1_dst,
504 pu_wd,
505 pu_wd,
506 ref_strd,
507 luma_weight_l0,
508 luma_offset_l0,
509 luma_weight_l1,
510 luma_offset_l1,
511 shift,
512 lvl_shift1,
513 lvl_shift2,
514 pu_ht,
515 pu_wd);
516 }
517 else
518 {
519 shift = ps_slice_hdr->s_wt_ofst.i1_chroma_log2_weight_denom
520 + SHIFT_14_MINUS_BIT_DEPTH + 1;
521
522 if(chroma_yuv420sp_vu)
523 {
524 ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr(pi2_tmp1,
525 pi2_tmp2,
526 pu1_dst,
527 pu_wd,
528 pu_wd,
529 ref_strd,
530 chroma_weight_l0_cr,
531 chroma_weight_l0_cb,
532 chroma_offset_l0_cr,
533 chroma_offset_l0_cb,
534 chroma_weight_l1_cr,
535 chroma_weight_l1_cb,
536 chroma_offset_l1_cr,
537 chroma_offset_l1_cb,
538 shift,
539 lvl_shift1,
540 lvl_shift2,
541 pu_ht >> 1,
542 pu_wd >> 1);
543 }
544 else
545 {
546 ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr(pi2_tmp1,
547 pi2_tmp2,
548 pu1_dst,
549 pu_wd,
550 pu_wd,
551 ref_strd,
552 chroma_weight_l0_cb,
553 chroma_weight_l0_cr,
554 chroma_offset_l0_cb,
555 chroma_offset_l0_cr,
556 chroma_weight_l1_cb,
557 chroma_weight_l1_cr,
558 chroma_offset_l1_cb,
559 chroma_offset_l1_cr,
560 shift,
561 lvl_shift1,
562 lvl_shift2,
563 pu_ht >> 1,
564 pu_wd >> 1);
565 }
566 }
567 }
568
569 else if((weighted_pred != 0) && (bi_pred == 0))
570 {
571 lvl_shift1 = 0;
572 if(ps_pu->b2_pred_mode == PRED_L0)
573 {
574 if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0]))
575 lvl_shift1 = (1 << 13);
576 }
577 else
578 {
579 if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1]))
580 lvl_shift1 = (1 << 13);
581 }
582
583 if(0 == clr_indx)
584 {
585 shift = ps_slice_hdr->s_wt_ofst.i1_luma_log2_weight_denom
586 + SHIFT_14_MINUS_BIT_DEPTH;
587
588 ps_codec->s_func_selector.ihevc_weighted_pred_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2,
589 pu1_dst,
590 pu_wd,
591 ref_strd,
592 ps_pu->b2_pred_mode == PRED_L0 ? luma_weight_l0 : luma_weight_l1,
593 ps_pu->b2_pred_mode == PRED_L0 ? luma_offset_l0 : luma_offset_l1,
594 shift,
595 lvl_shift1,
596 pu_ht,
597 pu_wd);
598 }
599 else
600 {
601 shift = ps_slice_hdr->s_wt_ofst.i1_chroma_log2_weight_denom
602 + SHIFT_14_MINUS_BIT_DEPTH;
603
604 if(chroma_yuv420sp_vu)
605 {
606 ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2,
607 pu1_dst,
608 pu_wd,
609 ref_strd,
610 ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cr : chroma_weight_l1_cr,
611 ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cb : chroma_weight_l1_cb,
612 ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cr : chroma_offset_l1_cr,
613 ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cb : chroma_offset_l1_cb,
614 shift,
615 lvl_shift1,
616 pu_ht >> 1,
617 pu_wd >> 1);
618 }
619 else
620 {
621 ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2,
622 pu1_dst,
623 pu_wd,
624 ref_strd,
625 ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cb : chroma_weight_l1_cb,
626 ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cr : chroma_weight_l1_cr,
627 ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cb : chroma_offset_l1_cb,
628 ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cr : chroma_offset_l1_cr,
629 shift,
630 lvl_shift1,
631 pu_ht >> 1,
632 pu_wd >> 1);
633 }
634 }
635 }
636
637 else if((weighted_pred == 0) && (bi_pred != 0))
638 {
639 lvl_shift1 = 0;
640 lvl_shift2 = 0;
641 if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0]))
642 lvl_shift1 = (1 << 13);
643
644 if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1]))
645 lvl_shift2 = (1 << 13);
646
647 if(clr_indx != 0)
648 {
649 pu_ht = (pu_ht >> 1);
650 }
651 ps_codec->s_func_selector.ihevc_weighted_pred_bi_default_fptr(pi2_tmp1,
652 pi2_tmp2,
653 pu1_dst,
654 pu_wd,
655 pu_wd,
656 ref_strd,
657 lvl_shift1,
658 lvl_shift2,
659 pu_ht,
660 pu_wd);
661
662 }
663 }
664 }
665 }
666