1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ihevce_inter_pred.c
25 *
26 * @brief
27 * Contains funtions for giving out prediction samples for a given pu
28 *
29 * @author
30 * Ittiam
31 *
32 * @par List of Functions:
33 * - ihevc_inter_pred()
34 *
35 *
36 *******************************************************************************
37 */
38 /* System include files */
39 #include <stdio.h>
40 #include <string.h>
41 #include <stdlib.h>
42 #include <assert.h>
43 #include <stdarg.h>
44 #include <math.h>
45
46 /* User include files */
47 #include "ihevc_typedefs.h"
48 #include "itt_video_api.h"
49 #include "ihevce_api.h"
50
51 #include "rc_cntrl_param.h"
52 #include "rc_frame_info_collector.h"
53 #include "rc_look_ahead_params.h"
54
55 #include "ihevc_debug.h"
56 #include "ihevc_defs.h"
57 #include "ihevc_structs.h"
58 #include "ihevc_platform_macros.h"
59 #include "ihevc_deblk.h"
60 #include "ihevc_itrans_recon.h"
61 #include "ihevc_chroma_itrans_recon.h"
62 #include "ihevc_chroma_intra_pred.h"
63 #include "ihevc_intra_pred.h"
64 #include "ihevc_inter_pred.h"
65 #include "ihevc_mem_fns.h"
66 #include "ihevc_padding.h"
67 #include "ihevc_weighted_pred.h"
68 #include "ihevc_sao.h"
69 #include "ihevc_resi_trans.h"
70 #include "ihevc_quant_iquant_ssd.h"
71 #include "ihevc_cabac_tables.h"
72
73 #include "ihevce_defs.h"
74 #include "ihevce_lap_enc_structs.h"
75 #include "ihevce_multi_thrd_structs.h"
76 #include "ihevce_me_common_defs.h"
77 #include "ihevce_had_satd.h"
78 #include "ihevce_error_codes.h"
79 #include "ihevce_bitstream.h"
80 #include "ihevce_cabac.h"
81 #include "ihevce_rdoq_macros.h"
82 #include "ihevce_function_selector.h"
83 #include "ihevce_enc_structs.h"
84 #include "ihevce_entropy_structs.h"
85 #include "ihevce_cmn_utils_instr_set_router.h"
86 #include "ihevce_enc_loop_structs.h"
87 #include "ihevce_inter_pred.h"
88 #include "ihevc_weighted_pred.h"
89
90 /*****************************************************************************/
91 /* Global tables */
92 /*****************************************************************************/
93
94 /**
95 ******************************************************************************
96 * @brief Table of filter tap coefficients for HEVC luma inter prediction
97 * input : sub pel mv position (dx/dy = 0:3)
98 * output : filter coeffs to be used for that position
99 *
100 * @remarks See section 8.5.2.2.2.1 Luma sample interpolation process of HEVC
101 ******************************************************************************
102 */
103 WORD8 gai1_hevc_luma_filter_taps[4][NTAPS_LUMA] = { { 0, 0, 0, 64, 0, 0, 0, 0 },
104 { -1, 4, -10, 58, 17, -5, 1, 0 },
105 { -1, 4, -11, 40, 40, -11, 4, -1 },
106 { 0, 1, -5, 17, 58, -10, 4, -1 } };
107
108 /**
109 ******************************************************************************
110 * @brief Table of filter tap coefficients for HEVC chroma inter prediction
111 * input : chroma sub pel mv position (dx/dy = 0:7)
112 * output : filter coeffs to be used for that position
113 *
114 * @remarks See section 8.5.2.2.2.2 Chroma sample interpolation process of HEVC
115 The filter uses only the first four elements in each array
116 ******************************************************************************
117 */
118 WORD8 gai1_hevc_chroma_filter_taps[8][NTAPS_CHROMA] = { { 0, 64, 0, 0 }, { -2, 58, 10, -2 },
119 { -4, 54, 16, -2 }, { -6, 46, 28, -4 },
120 { -4, 36, 36, -4 }, { -4, 28, 46, -6 },
121 { -2, 16, 54, -4 }, { -2, 10, 58, -2 } };
122
123 /*****************************************************************************/
124 /* Function Definitions */
125 /*****************************************************************************/
126
127 /**
128 *******************************************************************************
129 *
130 * @brief
131 * Performs Luma inter pred based on sub pel position dxdy and store the result
132 * in a 16 bit destination buffer
133 *
134 * @param[in] pu1_src
135 * pointer to the source correspoding to integer pel position of a mv (left and
136 * top justified integer position)
137 *
138 * @param[out] pi2_dst
139 * WORD16 pointer to the destination
140 *
141 * @param[in] src_strd
142 * source buffer stride
143 *
144 * @param[in] dst_strd
145 * destination buffer stride
146 *
147 * @param[in] pi2_hdst_scratch
148 * scratch buffer for intermediate storage of horizontal filter output; used as
149 * input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
150 *
151 * Max scratch buffer required is w * (h + 7) * sizeof(WORD16)
152 *
153 * @param[in] ht
154 * width of the prediction unit
155 *
156 * @param[in] wd
157 * width of the prediction unit
158 *
159 * @param[in] dx
160 * qpel position[0:3] of mv in x direction
161 *
162 * @param[in] dy
163 * qpel position[0:3] of mv in y direction
164 *
165 * @returns
166 * none
167 *
168 * @remarks
169 *
170 *******************************************************************************
171 */
ihevce_luma_interpolate_16bit_dxdy(UWORD8 * pu1_src,WORD16 * pi2_dst,WORD32 src_strd,WORD32 dst_strd,WORD16 * pi2_hdst_scratch,WORD32 ht,WORD32 wd,WORD32 dy,WORD32 dx,func_selector_t * ps_func_selector)172 void ihevce_luma_interpolate_16bit_dxdy(
173 UWORD8 *pu1_src,
174 WORD16 *pi2_dst,
175 WORD32 src_strd,
176 WORD32 dst_strd,
177 WORD16 *pi2_hdst_scratch,
178 WORD32 ht,
179 WORD32 wd,
180 WORD32 dy,
181 WORD32 dx,
182 func_selector_t *ps_func_selector)
183 {
184 if((0 == dx) && (0 == dy))
185 {
186 /*--------- full pel position : copy input by upscaling-------*/
187
188 ps_func_selector->ihevc_inter_pred_luma_copy_w16out_fptr(
189 pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[0][0], ht, wd);
190 }
191 else if((0 != dx) && (0 != dy))
192 {
193 /*----------sub pel in both x and y direction---------*/
194
195 UWORD8 *pu1_horz_src = pu1_src - (3 * src_strd);
196 WORD32 hdst_buf_stride = wd;
197 WORD16 *pi2_vert_src = pi2_hdst_scratch + (3 * hdst_buf_stride);
198
199 /* horizontal filtering of source done in a scratch buffer first */
200 ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
201 pu1_horz_src,
202 pi2_hdst_scratch,
203 src_strd,
204 hdst_buf_stride,
205 &gai1_hevc_luma_filter_taps[dx][0],
206 (ht + NTAPS_LUMA - 1),
207 wd);
208
209 /* vertical filtering on scratch buffer and stored in desitnation */
210 ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_w16out_fptr(
211 pi2_vert_src,
212 pi2_dst,
213 hdst_buf_stride,
214 dst_strd,
215 &gai1_hevc_luma_filter_taps[dy][0],
216 ht,
217 wd);
218 }
219 else if(0 == dy)
220 {
221 /*----------sub pel in x direction only ---------*/
222
223 ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
224 pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dx][0], ht, wd);
225 }
226 else /* if (0 == dx) */
227 {
228 /*----------sub pel in y direction only ---------*/
229
230 ps_func_selector->ihevc_inter_pred_luma_vert_w16out_fptr(
231 pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dy][0], ht, wd);
232 }
233 }
234
235 /**
236 *******************************************************************************
237 *
238 * @brief
239 * Performs Luma inter pred based on sub pel position dxdy and store the result
240 * in a 8 bit destination buffer
241 *
242 * @param[in] pu1_src
243 * pointer to the source correspoding to integer pel position of a mv (left and
244 * top justified integer position)
245 *
246 * @param[out] pu1_dst
247 * UWORD8 pointer to the destination
248 *
249 * @param[in] src_strd
250 * source buffer stride
251 *
252 * @param[in] dst_strd
253 * destination buffer stride
254 *
255 * @param[in] pi2_hdst_scratch
256 * scratch buffer for intermediate storage of horizontal filter output; used as
257 * input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
258 *
259 * Max scratch buffer required is w * (h + 7) * sizeof(WORD16)
260 *
261 * @param[in] ht
262 * width of the prediction unit
263 *
264 * @param[in] wd
265 * width of the prediction unit
266 *
267 * @param[in] dx
268 * qpel position[0:3] of mv in x direction
269 *
270 * @param[in] dy
271 * qpel position[0:3] of mv in y direction
272 *
273 * @returns
274 * none
275 *
276 * @remarks
277 *
278 *******************************************************************************
279 */
ihevce_luma_interpolate_8bit_dxdy(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD16 * pi2_hdst_scratch,WORD32 ht,WORD32 wd,WORD32 dy,WORD32 dx,func_selector_t * ps_func_selector)280 void ihevce_luma_interpolate_8bit_dxdy(
281 UWORD8 *pu1_src,
282 UWORD8 *pu1_dst,
283 WORD32 src_strd,
284 WORD32 dst_strd,
285 WORD16 *pi2_hdst_scratch,
286 WORD32 ht,
287 WORD32 wd,
288 WORD32 dy,
289 WORD32 dx,
290 func_selector_t *ps_func_selector)
291 {
292 if((0 == dx) && (0 == dy))
293 {
294 /*--------- full pel position : copy input as is -------*/
295
296 ps_func_selector->ihevc_inter_pred_luma_copy_fptr(
297 pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[0][0], ht, wd);
298 }
299 else if((0 != dx) && (0 != dy))
300 {
301 /*----------sub pel in both x and y direction---------*/
302
303 UWORD8 *pu1_horz_src = pu1_src - (3 * src_strd);
304 WORD32 hdst_buf_stride = wd;
305 WORD16 *pi2_vert_src = pi2_hdst_scratch + (3 * hdst_buf_stride);
306
307 /* horizontal filtering of source done in a scratch buffer first */
308 ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
309 pu1_horz_src,
310 pi2_hdst_scratch,
311 src_strd,
312 hdst_buf_stride,
313 &gai1_hevc_luma_filter_taps[dx][0],
314 (ht + NTAPS_LUMA - 1),
315 wd);
316
317 /* vertical filtering on scratch buffer and stored in desitnation */
318 ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr(
319 pi2_vert_src,
320 pu1_dst,
321 hdst_buf_stride,
322 dst_strd,
323 &gai1_hevc_luma_filter_taps[dy][0],
324 ht,
325 wd);
326 }
327 else if(0 == dy)
328 {
329 /*----------sub pel in x direction only ---------*/
330
331 ps_func_selector->ihevc_inter_pred_luma_horz_fptr(
332 pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dx][0], ht, wd);
333 }
334 else /* if (0 == dx) */
335 {
336 /*----------sub pel in y direction only ---------*/
337
338 ps_func_selector->ihevc_inter_pred_luma_vert_fptr(
339 pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dy][0], ht, wd);
340 }
341 }
342
343 /**
344 *******************************************************************************
345 *
346 * @brief
347 * Performs Luma prediction for a inter prediction unit(PU)
348 *
349 * @par Description:
350 * For a given PU, Inter prediction followed by weighted prediction (if
351 * required)
352 *
353 * @param[in] ps_inter_pred_ctxt
354 * context for inter prediction; contains ref list, weight offsets, ctb offsets
355 *
356 * @param[in] ps_pu
357 * pointer to PU structure whose inter prediction needs to be done
358 *
359 * @param[in] pu1_dst_buf
360 * pointer to destination buffer where the inter prediction is done
361 *
362 * @param[in] dst_stride
363 * pitch of the destination buffer
364 *
365 * @returns
366 * IV_FAIL for mvs going outside ref frame padded limits
367 * IV_SUCCESS after completing mc for given inter pu
368 *
369 * @remarks
370 *
371 *******************************************************************************
372 */
ihevce_luma_inter_pred_pu(void * pv_inter_pred_ctxt,pu_t * ps_pu,void * pv_dst_buf,WORD32 dst_stride,WORD32 i4_flag_inter_pred_source)373 IV_API_CALL_STATUS_T ihevce_luma_inter_pred_pu(
374 void *pv_inter_pred_ctxt,
375 pu_t *ps_pu,
376 void *pv_dst_buf,
377 WORD32 dst_stride,
378 WORD32 i4_flag_inter_pred_source)
379 {
380 inter_pred_ctxt_t *ps_inter_pred_ctxt = (inter_pred_ctxt_t *)pv_inter_pred_ctxt;
381 func_selector_t *ps_func_selector = ps_inter_pred_ctxt->ps_func_selector;
382
383 WORD32 inter_pred_idc = ps_pu->b2_pred_mode;
384 UWORD8 *pu1_dst_buf = (UWORD8 *)pv_dst_buf;
385 WORD32 pu_wd = (ps_pu->b4_wd + 1) << 2;
386 WORD32 pu_ht = (ps_pu->b4_ht + 1) << 2;
387
388 WORD32 wp_flag = ps_inter_pred_ctxt->i1_weighted_pred_flag ||
389 ps_inter_pred_ctxt->i1_weighted_bipred_flag;
390
391 /* 16bit dest required for interpolate if weighted pred is on or bipred */
392 WORD32 store_16bit_output;
393
394 recon_pic_buf_t *ps_ref_pic_l0, *ps_ref_pic_l1;
395 UWORD8 *pu1_ref_pic, *pu1_ref_int_pel;
396 WORD32 ref_pic_stride;
397
398 /* offset of reference block in integer pel units */
399 WORD32 frm_x_ofst, frm_y_ofst;
400 WORD32 frm_x_pu, frm_y_pu;
401
402 /* scratch 16 bit buffers for interpolation in l0 and l1 direction */
403 WORD16 *pi2_scr_buf_l0 = &ps_inter_pred_ctxt->ai2_scratch_buf_l0[0];
404 WORD16 *pi2_scr_buf_l1 = &ps_inter_pred_ctxt->ai2_scratch_buf_l1[0];
405
406 /* scratch buffer for horizontal interpolation destination */
407 WORD16 *pi2_horz_scratch = &ps_inter_pred_ctxt->ai2_horz_scratch[0];
408
409 WORD32 wgt0, wgt1, off0, off1, shift, lvl_shift0, lvl_shift1;
410
411 /* get PU's frm x and frm y offset */
412 frm_x_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_x + (ps_pu->b4_pos_x << 2);
413 frm_y_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_y + (ps_pu->b4_pos_y << 2);
414
415 /* sanity checks */
416 ASSERT((wp_flag == 0) || (wp_flag == 1));
417 ASSERT(dst_stride >= pu_wd);
418 ASSERT(ps_pu->b1_intra_flag == 0);
419
420 lvl_shift0 = 0;
421 lvl_shift1 = 0;
422
423 if(wp_flag)
424 {
425 UWORD8 u1_is_wgt_pred_L0, u1_is_wgt_pred_L1;
426
427 if(inter_pred_idc != PRED_L1)
428 {
429 ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx];
430 u1_is_wgt_pred_L0 = ps_ref_pic_l0->s_weight_offset.u1_luma_weight_enable_flag;
431 }
432 if(inter_pred_idc != PRED_L0)
433 {
434 ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx];
435 u1_is_wgt_pred_L1 = ps_ref_pic_l1->s_weight_offset.u1_luma_weight_enable_flag;
436 }
437 if(inter_pred_idc == PRED_BI)
438 {
439 wp_flag = (u1_is_wgt_pred_L0 || u1_is_wgt_pred_L1);
440 }
441 else if(inter_pred_idc == PRED_L0)
442 {
443 wp_flag = u1_is_wgt_pred_L0;
444 }
445 else if(inter_pred_idc == PRED_L1)
446 {
447 wp_flag = u1_is_wgt_pred_L1;
448 }
449 else
450 {
451 /*other values are not allowed*/
452 assert(0);
453 }
454 }
455 store_16bit_output = (inter_pred_idc == PRED_BI) || (wp_flag);
456
457 if(inter_pred_idc != PRED_L1)
458 {
459 /*****************************************************/
460 /* L0 inter prediction */
461 /*****************************************************/
462
463 /* motion vecs in qpel precision */
464 WORD32 mv_x = ps_pu->mv.s_l0_mv.i2_mvx;
465 WORD32 mv_y = ps_pu->mv.s_l0_mv.i2_mvy;
466
467 /* sub pel offsets in x and y direction w.r.t integer pel */
468 WORD32 dx = mv_x & 0x3;
469 WORD32 dy = mv_y & 0x3;
470
471 /* ref idx is currently stored in the lower 4bits */
472 WORD32 ref_idx = (ps_pu->mv.i1_l0_ref_idx);
473
474 /* x and y integer offsets w.r.t frame start */
475 frm_x_ofst = (frm_x_pu + (mv_x >> 2));
476 frm_y_ofst = (frm_y_pu + (mv_y >> 2));
477
478 ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ref_idx];
479
480 /* picture buffer start and stride */
481 if(i4_flag_inter_pred_source == 1)
482 {
483 pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc_src.pv_y_buf;
484 }
485 else
486 {
487 pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc.pv_y_buf;
488 }
489 ref_pic_stride = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_strd;
490
491 /* Error check for mvs going out of ref frame padded limits */
492 {
493 WORD32 min_x, max_x = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_wd;
494 WORD32 min_y, max_y = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_ht;
495
496 min_x =
497 -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT]
498 ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] - 4)
499 : (PAD_HORZ - 4));
500
501 max_x += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT]
502 ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] - 4)
503 : (PAD_HORZ - 4);
504
505 min_y =
506 -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP]
507 ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] - 4)
508 : (PAD_VERT - 4));
509
510 max_y += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT]
511 ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] - 4)
512 : (PAD_VERT - 4);
513
514 if((frm_x_ofst < min_x) || (frm_x_ofst + pu_wd) > max_x)
515 //ASSERT(0);
516 return (IV_FAIL);
517
518 if((frm_y_ofst < min_y) || (frm_y_ofst + pu_ht) > max_y)
519 //ASSERT(0);
520 return (IV_FAIL);
521 }
522
523 /* point to reference start location in ref frame */
524 /* Assuming clipping of mv is not required here as ME would */
525 /* take care of mv access not going beyond padded data */
526 pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
527
528 /* level shifted for subpel with both x and y componenet being non 0 */
529 /* this is because the interpolate function subtract this to contain */
530 /* the resulting data in 16 bits */
531 lvl_shift0 = (dx != 0) && (dy != 0) ? OFFSET14 : 0;
532
533 if(store_16bit_output)
534 {
535 /* do interpolation in 16bit L0 scratch buffer */
536 ihevce_luma_interpolate_16bit_dxdy(
537 pu1_ref_int_pel,
538 pi2_scr_buf_l0,
539 ref_pic_stride,
540 pu_wd,
541 pi2_horz_scratch,
542 pu_ht,
543 pu_wd,
544 dy,
545 dx,
546 ps_func_selector);
547 }
548 else
549 {
550 /* do interpolation in 8bit destination buffer and return */
551 ihevce_luma_interpolate_8bit_dxdy(
552 pu1_ref_int_pel,
553 pu1_dst_buf,
554 ref_pic_stride,
555 dst_stride,
556 pi2_horz_scratch,
557 pu_ht,
558 pu_wd,
559 dy,
560 dx,
561 ps_func_selector);
562
563 return (IV_SUCCESS);
564 }
565 }
566
567 if(inter_pred_idc != PRED_L0)
568 {
569 /*****************************************************/
570 /* L1 inter prediction */
571 /*****************************************************/
572
573 /* motion vecs in qpel precision */
574 WORD32 mv_x = ps_pu->mv.s_l1_mv.i2_mvx;
575 WORD32 mv_y = ps_pu->mv.s_l1_mv.i2_mvy;
576
577 /* sub pel offsets in x and y direction w.r.t integer pel */
578 WORD32 dx = mv_x & 0x3;
579 WORD32 dy = mv_y & 0x3;
580
581 /* ref idx is currently stored in the lower 4bits */
582 WORD32 ref_idx = (ps_pu->mv.i1_l1_ref_idx);
583
584 /* x and y integer offsets w.r.t frame start */
585 frm_x_ofst = (frm_x_pu + (mv_x >> 2));
586 frm_y_ofst = (frm_y_pu + (mv_y >> 2));
587
588 ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ref_idx];
589
590 /* picture buffer start and stride */
591
592 if(i4_flag_inter_pred_source == 1)
593 {
594 pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc_src.pv_y_buf;
595 }
596 else
597 {
598 pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc.pv_y_buf;
599 }
600 ref_pic_stride = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_strd;
601
602 /* Error check for mvs going out of ref frame padded limits */
603 {
604 WORD32 min_x, max_x = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_wd;
605 WORD32 min_y, max_y = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_ht;
606
607 min_x =
608 -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT]
609 ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] - 4)
610 : (PAD_HORZ - 4));
611
612 max_x += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT]
613 ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] - 4)
614 : (PAD_HORZ - 4);
615
616 min_y =
617 -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP]
618 ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] - 4)
619 : (PAD_VERT - 4));
620
621 max_y += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT]
622 ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] - 4)
623 : (PAD_VERT - 4);
624
625 if((frm_x_ofst < min_x) || (frm_x_ofst + pu_wd) > max_x)
626 //ASSERT(0);
627 return (IV_FAIL);
628
629 if((frm_y_ofst < min_y) || (frm_y_ofst + pu_ht) > max_y)
630 //ASSERT(0);
631 return (IV_FAIL);
632 }
633
634 /* point to reference start location in ref frame */
635 /* Assuming clipping of mv is not required here as ME would */
636 /* take care of mv access not going beyond padded data */
637 pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
638
639 /* level shifted for subpel with both x and y componenet being non 0 */
640 /* this is because the interpolate function subtract this to contain */
641 /* the resulting data in 16 bits */
642 lvl_shift1 = (dx != 0) && (dy != 0) ? OFFSET14 : 0;
643
644 if(store_16bit_output)
645 {
646 /* do interpolation in 16bit L1 scratch buffer */
647 ihevce_luma_interpolate_16bit_dxdy(
648 pu1_ref_int_pel,
649 pi2_scr_buf_l1,
650 ref_pic_stride,
651 pu_wd,
652 pi2_horz_scratch,
653 pu_ht,
654 pu_wd,
655 dy,
656 dx,
657 ps_func_selector);
658 }
659 else
660 {
661 /* do interpolation in 8bit destination buffer and return */
662 ihevce_luma_interpolate_8bit_dxdy(
663 pu1_ref_int_pel,
664 pu1_dst_buf,
665 ref_pic_stride,
666 dst_stride,
667 pi2_horz_scratch,
668 pu_ht,
669 pu_wd,
670 dy,
671 dx,
672 ps_func_selector);
673
674 return (IV_SUCCESS);
675 }
676 }
677
678 if((inter_pred_idc != PRED_BI) && wp_flag)
679 {
680 /*****************************************************/
681 /* unidirection weighted prediction */
682 /*****************************************************/
683 ihevce_wght_offst_t *ps_weight_offset;
684 WORD16 *pi2_src;
685 WORD32 lvl_shift;
686
687 /* intialize the weight, offsets and ref based on l0/l1 mode */
688 if(inter_pred_idc == PRED_L0)
689 {
690 pi2_src = pi2_scr_buf_l0;
691 ps_weight_offset = &ps_ref_pic_l0->s_weight_offset;
692 lvl_shift = lvl_shift0;
693 }
694 else
695 {
696 pi2_src = pi2_scr_buf_l1;
697 ps_weight_offset = &ps_ref_pic_l1->s_weight_offset;
698 lvl_shift = lvl_shift1;
699 }
700
701 wgt0 = ps_weight_offset->i2_luma_weight;
702 off0 = ps_weight_offset->i2_luma_offset;
703 shift = ps_inter_pred_ctxt->i4_log2_luma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH;
704
705 /* do the uni directional weighted prediction */
706 ps_func_selector->ihevc_weighted_pred_uni_fptr(
707 pi2_src, pu1_dst_buf, pu_wd, dst_stride, wgt0, off0, shift, lvl_shift, pu_ht, pu_wd);
708 }
709 else
710 {
711 /*****************************************************/
712 /* Bipred prediction */
713 /*****************************************************/
714
715 if(wp_flag)
716 {
717 /*****************************************************/
718 /* Bi pred weighted prediction */
719 /*****************************************************/
720 wgt0 = ps_ref_pic_l0->s_weight_offset.i2_luma_weight;
721 off0 = ps_ref_pic_l0->s_weight_offset.i2_luma_offset;
722
723 wgt1 = ps_ref_pic_l1->s_weight_offset.i2_luma_weight;
724 off1 = ps_ref_pic_l1->s_weight_offset.i2_luma_offset;
725
726 shift = ps_inter_pred_ctxt->i4_log2_luma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH + 1;
727
728 ps_func_selector->ihevc_weighted_pred_bi_fptr(
729 pi2_scr_buf_l0,
730 pi2_scr_buf_l1,
731 pu1_dst_buf,
732 pu_wd,
733 pu_wd,
734 dst_stride,
735 wgt0,
736 off0,
737 wgt1,
738 off1,
739 shift,
740 lvl_shift0,
741 lvl_shift1,
742 pu_ht,
743 pu_wd);
744 }
745 else
746 {
747 /*****************************************************/
748 /* Default Bi pred prediction */
749 /*****************************************************/
750 ps_func_selector->ihevc_weighted_pred_bi_default_fptr(
751 pi2_scr_buf_l0,
752 pi2_scr_buf_l1,
753 pu1_dst_buf,
754 pu_wd,
755 pu_wd,
756 dst_stride,
757 lvl_shift0,
758 lvl_shift1,
759 pu_ht,
760 pu_wd);
761 }
762 }
763
764 return (IV_SUCCESS);
765 }
766
767 /**
768 *******************************************************************************
769 *
770 * @brief
771 * Performs Chroma inter pred based on sub pel position dxdy and store the
772 * result in a 16 bit destination buffer
773 *
774 * @param[in] pu1_src
775 * pointer to the source correspoding to integer pel position of a mv (left and
776 * top justified integer position)
777 *
778 * @param[out] pi2_dst
779 * WORD16 pointer to the destination
780 *
781 * @param[in] src_strd
782 * source buffer stride
783 *
784 * @param[in] dst_strd
785 * destination buffer stride
786 *
787 * @param[in] pi2_hdst_scratch
788 * scratch buffer for intermediate storage of horizontal filter output; used as
789 * input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
790 *
791 * Max scratch buffer required is w * (h + 3) * sizeof(WORD16)
792 *
793 * @param[in] ht
794 * width of the prediction unit
795 *
796 * @param[in] wd
797 * width of the prediction unit
798 *
799 * @param[in] dx
800 * 1/8th pel position[0:7] of mv in x direction
801 *
802 * @param[in] dy
803 * 1/8th pel position[0:7] of mv in y direction
804 *
805 * @returns
806 * none
807 *
808 * @remarks
809 *
810 *******************************************************************************
811 */
ihevce_chroma_interpolate_16bit_dxdy(UWORD8 * pu1_src,WORD16 * pi2_dst,WORD32 src_strd,WORD32 dst_strd,WORD16 * pi2_hdst_scratch,WORD32 ht,WORD32 wd,WORD32 dy,WORD32 dx,func_selector_t * ps_func_selector)812 void ihevce_chroma_interpolate_16bit_dxdy(
813 UWORD8 *pu1_src,
814 WORD16 *pi2_dst,
815 WORD32 src_strd,
816 WORD32 dst_strd,
817 WORD16 *pi2_hdst_scratch,
818 WORD32 ht,
819 WORD32 wd,
820 WORD32 dy,
821 WORD32 dx,
822 func_selector_t *ps_func_selector)
823 {
824 if((0 == dx) && (0 == dy))
825 {
826 /*--------- full pel position : copy input by upscaling-------*/
827
828 ps_func_selector->ihevc_inter_pred_chroma_copy_w16out_fptr(
829 pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[0][0], ht, wd);
830 }
831 else if((0 != dx) && (0 != dy))
832 {
833 /*----------sub pel in both x and y direction---------*/
834
835 UWORD8 *pu1_horz_src = pu1_src - src_strd;
836 WORD32 hdst_buf_stride = (wd << 1); /* uv interleave */
837 WORD16 *pi2_vert_src = pi2_hdst_scratch + hdst_buf_stride;
838
839 /* horizontal filtering of source done in a scratch buffer first */
840 ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
841 pu1_horz_src,
842 pi2_hdst_scratch,
843 src_strd,
844 hdst_buf_stride,
845 &gai1_hevc_chroma_filter_taps[dx][0],
846 (ht + NTAPS_CHROMA - 1),
847 wd);
848
849 /* vertical filtering on scratch buffer and stored in desitnation */
850 ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr(
851 pi2_vert_src,
852 pi2_dst,
853 hdst_buf_stride,
854 dst_strd,
855 &gai1_hevc_chroma_filter_taps[dy][0],
856 ht,
857 wd);
858 }
859 else if(0 == dy)
860 {
861 /*----------sub pel in x direction only ---------*/
862
863 ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
864 pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dx][0], ht, wd);
865 }
866 else /* if (0 == dx) */
867 {
868 /*----------sub pel in y direction only ---------*/
869
870 ps_func_selector->ihevc_inter_pred_chroma_vert_w16out_fptr(
871 pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dy][0], ht, wd);
872 }
873 }
874
875 /**
876 *******************************************************************************
877 *
878 * @brief
879 * Performs Chroma inter pred based on sub pel position dxdy and store the
880 * result in a 8 bit destination buffer
881 *
882 * @param[in] pu1_src
883 * pointer to the source correspoding to integer pel position of a mv (left and
884 * top justified integer position)
885 *
886 * @param[out] pu1_dst
887 * UWORD8 pointer to the destination
888 *
889 * @param[in] src_strd
890 * source buffer stride
891 *
892 * @param[in] dst_strd
893 * destination buffer stride
894 *
895 * @param[in] pi2_hdst_scratch
896 * scratch buffer for intermediate storage of horizontal filter output; used as
897 * input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
898 *
899 * Max scratch buffer required is w * (h + 3) * sizeof(WORD16)
900 *
901 * @param[in] ht
902 * width of the prediction unit
903 *
904 * @param[in] wd
905 * width of the prediction unit
906 *
907 * @param[in] dx
908 * 1/8th pel position[0:7] of mv in x direction
909 *
910 * @param[in] dy
911 * 1/8th pel position[0:7] of mv in y direction
912 *
913 * @returns
914 * none
915 *
916 * @remarks
917 *
918 *******************************************************************************
919 */
ihevce_chroma_interpolate_8bit_dxdy(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD16 * pi2_hdst_scratch,WORD32 ht,WORD32 wd,WORD32 dy,WORD32 dx,func_selector_t * ps_func_selector)920 void ihevce_chroma_interpolate_8bit_dxdy(
921 UWORD8 *pu1_src,
922 UWORD8 *pu1_dst,
923 WORD32 src_strd,
924 WORD32 dst_strd,
925 WORD16 *pi2_hdst_scratch,
926 WORD32 ht,
927 WORD32 wd,
928 WORD32 dy,
929 WORD32 dx,
930 func_selector_t *ps_func_selector)
931 {
932 if((0 == dx) && (0 == dy))
933 {
934 /*--------- full pel position : copy input as is -------*/
935 ps_func_selector->ihevc_inter_pred_chroma_copy_fptr(
936 pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[0][0], ht, wd);
937 }
938 else if((0 != dx) && (0 != dy))
939 {
940 /*----------sub pel in both x and y direction---------*/
941 UWORD8 *pu1_horz_src = pu1_src - src_strd;
942 WORD32 hdst_buf_stride = (wd << 1); /* uv interleave */
943 WORD16 *pi2_vert_src = pi2_hdst_scratch + hdst_buf_stride;
944
945 /* horizontal filtering of source done in a scratch buffer first */
946 ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
947 pu1_horz_src,
948 pi2_hdst_scratch,
949 src_strd,
950 hdst_buf_stride,
951 &gai1_hevc_chroma_filter_taps[dx][0],
952 (ht + NTAPS_CHROMA - 1),
953 wd);
954
955 /* vertical filtering on scratch buffer and stored in desitnation */
956 ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_fptr(
957 pi2_vert_src,
958 pu1_dst,
959 hdst_buf_stride,
960 dst_strd,
961 &gai1_hevc_chroma_filter_taps[dy][0],
962 ht,
963 wd);
964 }
965 else if(0 == dy)
966 {
967 /*----------sub pel in x direction only ---------*/
968 ps_func_selector->ihevc_inter_pred_chroma_horz_fptr(
969 pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dx][0], ht, wd);
970 }
971 else /* if (0 == dx) */
972 {
973 /*----------sub pel in y direction only ---------*/
974 ps_func_selector->ihevc_inter_pred_chroma_vert_fptr(
975 pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dy][0], ht, wd);
976 }
977 }
978
979 /**
980 *******************************************************************************
981 *
982 * @brief
983 * Performs Chroma prediction for a inter prediction unit(PU)
984 *
985 * @par Description:
986 * For a given PU, Inter prediction followed by weighted prediction (if
987 * required). The reference and destination buffers are uv interleaved
988 *
989 * @param[in] ps_inter_pred_ctxt
990 * context for inter prediction; contains ref list, weight offsets, ctb offsets
991 *
992 * @param[in] ps_pu
993 * pointer to PU structure whose inter prediction needs to be done
994 *
995 * @param[in] pu1_dst_buf
996 * pointer to destination buffer where the inter prediction is done
997 *
998 * @param[in] dst_stride
999 * pitch of the destination buffer
1000 *
1001 * @returns
1002 * none
1003 *
1004 * @remarks
1005 *
1006 *******************************************************************************
1007 */
ihevce_chroma_inter_pred_pu(void * pv_inter_pred_ctxt,pu_t * ps_pu,UWORD8 * pu1_dst_buf,WORD32 dst_stride)1008 void ihevce_chroma_inter_pred_pu(
1009 void *pv_inter_pred_ctxt, pu_t *ps_pu, UWORD8 *pu1_dst_buf, WORD32 dst_stride)
1010 {
1011 inter_pred_ctxt_t *ps_inter_pred_ctxt = (inter_pred_ctxt_t *)pv_inter_pred_ctxt;
1012 func_selector_t *ps_func_selector = ps_inter_pred_ctxt->ps_func_selector;
1013
1014 WORD32 inter_pred_idc = ps_pu->b2_pred_mode;
1015 UWORD8 u1_is_422 = (ps_inter_pred_ctxt->u1_chroma_array_type == 2);
1016 /* chroma width and height are half of luma width and height */
1017 WORD32 pu_wd_chroma = (ps_pu->b4_wd + 1) << 1;
1018 WORD32 pu_ht_chroma = (ps_pu->b4_ht + 1) << (u1_is_422 + 1);
1019
1020 WORD32 wp_flag = ps_inter_pred_ctxt->i1_weighted_pred_flag ||
1021 ps_inter_pred_ctxt->i1_weighted_bipred_flag;
1022
1023 /* 16bit dest required for interpolate if weighted pred is on or bipred */
1024 WORD32 store_16bit_output;
1025
1026 recon_pic_buf_t *ps_ref_pic_l0, *ps_ref_pic_l1;
1027 UWORD8 *pu1_ref_pic, *pu1_ref_int_pel;
1028 WORD32 ref_pic_stride;
1029
1030 /* offset of reference block in integer pel units */
1031 WORD32 frm_x_ofst, frm_y_ofst;
1032 WORD32 frm_x_pu, frm_y_pu;
1033
1034 /* scratch 16 bit buffers for interpolation in l0 and l1 direction */
1035 WORD16 *pi2_scr_buf_l0 = &ps_inter_pred_ctxt->ai2_scratch_buf_l0[0];
1036 WORD16 *pi2_scr_buf_l1 = &ps_inter_pred_ctxt->ai2_scratch_buf_l1[0];
1037
1038 /* scratch buffer for horizontal interpolation destination */
1039 WORD16 *pi2_horz_scratch = &ps_inter_pred_ctxt->ai2_horz_scratch[0];
1040
1041 /* get PU's frm x and frm y offset : Note uv is interleaved */
1042 frm_x_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_x + (ps_pu->b4_pos_x << 2);
1043 frm_y_pu = (ps_inter_pred_ctxt->i4_ctb_frm_pos_y >> (u1_is_422 == 0)) +
1044 (ps_pu->b4_pos_y << (u1_is_422 + 1));
1045
1046 /* sanity checks */
1047 ASSERT((wp_flag == 0) || (wp_flag == 1));
1048 ASSERT(dst_stride >= (pu_wd_chroma << 1)); /* uv interleaved */
1049 ASSERT(ps_pu->b1_intra_flag == 0);
1050
1051 if(wp_flag)
1052 {
1053 UWORD8 u1_is_wgt_pred_L0, u1_is_wgt_pred_L1;
1054
1055 if(inter_pred_idc != PRED_L1)
1056 {
1057 ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx];
1058 u1_is_wgt_pred_L0 = ps_ref_pic_l0->s_weight_offset.u1_chroma_weight_enable_flag;
1059 }
1060 if(inter_pred_idc != PRED_L0)
1061 {
1062 ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx];
1063 u1_is_wgt_pred_L1 = ps_ref_pic_l1->s_weight_offset.u1_chroma_weight_enable_flag;
1064 }
1065 if(inter_pred_idc == PRED_BI)
1066 {
1067 wp_flag = (u1_is_wgt_pred_L0 || u1_is_wgt_pred_L1);
1068 }
1069 else if(inter_pred_idc == PRED_L0)
1070 {
1071 wp_flag = u1_is_wgt_pred_L0;
1072 }
1073 else if(inter_pred_idc == PRED_L1)
1074 {
1075 wp_flag = u1_is_wgt_pred_L1;
1076 }
1077 else
1078 {
1079 /*other values are not allowed*/
1080 assert(0);
1081 }
1082 }
1083 store_16bit_output = (inter_pred_idc == PRED_BI) || (wp_flag);
1084
1085 if(inter_pred_idc != PRED_L1)
1086 {
1087 /*****************************************************/
1088 /* L0 inter prediction(Chroma ) */
1089 /*****************************************************/
1090
1091 /* motion vecs in qpel precision */
1092 WORD32 mv_x = ps_pu->mv.s_l0_mv.i2_mvx;
1093 WORD32 mv_y = ps_pu->mv.s_l0_mv.i2_mvy;
1094
1095 /* sub pel offsets in x and y direction w.r.t integer pel */
1096 WORD32 dx = mv_x & 0x7;
1097 WORD32 dy = (mv_y & ((1 << (!u1_is_422 + 2)) - 1)) << u1_is_422;
1098
1099 /* ref idx is currently stored in the lower 4bits */
1100 WORD32 ref_idx = (ps_pu->mv.i1_l0_ref_idx);
1101
1102 /* x and y integer offsets w.r.t frame start */
1103
1104 frm_x_ofst = (frm_x_pu + ((mv_x >> 3) << 1)); /* uv interleaved */
1105 frm_y_ofst = (frm_y_pu + ((mv_y >> (3 - u1_is_422))));
1106
1107 ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ref_idx];
1108
1109 /* picture buffer start and stride */
1110 pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc.pv_u_buf;
1111 ref_pic_stride = ps_ref_pic_l0->s_yuv_buf_desc.i4_uv_strd;
1112
1113 /* point to reference start location in ref frame */
1114 /* Assuming clipping of mv is not required here as ME would */
1115 /* take care of mv access not going beyond padded data */
1116 pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
1117
1118 if(store_16bit_output)
1119 {
1120 /* do interpolation in 16bit L0 scratch buffer */
1121 ihevce_chroma_interpolate_16bit_dxdy(
1122 pu1_ref_int_pel,
1123 pi2_scr_buf_l0,
1124 ref_pic_stride,
1125 (pu_wd_chroma << 1),
1126 pi2_horz_scratch,
1127 pu_ht_chroma,
1128 pu_wd_chroma,
1129 dy,
1130 dx,
1131 ps_func_selector);
1132 }
1133 else
1134 {
1135 /* do interpolation in 8bit destination buffer and return */
1136 ihevce_chroma_interpolate_8bit_dxdy(
1137 pu1_ref_int_pel,
1138 pu1_dst_buf,
1139 ref_pic_stride,
1140 dst_stride,
1141 pi2_horz_scratch,
1142 pu_ht_chroma,
1143 pu_wd_chroma,
1144 dy,
1145 dx,
1146 ps_func_selector);
1147
1148 return;
1149 }
1150 }
1151
1152 if(inter_pred_idc != PRED_L0)
1153 {
1154 /*****************************************************/
1155 /* L1 inter prediction(Chroma) */
1156 /*****************************************************/
1157
1158 /* motion vecs in qpel precision */
1159 WORD32 mv_x = ps_pu->mv.s_l1_mv.i2_mvx;
1160 WORD32 mv_y = ps_pu->mv.s_l1_mv.i2_mvy;
1161
1162 /* sub pel offsets in x and y direction w.r.t integer pel */
1163 WORD32 dx = mv_x & 0x7;
1164 WORD32 dy = (mv_y & ((1 << (!u1_is_422 + 2)) - 1)) << u1_is_422;
1165
1166 /* ref idx is currently stored in the lower 4bits */
1167 WORD32 ref_idx = (ps_pu->mv.i1_l1_ref_idx);
1168
1169 /* x and y integer offsets w.r.t frame start */
1170 frm_x_ofst = (frm_x_pu + ((mv_x >> 3) << 1)); /* uv interleaved */
1171 frm_y_ofst = (frm_y_pu + ((mv_y >> (3 - u1_is_422))));
1172
1173 ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ref_idx];
1174
1175 /* picture buffer start and stride */
1176 pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc.pv_u_buf;
1177 ref_pic_stride = ps_ref_pic_l1->s_yuv_buf_desc.i4_uv_strd;
1178
1179 /* point to reference start location in ref frame */
1180 /* Assuming clipping of mv is not required here as ME would */
1181 /* take care of mv access not going beyond padded data */
1182 pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
1183
1184 if(store_16bit_output)
1185 {
1186 /* do interpolation in 16bit L1 scratch buffer */
1187 ihevce_chroma_interpolate_16bit_dxdy(
1188 pu1_ref_int_pel,
1189 pi2_scr_buf_l1,
1190 ref_pic_stride,
1191 (pu_wd_chroma << 1),
1192 pi2_horz_scratch,
1193 pu_ht_chroma,
1194 pu_wd_chroma,
1195 dy,
1196 dx,
1197 ps_func_selector);
1198 }
1199 else
1200 {
1201 /* do interpolation in 8bit destination buffer and return */
1202 ihevce_chroma_interpolate_8bit_dxdy(
1203 pu1_ref_int_pel,
1204 pu1_dst_buf,
1205 ref_pic_stride,
1206 dst_stride,
1207 pi2_horz_scratch,
1208 pu_ht_chroma,
1209 pu_wd_chroma,
1210 dy,
1211 dx,
1212 ps_func_selector);
1213
1214 return;
1215 }
1216 }
1217
1218 if((inter_pred_idc != PRED_BI) && wp_flag)
1219 {
1220 /*****************************************************/
1221 /* unidirection weighted prediction(Chroma) */
1222 /*****************************************************/
1223 ihevce_wght_offst_t *ps_weight_offset;
1224 WORD16 *pi2_src;
1225 WORD32 lvl_shift = 0;
1226 WORD32 wgt_cb, wgt_cr, off_cb, off_cr;
1227 WORD32 shift;
1228
1229 /* intialize the weight, offsets and ref based on l0/l1 mode */
1230 if(inter_pred_idc == PRED_L0)
1231 {
1232 pi2_src = pi2_scr_buf_l0;
1233 ps_weight_offset = &ps_ref_pic_l0->s_weight_offset;
1234 }
1235 else
1236 {
1237 pi2_src = pi2_scr_buf_l1;
1238 ps_weight_offset = &ps_ref_pic_l1->s_weight_offset;
1239 }
1240
1241 wgt_cb = ps_weight_offset->i2_cb_weight;
1242 off_cb = ps_weight_offset->i2_cb_offset;
1243 wgt_cr = ps_weight_offset->i2_cr_weight;
1244 off_cr = ps_weight_offset->i2_cr_offset;
1245
1246 shift = ps_inter_pred_ctxt->i4_log2_chroma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH;
1247
1248 /* do the uni directional weighted prediction */
1249 ps_func_selector->ihevc_weighted_pred_chroma_uni_fptr(
1250 pi2_src,
1251 pu1_dst_buf,
1252 (pu_wd_chroma << 1),
1253 dst_stride,
1254 wgt_cb,
1255 wgt_cr,
1256 off_cb,
1257 off_cr,
1258 shift,
1259 lvl_shift,
1260 pu_ht_chroma,
1261 pu_wd_chroma);
1262 }
1263 else
1264 {
1265 /*****************************************************/
1266 /* Bipred prediction(Chroma) */
1267 /*****************************************************/
1268 if(wp_flag)
1269 {
1270 WORD32 wgt0_cb, wgt1_cb, wgt0_cr, wgt1_cr;
1271 WORD32 off0_cb, off1_cb, off0_cr, off1_cr;
1272 WORD32 shift;
1273
1274 /*****************************************************/
1275 /* Bi pred weighted prediction (Chroma) */
1276 /*****************************************************/
1277 wgt0_cb = ps_ref_pic_l0->s_weight_offset.i2_cb_weight;
1278 off0_cb = ps_ref_pic_l0->s_weight_offset.i2_cb_offset;
1279
1280 wgt0_cr = ps_ref_pic_l0->s_weight_offset.i2_cr_weight;
1281 off0_cr = ps_ref_pic_l0->s_weight_offset.i2_cr_offset;
1282
1283 wgt1_cb = ps_ref_pic_l1->s_weight_offset.i2_cb_weight;
1284 off1_cb = ps_ref_pic_l1->s_weight_offset.i2_cb_offset;
1285
1286 wgt1_cr = ps_ref_pic_l1->s_weight_offset.i2_cr_weight;
1287 off1_cr = ps_ref_pic_l1->s_weight_offset.i2_cr_offset;
1288
1289 shift = ps_inter_pred_ctxt->i4_log2_chroma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH + 1;
1290
1291 ps_func_selector->ihevc_weighted_pred_chroma_bi_fptr(
1292 pi2_scr_buf_l0,
1293 pi2_scr_buf_l1,
1294 pu1_dst_buf,
1295 (pu_wd_chroma << 1),
1296 (pu_wd_chroma << 1),
1297 dst_stride,
1298 wgt0_cb,
1299 wgt0_cr,
1300 off0_cb,
1301 off0_cr,
1302 wgt1_cb,
1303 wgt1_cr,
1304 off1_cb,
1305 off1_cr,
1306 shift,
1307 0,
1308 0,
1309 pu_ht_chroma,
1310 pu_wd_chroma);
1311 }
1312 else
1313 {
1314 /*****************************************************/
1315 /* Default Bi pred prediction (Chroma) */
1316 /*****************************************************/
1317 ps_func_selector->ihevc_weighted_pred_chroma_bi_default_fptr(
1318 pi2_scr_buf_l0,
1319 pi2_scr_buf_l1,
1320 pu1_dst_buf,
1321 (pu_wd_chroma << 1),
1322 (pu_wd_chroma << 1),
1323 dst_stride,
1324 0,
1325 0,
1326 pu_ht_chroma,
1327 pu_wd_chroma);
1328 }
1329 }
1330 }
1331