1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /*!
21 **************************************************************************
22 * \file ih264d_inter_pred.c
23 *
24 * \brief
25 * This file contains routines to perform MotionCompensation tasks
26 *
27 * Detailed_description
28 *
29 * \date
30 * 20/11/2002
31 *
32 * \author Arvind Raman
33 **************************************************************************
34 */
35
36 #include <string.h>
37 #include "ih264d_defs.h"
38 #include "ih264d_mvpred.h"
39 #include "ih264d_error_handler.h"
40 #include "ih264d_structs.h"
41 #include "ih264d_defs.h"
42 #include "ih264d_inter_pred.h"
43 #include "ih264_typedefs.h"
44 #include "ih264_macros.h"
45 #include "ih264_platform_macros.h"
46 #include "ih264d_debug.h"
47 #include "ih264d_tables.h"
48 #include "ih264d_mb_utils.h"
49
50
51 void ih264d_pad_on_demand(pred_info_t *ps_pred, UWORD8 lum_chrom_blk);
52
53
54
ih264d_copy_multiplex_data(UWORD8 * puc_Source,UWORD8 * puc_To,UWORD32 uc_w,UWORD32 uc_h,UWORD32 ui16_sourceWidth,UWORD32 ui16_toWidth)55 void ih264d_copy_multiplex_data(UWORD8 *puc_Source,
56 UWORD8 *puc_To,
57 UWORD32 uc_w,
58 UWORD32 uc_h,
59 UWORD32 ui16_sourceWidth,
60 UWORD32 ui16_toWidth)
61 {
62 UWORD8 uc_i, uc_j;
63
64 for(uc_i = 0; uc_i < uc_h; uc_i++)
65 {
66 memcpy(puc_To, puc_Source, uc_w);
67 puc_To += ui16_toWidth;
68 puc_Source += ui16_sourceWidth;
69 }
70 }
71
72
73 /*!
74 **************************************************************************
75 * \if Function name : dma_2d1d \endif
76 *
77 * \brief
78 * 2D -> 1D linear DMA into the reference buffers
79 *
80 * \return
81 * None
82 **************************************************************************
83 */
ih264d_copy_2d1d(UWORD8 * puc_src,UWORD8 * puc_dest,UWORD16 ui16_srcWidth,UWORD16 ui16_widthToFill,UWORD16 ui16_heightToFill)84 void ih264d_copy_2d1d(UWORD8 *puc_src,
85 UWORD8 *puc_dest,
86 UWORD16 ui16_srcWidth,
87 UWORD16 ui16_widthToFill,
88 UWORD16 ui16_heightToFill)
89 {
90 UWORD32 uc_w, uc_h;
91 for(uc_h = ui16_heightToFill; uc_h != 0; uc_h--)
92 {
93 memcpy(puc_dest, puc_src, ui16_widthToFill);
94 puc_dest += ui16_widthToFill;
95 puc_src += ui16_srcWidth;
96 }
97 }
98
99 /*!
100 **************************************************************************
101 * \if Function name : ih264d_fill_pred_info \endif
102 *
103 * \brief
104 * Fills inter prediction related info
105 *
106 * \return
107 * None
108 **************************************************************************
109 */
ih264d_fill_pred_info(WORD16 * pi2_mv,WORD32 part_width,WORD32 part_height,WORD32 sub_mb_num,WORD32 pred_dir,pred_info_pkd_t * ps_pred_pkd,WORD8 i1_buf_id,WORD8 i1_ref_idx,UWORD32 * pu4_wt_offset,UWORD8 u1_pic_type)110 void ih264d_fill_pred_info(WORD16 *pi2_mv,WORD32 part_width,WORD32 part_height, WORD32 sub_mb_num,
111 WORD32 pred_dir,pred_info_pkd_t *ps_pred_pkd,WORD8 i1_buf_id,
112 WORD8 i1_ref_idx,UWORD32 *pu4_wt_offset,UWORD8 u1_pic_type)
113 {
114 WORD32 insert_bits;
115
116 ps_pred_pkd->i2_mv[0] = pi2_mv[0];
117 ps_pred_pkd->i2_mv[1] = pi2_mv[1];
118
119 insert_bits = sub_mb_num & 3; /*sub mb x*/
120 ps_pred_pkd->i1_size_pos_info = insert_bits;
121 insert_bits = sub_mb_num >> 2;/*sub mb y*/
122 ps_pred_pkd->i1_size_pos_info |= insert_bits << 2;
123 insert_bits = part_width >> 1;
124 ps_pred_pkd->i1_size_pos_info |= insert_bits << 4;
125 insert_bits = part_height >> 1;
126 ps_pred_pkd->i1_size_pos_info |= insert_bits << 6;
127
128 ps_pred_pkd->i1_ref_idx_info = i1_ref_idx;
129 ps_pred_pkd->i1_ref_idx_info |= (pred_dir << 6);
130 ps_pred_pkd->i1_buf_id = i1_buf_id;
131 ps_pred_pkd->pu4_wt_offst = pu4_wt_offset;
132 ps_pred_pkd->u1_pic_type = u1_pic_type;
133
134
135 }
136
137
138
139
140
141
142
143 /*****************************************************************************/
144 /* \if Function name : formMbPartInfo \endif */
145 /* */
146 /* \brief */
147 /* Form the Mb partition information structure, to be used by the MC */
148 /* routine */
149 /* */
150 /* \return */
151 /* None */
152 /* \note */
153 /* c_bufx is used to select PredBuffer, */
154 /* if it's only Forward/Backward prediction always buffer used is */
155 /* puc_MbLumaPredBuffer[0 to X1],pu1_mb_cb_pred_buffer[0 to X1] and */
156 /* pu1_mb_cr_pred_buffer[0 to X1] */
157 /* */
158 /* if it's bidirect for forward ..PredBuffer[0 to X1] buffer is used and */
159 /* ..PredBuffer[X2 to X3] for backward prediction. and */
160 /* */
161 /* Final predicted samples values are the average of ..PredBuffer[0 to X1]*/
162 /* and ..PredBuffer[X2 to X3] */
163 /* */
164 /* X1 is 255 for Luma and 63 for Chroma */
165 /* X2 is 256 for Luma and 64 for Chroma */
166 /* X3 is 511 for Luma and 127 for Chroma */
167 /* */
168 /* DD MM YYYY Author(s) Changes (Describe the changes made) */
169 /* 11 05 2005 SWRN Modified to handle pod */
170 /*****************************************************************************/
171
ih264d_form_mb_part_info_bp(pred_info_pkd_t * ps_pred_pkd,dec_struct_t * ps_dec,UWORD16 u2_mb_x,UWORD16 u2_mb_y,WORD32 mb_index,dec_mb_info_t * ps_cur_mb_info)172 WORD32 ih264d_form_mb_part_info_bp(pred_info_pkd_t *ps_pred_pkd,
173 dec_struct_t * ps_dec,
174 UWORD16 u2_mb_x,
175 UWORD16 u2_mb_y,
176 WORD32 mb_index,
177 dec_mb_info_t *ps_cur_mb_info)
178 {
179 /* The reference buffer pointer */
180 WORD32 i2_frm_x, i2_frm_y;
181 WORD32 i2_tmp_mv_x, i2_tmp_mv_y;
182 WORD32 i2_rec_x, i2_rec_y;
183
184 WORD32 u2_pic_ht;
185 WORD32 u2_frm_wd;
186 WORD32 u2_rec_wd;
187 UWORD8 u1_sub_x = 0,u1_sub_y=0 ;
188 UWORD8 u1_part_wd = 0,u1_part_ht = 0;
189 WORD16 i2_mv_x,i2_mv_y;
190
191 /********************************************/
192 /* i1_mc_wd width reqd for mcomp */
193 /* u1_dma_ht height reqd for mcomp */
194 /* u1_dma_wd width aligned to 4 bytes */
195 /* u1_dx fractional part of width */
196 /* u1_dx fractional part of height */
197 /********************************************/
198 UWORD32 i1_mc_wd;
199
200 WORD32 u1_dma_ht;
201
202 UWORD32 u1_dma_wd;
203 UWORD32 u1_dx;
204 UWORD32 u1_dy;
205 pred_info_t * ps_pred = ps_dec->ps_pred + ps_dec->u4_pred_info_idx;
206 dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice;
207 tfr_ctxt_t *ps_frame_buf;
208 struct pic_buffer_t *ps_ref_frm;
209 UWORD8 u1_scale_ref,u1_mbaff,u1_field;
210 pic_buffer_t **pps_ref_frame;
211 WORD8 i1_size_pos_info,i1_buf_id;
212
213 PROFILE_DISABLE_MB_PART_INFO()
214
215 UNUSED(ps_cur_mb_info);
216 i1_size_pos_info = ps_pred_pkd->i1_size_pos_info;
217 GET_XPOS_PRED(u1_sub_x,i1_size_pos_info);
218 GET_YPOS_PRED(u1_sub_y,i1_size_pos_info);
219 GET_WIDTH_PRED(u1_part_wd,i1_size_pos_info);
220 GET_HEIGHT_PRED(u1_part_ht,i1_size_pos_info);
221 i2_mv_x = ps_pred_pkd->i2_mv[0];
222 i2_mv_y = ps_pred_pkd->i2_mv[1];
223 i1_buf_id = ps_pred_pkd->i1_buf_id;
224
225
226 ps_ref_frm = ps_dec->apv_buf_id_pic_buf_map[i1_buf_id];
227
228
229 {
230 ps_frame_buf = &ps_dec->s_tran_addrecon;
231 }
232
233
234 /* Transfer Setup Y */
235 {
236 UWORD8 *pu1_pred, *pu1_rec;
237
238 /* calculating rounded motion vectors and fractional components */
239 i2_tmp_mv_x = i2_mv_x;
240 i2_tmp_mv_y = i2_mv_y;
241 u1_dx = i2_tmp_mv_x & 0x3;
242 u1_dy = i2_tmp_mv_y & 0x3;
243 i2_tmp_mv_x >>= 2;
244 i2_tmp_mv_y >>= 2;
245 i1_mc_wd = u1_part_wd << 2;
246 u1_dma_ht = u1_part_ht << 2;
247 if(u1_dx)
248 {
249 i2_tmp_mv_x -= 2;
250 i1_mc_wd += 5;
251 }
252 if(u1_dy)
253 {
254 i2_tmp_mv_y -= 2;
255 u1_dma_ht += 5;
256 }
257
258 /********************************************************************/
259 /* Calulating the horizontal and the vertical u4_ofst from top left */
260 /* edge of the reference frame, and subsequent clipping */
261 /********************************************************************/
262 u2_pic_ht = ps_dec->u2_pic_ht;
263 u2_frm_wd = ps_dec->u2_frm_wd_y;
264 i2_rec_x = u1_sub_x << 2;
265 i2_rec_y = u1_sub_y << 2;
266
267 i2_frm_x = (u2_mb_x << 4) + i2_rec_x + i2_tmp_mv_x;
268 i2_frm_y = (u2_mb_y << 4) + i2_rec_y + i2_tmp_mv_y;
269
270 i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_X_FRM, (ps_dec->u2_pic_wd - 1),
271 i2_frm_x);
272 i2_frm_y = CLIP3(((1 - u1_dma_ht)), (u2_pic_ht - (1)), i2_frm_y);
273
274 pu1_pred = ps_ref_frm->pu1_buf1 + i2_frm_y * u2_frm_wd + i2_frm_x;
275
276 u1_dma_wd = (i1_mc_wd + 3) & 0xFC;
277
278 /********************************************************************/
279 /* Calulating the horizontal and the vertical u4_ofst from top left */
280 /* edge of the recon buffer */
281 /********************************************************************/
282 u2_rec_wd = MB_SIZE;
283 {
284 u2_rec_wd = ps_dec->u2_frm_wd_y;
285 i2_rec_x += (mb_index << 4);
286 pu1_rec = ps_frame_buf->pu1_dest_y + i2_rec_y * u2_rec_wd
287 + i2_rec_x;
288 }
289
290 /* filling the pred and dma structures for Y */
291 u2_frm_wd = ps_dec->u2_frm_wd_y;
292
293 ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
294 ps_pred->i1_dma_ht = u1_dma_ht;
295 ps_pred->i1_mc_wd = i1_mc_wd;
296 ps_pred->u2_frm_wd = u2_frm_wd;
297 ps_pred->pu1_rec_y_u = pu1_rec;
298 ps_pred->u2_dst_stride = u2_rec_wd;
299
300 ps_pred->i1_mb_partwidth = u1_part_wd << 2;
301 ps_pred->i1_mb_partheight = u1_part_ht << 2;
302 ps_pred->u1_dydx = (u1_dy << 2) + u1_dx;
303
304 ps_pred->pu1_y_ref = pu1_pred;
305
306 }
307
308 /* Increment ps_pred index */
309 ps_pred++;
310
311 /* Transfer Setup U & V */
312 {
313 WORD32 i4_ref_offset, i4_rec_offset;
314 UWORD8 *pu1_pred_u, *pu1_pred_v;
315
316
317 /* calculating rounded motion vectors and fractional components */
318 i2_tmp_mv_x = i2_mv_x;
319 i2_tmp_mv_y = i2_mv_y;
320
321 /************************************************************************/
322 /* Table 8-9: Derivation of the vertical component of the chroma vector */
323 /* in field coding mode */
324 /************************************************************************/
325
326 /* Eighth sample of the chroma MV */
327 u1_dx = i2_tmp_mv_x & 0x7;
328 u1_dy = i2_tmp_mv_y & 0x7;
329
330 /********************************************************************/
331 /* Calculating the full pel MV for chroma which is 1/2 of the Luma */
332 /* MV in full pel units */
333 /********************************************************************/
334 i2_mv_x = i2_tmp_mv_x;
335 i2_mv_y = i2_tmp_mv_y;
336 i2_tmp_mv_x = SIGN_POW2_DIV(i2_tmp_mv_x, 3);
337 i2_tmp_mv_y = SIGN_POW2_DIV(i2_tmp_mv_y, 3);
338 i1_mc_wd = u1_part_wd << 1;
339 u1_dma_ht = u1_part_ht << 1;
340 if(u1_dx)
341 {
342 i2_tmp_mv_x -= (i2_mv_x < 0);
343 i1_mc_wd++;
344 }
345 if(u1_dy != 0)
346 {
347 i2_tmp_mv_y -= (i2_mv_y < 0);
348 u1_dma_ht++;
349 }
350
351 /********************************************************************/
352 /* Calulating the horizontal and the vertical u4_ofst from top left */
353 /* edge of the reference frame, and subsequent clipping */
354 /********************************************************************/
355 u2_pic_ht >>= 1;
356 u2_frm_wd = ps_dec->u2_frm_wd_uv;
357 i2_rec_x = u1_sub_x << 1;
358 i2_rec_y = u1_sub_y << 1;
359
360 i2_frm_x = (u2_mb_x << 3) + i2_rec_x + i2_tmp_mv_x;
361 i2_frm_y = (u2_mb_y << 3) + i2_rec_y + i2_tmp_mv_y;
362
363 i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_UV_FRM,
364 ((ps_dec->u2_pic_wd >> 1) - 1), i2_frm_x);
365 i2_frm_y = CLIP3(((1 - u1_dma_ht)), (u2_pic_ht - (1)), i2_frm_y);
366
367 i4_ref_offset = i2_frm_y * u2_frm_wd + i2_frm_x * YUV420SP_FACTOR;
368 u1_dma_wd = (i1_mc_wd + 3) & 0xFC;
369
370 /********************************************************************/
371 /* Calulating the horizontal and the vertical u4_ofst from top left */
372 /* edge of the recon buffer */
373 /********************************************************************/
374 /* CHANGED CODE */
375 u2_rec_wd = BLK8x8SIZE * YUV420SP_FACTOR;
376 i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
377
378 {
379 u2_rec_wd = ps_dec->u2_frm_wd_uv;
380 i2_rec_x += (mb_index << 3);
381 i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
382 ps_pred->pu1_rec_y_u = ps_frame_buf->pu1_dest_u + i4_rec_offset;
383 ps_pred->u1_pi1_wt_ofst_rec_v = ps_frame_buf->pu1_dest_v
384 + i4_rec_offset;
385 }
386
387 /* CHANGED CODE */
388
389 /* filling the common pred structures for U */
390 u2_frm_wd = ps_dec->u2_frm_wd_uv;
391
392 ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
393 ps_pred->i1_dma_ht = u1_dma_ht;
394 ps_pred->i1_mc_wd = i1_mc_wd;
395
396 ps_pred->u2_frm_wd = u2_frm_wd;
397 ps_pred->u2_dst_stride = u2_rec_wd;
398
399 ps_pred->i1_mb_partwidth = u1_part_wd << 1;
400 ps_pred->i1_mb_partheight = u1_part_ht << 1;
401 ps_pred->u1_dydx = (u1_dy << 3) + u1_dx;
402
403 pu1_pred_u = ps_ref_frm->pu1_buf2 + i4_ref_offset;
404 pu1_pred_v = ps_ref_frm->pu1_buf3 + i4_ref_offset;
405
406 /* Copy U & V partitions */
407 ps_pred->pu1_u_ref = pu1_pred_u;
408
409 /* Increment the reference buffer Index */
410 ps_pred->pu1_v_ref = pu1_pred_v;
411 }
412
413 /* Increment ps_pred index */
414 ps_dec->u4_pred_info_idx += 2;
415
416 return OK;
417
418 }
419
420
421 /*****************************************************************************/
422 /* \if Function name : formMbPartInfo \endif */
423 /* */
424 /* \brief */
425 /* Form the Mb partition information structure, to be used by the MC */
426 /* routine */
427 /* */
428 /* \return */
429 /* None */
430 /* \note */
431 /* c_bufx is used to select PredBuffer, */
432 /* if it's only Forward/Backward prediction always buffer used is */
433 /* puc_MbLumaPredBuffer[0 to X1],pu1_mb_cb_pred_buffer[0 to X1] and */
434 /* pu1_mb_cr_pred_buffer[0 to X1] */
435 /* */
436 /* if it's bidirect for forward ..PredBuffer[0 to X1] buffer is used and */
437 /* ..PredBuffer[X2 to X3] for backward prediction. and */
438 /* */
439 /* Final predicted samples values are the average of ..PredBuffer[0 to X1]*/
440 /* and ..PredBuffer[X2 to X3] */
441 /* */
442 /* X1 is 255 for Luma and 63 for Chroma */
443 /* X2 is 256 for Luma and 64 for Chroma */
444 /* X3 is 511 for Luma and 127 for Chroma */
445 /* */
446 /* DD MM YYYY Author(s) Changes (Describe the changes made) */
447 /* 11 05 2005 SWRN Modified to handle pod */
448 /*****************************************************************************/
ih264d_form_mb_part_info_mp(pred_info_pkd_t * ps_pred_pkd,dec_struct_t * ps_dec,UWORD16 u2_mb_x,UWORD16 u2_mb_y,WORD32 mb_index,dec_mb_info_t * ps_cur_mb_info)449 WORD32 ih264d_form_mb_part_info_mp(pred_info_pkd_t *ps_pred_pkd,
450 dec_struct_t * ps_dec,
451 UWORD16 u2_mb_x,
452 UWORD16 u2_mb_y,
453 WORD32 mb_index,
454 dec_mb_info_t *ps_cur_mb_info)
455 {
456 /* The reference buffer pointer */
457 UWORD8 *pu1_ref_buf;
458 WORD16 i2_frm_x, i2_frm_y, i2_tmp_mv_x, i2_tmp_mv_y, i2_pod_ht;
459 WORD16 i2_rec_x, i2_rec_y;
460 UWORD16 u2_pic_ht, u2_frm_wd, u2_rec_wd;
461 UWORD8 u1_wght_pred_type, u1_wted_bipred_idc;
462 UWORD16 u2_tot_ref_scratch_size;
463 UWORD8 u1_sub_x = 0;
464 UWORD8 u1_sub_y = 0;
465 UWORD8 u1_is_bi_dir = 0;
466
467 /********************************************/
468 /* i1_mc_wd width reqd for mcomp */
469 /* u1_dma_ht height reqd for mcomp */
470 /* u1_dma_wd width aligned to 4 bytes */
471 /* u1_dx fractional part of width */
472 /* u1_dx fractional part of height */
473 /********************************************/
474 UWORD8 i1_mc_wd, u1_dma_ht, u1_dma_wd, u1_dx, u1_dy;
475 pred_info_t * ps_pred ;
476 dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice;
477 const UWORD8 u1_slice_type = ps_dec->ps_decode_cur_slice->slice_type;
478 UWORD8 u1_pod_bot, u1_pod_top;
479
480 /* load the pictype for pod u4_flag & chroma motion vector derivation */
481 UWORD8 u1_ref_pic_type ;
482
483 /* set default value to flags specifying field nature of picture & mb */
484 UWORD32 u1_mb_fld = 0, u1_mb_or_pic_fld;
485 UWORD32 u1_mb_bot = 0, u1_pic_bot = 0, u1_mb_or_pic_bot;
486 tfr_ctxt_t *ps_frame_buf;
487 /* calculate flags specifying field nature of picture & mb */
488 const UWORD32 u1_pic_fld = ps_cur_slice->u1_field_pic_flag;
489 WORD8 i1_pred;
490 WORD8 i1_size_pos_info,i1_buf_id,i1_ref_idx;
491 UWORD8 u1_part_wd,u1_part_ht;
492 WORD16 i2_mv_x,i2_mv_y;
493 struct pic_buffer_t *ps_ref_frm;
494 UWORD32 *pu4_wt_offset;
495 UWORD8 *pu1_buf1,*pu1_buf2,*pu1_buf3;
496
497
498 PROFILE_DISABLE_MB_PART_INFO()
499
500 ps_pred = ps_dec->ps_pred + ps_dec->u4_pred_info_idx;
501
502
503 i1_size_pos_info = ps_pred_pkd->i1_size_pos_info;
504 GET_XPOS_PRED(u1_sub_x,i1_size_pos_info);
505 GET_YPOS_PRED(u1_sub_y,i1_size_pos_info);
506 GET_WIDTH_PRED(u1_part_wd,i1_size_pos_info);
507 GET_HEIGHT_PRED(u1_part_ht,i1_size_pos_info);
508 i2_mv_x = ps_pred_pkd->i2_mv[0];
509 i2_mv_y = ps_pred_pkd->i2_mv[1];
510 i1_ref_idx = ps_pred_pkd->i1_ref_idx_info & 0x3f;
511 i1_buf_id = ps_pred_pkd->i1_buf_id;
512 ps_ref_frm = ps_dec->apv_buf_id_pic_buf_map[i1_buf_id];
513
514 i1_pred = (ps_pred_pkd->i1_ref_idx_info & 0xC0) >> 6;
515 u1_is_bi_dir = (i1_pred == BI_PRED);
516
517
518 u1_ref_pic_type = ps_pred_pkd->u1_pic_type & PIC_MASK;
519
520 pu1_buf1 = ps_ref_frm->pu1_buf1;
521 pu1_buf2 = ps_ref_frm->pu1_buf2;
522 pu1_buf3 = ps_ref_frm->pu1_buf3;
523
524 if(u1_ref_pic_type == BOT_FLD)
525 {
526 pu1_buf1 += ps_ref_frm->u2_frm_wd_y;
527 pu1_buf2 += ps_ref_frm->u2_frm_wd_uv;
528 pu1_buf3 += ps_ref_frm->u2_frm_wd_uv;
529
530 }
531
532
533
534 if(ps_dec->ps_cur_pps->u1_wted_pred_flag)
535 {
536 pu4_wt_offset = (UWORD32*)&ps_dec->pu4_wt_ofsts[2
537 * X3(i1_ref_idx)];
538 }
539
540
541 pu4_wt_offset = ps_pred_pkd->pu4_wt_offst;
542
543
544 /* Pointer to the frame buffer */
545 {
546 ps_frame_buf = &ps_dec->s_tran_addrecon;
547 /* CHANGED CODE */
548 }
549
550 if(!u1_pic_fld)
551 {
552 u1_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag;
553 u1_mb_bot = 1 - ps_cur_mb_info->u1_topmb;
554 }
555 else
556 u1_pic_bot = ps_cur_slice->u1_bottom_field_flag;
557
558 /****************************************************************/
559 /* calculating the flags the tell whether to use frame-padding */
560 /* or use software pad-on-demand */
561 /****************************************************************/
562 u1_mb_or_pic_bot = u1_mb_bot | u1_pic_bot;
563 u1_mb_or_pic_fld = u1_mb_fld | u1_pic_fld;
564 u1_pod_bot = u1_mb_or_pic_fld && (u1_ref_pic_type == TOP_FLD);
565 u1_pod_top = u1_mb_or_pic_fld && (u1_ref_pic_type == BOT_FLD);
566
567 /* Weighted Pred additions */
568 u1_wted_bipred_idc = ps_dec->ps_cur_pps->u1_wted_bipred_idc;
569
570 if((u1_slice_type == P_SLICE) || (u1_slice_type == SP_SLICE))
571 {
572 /* P Slice only */
573 u1_wght_pred_type = ps_dec->ps_cur_pps->u1_wted_pred_flag;
574
575 }
576 else
577 {
578 /* B Slice only */
579 u1_wght_pred_type = 1 + u1_is_bi_dir;
580 if(u1_wted_bipred_idc == 0)
581 u1_wght_pred_type = 0;
582 if((u1_wted_bipred_idc == 2) && (!u1_is_bi_dir))
583 u1_wght_pred_type = 0;
584 }
585 /* load the scratch reference buffer index */
586 pu1_ref_buf = ps_dec->pu1_ref_buff + ps_dec->u4_dma_buf_idx;
587 u2_tot_ref_scratch_size = 0;
588
589
590 /* Transfer Setup Y */
591 {
592 UWORD8 *pu1_pred, *pu1_rec;
593 /* calculating rounded motion vectors and fractional components */
594 i2_tmp_mv_x = i2_mv_x;
595 i2_tmp_mv_y = i2_mv_y;
596
597 u1_dx = i2_tmp_mv_x & 0x3;
598 u1_dy = i2_tmp_mv_y & 0x3;
599 i2_tmp_mv_x >>= 2;
600 i2_tmp_mv_y >>= 2;
601 i1_mc_wd = u1_part_wd << 2;
602 u1_dma_ht = u1_part_ht << 2;
603 if(u1_dx)
604 {
605 i2_tmp_mv_x -= 2;
606 i1_mc_wd += 5;
607 }
608 if(u1_dy)
609 {
610 i2_tmp_mv_y -= 2;
611 u1_dma_ht += 5;
612 }
613
614 /********************************************************************/
615 /* Calulating the horizontal and the vertical u4_ofst from top left */
616 /* edge of the reference frame, and subsequent clipping */
617 /********************************************************************/
618 u2_pic_ht = ps_dec->u2_pic_ht >> u1_pic_fld;
619 u2_frm_wd = ps_dec->u2_frm_wd_y << u1_pic_fld;
620 i2_frm_x = (u2_mb_x << 4) + (u1_sub_x << 2) + i2_tmp_mv_x;
621 i2_frm_y = ((u2_mb_y + (u1_mb_bot && !u1_mb_fld)) << 4)
622 + (((u1_sub_y << 2) + i2_tmp_mv_y) << u1_mb_fld);
623
624 i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_X_FRM, (ps_dec->u2_pic_wd - 1),
625 i2_frm_x);
626 i2_frm_y = CLIP3(((1 - u1_dma_ht) << u1_mb_fld),
627 (u2_pic_ht - (1 << u1_mb_fld)), i2_frm_y);
628
629 pu1_pred = pu1_buf1 + i2_frm_y * u2_frm_wd + i2_frm_x;
630 u1_dma_wd = (i1_mc_wd + 3) & 0xFC;
631 /********************************************************************/
632 /* Calulating the horizontal and the vertical u4_ofst from top left */
633 /* edge of the recon buffer */
634 /********************************************************************/
635 /* CHANGED CODE */
636 u2_rec_wd = MB_SIZE;
637 i2_rec_x = u1_sub_x << 2;
638 i2_rec_y = u1_sub_y << 2;
639 {
640 u2_rec_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld;
641 i2_rec_x += (mb_index << 4);
642 pu1_rec = ps_frame_buf->pu1_dest_y + i2_rec_y * u2_rec_wd
643 + i2_rec_x;
644 if(u1_mb_bot)
645 pu1_rec += ps_dec->u2_frm_wd_y << ((u1_mb_fld) ? 0 : 4);
646 }
647
648 /* CHANGED CODE */
649
650 /* filling the pred and dma structures for Y */
651 u2_frm_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld;
652
653 ps_pred->pu1_dma_dest_addr = pu1_ref_buf;
654 ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
655 ps_pred->u2_frm_wd = u2_frm_wd;
656 ps_pred->i1_dma_ht = u1_dma_ht;
657 ps_pred->i1_mc_wd = i1_mc_wd;
658 ps_pred->pu1_rec_y_u = pu1_rec;
659 ps_pred->u2_dst_stride = u2_rec_wd;
660
661 ps_pred->i1_mb_partwidth = u1_part_wd << 2;
662 ps_pred->i1_mb_partheight = u1_part_ht << 2;
663 ps_pred->u1_dydx = (u1_dy << 2) + u1_dx;
664 ps_pred->u1_is_bi_direct = u1_is_bi_dir;
665 ps_pred->u1_pi1_wt_ofst_rec_v = (UWORD8 *)pu4_wt_offset;
666 ps_pred->u1_wght_pred_type = u1_wght_pred_type;
667 ps_pred->i1_pod_ht = 0;
668
669 /* Increment the Reference buffer Indices */
670 pu1_ref_buf += u1_dma_wd * u1_dma_ht;
671 u2_tot_ref_scratch_size += u1_dma_wd * u1_dma_ht;
672
673 /* unrestricted field motion comp for top region outside frame */
674 i2_pod_ht = (-i2_frm_y) >> u1_mb_fld;
675 if((i2_pod_ht > 0) && u1_pod_top)
676 {
677 ps_pred->i1_pod_ht = (WORD8)(-i2_pod_ht);
678 u1_dma_ht -= i2_pod_ht;
679 pu1_pred += i2_pod_ht * u2_frm_wd;
680 }
681 /* unrestricted field motion comp for bottom region outside frame */
682 else if(u1_pod_bot)
683 {
684 i2_pod_ht = u1_dma_ht + ((i2_frm_y - u2_pic_ht) >> u1_mb_fld);
685 if(i2_pod_ht > 0)
686 {
687 u1_dma_ht -= i2_pod_ht;
688 ps_pred->i1_pod_ht = (WORD8)i2_pod_ht;
689 }
690 }
691
692 /* Copy Y partition */
693
694 /*
695 * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data
696 * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data
697 */
698 if(ps_pred->i1_pod_ht)
699 {
700 ps_pred->pu1_pred = pu1_pred;
701 ps_pred->u1_dma_ht_y = u1_dma_ht;
702 ps_pred->u1_dma_wd_y = u1_dma_wd;
703 }
704 ps_pred->pu1_y_ref = pu1_pred;
705 }
706
707
708
709 /* Increment ps_pred index */
710 ps_pred++;
711
712 /* Transfer Setup U & V */
713 {
714 WORD32 i4_ref_offset, i4_rec_offset;
715 UWORD8 *pu1_pred_u, *pu1_pred_v, u1_tmp_dma_ht;
716 /* CHANGED CODE */
717 UWORD8 u1_chroma_cbp = (UWORD8)(ps_cur_mb_info->u1_cbp >> 4);
718 /* CHANGED CODE */
719
720 /* calculating rounded motion vectors and fractional components */
721 i2_tmp_mv_x = i2_mv_x;
722 i2_tmp_mv_y = i2_mv_y;
723
724 /************************************************************************/
725 /* Table 8-9: Derivation of the vertical component of the chroma vector */
726 /* in field coding mode */
727 /************************************************************************/
728 if(u1_pod_bot && u1_mb_or_pic_bot)
729 i2_tmp_mv_y += 2;
730 if(u1_pod_top && !u1_mb_or_pic_bot)
731 i2_tmp_mv_y -= 2;
732
733 /* Eighth sample of the chroma MV */
734 u1_dx = i2_tmp_mv_x & 0x7;
735 u1_dy = i2_tmp_mv_y & 0x7;
736
737 /********************************************************************/
738 /* Calculating the full pel MV for chroma which is 1/2 of the Luma */
739 /* MV in full pel units */
740 /********************************************************************/
741 i2_mv_x = i2_tmp_mv_x;
742 i2_mv_y = i2_tmp_mv_y;
743 i2_tmp_mv_x = SIGN_POW2_DIV(i2_tmp_mv_x, 3);
744 i2_tmp_mv_y = SIGN_POW2_DIV(i2_tmp_mv_y, 3);
745 i1_mc_wd = u1_part_wd << 1;
746 u1_dma_ht = u1_part_ht << 1;
747 if(u1_dx)
748 {
749 if(i2_mv_x < 0)
750 i2_tmp_mv_x -= 1;
751 i1_mc_wd++;
752 }
753 if(u1_dy != 0)
754 {
755 if(i2_mv_y < 0)
756 i2_tmp_mv_y -= 1;
757 u1_dma_ht++;
758 }
759
760 /********************************************************************/
761 /* Calulating the horizontal and the vertical u4_ofst from top left */
762 /* edge of the reference frame, and subsequent clipping */
763 /********************************************************************/
764 u2_pic_ht >>= 1;
765 u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_pic_fld;
766 i2_frm_x = (u2_mb_x << 3) + (u1_sub_x << 1) + i2_tmp_mv_x;
767 i2_frm_y = ((u2_mb_y + (u1_mb_bot && !u1_mb_fld)) << 3)
768 + (((u1_sub_y << 1) + i2_tmp_mv_y) << u1_mb_fld);
769
770 i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_UV_FRM,
771 ((ps_dec->u2_pic_wd >> 1) - 1), i2_frm_x);
772 i2_frm_y = CLIP3(((1 - u1_dma_ht) << u1_mb_fld),
773 (u2_pic_ht - (1 << u1_mb_fld)), i2_frm_y);
774
775 i4_ref_offset = i2_frm_y * u2_frm_wd + i2_frm_x * YUV420SP_FACTOR;
776 u1_dma_wd = (i1_mc_wd + 3) & 0xFC;
777
778 /********************************************************************/
779 /* Calulating the horizontal and the vertical u4_ofst from top left */
780 /* edge of the recon buffer */
781 /********************************************************************/
782 /* CHANGED CODE */
783 u2_rec_wd = BLK8x8SIZE * YUV420SP_FACTOR;
784 i2_rec_x = u1_sub_x << 1;
785 i2_rec_y = u1_sub_y << 1;
786 i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
787 {
788 u2_rec_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
789
790 i2_rec_x += (mb_index << 3);
791 i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
792 if(u1_mb_bot)
793 i4_rec_offset += ps_dec->u2_frm_wd_uv << ((u1_mb_fld) ? 0 : 3);
794 ps_pred->pu1_rec_y_u = ps_frame_buf->pu1_dest_u + i4_rec_offset;
795 ps_pred->u1_pi1_wt_ofst_rec_v = ps_frame_buf->pu1_dest_v
796 + i4_rec_offset;
797
798 }
799
800 /* CHANGED CODE */
801
802 /* filling the common pred structures for U */
803 u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
804 u1_tmp_dma_ht = u1_dma_ht;
805 ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
806 ps_pred->u2_frm_wd = u2_frm_wd;
807 ps_pred->i1_dma_ht = u1_dma_ht;
808 ps_pred->i1_mc_wd = i1_mc_wd;
809 ps_pred->u2_dst_stride = u2_rec_wd;
810
811 ps_pred->i1_mb_partwidth = u1_part_wd << 1;
812 ps_pred->i1_mb_partheight = u1_part_ht << 1;
813 ps_pred->u1_dydx = (u1_dy << 3) + u1_dx;
814 ps_pred->u1_is_bi_direct = u1_is_bi_dir;
815 ps_pred->u1_wght_pred_type = u1_wght_pred_type;
816 ps_pred->i1_pod_ht = 0;
817
818 ps_pred->pu1_dma_dest_addr = pu1_ref_buf;
819
820 /* unrestricted field motion comp for top region outside frame */
821 i2_pod_ht = (-i2_frm_y) >> u1_mb_fld;
822 if((i2_pod_ht > 0) && u1_pod_top)
823 {
824 i4_ref_offset += i2_pod_ht * u2_frm_wd;
825 u1_dma_ht -= i2_pod_ht;
826 ps_pred->i1_pod_ht = (WORD8)(-i2_pod_ht);
827 }
828 /* unrestricted field motion comp for bottom region outside frame */
829 else if(u1_pod_bot)
830 {
831 i2_pod_ht = u1_dma_ht + ((i2_frm_y - u2_pic_ht) >> u1_mb_fld);
832 if(i2_pod_ht > 0)
833 {
834 u1_dma_ht -= i2_pod_ht;
835 ps_pred->i1_pod_ht = (WORD8)i2_pod_ht;
836 }
837 }
838
839 pu1_pred_u = pu1_buf2 + i4_ref_offset;
840 pu1_pred_v = pu1_buf3 + i4_ref_offset;
841
842 /* Copy U & V partitions */
843 if(ps_pred->i1_pod_ht)
844 {
845 ps_pred->pu1_pred_u = pu1_pred_u;
846 ps_pred->u1_dma_ht_uv = u1_dma_ht;
847 ps_pred->u1_dma_wd_uv = u1_dma_wd;
848
849 }
850 ps_pred->pu1_u_ref = pu1_pred_u;
851
852 /* Increment the reference buffer Index */
853 u2_tot_ref_scratch_size += (u1_dma_wd * u1_tmp_dma_ht) << 1;
854
855 if(ps_pred->i1_pod_ht)
856 {
857 ps_pred->pu1_pred_v = pu1_pred_v;
858 ps_pred->u1_dma_ht_uv = u1_dma_ht;
859 ps_pred->u1_dma_wd_uv = u1_dma_wd;
860 }
861
862 ps_pred->pu1_v_ref = pu1_pred_v;
863 }
864
865 /* Increment ps_pred index */
866 ps_dec->u4_pred_info_idx += 2;
867
868
869 /* Increment the reference buffer Index */
870 ps_dec->u4_dma_buf_idx += u2_tot_ref_scratch_size;
871
872 if(ps_dec->u4_dma_buf_idx > MAX_REF_BUF_SIZE)
873 return ERROR_NUM_MV;
874
875 return OK;
876
877
878
879 }
880
881
882 /*!
883 **************************************************************************
884 * \if Function name : MotionCompensate \endif
885 *
886 * \brief
887 * The routine forms predictor blocks for the entire MB and stores it in
888 * predictor buffers.This function works only for BASELINE profile
889 *
890 * \param ps_dec: Pointer to the structure decStruct. This is used to get
891 * pointers to the current and the reference frame and to the MbParams
892 * structure.
893 *
894 * \return
895 * None
896 *
897 * \note
898 * The routine forms predictors for all the luma and the chroma MB
899 * partitions.
900 **************************************************************************
901 */
902
ih264d_motion_compensate_bp(dec_struct_t * ps_dec,dec_mb_info_t * ps_cur_mb_info)903 void ih264d_motion_compensate_bp(dec_struct_t * ps_dec, dec_mb_info_t *ps_cur_mb_info)
904 {
905 pred_info_t *ps_pred ;
906 UWORD8 *puc_ref, *pu1_dest_y;
907 UWORD8 *pu1_dest_u;
908 UWORD32 u2_num_pels, u2_ref_wd_y, u2_ref_wd_uv, u2_dst_wd;
909
910 UWORD32 u4_wd_y, u4_ht_y, u4_wd_uv;
911 UWORD32 u4_ht_uv;
912 UWORD8 *puc_pred0 = (UWORD8 *)(ps_dec->pi2_pred1);
913
914
915 PROFILE_DISABLE_INTER_PRED()
916 UNUSED(ps_cur_mb_info);
917 ps_pred = ps_dec->ps_pred ;
918
919 for(u2_num_pels = 0; u2_num_pels < 256;)
920 {
921 UWORD32 uc_dx, uc_dy;
922 /* Pointer to the destination buffer. If the CBPs of all 8x8 blocks in
923 the MB partition are zero then it would be better to copy the
924 predictor valus directly to the current frame buffer */
925 /*
926 * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data
927 * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data
928 */
929
930 u2_ref_wd_y = ps_pred->u2_frm_wd;
931 puc_ref = ps_pred->pu1_y_ref;
932 if(ps_pred->u1_dydx & 0x3)
933 puc_ref += 2;
934 if(ps_pred->u1_dydx >> 2)
935 puc_ref += 2 * u2_ref_wd_y;
936
937 u4_wd_y = ps_pred->i1_mb_partwidth;
938 u4_ht_y = ps_pred->i1_mb_partheight;
939 uc_dx = ps_pred->u1_dydx;
940 uc_dy = uc_dx >> 2;
941 uc_dx &= 0x3;
942
943 pu1_dest_y = ps_pred->pu1_rec_y_u;
944 u2_dst_wd = ps_pred->u2_dst_stride;
945
946 ps_dec->apf_inter_pred_luma[ps_pred->u1_dydx](puc_ref, pu1_dest_y,
947 u2_ref_wd_y,
948 u2_dst_wd,
949 u4_ht_y,
950 u4_wd_y, puc_pred0,
951 ps_pred->u1_dydx);
952
953 ps_pred++;
954
955 /* Interpolate samples for the chroma components */
956 {
957 UWORD8 *pu1_ref_u;
958
959 u2_ref_wd_uv = ps_pred->u2_frm_wd;
960 pu1_ref_u = ps_pred->pu1_u_ref;
961
962 u4_wd_uv = ps_pred->i1_mb_partwidth;
963 u4_ht_uv = ps_pred->i1_mb_partheight;
964 uc_dx = ps_pred->u1_dydx; /* 8*dy + dx */
965 uc_dy = uc_dx >> 3;
966 uc_dx &= 0x7;
967
968 pu1_dest_u = ps_pred->pu1_rec_y_u;
969 u2_dst_wd = ps_pred->u2_dst_stride;
970
971 ps_pred++;
972 ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_dest_u, u2_ref_wd_uv,
973 u2_dst_wd, uc_dx, uc_dy,
974 u4_ht_uv, u4_wd_uv);
975
976 }
977
978 u2_num_pels += (UWORD8)u4_wd_y * (UWORD8)u4_ht_y;
979
980 }
981 }
982
983
984 /*
985 **************************************************************************
986 * \if Function name : MotionCompensateB \endif
987 *
988 * \brief
989 * The routine forms predictor blocks for the entire MB and stores it in
990 * predictor buffers.
991 *
992 * \param ps_dec: Pointer to the structure decStruct. This is used to get
993 * pointers to the current and the reference frame and to the MbParams
994 * structure.
995 *
996 * \return
997 * None
998 *
999 * \note
1000 * The routine forms predictors for all the luma and the chroma MB
1001 * partitions.
1002 **************************************************************************
1003 */
1004
ih264d_motion_compensate_mp(dec_struct_t * ps_dec,dec_mb_info_t * ps_cur_mb_info)1005 void ih264d_motion_compensate_mp(dec_struct_t * ps_dec, dec_mb_info_t *ps_cur_mb_info)
1006 {
1007 pred_info_t *ps_pred ;
1008 pred_info_t *ps_pred_y_forw, *ps_pred_y_back, *ps_pred_cr_forw;
1009 UWORD8 *puc_ref, *pu1_dest_y, *puc_pred0, *puc_pred1;
1010 UWORD8 *pu1_dest_u, *pu1_dest_v;
1011 WORD16 *pi16_intm;
1012 UWORD32 u2_num_pels, u2_ref_wd_y, u2_ref_wd_uv, u2_dst_wd;
1013 UWORD32 u2_dest_wd_y, u2_dest_wd_uv;
1014 UWORD32 u2_row_buf_wd_y = 0;
1015 UWORD32 u2_row_buf_wd_uv = 0;
1016 UWORD32 u2_log2Y_crwd;
1017 UWORD32 u4_wd_y, u4_ht_y, u1_dir, u4_wd_uv;
1018 UWORD32 u4_ht_uv;
1019 UWORD8 *pu1_temp_mc_buffer = ps_dec->pu1_temp_mc_buffer;
1020 WORD32 i2_pod_ht;
1021 UWORD32 u2_pic_ht, u2_frm_wd, u2_rec_wd;
1022 UWORD32 u1_pod_bot, u1_pod_top;
1023 UWORD8 *pu1_pred, *pu1_dma_dst;
1024 UWORD32 u1_dma_wd, u1_dma_ht;
1025
1026 dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice;
1027
1028 /* set default value to flags specifying field nature of picture & mb */
1029 UWORD32 u1_mb_fld = 0, u1_mb_or_pic_fld;
1030 UWORD32 u1_mb_or_pic_bot;
1031 /* calculate flags specifying field nature of picture & mb */
1032 const UWORD8 u1_pic_fld = ps_cur_slice->u1_field_pic_flag;
1033
1034 PROFILE_DISABLE_INTER_PRED()
1035 ps_pred = ps_dec->ps_pred ;
1036 /* Initialize both ps_pred_y_forw, ps_pred_cr_forw and ps_pred_y_back
1037 * to avoid static analysis warnings */
1038 ps_pred_y_forw = ps_pred;
1039 ps_pred_y_back = ps_pred;
1040 ps_pred_cr_forw = ps_pred;
1041
1042 u2_log2Y_crwd = ps_dec->ps_decode_cur_slice->u2_log2Y_crwd;
1043
1044 if(!u1_pic_fld)
1045 {
1046 u1_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag;
1047 }
1048
1049 u1_mb_or_pic_fld = u1_mb_fld | u1_pic_fld;
1050
1051 pi16_intm = ps_dec->pi2_pred1;
1052 puc_pred0 = (UWORD8 *)pi16_intm;
1053 puc_pred1 = puc_pred0 + PRED_BUFFER_WIDTH * PRED_BUFFER_HEIGHT * sizeof(WORD16);
1054
1055 for(u2_num_pels = 0; u2_num_pels < 256;)
1056 {
1057 UWORD8 uc_dx, uc_dy;
1058 const UWORD8 u1_is_bi_direct = ps_pred->u1_is_bi_direct;
1059 for(u1_dir = 0; u1_dir <= u1_is_bi_direct; u1_dir++)
1060 {
1061 /* Pointer to the destination buffer. If the CBPs of all 8x8 blocks in
1062 the MB partition are zero then it would be better to copy the
1063 predictor valus directly to the current frame buffer */
1064 /*
1065 * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data
1066 * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data
1067 */
1068
1069 if(ps_pred->i1_pod_ht)
1070 {
1071 u2_ref_wd_y = ps_pred->u2_u1_ref_buf_wd;
1072 puc_ref = ps_pred->pu1_dma_dest_addr;
1073 }
1074 else
1075 {
1076 u2_ref_wd_y = ps_pred->u2_frm_wd;
1077 puc_ref = ps_pred->pu1_y_ref;
1078
1079 }
1080
1081 if(ps_pred->u1_dydx & 0x3)
1082 puc_ref += 2;
1083 if(ps_pred->u1_dydx >> 2)
1084 puc_ref += 2 * u2_ref_wd_y;
1085 u4_wd_y = ps_pred->i1_mb_partwidth;
1086 u4_ht_y = ps_pred->i1_mb_partheight;
1087
1088 uc_dx = ps_pred->u1_dydx;
1089 uc_dy = uc_dx >> 2;
1090 uc_dx &= 0x3;
1091 if(u1_dir == 0)
1092 {
1093 pu1_dest_y = ps_pred->pu1_rec_y_u;
1094 u2_row_buf_wd_y = ps_pred->u2_dst_stride;
1095 u2_dst_wd = ps_pred->u2_dst_stride;
1096 u2_dest_wd_y = u2_dst_wd;
1097 ps_pred_y_forw = ps_pred;
1098 }
1099 else
1100 {
1101 pu1_dest_y = pu1_temp_mc_buffer;
1102 u2_dst_wd = MB_SIZE;
1103 u2_dest_wd_y = u2_dst_wd;
1104 ps_pred_y_back = ps_pred;
1105 ps_pred_y_back->pu1_rec_y_u = pu1_dest_y;
1106 }
1107
1108 /* padding on demand (POD) for y done here */
1109
1110 if(ps_pred->i1_pod_ht)
1111 {
1112 pu1_pred = ps_pred->pu1_pred;
1113 pu1_dma_dst = ps_pred->pu1_dma_dest_addr;
1114 u1_dma_wd = ps_pred->u1_dma_wd_y;
1115 u1_dma_ht = ps_pred->u1_dma_ht_y;
1116 u2_frm_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld;
1117 if(ps_pred->i1_pod_ht < 0)
1118 {
1119 pu1_dma_dst = pu1_dma_dst - (ps_pred->i1_pod_ht * ps_pred->u2_u1_ref_buf_wd);
1120 }
1121 ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd, u1_dma_wd,
1122 u1_dma_ht);
1123 ih264d_pad_on_demand(ps_pred, LUM_BLK);
1124 }
1125 ps_dec->apf_inter_pred_luma[ps_pred->u1_dydx](puc_ref, pu1_dest_y,
1126 u2_ref_wd_y,
1127 u2_dst_wd,
1128 u4_ht_y,
1129 u4_wd_y,
1130 puc_pred0,
1131 ps_pred->u1_dydx);
1132 ps_pred++;
1133
1134 /* Interpolate samples for the chroma components */
1135 {
1136 UWORD8 *pu1_ref_u;
1137 UWORD32 u1_dma_ht;
1138
1139 /* padding on demand (POD) for U and V done here */
1140 u1_dma_ht = ps_pred->i1_dma_ht;
1141
1142 if(ps_pred->i1_pod_ht)
1143 {
1144 pu1_pred = ps_pred->pu1_pred_u;
1145 pu1_dma_dst = ps_pred->pu1_dma_dest_addr;
1146 u1_dma_ht = ps_pred->u1_dma_ht_uv;
1147 u1_dma_wd = ps_pred->u1_dma_wd_uv * YUV420SP_FACTOR;
1148 u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
1149 if(ps_pred->i1_pod_ht < 0)
1150 {
1151 /*Top POD*/
1152 pu1_dma_dst -= (ps_pred->i1_pod_ht
1153 * ps_pred->u2_u1_ref_buf_wd
1154 * YUV420SP_FACTOR);
1155 }
1156
1157 ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd,
1158 u1_dma_wd, u1_dma_ht);
1159
1160 pu1_dma_dst += (ps_pred->i1_dma_ht
1161 * ps_pred->u2_u1_ref_buf_wd);
1162 pu1_pred = ps_pred->pu1_pred_v;
1163
1164 ih264d_pad_on_demand(ps_pred, CHROM_BLK);
1165 }
1166
1167 if(ps_pred->i1_pod_ht)
1168 {
1169 pu1_ref_u = ps_pred->pu1_dma_dest_addr;
1170
1171 u2_ref_wd_uv = ps_pred->u2_u1_ref_buf_wd
1172 * YUV420SP_FACTOR;
1173 }
1174 else
1175 {
1176 u2_ref_wd_uv = ps_pred->u2_frm_wd;
1177 pu1_ref_u = ps_pred->pu1_u_ref;
1178
1179 }
1180
1181 u4_wd_uv = ps_pred->i1_mb_partwidth;
1182 u4_ht_uv = ps_pred->i1_mb_partheight;
1183 uc_dx = ps_pred->u1_dydx; /* 8*dy + dx */
1184 uc_dy = uc_dx >> 3;
1185 uc_dx &= 0x7;
1186 if(u1_dir == 0)
1187 {
1188 pu1_dest_u = ps_pred->pu1_rec_y_u;
1189
1190 pu1_dest_v = ps_pred->u1_pi1_wt_ofst_rec_v;
1191 u2_row_buf_wd_uv = ps_pred->u2_dst_stride;
1192 u2_dst_wd = ps_pred->u2_dst_stride;
1193 u2_dest_wd_uv = u2_dst_wd;
1194 ps_pred_cr_forw = ps_pred;
1195 }
1196 else
1197 {
1198 pu1_dest_u = puc_pred0;
1199
1200 pu1_dest_v = puc_pred1;
1201 u2_dest_wd_uv = BUFFER_WIDTH;
1202 u2_dst_wd = BUFFER_WIDTH;
1203 ps_pred->pu1_rec_y_u = pu1_dest_u;
1204 ps_pred->u1_pi1_wt_ofst_rec_v = pu1_dest_v;
1205 }
1206
1207 ps_pred++;
1208 ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_dest_u,
1209 u2_ref_wd_uv, u2_dst_wd,
1210 uc_dx, uc_dy, u4_ht_uv,
1211 u4_wd_uv);
1212
1213 if(ps_cur_mb_info->u1_Mux == 1)
1214 {
1215 /******************************************************************/
1216 /* padding on demand (POD) for U and V done here */
1217 /* ps_pred now points to the Y entry of the 0,0 component */
1218 /* Y need not be checked for POD because Y lies within */
1219 /* the picture((0,0) mv for Y doesnot get changed. But (0,0) for */
1220 /* U and V can need POD beacause of cross-field mv adjustments */
1221 /* (Table 8-9 of standard) */
1222 /******************************************************************/
1223 if((ps_pred + 1)->i1_pod_ht)
1224 {
1225 pu1_pred = (ps_pred + 1)->pu1_pred_u;
1226 pu1_dma_dst = (ps_pred + 1)->pu1_dma_dest_addr;
1227 u1_dma_ht = (ps_pred + 1)->u1_dma_ht_uv;
1228 u1_dma_wd = (ps_pred + 1)->u1_dma_wd_uv
1229 * YUV420SP_FACTOR;
1230 u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
1231 if((ps_pred + 1)->i1_pod_ht < 0)
1232 {
1233 /*Top POD*/
1234 pu1_dma_dst -= ((ps_pred + 1)->i1_pod_ht
1235 * (ps_pred + 1)->u2_u1_ref_buf_wd
1236 * YUV420SP_FACTOR);
1237 }
1238 ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd,
1239 u1_dma_wd, u1_dma_ht);
1240 pu1_dma_dst += ((ps_pred + 1)->i1_dma_ht
1241 * (ps_pred + 1)->u2_u1_ref_buf_wd); //(u1_dma_ht * u1_dma_wd);//
1242 pu1_pred = (ps_pred + 1)->pu1_pred_v;
1243 ih264d_pad_on_demand(ps_pred + 1, CHROM_BLK);
1244
1245 }
1246
1247 ih264d_multiplex_ref_data(ps_dec, ps_pred, pu1_dest_y,
1248 pu1_dest_u, ps_cur_mb_info,
1249 u2_dest_wd_y, u2_dest_wd_uv,
1250 u1_dir);
1251 ps_pred += 2;
1252 }
1253 }
1254 }
1255 if(u1_dir != 0)
1256 u2_ref_wd_y = MB_SIZE;
1257
1258 u2_num_pels += u4_wd_y * u4_ht_y;
1259 /* if BI_DIRECT, average the two pred's, and put in ..PredBuffer[0] */
1260 if((u1_is_bi_direct != 0) || (ps_pred_y_forw->u1_wght_pred_type != 0))
1261 {
1262
1263 switch(ps_pred_y_forw->u1_wght_pred_type)
1264 {
1265 case 0:
1266 ps_dec->pf_default_weighted_pred_luma(
1267 ps_pred_y_forw->pu1_rec_y_u, pu1_dest_y,
1268 ps_pred_y_forw->pu1_rec_y_u,
1269 u2_row_buf_wd_y, u2_ref_wd_y,
1270 u2_row_buf_wd_y, u4_ht_uv * 2,
1271 u4_wd_uv * 2);
1272
1273 ps_dec->pf_default_weighted_pred_chroma(
1274 ps_pred_cr_forw->pu1_rec_y_u, pu1_dest_u,
1275 ps_pred_cr_forw->pu1_rec_y_u,
1276 u2_row_buf_wd_uv, u2_dst_wd,
1277 u2_row_buf_wd_uv, u4_ht_uv,
1278 u4_wd_uv);
1279
1280 break;
1281 case 1:
1282 {
1283 UWORD32 *pu4_weight_ofst =
1284 (UWORD32*)ps_pred_y_forw->u1_pi1_wt_ofst_rec_v;
1285 UWORD32 u4_wt_ofst_u, u4_wt_ofst_v;
1286 UWORD32 u4_wt_ofst_y =
1287 (UWORD32)(pu4_weight_ofst[0]);
1288 WORD32 weight = (WORD16)(u4_wt_ofst_y & 0xffff);
1289 WORD32 ofst = (WORD8)(u4_wt_ofst_y >> 16);
1290
1291 ps_dec->pf_weighted_pred_luma(ps_pred_y_forw->pu1_rec_y_u,
1292 ps_pred_y_forw->pu1_rec_y_u,
1293 u2_row_buf_wd_y,
1294 u2_row_buf_wd_y,
1295 (u2_log2Y_crwd & 0x0ff),
1296 weight, ofst, u4_ht_y,
1297 u4_wd_y);
1298
1299 u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[2]);
1300 u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[4]);
1301 weight = ((u4_wt_ofst_v & 0xffff) << 16)
1302 | (u4_wt_ofst_u & 0xffff);
1303 ofst = ((u4_wt_ofst_v >> 16) << 8)
1304 | ((u4_wt_ofst_u >> 16) & 0xFF);
1305
1306 ps_dec->pf_weighted_pred_chroma(
1307 ps_pred_cr_forw->pu1_rec_y_u,
1308 ps_pred_cr_forw->pu1_rec_y_u,
1309 u2_row_buf_wd_uv, u2_row_buf_wd_uv,
1310 (u2_log2Y_crwd >> 8), weight, ofst,
1311 u4_ht_y >> 1, u4_wd_y >> 1);
1312 }
1313
1314 break;
1315 case 2:
1316 {
1317 UWORD32 *pu4_weight_ofst =
1318 (UWORD32*)ps_pred_y_forw->u1_pi1_wt_ofst_rec_v;
1319 UWORD32 u4_wt_ofst_u, u4_wt_ofst_v;
1320 UWORD32 u4_wt_ofst_y;
1321 WORD32 weight1, weight2;
1322 WORD32 ofst1, ofst2;
1323
1324 u4_wt_ofst_y = (UWORD32)(pu4_weight_ofst[0]);
1325
1326 weight1 = (WORD16)(u4_wt_ofst_y & 0xffff);
1327 ofst1 = (WORD8)(u4_wt_ofst_y >> 16);
1328
1329 u4_wt_ofst_y = (UWORD32)(pu4_weight_ofst[1]);
1330 weight2 = (WORD16)(u4_wt_ofst_y & 0xffff);
1331 ofst2 = (WORD8)(u4_wt_ofst_y >> 16);
1332
1333 ps_dec->pf_weighted_bi_pred_luma(ps_pred_y_forw->pu1_rec_y_u,
1334 ps_pred_y_back->pu1_rec_y_u,
1335 ps_pred_y_forw->pu1_rec_y_u,
1336 u2_row_buf_wd_y,
1337 u2_ref_wd_y,
1338 u2_row_buf_wd_y,
1339 (u2_log2Y_crwd & 0x0ff),
1340 weight1, weight2, ofst1,
1341 ofst2, u4_ht_y,
1342 u4_wd_y);
1343
1344 u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[2]);
1345 u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[4]);
1346 weight1 = ((u4_wt_ofst_v & 0xffff) << 16)
1347 | (u4_wt_ofst_u & 0xffff);
1348 ofst1 = ((u4_wt_ofst_v >> 16) << 8)
1349 | ((u4_wt_ofst_u >> 16) & 0xFF);
1350
1351 u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[3]);
1352 u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[5]);
1353 weight2 = ((u4_wt_ofst_v & 0xffff) << 16)
1354 | (u4_wt_ofst_u & 0xffff);
1355 ofst2 = ((u4_wt_ofst_v >> 16) << 8)
1356 | ((u4_wt_ofst_u >> 16) & 0xFF);
1357
1358 ps_dec->pf_weighted_bi_pred_chroma(
1359 (ps_pred_y_forw + 1)->pu1_rec_y_u,
1360 (ps_pred_y_back + 1)->pu1_rec_y_u,
1361 (ps_pred_y_forw + 1)->pu1_rec_y_u,
1362 u2_row_buf_wd_uv, u2_dst_wd,
1363 u2_row_buf_wd_uv, (u2_log2Y_crwd >> 8),
1364 weight1, weight2, ofst1, ofst2,
1365 u4_ht_y >> 1, u4_wd_y >> 1);
1366 }
1367
1368 break;
1369 }
1370
1371 }
1372 }
1373 }
1374
1375
1376 /*!
1377 **************************************************************************
1378 * \if Function name : ih264d_multiplex_ref_data \endif
1379 *
1380 * \brief
1381 * Initializes forward and backward refernce lists for B slice decoding.
1382 *
1383 *
1384 * \return
1385 * 0 on Success and Error code otherwise
1386 **************************************************************************
1387 */
1388
ih264d_multiplex_ref_data(dec_struct_t * ps_dec,pred_info_t * ps_pred,UWORD8 * pu1_dest_y,UWORD8 * pu1_dest_u,dec_mb_info_t * ps_cur_mb_info,UWORD16 u2_dest_wd_y,UWORD16 u2_dest_wd_uv,UWORD8 u1_dir)1389 void ih264d_multiplex_ref_data(dec_struct_t * ps_dec,
1390 pred_info_t *ps_pred,
1391 UWORD8* pu1_dest_y,
1392 UWORD8* pu1_dest_u,
1393 dec_mb_info_t *ps_cur_mb_info,
1394 UWORD16 u2_dest_wd_y,
1395 UWORD16 u2_dest_wd_uv,
1396 UWORD8 u1_dir)
1397 {
1398 UWORD16 u2_mask = ps_cur_mb_info->u2_mask[u1_dir];
1399 UWORD8 *pu1_ref_y, *pu1_ref_u;
1400 UWORD8 uc_cond, i, j, u1_dydx;
1401 UWORD16 u2_ref_wd_y, u2_ref_wd_uv;
1402
1403 PROFILE_DISABLE_INTER_PRED()
1404
1405 if(ps_pred->i1_pod_ht)
1406 {
1407 pu1_ref_y = ps_pred->pu1_dma_dest_addr;
1408
1409 u2_ref_wd_y = ps_pred->u2_u1_ref_buf_wd;
1410 }
1411 else
1412 {
1413 pu1_ref_y = ps_pred->pu1_y_ref;
1414 u2_ref_wd_y = ps_pred->u2_frm_wd;
1415 }
1416
1417 ps_pred++;
1418 if(ps_pred->i1_pod_ht)
1419 {
1420 pu1_ref_u = ps_pred->pu1_dma_dest_addr;
1421 u2_ref_wd_uv = ps_pred->u2_u1_ref_buf_wd * YUV420SP_FACTOR;
1422
1423 }
1424 else
1425 {
1426 pu1_ref_u = ps_pred->pu1_u_ref;
1427 u2_ref_wd_uv = ps_pred->u2_frm_wd;
1428
1429 }
1430
1431 u1_dydx = ps_pred->u1_dydx;
1432
1433 {
1434 UWORD8 uc_dx, uc_dy;
1435 UWORD8 *pu1_scratch_u;
1436
1437 uc_dx = u1_dydx & 0x3;
1438 uc_dy = u1_dydx >> 3;
1439 if(u1_dydx != 0)
1440 {
1441 pred_info_t * ps_prv_pred = ps_pred - 2;
1442 pu1_scratch_u = ps_prv_pred->pu1_dma_dest_addr;
1443 ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_scratch_u,
1444 u2_ref_wd_uv, 16, uc_dx, uc_dy, 8,
1445 8);
1446
1447 /* Modify ref pointer and refWidth to point to scratch */
1448 /* buffer to be used below in ih264d_copy_multiplex_data functions */
1449 /* CHANGED CODE */
1450 pu1_ref_u = pu1_scratch_u;
1451 u2_ref_wd_uv = 8 * YUV420SP_FACTOR;
1452 }
1453 }
1454 {
1455 for(i = 0; i < 4; i++)
1456 {
1457 for(j = 0; j < 4; j++)
1458 {
1459 uc_cond = u2_mask & 1;
1460 u2_mask >>= 1;
1461 if(uc_cond)
1462 {
1463 *(UWORD32 *)(pu1_dest_y + u2_dest_wd_y) =
1464 *(UWORD32 *)(pu1_ref_y + u2_ref_wd_y);
1465 *(UWORD32 *)(pu1_dest_y + 2 * u2_dest_wd_y) =
1466 *(UWORD32 *)(pu1_ref_y + 2 * u2_ref_wd_y);
1467 *(UWORD32 *)(pu1_dest_y + 3 * u2_dest_wd_y) =
1468 *(UWORD32 *)(pu1_ref_y + 3 * u2_ref_wd_y);
1469 {
1470 UWORD32 *dst, *src;
1471 dst = (UWORD32 *)pu1_dest_y;
1472 src = (UWORD32 *)pu1_ref_y;
1473 *dst = *src;
1474 dst++;
1475 src++;
1476 pu1_dest_y = (UWORD8 *)dst;
1477 pu1_ref_y = (UWORD8 *)src;
1478 }
1479 *(UWORD32 *)(pu1_dest_u + u2_dest_wd_uv) =
1480 *(UWORD32 *)(pu1_ref_u + u2_ref_wd_uv);
1481 {
1482 UWORD32 *dst, *src;
1483 dst = (UWORD32 *)pu1_dest_u;
1484 src = (UWORD32 *)pu1_ref_u;
1485 *dst = *src;
1486 dst++;
1487 src++;
1488 pu1_dest_u = (UWORD8 *)dst;
1489 pu1_ref_u = (UWORD8 *)src;
1490 }
1491
1492 }
1493 else
1494 {
1495 pu1_dest_y += 4;
1496 pu1_ref_y += 4;
1497 pu1_dest_u += 2 * YUV420SP_FACTOR;
1498 pu1_ref_u += 2 * YUV420SP_FACTOR;
1499 }
1500 }
1501 pu1_ref_y += 4 * (u2_ref_wd_y - 4);
1502 pu1_ref_u += 2 * (u2_ref_wd_uv - 4 * YUV420SP_FACTOR);
1503 pu1_dest_y += 4 * (u2_dest_wd_y - 4);
1504 pu1_dest_u += 2 * (u2_dest_wd_uv - 4 * YUV420SP_FACTOR);
1505 }
1506 }
1507 }
1508
ih264d_pad_on_demand(pred_info_t * ps_pred,UWORD8 lum_chrom_blk)1509 void ih264d_pad_on_demand(pred_info_t *ps_pred, UWORD8 lum_chrom_blk)
1510 {
1511 if(CHROM_BLK == lum_chrom_blk)
1512 {
1513 UWORD32 *pu4_pod_src_u, *pu4_pod_dst_u;
1514 UWORD32 *pu4_pod_src_v, *pu4_pod_dst_v;
1515 WORD32 j, u1_wd_stride;
1516 WORD32 i, u1_dma_ht, i1_ht;
1517 UWORD32 u2_dma_size;
1518 u1_wd_stride = (ps_pred->u2_u1_ref_buf_wd >> 2) * YUV420SP_FACTOR;
1519 u1_dma_ht = ps_pred->i1_dma_ht;
1520 u2_dma_size = u1_wd_stride * u1_dma_ht;
1521 pu4_pod_src_u = (UWORD32 *)ps_pred->pu1_dma_dest_addr;
1522 pu4_pod_dst_u = pu4_pod_src_u;
1523
1524 pu4_pod_src_v = pu4_pod_src_u + u2_dma_size;
1525 pu4_pod_dst_v = pu4_pod_src_v;
1526
1527 i1_ht = ps_pred->i1_pod_ht;
1528 pu4_pod_src_u -= u1_wd_stride * i1_ht;
1529 pu4_pod_src_v -= u1_wd_stride * i1_ht;
1530 if(i1_ht < 0)
1531 /* Top POD */
1532 i1_ht = -i1_ht;
1533 else
1534 {
1535 /* Bottom POD */
1536 pu4_pod_src_u += (u1_dma_ht - 1) * u1_wd_stride;
1537 pu4_pod_dst_u += (u1_dma_ht - i1_ht) * u1_wd_stride;
1538 pu4_pod_src_v += (u1_dma_ht - 1) * u1_wd_stride;
1539 pu4_pod_dst_v += (u1_dma_ht - i1_ht) * u1_wd_stride;
1540 }
1541
1542 for(i = 0; i < i1_ht; i++)
1543 for(j = 0; j < u1_wd_stride; j++)
1544 {
1545 *pu4_pod_dst_u++ = *(pu4_pod_src_u + j);
1546
1547 }
1548 }
1549 else
1550 {
1551 UWORD32 *pu4_pod_src, *pu4_pod_dst;
1552 WORD32 j, u1_wd_stride;
1553 WORD32 i, i1_ht;
1554 pu4_pod_src = (UWORD32 *)ps_pred->pu1_dma_dest_addr;
1555 pu4_pod_dst = pu4_pod_src;
1556 u1_wd_stride = ps_pred->u2_u1_ref_buf_wd >> 2;
1557 i1_ht = ps_pred->i1_pod_ht;
1558 pu4_pod_src -= u1_wd_stride * i1_ht;
1559 if(i1_ht < 0)
1560 /* Top POD */
1561 i1_ht = -i1_ht;
1562 else
1563 {
1564 /* Bottom POD */
1565 pu4_pod_src += (ps_pred->i1_dma_ht - 1) * u1_wd_stride;
1566 pu4_pod_dst += (ps_pred->i1_dma_ht - i1_ht) * u1_wd_stride;
1567 }
1568
1569 for(i = 0; i < i1_ht; i++)
1570 for(j = 0; j < u1_wd_stride; j++)
1571 *pu4_pod_dst++ = *(pu4_pod_src + j);
1572 }
1573 }
1574
1575