1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ih264e_mc.c
25 *
26 * @brief
27 * Contains definition of functions for motion compensation
28 *
29 * @author
30 * ittiam
31 *
32 * @par List of Functions:
33 * - ih264e_motion_comp_luma()
34 * - ih264e_motion_comp_chroma()
35 *
36 * @remarks
37 * None
38 *
39 *******************************************************************************
40 */
41
42 /*****************************************************************************/
43 /* File Includes */
44 /*****************************************************************************/
45
46 /* System include files */
47 #include <stdio.h>
48
49 /* User include files */
50 #include "ih264_typedefs.h"
51 #include "ih264_defs.h"
52 #include "iv2.h"
53 #include "ive2.h"
54 #include "ime_distortion_metrics.h"
55 #include "ime_defs.h"
56 #include "ime_structs.h"
57 #include "ih264_structs.h"
58 #include "ih264_inter_pred_filters.h"
59 #include "ih264_mem_fns.h"
60 #include "ih264_padding.h"
61 #include "ih264_intra_pred_filters.h"
62 #include "ih264_deblk_edge_filters.h"
63 #include "ih264_trans_quant_itrans_iquant.h"
64 #include "ih264_cabac_tables.h"
65 #include "ih264e_defs.h"
66 #include "ih264e_error.h"
67 #include "ih264e_bitstream.h"
68 #include "irc_cntrl_param.h"
69 #include "irc_frame_info_collector.h"
70 #include "ih264e_rate_control.h"
71 #include "ih264e_cabac_structs.h"
72 #include "ih264e_structs.h"
73 #include "ih264e_mc.h"
74 #include "ih264e_half_pel.h"
75
76 /*****************************************************************************/
77 /* Function Definitions */
78 /*****************************************************************************/
79
80 /**
81 ******************************************************************************
82 *
83 * @brief
84 * performs motion compensation for a luma mb for the given mv.
85 *
86 * @par Description
87 * This routine performs motion compensation of an inter mb. When the inter
88 * mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
89 * to pred buffer. In this case the function returns pointer and stride of the
90 * ref. buffer and this info is used in place of pred buffer else where.
91 * In other cases, the pred buffer is populated via copy / filtering + copy
92 * (q pel cases) and returned.
93 *
94 * @param[in] ps_proc
95 * pointer to current proc ctxt
96 *
97 * @param[out] pu1_pseudo_pred
98 * pseudo prediction buffer
99 *
100 * @param[out] u4_pseudo_pred_strd
101 * pseudo pred buffer stride
102 *
103 * @return none
104 *
105 * @remarks Assumes half pel buffers for the entire frame are populated.
106 *
107 ******************************************************************************
108 */
ih264e_motion_comp_luma(process_ctxt_t * ps_proc,UWORD8 ** pu1_pseudo_pred,WORD32 * pi4_pseudo_pred_strd)109 void ih264e_motion_comp_luma(process_ctxt_t *ps_proc, UWORD8 **pu1_pseudo_pred,
110 WORD32 *pi4_pseudo_pred_strd)
111 {
112 /* codec context */
113 codec_t *ps_codec = ps_proc->ps_codec;
114
115 /* me ctxt */
116 me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
117
118 /* Pointer to the structure having motion vectors, size and position of curr partitions */
119 enc_pu_t *ps_curr_pu;
120
121 /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */
122 UWORD8 *pu1_ref[4];
123
124 /* pred buffer ptr */
125 UWORD8 *pu1_pred;
126
127 /* strides of full pel, half pel x, half pel y, half pel xy reference buffer */
128 WORD32 i4_ref_strd[4];
129
130 /* pred buffer stride */
131 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
132
133 /* full pel motion vectors */
134 WORD32 u4_mv_x_full, u4_mv_y_full;
135
136 /* half pel motion vectors */
137 WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
138
139 /* quarter pel motion vectors */
140 WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
141
142 /* width & height of the partition */
143 UWORD32 wd, ht;
144
145 /* partition idx */
146 UWORD32 u4_num_prtn;
147
148 /* half / qpel coefficient */
149 UWORD32 u4_subpel_factor;
150
151 /* BIPRED Flag */
152 WORD32 i4_bipred_flag;
153
154 /* temp var */
155 UWORD32 u4_lkup_idx1;
156
157 /* Init */
158 i4_ref_strd[0] = ps_proc->i4_rec_strd;
159
160 i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] =
161 ps_me_ctxt->u4_subpel_buf_strd;
162
163 for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions;
164 u4_num_prtn++)
165 {
166 mv_t *ps_curr_mv;
167
168 /* update ptr to curr partition */
169 ps_curr_pu = ps_proc->ps_pu + u4_num_prtn;
170
171 /* Set no no bipred */
172 i4_bipred_flag = 0;
173
174 switch (ps_curr_pu->b2_pred_mode)
175 {
176 case PRED_L0:
177 ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv;
178 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0];
179 break;
180
181 case PRED_L1:
182 ps_curr_mv = &ps_curr_pu->s_me_info[1].s_mv;
183 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[1];
184 break;
185
186 case PRED_BI:
187 /*
188 * In case of PRED_BI, we only need to ensure that
189 * the reference buffer that gets selected is
190 * ps_proc->pu1_best_subpel_buf
191 */
192
193 /* Dummy */
194 ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv;
195 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0];
196
197 i4_bipred_flag = 1;
198 break;
199
200 default:
201 ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv;
202 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0];
203 break;
204
205 }
206
207 /* get full pel mv's (full pel units) */
208 u4_mv_x_full = ps_curr_mv->i2_mvx >> 2;
209 u4_mv_y_full = ps_curr_mv->i2_mvy >> 2;
210
211 /* get half pel mv's */
212 u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
213 u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
214
215 /* get quarter pel mv's */
216 u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
217 u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
218
219 /* width and height of partition */
220 wd = (ps_curr_pu->b4_wd + 1) << 2;
221 ht = (ps_curr_pu->b4_ht + 1) << 2;
222
223 /* decision ? qpel/hpel, fpel */
224 u4_subpel_factor = (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2)
225 + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel);
226
227 /* Move ref to position given by MV */
228 pu1_ref[0] += ((u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full);
229
230 /* Sub pel ptrs/ Biperd pointers init */
231 pu1_ref[1] = ps_proc->pu1_best_subpel_buf;
232 i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd;
233
234 /* update pred buff ptr */
235 pu1_pred = ps_proc->pu1_pred_mb
236 + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd
237 + 4 * ps_curr_pu->b4_pos_x;
238
239 /* u4_lkup_idx1 will be non zero for half pel and bipred */
240 u4_lkup_idx1 = ((u4_subpel_factor >> 2) != 0) || i4_bipred_flag;
241
242 {
243 /********************************************************************/
244 /* if the block is P16x16 MB and mv are not quarter pel motion */
245 /* vectors, there is no need to copy 16x16 unit from reference frame*/
246 /* to pred buffer. We might as well send the reference frame buffer */
247 /* pointer as pred buffer (ofc with updated stride) to fwd transform*/
248 /* and inverse transform unit. */
249 /********************************************************************/
250 if (ps_proc->u4_num_sub_partitions == 1)
251 {
252 *pu1_pseudo_pred = pu1_ref[u4_lkup_idx1];
253 *pi4_pseudo_pred_strd = i4_ref_strd[u4_lkup_idx1];
254
255 }
256 /*
257 * Copying half pel or full pel to prediction buffer
258 * Currently ps_proc->u4_num_sub_partitions will always be 1 as we only support 16x16 in P mbs
259 */
260 else
261 {
262 ps_codec->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1],
263 pu1_pred,
264 i4_ref_strd[u4_lkup_idx1],
265 i4_pred_strd, ht, wd, NULL,
266 0);
267 }
268
269 }
270 }
271 }
272
273 /**
274 ******************************************************************************
275 *
276 * @brief
277 * performs motion compensation for chroma mb
278 *
279 * @par Description
280 * Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
281 * according to the motion vectors given
282 *
283 * @param[in] ps_proc
284 * pointer to current proc ctxt
285 *
286 * @return none
287 *
288 * @remarks Assumes half pel and quarter pel buffers for the entire frame are
289 * populated.
290 ******************************************************************************
291 */
ih264e_motion_comp_chroma(process_ctxt_t * ps_proc)292 void ih264e_motion_comp_chroma(process_ctxt_t *ps_proc)
293 {
294 /* codec context */
295 codec_t *ps_codec = ps_proc->ps_codec;
296
297 /* Pointer to the structure having motion vectors, size and position of curr partitions */
298 enc_pu_t *ps_curr_pu;
299
300 /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */
301 UWORD8 *pu1_ref;
302
303 /* pred buffer ptr */
304 UWORD8 *pu1_pred;
305
306 /* strides of full pel reference buffer */
307 WORD32 i4_ref_strd = ps_proc->i4_rec_strd;
308
309 /* pred buffer stride */
310 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
311
312 /* full pel motion vectors */
313 WORD32 u4_mv_x_full, u4_mv_y_full;
314
315 /* half pel motion vectors */
316 WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
317
318 /* quarter pel motion vectors */
319 WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
320
321 /* width & height of the partition */
322 UWORD32 wd, ht;
323
324 /* partition idx */
325 UWORD32 u4_num_prtn;
326
327 WORD32 u4_mv_x;
328 WORD32 u4_mv_y;
329 UWORD8 u1_dx, u1_dy;
330
331 for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions;
332 u4_num_prtn++)
333 {
334 mv_t *ps_curr_mv;
335
336 ps_curr_pu = ps_proc->ps_pu + u4_num_prtn;
337
338 if (ps_curr_pu->b2_pred_mode != PRED_BI)
339 {
340 ps_curr_mv = &ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv;
341 pu1_ref = ps_proc->apu1_ref_buf_chroma[ps_curr_pu->b2_pred_mode];
342
343 u4_mv_x = ps_curr_mv->i2_mvx >> 3;
344 u4_mv_y = ps_curr_mv->i2_mvy >> 3;
345
346 /* corresponds to full pel motion vector in luma, but in chroma corresponds to pel formed wiith dx, dy =4 */
347 u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
348 u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
349
350 /* get half pel mv's */
351 u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
352 u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
353
354 /* get quarter pel mv's */
355 u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
356 u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
357
358 /* width and height of sub macro block */
359 wd = (ps_curr_pu->b4_wd + 1) << 1;
360 ht = (ps_curr_pu->b4_ht + 1) << 1;
361
362 /* move the pointers so that they point to the motion compensated locations */
363 pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
364
365 pu1_pred = ps_proc->pu1_pred_mb
366 + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd
367 + 2 * ps_curr_pu->b4_pos_x;
368
369 u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
370 u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
371
372 /* cases where u1_dx = 0 or u1_dy = 0 are dealt separately in neon with
373 * separate functions for better performance
374 *
375 * ih264_inter_pred_chroma_dx_zero_a9q
376 * and
377 * ih264_inter_pred_chroma_dy_zero_a9q
378 */
379
380 ps_codec->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd,
381 i4_pred_strd, u1_dx, u1_dy, ht, wd);
382 }
383 else /* If the pred mode is PRED_BI */
384 {
385 /*
386 * We need to interpolate the L0 and L1 ref pics with the chorma MV
387 * then use them to average for bilinrar interpred
388 */
389 WORD32 i4_predmode;
390 UWORD8 *pu1_ref_buf[2];
391
392 /* Temporary buffers to store the interpolated value from L0 and L1 */
393 pu1_ref_buf[PRED_L0] = ps_proc->apu1_subpel_buffs[0];
394 pu1_ref_buf[PRED_L1] = ps_proc->apu1_subpel_buffs[1];
395
396
397 for (i4_predmode = 0; i4_predmode < PRED_BI; i4_predmode++)
398 {
399 ps_curr_mv = &ps_curr_pu->s_me_info[i4_predmode].s_mv;
400 pu1_ref = ps_proc->apu1_ref_buf_chroma[i4_predmode];
401
402 u4_mv_x = ps_curr_mv->i2_mvx >> 3;
403 u4_mv_y = ps_curr_mv->i2_mvy >> 3;
404
405 /*
406 * corresponds to full pel motion vector in luma, but in chroma
407 * corresponds to pel formed wiith dx, dy =4
408 */
409 u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
410 u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
411
412 /* get half pel mv's */
413 u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
414 u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
415
416 /* get quarter pel mv's */
417 u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
418 u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
419
420 /* width and height of sub macro block */
421 wd = (ps_curr_pu->b4_wd + 1) << 1;
422 ht = (ps_curr_pu->b4_ht + 1) << 1;
423
424 /* move the pointers so that they point to the motion compensated locations */
425 pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
426
427 pu1_pred = ps_proc->pu1_pred_mb
428 + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd
429 + 2 * ps_curr_pu->b4_pos_x;
430
431 u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1)
432 + (u4_mv_x_qpel);
433 u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1)
434 + (u4_mv_y_qpel);
435
436 ps_codec->pf_inter_pred_chroma(pu1_ref,
437 pu1_ref_buf[i4_predmode],
438 i4_ref_strd, MB_SIZE, u1_dx,
439 u1_dy, ht, wd);
440 }
441
442 ps_codec->pf_inter_pred_luma_bilinear(pu1_ref_buf[PRED_L0],
443 pu1_ref_buf[PRED_L1], pu1_pred,
444 MB_SIZE, MB_SIZE,
445 i4_pred_strd, MB_SIZE >> 1,
446 MB_SIZE);
447 }
448 }
449 }
450