1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 *******************************************************************************
22 * @file
23 * ih264_inter_pred_filters.c
24 *
25 * @brief
26 * Contains function definitions for inter prediction interpolation filters
27 *
28 * @author
29 * Ittiam
30 *
31 * @par List of Functions:
32 * - ih264_inter_pred_luma_copy
33 * - ih264_interleave_copy
34 * - ih264_inter_pred_luma_horz
35 * - ih264_inter_pred_luma_vert
36 * - ih264_inter_pred_luma_horz_hpel_vert_hpel
37 * - ih264_inter_pred_luma_horz_qpel
38 * - ih264_inter_pred_luma_vert_qpel
39 * - ih264_inter_pred_luma_horz_qpel_vert_qpel
40 * - ih264_inter_pred_luma_horz_hpel_vert_qpel
41 * - ih264_inter_pred_luma_horz_qpel_vert_hpel
42 * - ih264_inter_pred_luma_bilinear
43 * - ih264_inter_pred_chroma
44 *
45 * @remarks
46 * None
47 *
48 *******************************************************************************
49 */
50
51 /*****************************************************************************/
52 /* File Includes */
53 /*****************************************************************************/
54
55 /* User include files */
56 #include "ih264_typedefs.h"
57 #include "ih264_macros.h"
58 #include "ih264_platform_macros.h"
59 #include "ih264_inter_pred_filters.h"
60
61
62 /*****************************************************************************/
63 /* Constant Data variables */
64 /*****************************************************************************/
65
66 /* coefficients for 6 tap filtering*/
67 const WORD32 ih264_g_six_tap[3] ={1,-5,20};
68
69
70 /*****************************************************************************/
71 /* Function definitions . */
72 /*****************************************************************************/
73 /**
74 *******************************************************************************
75 *
76 * @brief
77 * Interprediction luma function for copy
78 *
79 * @par Description:
80 * Copies the array of width 'wd' and height 'ht' from the location pointed
81 * by 'src' to the location pointed by 'dst'
82 *
83 * @param[in] pu1_src
84 * UWORD8 pointer to the source
85 *
86 * @param[out] pu1_dst
87 * UWORD8 pointer to the destination
88 *
89 * @param[in] src_strd
90 * integer source stride
91 *
92 * @param[in] dst_strd
93 * integer destination stride
94 *
95 *
96 * @param[in] ht
97 * integer height of the array
98 *
99 * @param[in] wd
100 * integer width of the array
101 *
102 * @returns
103 *
104 * @remarks
105 * None
106 *
107 *******************************************************************************
108 */
109
ih264_inter_pred_luma_copy(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD32 ht,WORD32 wd,UWORD8 * pu1_tmp,WORD32 dydx)110 void ih264_inter_pred_luma_copy(UWORD8 *pu1_src,
111 UWORD8 *pu1_dst,
112 WORD32 src_strd,
113 WORD32 dst_strd,
114 WORD32 ht,
115 WORD32 wd,
116 UWORD8* pu1_tmp,
117 WORD32 dydx)
118 {
119 WORD32 row, col;
120 UNUSED(pu1_tmp);
121 UNUSED(dydx);
122 for(row = 0; row < ht; row++)
123 {
124 for(col = 0; col < wd; col++)
125 {
126 pu1_dst[col] = pu1_src[col];
127 }
128
129 pu1_src += src_strd;
130 pu1_dst += dst_strd;
131 }
132 }
133
134 /**
135 *******************************************************************************
136 *
137 * @brief
138 * Fucntion for copying to an interleaved destination
139 *
140 * @par Description:
141 * Copies the array of width 'wd' and height 'ht' from the location pointed
142 * by 'src' to the location pointed by 'dst'
143 *
144 * @param[in] pu1_src
145 * UWORD8 pointer to the source
146 *
147 * @param[out] pu1_dst
148 * UWORD8 pointer to the destination
149 *
150 * @param[in] src_strd
151 * integer source stride
152 *
153 * @param[in] dst_strd
154 * integer destination stride
155 *
156 * @param[in] ht
157 * integer height of the array
158 *
159 * @param[in] wd
160 * integer width of the array
161 *
162 * @returns
163 *
164 * @remarks
165 * The alternate elements of src will be copied to alternate locations in dsr
166 * Other locations are not touched
167 *
168 *******************************************************************************
169 */
ih264_interleave_copy(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD32 ht,WORD32 wd)170 void ih264_interleave_copy(UWORD8 *pu1_src,
171 UWORD8 *pu1_dst,
172 WORD32 src_strd,
173 WORD32 dst_strd,
174 WORD32 ht,
175 WORD32 wd)
176 {
177 WORD32 row, col;
178 wd *= 2;
179
180 for(row = 0; row < ht; row++)
181 {
182 for(col = 0; col < wd; col+=2)
183 {
184 pu1_dst[col] = pu1_src[col];
185 }
186
187 pu1_src += src_strd;
188 pu1_dst += dst_strd;
189 }
190 }
191
192 /**
193 *******************************************************************************
194 *
195 * @brief
196 * Interprediction luma filter for horizontal input
197 *
198 * @par Description:
199 * Applies a 6 tap horizontal filter .The output is clipped to 8 bits
200 * sec 8.4.2.2.1 titled "Luma sample interpolation process"
201 *
202 * @param[in] pu1_src
203 * UWORD8 pointer to the source
204 *
205 * @param[out] pu1_dst
206 * UWORD8 pointer to the destination
207 *
208 * @param[in] src_strd
209 * integer source stride
210 *
211 * @param[in] dst_strd
212 * integer destination stride
213 *
214 * @param[in] ht
215 * integer height of the array
216 *
217 * @param[in] wd
218 * integer width of the array
219 *
220 * @returns
221 *
222 * @remarks
223 * None
224 *
225 *******************************************************************************
226 */
ih264_inter_pred_luma_horz(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD32 ht,WORD32 wd,UWORD8 * pu1_tmp,WORD32 dydx)227 void ih264_inter_pred_luma_horz(UWORD8 *pu1_src,
228 UWORD8 *pu1_dst,
229 WORD32 src_strd,
230 WORD32 dst_strd,
231 WORD32 ht,
232 WORD32 wd,
233 UWORD8* pu1_tmp,
234 WORD32 dydx)
235 {
236 WORD32 row, col;
237 WORD16 i2_tmp;
238 UNUSED(pu1_tmp);
239 UNUSED(dydx);
240
241 for(row = 0; row < ht; row++)
242 {
243 for(col = 0; col < wd; col++)
244 {
245 i2_tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
246 i2_tmp = ih264_g_six_tap[0] *
247 (pu1_src[col - 2] + pu1_src[col + 3])
248 + ih264_g_six_tap[1] *
249 (pu1_src[col - 1] + pu1_src[col + 2])
250 + ih264_g_six_tap[2] *
251 (pu1_src[col] + pu1_src[col + 1]);
252 i2_tmp = (i2_tmp + 16) >> 5;
253 pu1_dst[col] = CLIP_U8(i2_tmp);
254 }
255
256 pu1_src += src_strd;
257 pu1_dst += dst_strd;
258 }
259
260 }
261
262 /**
263 *******************************************************************************
264 *
265 * @brief
266 * Interprediction luma filter for vertical input
267 *
268 * @par Description:
269 * Applies a 6 tap vertical filter.The output is clipped to 8 bits
270 * sec 8.4.2.2.1 titled "Luma sample interpolation process"
271 *
272 * @param[in] pu1_src
273 * UWORD8 pointer to the source
274 *
275 * @param[out] pu1_dst
276 * UWORD8 pointer to the destination
277 *
278 * @param[in] src_strd
279 * integer source stride
280 *
281 * @param[in] dst_strd
282 * integer destination stride
283 *
284 * @param[in] ht
285 * integer height of the array
286 *
287 * @param[in] wd
288 * integer width of the array
289 *
290 * @returns
291 *
292 * @remarks
293 * None
294 *
295 *******************************************************************************
296 */
ih264_inter_pred_luma_vert(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD32 ht,WORD32 wd,UWORD8 * pu1_tmp,WORD32 dydx)297 void ih264_inter_pred_luma_vert(UWORD8 *pu1_src,
298 UWORD8 *pu1_dst,
299 WORD32 src_strd,
300 WORD32 dst_strd,
301 WORD32 ht,
302 WORD32 wd,
303 UWORD8* pu1_tmp,
304 WORD32 dydx)
305 {
306 WORD32 row, col;
307 WORD16 i2_tmp;
308 UNUSED(pu1_tmp);
309 UNUSED(dydx);
310
311 for(row = 0; row < ht; row++)
312 {
313 for(col = 0; col < wd; col++)
314 {
315 i2_tmp = 0; /*ih264_g_six_tap[] is the array containing the filter coeffs*/
316 i2_tmp = ih264_g_six_tap[0] *
317 (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
318 + ih264_g_six_tap[1] *
319 (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
320 + ih264_g_six_tap[2] *
321 (pu1_src[col] + pu1_src[col + 1 * src_strd]);
322 i2_tmp = (i2_tmp + 16) >> 5;
323 pu1_dst[col] = CLIP_U8(i2_tmp);
324 }
325 pu1_src += src_strd;
326 pu1_dst += dst_strd;
327 }
328 }
329
330 /*!
331 **************************************************************************
332 * \if Function name : ih264_inter_pred_luma_horz_hpel_vert_hpel \endif
333 *
334 * \brief
335 * This function implements a two stage cascaded six tap filter. It
336 * applies the six tap filter in the horizontal direction on the
337 * predictor values, followed by applying the same filter in the
338 * vertical direction on the output of the first stage. The six tap
339 * filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
340 * interpolation process"
341 *
342 * \param pu1_src: Pointer to the buffer containing the predictor values.
343 * pu1_src could point to the frame buffer or the predictor buffer.
344 * \param pu1_dst: Pointer to the destination buffer where the output of
345 * the six tap filter is stored.
346 * \param ht: Height of the rectangular pixel grid to be interpolated
347 * \param wd: Width of the rectangular pixel grid to be interpolated
348 * \param src_strd: Width of the buffer pointed to by pu1_src.
349 * \param dst_strd: Width of the destination buffer
350 * \param pu1_tmp: temporary buffer.
351 * \param dydx: x and y reference offset for qpel calculations: UNUSED in this function.
352 *
353 * \return
354 * None.
355 *
356 * \note
357 * This function takes the 8 bit predictor values, applies the six tap
358 * filter in the horizontal direction and outputs the result clipped to
359 * 8 bit precision. The input is stored in the buffer pointed to by
360 * pu1_src while the output is stored in the buffer pointed by pu1_dst.
361 * Both pu1_src and pu1_dst could point to the same buffer i.e. the
362 * six tap filter could be done in place.
363 *
364 **************************************************************************
365 */
ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD32 ht,WORD32 wd,UWORD8 * pu1_tmp,WORD32 dydx)366 void ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 *pu1_src,
367 UWORD8 *pu1_dst,
368 WORD32 src_strd,
369 WORD32 dst_strd,
370 WORD32 ht,
371 WORD32 wd,
372 UWORD8* pu1_tmp,
373 WORD32 dydx)
374 {
375 WORD32 row, col;
376 WORD32 tmp;
377 WORD16* pi2_pred1_temp;
378 WORD16* pi2_pred1;
379 UNUSED(dydx);
380 pi2_pred1_temp = (WORD16*)pu1_tmp;
381 pi2_pred1_temp += 2;
382 pi2_pred1 = pi2_pred1_temp;
383 for(row = 0; row < ht; row++)
384 {
385 for(col = -2; col < wd + 3; col++)
386 {
387 tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
388 tmp = ih264_g_six_tap[0] *
389 (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
390 + ih264_g_six_tap[1] *
391 (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
392 + ih264_g_six_tap[2] *
393 (pu1_src[col] + pu1_src[col + 1 * src_strd]);
394 pi2_pred1_temp[col] = tmp;
395 }
396 pu1_src += src_strd;
397 pi2_pred1_temp = pi2_pred1_temp + wd + 5;
398 }
399
400 for(row = 0; row < ht; row++)
401 {
402 for(col = 0; col < wd; col++)
403 {
404 tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
405 tmp = ih264_g_six_tap[0] *
406 (pi2_pred1[col - 2] + pi2_pred1[col + 3])
407 + ih264_g_six_tap[1] *
408 (pi2_pred1[col - 1] + pi2_pred1[col + 2])
409 + ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1]);
410 tmp = (tmp + 512) >> 10;
411 pu1_dst[col] = CLIP_U8(tmp);
412 }
413 pi2_pred1 += (wd + 5);
414 pu1_dst += dst_strd;
415 }
416 }
417
418 /*!
419 **************************************************************************
420 * \if Function name : ih264_inter_pred_luma_horz_qpel \endif
421 *
422 * \brief
423 * This routine applies the six tap filter to the predictors in the
424 * horizontal direction. The six tap filtering operation is described in
425 * sec 8.4.2.2.1 titled "Luma sample interpolation process"
426 *
427 * \param pu1_src: Pointer to the buffer containing the predictor values.
428 * pu1_src could point to the frame buffer or the predictor buffer.
429 * \param pu1_dst: Pointer to the destination buffer where the output of
430 * the six tap filter is stored.
431 * \param ht: Height of the rectangular pixel grid to be interpolated
432 * \param wd: Width of the rectangular pixel grid to be interpolated
433 * \param src_strd: Width of the buffer pointed to by pu1_src.
434 * \param dst_strd: Width of the destination buffer
435 * \param pu1_tmp: temporary buffer: UNUSED in this function
436 * \param dydx: x and y reference offset for qpel calculations.
437 *
438 * \return
439 * None.
440 *
441 * \note
442 * This function takes the 8 bit predictor values, applies the six tap
443 * filter in the horizontal direction and outputs the result clipped to
444 * 8 bit precision. The input is stored in the buffer pointed to by
445 * pu1_src while the output is stored in the buffer pointed by pu1_dst.
446 * Both pu1_src and pu1_dst could point to the same buffer i.e. the
447 * six tap filter could be done in place.
448 *
449 **************************************************************************
450 */
ih264_inter_pred_luma_horz_qpel(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD32 ht,WORD32 wd,UWORD8 * pu1_tmp,WORD32 dydx)451 void ih264_inter_pred_luma_horz_qpel(UWORD8 *pu1_src,
452 UWORD8 *pu1_dst,
453 WORD32 src_strd,
454 WORD32 dst_strd,
455 WORD32 ht,
456 WORD32 wd,
457 UWORD8* pu1_tmp,
458 WORD32 dydx)
459 {
460 WORD32 row, col;
461 UWORD8 *pu1_pred1;
462 WORD32 x_offset = dydx & 0x3;
463 UNUSED(pu1_tmp);
464 pu1_pred1 = pu1_src + (x_offset >> 1);
465
466 for(row = 0; row < ht; row++)
467 {
468 for(col = 0; col < wd; col++, pu1_src++, pu1_dst++)
469 {
470 WORD16 i2_temp;
471 /* The logic below implements the following equation
472 i2_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) +
473 20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */
474 i2_temp = pu1_src[-2] + pu1_src[3]
475 - (pu1_src[-1] + pu1_src[2])
476 + ((pu1_src[0] + pu1_src[1] - pu1_src[-1] - pu1_src[2]) << 2)
477 + ((pu1_src[0] + pu1_src[1]) << 4);
478 i2_temp = (i2_temp + 16) >> 5;
479 i2_temp = CLIP_U8(i2_temp);
480 *pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1;
481
482 pu1_pred1++;
483 }
484 pu1_dst += dst_strd - wd;
485 pu1_src += src_strd - wd;
486 pu1_pred1 += src_strd - wd;
487 }
488 }
489
490 /*!
491 **************************************************************************
492 * \if Function name : ih264_inter_pred_luma_vert_qpel \endif
493 *
494 * \brief
495 * This routine applies the six tap filter to the predictors in the
496 * vertical direction and interpolates them to obtain pixels at quarter vertical
497 * positions (0, 1/4) and (0, 3/4). The six tap filtering operation is
498 * described in sec 8.4.2.2.1 titled "Luma sample interpolation process"
499 *
500 * \param pu1_src: Pointer to the buffer containing the predictor values.
501 * pu1_src could point to the frame buffer or the predictor buffer.
502 * \param pu1_dst: Pointer to the destination buffer where the output of
503 * the six tap filter is stored.
504 * \param ht: Height of the rectangular pixel grid to be interpolated
505 * \param wd: Width of the rectangular pixel grid to be interpolated
506 * \param src_strd: Width of the buffer pointed to by puc_pred.
507 * \param dst_strd: Width of the destination buffer
508 * \param pu1_tmp: temporary buffer: UNUSED in this function
509 * \param dydx: x and y reference offset for qpel calculations.
510 *
511 * \return
512 * void
513 *
514 * \note
515 * This function takes the 8 bit predictor values, applies the six tap
516 * filter in the vertical direction and outputs the result clipped to
517 * 8 bit precision. The input is stored in the buffer pointed to by
518 * puc_pred while the output is stored in the buffer pointed by puc_dest.
519 * Both puc_pred and puc_dest could point to the same buffer i.e. the
520 * six tap filter could be done in place.
521 *
522 * \para <title>
523 * <paragraph>
524 * ...
525 **************************************************************************
526 */
ih264_inter_pred_luma_vert_qpel(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD32 ht,WORD32 wd,UWORD8 * pu1_tmp,WORD32 dydx)527 void ih264_inter_pred_luma_vert_qpel(UWORD8 *pu1_src,
528 UWORD8 *pu1_dst,
529 WORD32 src_strd,
530 WORD32 dst_strd,
531 WORD32 ht,
532 WORD32 wd,
533 UWORD8* pu1_tmp,
534 WORD32 dydx)
535 {
536 WORD32 row, col;
537 WORD32 y_offset = dydx >> 2;
538 WORD32 off1, off2, off3;
539 UWORD8 *pu1_pred1;
540 UNUSED(pu1_tmp);
541 y_offset = y_offset & 0x3;
542
543 off1 = src_strd;
544 off2 = src_strd << 1;
545 off3 = off1 + off2;
546
547 pu1_pred1 = pu1_src + (y_offset >> 1) * src_strd;
548
549 for(row = 0; row < ht; row++)
550 {
551 for(col = 0; col < wd; col++, pu1_dst++, pu1_src++, pu1_pred1++)
552 {
553 WORD16 i2_temp;
554 /* The logic below implements the following equation
555 i16_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] -
556 5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd]) +
557 20 * (puc_pred[0] + puc_pred[src_strd]); */
558 i2_temp = pu1_src[-off2] + pu1_src[off3]
559 - (pu1_src[-off1] + pu1_src[off2])
560 + ((pu1_src[0] + pu1_src[off1] - pu1_src[-off1] - pu1_src[off2]) << 2)
561 + ((pu1_src[0] + pu1_src[off1]) << 4);
562 i2_temp = (i2_temp + 16) >> 5;
563 i2_temp = CLIP_U8(i2_temp);
564
565 *pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1;
566 }
567 pu1_src += src_strd - wd;
568 pu1_pred1 += src_strd - wd;
569 pu1_dst += dst_strd - wd;
570 }
571 }
572
573 /*!
574 **************************************************************************
575 * \if Function name : ih264_inter_pred_luma_horz_qpel_vert_qpel \endif
576 *
577 * \brief
578 * This routine applies the six tap filter to the predictors in the
579 * vertical and horizontal direction and averages them to get pixels at locations
580 * (1/4,1/4), (1/4, 3/4), (3/4, 1/4) & (3/4, 3/4). The six tap filtering operation
581 * is described in sec 8.4.2.2.1 titled "Luma sample interpolation process"
582 *
583 * \param pu1_src: Pointer to the buffer containing the predictor values.
584 * pu1_src could point to the frame buffer or the predictor buffer.
585 * \param pu1_dst: Pointer to the destination buffer where the output of
586 * the six tap filter is stored.
587 * \param wd: Width of the rectangular pixel grid to be interpolated
588 * \param ht: Height of the rectangular pixel grid to be interpolated
589 * \param src_strd: Width of the buffer pointed to by puc_pred.
590 * \param dst_strd: Width of the destination buffer
591 * \param pu1_tmp: temporary buffer, UNUSED in this function
592 * \param dydx: x and y reference offset for qpel calculations.
593 *
594 * \return
595 * void
596 *
597 * \note
598 * This function takes the 8 bit predictor values, applies the six tap
599 * filter in the vertical direction and outputs the result clipped to
600 * 8 bit precision. The input is stored in the buffer pointed to by
601 * puc_pred while the output is stored in the buffer pointed by puc_dest.
602 * Both puc_pred and puc_dest could point to the same buffer i.e. the
603 * six tap filter could be done in place.
604 *
605 * \para <title>
606 * <paragraph>
607 * ...
608 **************************************************************************
609 */
ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD32 ht,WORD32 wd,UWORD8 * pu1_tmp,WORD32 dydx)610 void ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 *pu1_src,
611 UWORD8 *pu1_dst,
612 WORD32 src_strd,
613 WORD32 dst_strd,
614 WORD32 ht,
615 WORD32 wd,
616 UWORD8* pu1_tmp,
617 WORD32 dydx)
618 {
619 WORD32 row, col;
620 WORD32 x_offset = dydx & 0x3;
621 WORD32 y_offset = dydx >> 2;
622
623 WORD32 off1, off2, off3;
624 UWORD8* pu1_pred_vert, *pu1_pred_horz;
625 UNUSED(pu1_tmp);
626 y_offset = y_offset & 0x3;
627
628 off1 = src_strd;
629 off2 = src_strd << 1;
630 off3 = off1 + off2;
631
632 pu1_pred_horz = pu1_src + (y_offset >> 1) * src_strd;
633 pu1_pred_vert = pu1_src + (x_offset >> 1);
634
635 for(row = 0; row < ht; row++)
636 {
637 for(col = 0; col < wd;
638 col++, pu1_dst++, pu1_pred_vert++, pu1_pred_horz++)
639 {
640 WORD16 i2_temp_vert, i2_temp_horz;
641 /* The logic below implements the following equation
642 i2_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] -
643 5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd]) +
644 20 * (puc_pred[0] + puc_pred[src_strd]); */
645 i2_temp_vert = pu1_pred_vert[-off2] + pu1_pred_vert[off3]
646 - (pu1_pred_vert[-off1] + pu1_pred_vert[off2])
647 + ((pu1_pred_vert[0] + pu1_pred_vert[off1]
648 - pu1_pred_vert[-off1]
649 - pu1_pred_vert[off2]) << 2)
650 + ((pu1_pred_vert[0] + pu1_pred_vert[off1]) << 4);
651 i2_temp_vert = (i2_temp_vert + 16) >> 5;
652 i2_temp_vert = CLIP_U8(i2_temp_vert);
653
654 /* The logic below implements the following equation
655 i16_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) +
656 20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */
657 i2_temp_horz = pu1_pred_horz[-2] + pu1_pred_horz[3]
658 - (pu1_pred_horz[-1] + pu1_pred_horz[2])
659 + ((pu1_pred_horz[0] + pu1_pred_horz[1]
660 - pu1_pred_horz[-1]
661 - pu1_pred_horz[2]) << 2)
662 + ((pu1_pred_horz[0] + pu1_pred_horz[1]) << 4);
663 i2_temp_horz = (i2_temp_horz + 16) >> 5;
664 i2_temp_horz = CLIP_U8(i2_temp_horz);
665 *pu1_dst = (i2_temp_vert + i2_temp_horz + 1) >> 1;
666 }
667 pu1_pred_vert += (src_strd - wd);
668 pu1_pred_horz += (src_strd - wd);
669 pu1_dst += (dst_strd - wd);
670 }
671 }
672
673 /*!
674 **************************************************************************
675 * \if Function name : ih264_inter_pred_luma_horz_qpel_vert_hpel \endif
676 *
677 * \brief
678 * This routine applies the six tap filter to the predictors in the vertical
679 * and horizontal direction to obtain the pixel at (1/2,1/2). It then interpolates
680 * pixel at (0,1/2) and (1/2,1/2) to obtain pixel at (1/4,1/2). Similarly for (3/4,1/2).
681 * The six tap filtering operation is described in sec 8.4.2.2.1 titled
682 * "Luma sample interpolation process"
683 *
684 * \param pu1_src: Pointer to the buffer containing the predictor values.
685 * pu1_src could point to the frame buffer or the predictor buffer.
686 * \param pu1_dst: Pointer to the destination buffer where the output of
687 * the six tap filter followed by interpolation is stored.
688 * \param wd: Width of the rectangular pixel grid to be interpolated
689 * \param ht: Height of the rectangular pixel grid to be interpolated
690 * \param src_strd: Width of the buffer pointed to by puc_pred.
691 * \param dst_strd: Width of the destination buffer
692 * \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter.
693 * \param dydx: x and y reference offset for qpel calculations.
694 *
695 * \return
696 * void
697 *
698 * \note
699 * This function takes the 8 bit predictor values, applies the six tap
700 * filter in the vertical direction and outputs the result clipped to
701 * 8 bit precision. The input is stored in the buffer pointed to by
702 * puc_pred while the output is stored in the buffer pointed by puc_dest.
703 * Both puc_pred and puc_dest could point to the same buffer i.e. the
704 * six tap filter could be done in place.
705 *
706 * \para <title>
707 * <paragraph>
708 * ...
709 **************************************************************************
710 */
ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD32 ht,WORD32 wd,UWORD8 * pu1_tmp,WORD32 dydx)711 void ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 *pu1_src,
712 UWORD8 *pu1_dst,
713 WORD32 src_strd,
714 WORD32 dst_strd,
715 WORD32 ht,
716 WORD32 wd,
717 UWORD8* pu1_tmp,
718 WORD32 dydx)
719 {
720 WORD32 row, col;
721 WORD32 tmp;
722 WORD16* pi2_pred1_temp, *pi2_pred1;
723 UWORD8* pu1_dst_tmp;
724 WORD32 x_offset = dydx & 0x3;
725 WORD16 i2_macro;
726
727 pi2_pred1_temp = (WORD16*)pu1_tmp;
728 pi2_pred1_temp += 2;
729 pi2_pred1 = pi2_pred1_temp;
730 pu1_dst_tmp = pu1_dst;
731
732 for(row = 0; row < ht; row++)
733 {
734 for(col = -2; col < wd + 3; col++)
735 {
736 tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
737 tmp = ih264_g_six_tap[0] *
738 (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
739 + ih264_g_six_tap[1] *
740 (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
741 + ih264_g_six_tap[2] *
742 (pu1_src[col] + pu1_src[col + 1 * src_strd]);
743 pi2_pred1_temp[col] = tmp;
744 }
745
746 pu1_src += src_strd;
747 pi2_pred1_temp = pi2_pred1_temp + wd + 5;
748 }
749
750 pi2_pred1_temp = pi2_pred1;
751 for(row = 0; row < ht; row++)
752 {
753 for(col = 0; col < wd; col++)
754 {
755 tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
756 tmp = ih264_g_six_tap[0] *
757 (pi2_pred1[col - 2] + pi2_pred1[col + 3])
758 + ih264_g_six_tap[1] *
759 (pi2_pred1[col - 1] + pi2_pred1[col + 2])
760 + ih264_g_six_tap[2] *
761 (pi2_pred1[col] + pi2_pred1[col + 1]);
762 tmp = (tmp + 512) >> 10;
763 pu1_dst[col] = CLIP_U8(tmp);
764 }
765 pi2_pred1 += (wd + 5);
766 pu1_dst += dst_strd;
767 }
768
769 pu1_dst = pu1_dst_tmp;
770 pi2_pred1_temp += (x_offset >> 1);
771 for(row = ht; row != 0; row--)
772 {
773 for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++)
774 {
775 UWORD8 uc_temp;
776 /* Clipping the output of the six tap filter obtained from the
777 first stage of the 2d filter stage */
778 *pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5;
779 i2_macro = (*pi2_pred1_temp);
780 uc_temp = CLIP_U8(i2_macro);
781 *pu1_dst = (*pu1_dst + uc_temp + 1) >> 1;
782 }
783 pi2_pred1_temp += 5;
784 pu1_dst += dst_strd - wd;
785 }
786 }
787
788 /*!
789 **************************************************************************
790 * \if Function name : ih264_inter_pred_luma_horz_hpel_vert_qpel \endif
791 *
792 * \brief
793 * This routine applies the six tap filter to the predictors in the horizontal
794 * and vertical direction to obtain the pixel at (1/2,1/2). It then interpolates
795 * pixel at (1/2,0) and (1/2,1/2) to obtain pixel at (1/2,1/4). Similarly for (1/2,3/4).
796 * The six tap filtering operation is described in sec 8.4.2.2.1 titled
797 * "Luma sample interpolation process"
798 *
799 * \param pu1_src: Pointer to the buffer containing the predictor values.
800 * pu1_src could point to the frame buffer or the predictor buffer.
801 * \param pu1_dst: Pointer to the destination buffer where the output of
802 * the six tap filter followed by interpolation is stored.
803 * \param wd: Width of the rectangular pixel grid to be interpolated
804 * \param ht: Height of the rectangular pixel grid to be interpolated
805 * \param src_strd: Width of the buffer pointed to by puc_pred.
806 * \param dst_strd: Width of the destination buffer
807 * \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter.
808 * \param dydx: x and y reference offset for qpel calculations.
809 *
810 * \return
811 * void
812 *
813 * \note
814 * This function takes the 8 bit predictor values, applies the six tap
815 * filter in the vertical direction and outputs the result clipped to
816 * 8 bit precision. The input is stored in the buffer pointed to by
817 * puc_pred while the output is stored in the buffer pointed by puc_dest.
818 * Both puc_pred and puc_dest could point to the same buffer i.e. the
819 * six tap filter could be done in place.
820 *
821 * \para <title>
822 * <paragraph>
823 * ...
824 **************************************************************************
825 */
ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD32 ht,WORD32 wd,UWORD8 * pu1_tmp,WORD32 dydx)826 void ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 *pu1_src,
827 UWORD8 *pu1_dst,
828 WORD32 src_strd,
829 WORD32 dst_strd,
830 WORD32 ht,
831 WORD32 wd,
832 UWORD8* pu1_tmp,
833 WORD32 dydx)
834 {
835
836 WORD32 row, col;
837 WORD32 tmp;
838 WORD32 y_offset = dydx >> 2;
839 WORD16* pi2_pred1_temp, *pi2_pred1;
840 UWORD8* pu1_dst_tmp;
841 //WORD32 x_offset = dydx & 0x3;
842 WORD16 i2_macro;
843
844 y_offset = y_offset & 0x3;
845
846 pi2_pred1_temp = (WORD16*)pu1_tmp;
847 pi2_pred1_temp += 2 * wd;
848 pi2_pred1 = pi2_pred1_temp;
849 pu1_dst_tmp = pu1_dst;
850 pu1_src -= 2 * src_strd;
851 for(row = -2; row < ht + 3; row++)
852 {
853 for(col = 0; col < wd; col++)
854 {
855 tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
856 tmp = ih264_g_six_tap[0] * (pu1_src[col - 2] + pu1_src[col + 3])
857 + ih264_g_six_tap[1] * (pu1_src[col - 1] + pu1_src[col + 2])
858 + ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1]);
859 pi2_pred1_temp[col - 2 * wd] = tmp;
860 }
861
862 pu1_src += src_strd;
863 pi2_pred1_temp += wd;
864 }
865 pi2_pred1_temp = pi2_pred1;
866 for(row = 0; row < ht; row++)
867 {
868 for(col = 0; col < wd; col++)
869 {
870 tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
871 tmp = ih264_g_six_tap[0] * (pi2_pred1[col - 2 * wd] + pi2_pred1[col + 3 * wd])
872 + ih264_g_six_tap[1] * (pi2_pred1[col - 1 * wd] + pi2_pred1[col + 2 * wd])
873 + ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1 * wd]);
874 tmp = (tmp + 512) >> 10;
875 pu1_dst[col] = CLIP_U8(tmp);
876 }
877 pi2_pred1 += wd;
878 pu1_dst += dst_strd;
879 }
880 pu1_dst = pu1_dst_tmp;
881 pi2_pred1_temp += (y_offset >> 1) * wd;
882 for(row = ht; row != 0; row--)
883
884 {
885 for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++)
886 {
887 UWORD8 u1_temp;
888 /* Clipping the output of the six tap filter obtained from the
889 first stage of the 2d filter stage */
890 *pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5;
891 i2_macro = (*pi2_pred1_temp);
892 u1_temp = CLIP_U8(i2_macro);
893 *pu1_dst = (*pu1_dst + u1_temp + 1) >> 1;
894 }
895 //pi16_pred1_temp += wd;
896 pu1_dst += dst_strd - wd;
897 }
898 }
899
900 /**
901 *******************************************************************************
902 * function:ih264_inter_pred_luma_bilinear
903 *
904 * @brief
905 * This routine applies the bilinear filter to the predictors .
906 * The filtering operation is described in
907 * sec 8.4.2.2.1 titled "Luma sample interpolation process"
908 *
909 * @par Description:
910 \note
911 * This function is called to obtain pixels lying at the following
912 * locations (1/4,1), (3/4,1),(1,1/4), (1,3/4) ,(1/4,1/2), (3/4,1/2),(1/2,1/4), (1/2,3/4),(3/4,1/4),(1/4,3/4),(3/4,3/4)&& (1/4,1/4) .
913 * The function averages the two adjacent values from the two input arrays in horizontal direction.
914 *
915 *
916 * @param[in] pu1_src1:
917 * UWORD8 Pointer to the buffer containing the first input array.
918 *
919 * @param[in] pu1_src2:
920 * UWORD8 Pointer to the buffer containing the second input array.
921 *
922 * @param[out] pu1_dst
923 * UWORD8 pointer to the destination where the output of bilinear filter is stored.
924 *
925 * @param[in] src_strd1
926 * Stride of the first input buffer
927 *
928 * @param[in] src_strd2
929 * Stride of the second input buffer
930 *
931 * @param[in] dst_strd
932 * integer destination stride of pu1_dst
933 *
934 * @param[in] ht
935 * integer height of the array
936 *
937 * @param[in] wd
938 * integer width of the array
939 *
940 * @returns
941 *
942 * @remarks
943 * None
944 *
945 *******************************************************************************
946 */
ih264_inter_pred_luma_bilinear(UWORD8 * pu1_src1,UWORD8 * pu1_src2,UWORD8 * pu1_dst,WORD32 src_strd1,WORD32 src_strd2,WORD32 dst_strd,WORD32 ht,WORD32 wd)947 void ih264_inter_pred_luma_bilinear(UWORD8 *pu1_src1,
948 UWORD8 *pu1_src2,
949 UWORD8 *pu1_dst,
950 WORD32 src_strd1,
951 WORD32 src_strd2,
952 WORD32 dst_strd,
953 WORD32 ht,
954 WORD32 wd)
955 {
956 WORD32 row, col;
957 WORD16 i2_tmp;
958
959 for(row = 0; row < ht; row++)
960 {
961 for(col = 0; col < wd; col++)
962 {
963 i2_tmp = pu1_src1[col] + pu1_src2[col];
964 i2_tmp = (i2_tmp + 1) >> 1;
965 pu1_dst[col] = CLIP_U8(i2_tmp);
966 }
967 pu1_src1 += src_strd1;
968 pu1_src2 += src_strd2;
969 pu1_dst += dst_strd;
970 }
971
972 }
973
974 /**
975 *******************************************************************************
976 *
977 * @brief
978 * Interprediction chroma filter
979 *
980 * @par Description:
981 * Applies filtering to chroma samples as mentioned in
982 * sec 8.4.2.2.2 titled "chroma sample interpolation process"
983 *
984 * @param[in] pu1_src
985 * UWORD8 pointer to the source containing alternate U and V samples
986 *
987 * @param[out] pu1_dst
988 * UWORD8 pointer to the destination
989 *
990 * @param[in] src_strd
991 * integer source stride
992 *
993 * @param[in] dst_strd
994 * integer destination stride
995 *
996 * @param[in] u1_dx
997 * dx value where the sample is to be produced(refer sec 8.4.2.2.2 )
998 *
999 * @param[in] u1_dy
1000 * dy value where the sample is to be produced(refer sec 8.4.2.2.2 )
1001 *
1002 * @param[in] ht
1003 * integer height of the array
1004 *
1005 * @param[in] wd
1006 * integer width of the array
1007 *
1008 * @returns
1009 *
1010 * @remarks
1011 * None
1012 *
1013 *******************************************************************************
1014 */
ih264_inter_pred_chroma(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD32 dx,WORD32 dy,WORD32 ht,WORD32 wd)1015 void ih264_inter_pred_chroma(UWORD8 *pu1_src,
1016 UWORD8 *pu1_dst,
1017 WORD32 src_strd,
1018 WORD32 dst_strd,
1019 WORD32 dx,
1020 WORD32 dy,
1021 WORD32 ht,
1022 WORD32 wd)
1023 {
1024 WORD32 row, col;
1025 WORD16 i2_tmp;
1026
1027 for(row = 0; row < ht; row++)
1028 {
1029 for(col = 0; col < 2 * wd; col++)
1030 {
1031 i2_tmp = 0; /* applies equation (8-266) in section 8.4.2.2.2 */
1032 i2_tmp = (8 - dx) * (8 - dy) * pu1_src[col]
1033 + (dx) * (8 - dy) * pu1_src[col + 2]
1034 + (8 - dx) * (dy) * (pu1_src + src_strd)[col]
1035 + (dx) * (dy) * (pu1_src + src_strd)[col + 2];
1036 i2_tmp = (i2_tmp + 32) >> 6;
1037 pu1_dst[col] = CLIP_U8(i2_tmp);
1038 }
1039 pu1_src += src_strd;
1040 pu1_dst += dst_strd;
1041 }
1042 }
1043