• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include <stdlib.h>
13 #include "filter.h"
14 #include "vpx_ports/mem.h"
15 
16 DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
17 {
18     { 128,   0 },
19     { 112,  16 },
20     {  96,  32 },
21     {  80,  48 },
22     {  64,  64 },
23     {  48,  80 },
24     {  32,  96 },
25     {  16, 112 }
26 };
27 
28 DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
29 {
30 
31     { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
32     { 0, -6,  123,   12,  -1,  0 },
33     { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
34     { 0, -9,   93,   50,  -6,  0 },
35     { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
36     { 0, -6,   50,   93,  -9,  0 },
37     { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
38     { 0, -1,   12,  123,  -6,  0 },
39 };
40 
filter_block2d_first_pass(unsigned char * src_ptr,int * output_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const short * vp8_filter)41 static void filter_block2d_first_pass
42 (
43     unsigned char *src_ptr,
44     int *output_ptr,
45     unsigned int src_pixels_per_line,
46     unsigned int pixel_step,
47     unsigned int output_height,
48     unsigned int output_width,
49     const short *vp8_filter
50 )
51 {
52     unsigned int i, j;
53     int  Temp;
54 
55     for (i = 0; i < output_height; i++)
56     {
57         for (j = 0; j < output_width; j++)
58         {
59             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
60                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
61                    ((int)src_ptr[0]                 * vp8_filter[2]) +
62                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
63                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
64                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
65                    (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
66 
67             /* Normalize back to 0-255 */
68             Temp = Temp >> VP8_FILTER_SHIFT;
69 
70             if (Temp < 0)
71                 Temp = 0;
72             else if (Temp > 255)
73                 Temp = 255;
74 
75             output_ptr[j] = Temp;
76             src_ptr++;
77         }
78 
79         /* Next row... */
80         src_ptr    += src_pixels_per_line - output_width;
81         output_ptr += output_width;
82     }
83 }
84 
filter_block2d_second_pass(int * src_ptr,unsigned char * output_ptr,int output_pitch,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const short * vp8_filter)85 static void filter_block2d_second_pass
86 (
87     int *src_ptr,
88     unsigned char *output_ptr,
89     int output_pitch,
90     unsigned int src_pixels_per_line,
91     unsigned int pixel_step,
92     unsigned int output_height,
93     unsigned int output_width,
94     const short *vp8_filter
95 )
96 {
97     unsigned int i, j;
98     int  Temp;
99 
100     for (i = 0; i < output_height; i++)
101     {
102         for (j = 0; j < output_width; j++)
103         {
104             /* Apply filter */
105             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
106                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
107                    ((int)src_ptr[0]                 * vp8_filter[2]) +
108                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
109                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
110                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
111                    (VP8_FILTER_WEIGHT >> 1);   /* Rounding */
112 
113             /* Normalize back to 0-255 */
114             Temp = Temp >> VP8_FILTER_SHIFT;
115 
116             if (Temp < 0)
117                 Temp = 0;
118             else if (Temp > 255)
119                 Temp = 255;
120 
121             output_ptr[j] = (unsigned char)Temp;
122             src_ptr++;
123         }
124 
125         /* Start next row */
126         src_ptr    += src_pixels_per_line - output_width;
127         output_ptr += output_pitch;
128     }
129 }
130 
131 
filter_block2d(unsigned char * src_ptr,unsigned char * output_ptr,unsigned int src_pixels_per_line,int output_pitch,const short * HFilter,const short * VFilter)132 static void filter_block2d
133 (
134     unsigned char  *src_ptr,
135     unsigned char  *output_ptr,
136     unsigned int src_pixels_per_line,
137     int output_pitch,
138     const short  *HFilter,
139     const short  *VFilter
140 )
141 {
142     int FData[9*4]; /* Temp data buffer used in filtering */
143 
144     /* First filter 1-D horizontally... */
145     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
146 
147     /* then filter verticaly... */
148     filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
149 }
150 
151 
vp8_sixtap_predict_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)152 void vp8_sixtap_predict_c
153 (
154     unsigned char  *src_ptr,
155     int   src_pixels_per_line,
156     int  xoffset,
157     int  yoffset,
158     unsigned char *dst_ptr,
159     int dst_pitch
160 )
161 {
162     const short  *HFilter;
163     const short  *VFilter;
164 
165     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
166     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
167 
168     filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
169 }
vp8_sixtap_predict8x8_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)170 void vp8_sixtap_predict8x8_c
171 (
172     unsigned char  *src_ptr,
173     int  src_pixels_per_line,
174     int  xoffset,
175     int  yoffset,
176     unsigned char *dst_ptr,
177     int  dst_pitch
178 )
179 {
180     const short  *HFilter;
181     const short  *VFilter;
182     int FData[13*16];   /* Temp data buffer used in filtering */
183 
184     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
185     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
186 
187     /* First filter 1-D horizontally... */
188     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
189 
190 
191     /* then filter verticaly... */
192     filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
193 
194 }
195 
vp8_sixtap_predict8x4_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)196 void vp8_sixtap_predict8x4_c
197 (
198     unsigned char  *src_ptr,
199     int  src_pixels_per_line,
200     int  xoffset,
201     int  yoffset,
202     unsigned char *dst_ptr,
203     int  dst_pitch
204 )
205 {
206     const short  *HFilter;
207     const short  *VFilter;
208     int FData[13*16];   /* Temp data buffer used in filtering */
209 
210     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
211     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
212 
213     /* First filter 1-D horizontally... */
214     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
215 
216 
217     /* then filter verticaly... */
218     filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
219 
220 }
221 
vp8_sixtap_predict16x16_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)222 void vp8_sixtap_predict16x16_c
223 (
224     unsigned char  *src_ptr,
225     int  src_pixels_per_line,
226     int  xoffset,
227     int  yoffset,
228     unsigned char *dst_ptr,
229     int  dst_pitch
230 )
231 {
232     const short  *HFilter;
233     const short  *VFilter;
234     int FData[21*24];   /* Temp data buffer used in filtering */
235 
236 
237     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
238     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
239 
240     /* First filter 1-D horizontally... */
241     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
242 
243     /* then filter verticaly... */
244     filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
245 
246 }
247 
248 
249 /****************************************************************************
250  *
251  *  ROUTINE       : filter_block2d_bil_first_pass
252  *
253  *  INPUTS        : UINT8  *src_ptr    : Pointer to source block.
254  *                  UINT32  src_stride : Stride of source block.
255  *                  UINT32  height     : Block height.
256  *                  UINT32  width      : Block width.
257  *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
258  *
259  *  OUTPUTS       : INT32  *dst_ptr    : Pointer to filtered block.
260  *
261  *  RETURNS       : void
262  *
263  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
264  *                  in the horizontal direction to produce the filtered output
265  *                  block. Used to implement first-pass of 2-D separable filter.
266  *
267  *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
268  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
269  *
270  ****************************************************************************/
filter_block2d_bil_first_pass(unsigned char * src_ptr,unsigned short * dst_ptr,unsigned int src_stride,unsigned int height,unsigned int width,const short * vp8_filter)271 static void filter_block2d_bil_first_pass
272 (
273     unsigned char  *src_ptr,
274     unsigned short *dst_ptr,
275     unsigned int    src_stride,
276     unsigned int    height,
277     unsigned int    width,
278     const short    *vp8_filter
279 )
280 {
281     unsigned int i, j;
282 
283     for (i = 0; i < height; i++)
284     {
285         for (j = 0; j < width; j++)
286         {
287             /* Apply bilinear filter */
288             dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
289                           ((int)src_ptr[1] * vp8_filter[1]) +
290                           (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
291             src_ptr++;
292         }
293 
294         /* Next row... */
295         src_ptr += src_stride - width;
296         dst_ptr += width;
297     }
298 }
299 
300 /****************************************************************************
301  *
302  *  ROUTINE       : filter_block2d_bil_second_pass
303  *
304  *  INPUTS        : INT32  *src_ptr    : Pointer to source block.
305  *                  UINT32  dst_pitch  : Destination block pitch.
306  *                  UINT32  height     : Block height.
307  *                  UINT32  width      : Block width.
308  *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
309  *
310  *  OUTPUTS       : UINT16 *dst_ptr    : Pointer to filtered block.
311  *
312  *  RETURNS       : void
313  *
314  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
315  *                  in the vertical direction to produce the filtered output
316  *                  block. Used to implement second-pass of 2-D separable filter.
317  *
318  *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
319  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
320  *
321  ****************************************************************************/
filter_block2d_bil_second_pass(unsigned short * src_ptr,unsigned char * dst_ptr,int dst_pitch,unsigned int height,unsigned int width,const short * vp8_filter)322 static void filter_block2d_bil_second_pass
323 (
324     unsigned short *src_ptr,
325     unsigned char  *dst_ptr,
326     int             dst_pitch,
327     unsigned int    height,
328     unsigned int    width,
329     const short    *vp8_filter
330 )
331 {
332     unsigned int  i, j;
333     int  Temp;
334 
335     for (i = 0; i < height; i++)
336     {
337         for (j = 0; j < width; j++)
338         {
339             /* Apply filter */
340             Temp = ((int)src_ptr[0]     * vp8_filter[0]) +
341                    ((int)src_ptr[width] * vp8_filter[1]) +
342                    (VP8_FILTER_WEIGHT / 2);
343             dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
344             src_ptr++;
345         }
346 
347         /* Next row... */
348         dst_ptr += dst_pitch;
349     }
350 }
351 
352 
353 /****************************************************************************
354  *
355  *  ROUTINE       : filter_block2d_bil
356  *
357  *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
358  *                  UINT32  src_pitch        : Stride of source block.
359  *                  UINT32  dst_pitch        : Stride of destination block.
360  *                  INT32  *HFilter          : Array of 2 horizontal filter taps.
361  *                  INT32  *VFilter          : Array of 2 vertical filter taps.
362  *                  INT32  Width             : Block width
363  *                  INT32  Height            : Block height
364  *
365  *  OUTPUTS       : UINT16 *dst_ptr       : Pointer to filtered block.
366  *
367  *  RETURNS       : void
368  *
369  *  FUNCTION      : 2-D filters an input block by applying a 2-tap
370  *                  bi-linear filter horizontally followed by a 2-tap
371  *                  bi-linear filter vertically on the result.
372  *
373  *  SPECIAL NOTES : The largest block size can be handled here is 16x16
374  *
375  ****************************************************************************/
filter_block2d_bil(unsigned char * src_ptr,unsigned char * dst_ptr,unsigned int src_pitch,unsigned int dst_pitch,const short * HFilter,const short * VFilter,int Width,int Height)376 static void filter_block2d_bil
377 (
378     unsigned char *src_ptr,
379     unsigned char *dst_ptr,
380     unsigned int   src_pitch,
381     unsigned int   dst_pitch,
382     const short   *HFilter,
383     const short   *VFilter,
384     int            Width,
385     int            Height
386 )
387 {
388 
389     unsigned short FData[17*16];    /* Temp data buffer used in filtering */
390 
391     /* First filter 1-D horizontally... */
392     filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
393 
394     /* then 1-D vertically... */
395     filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
396 }
397 
398 
vp8_bilinear_predict4x4_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)399 void vp8_bilinear_predict4x4_c
400 (
401     unsigned char  *src_ptr,
402     int   src_pixels_per_line,
403     int  xoffset,
404     int  yoffset,
405     unsigned char *dst_ptr,
406     int dst_pitch
407 )
408 {
409     const short *HFilter;
410     const short *VFilter;
411 
412     HFilter = vp8_bilinear_filters[xoffset];
413     VFilter = vp8_bilinear_filters[yoffset];
414 #if 0
415     {
416         int i;
417         unsigned char temp1[16];
418         unsigned char temp2[16];
419 
420         bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
421         filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
422 
423         for (i = 0; i < 16; i++)
424         {
425             if (temp1[i] != temp2[i])
426             {
427                 bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
428                 filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
429             }
430         }
431     }
432 #endif
433     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
434 
435 }
436 
vp8_bilinear_predict8x8_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)437 void vp8_bilinear_predict8x8_c
438 (
439     unsigned char  *src_ptr,
440     int  src_pixels_per_line,
441     int  xoffset,
442     int  yoffset,
443     unsigned char *dst_ptr,
444     int  dst_pitch
445 )
446 {
447     const short *HFilter;
448     const short *VFilter;
449 
450     HFilter = vp8_bilinear_filters[xoffset];
451     VFilter = vp8_bilinear_filters[yoffset];
452 
453     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
454 
455 }
456 
vp8_bilinear_predict8x4_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)457 void vp8_bilinear_predict8x4_c
458 (
459     unsigned char  *src_ptr,
460     int  src_pixels_per_line,
461     int  xoffset,
462     int  yoffset,
463     unsigned char *dst_ptr,
464     int  dst_pitch
465 )
466 {
467     const short *HFilter;
468     const short *VFilter;
469 
470     HFilter = vp8_bilinear_filters[xoffset];
471     VFilter = vp8_bilinear_filters[yoffset];
472 
473     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
474 
475 }
476 
vp8_bilinear_predict16x16_c(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)477 void vp8_bilinear_predict16x16_c
478 (
479     unsigned char  *src_ptr,
480     int  src_pixels_per_line,
481     int  xoffset,
482     int  yoffset,
483     unsigned char *dst_ptr,
484     int  dst_pitch
485 )
486 {
487     const short *HFilter;
488     const short *VFilter;
489 
490     HFilter = vp8_bilinear_filters[xoffset];
491     VFilter = vp8_bilinear_filters[yoffset];
492 
493     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
494 }
495