• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include "vpx_ports/config.h"
13 #include "vpx_ports/mem.h"
14 #include "subpixel.h"
15 
16 extern const short vp8_six_tap_mmx[8][6*8];
17 extern const short vp8_bilinear_filters_mmx[8][2*8];
18 
19 extern void vp8_filter_block1d_h6_mmx
20 (
21     unsigned char   *src_ptr,
22     unsigned short  *output_ptr,
23     unsigned int    src_pixels_per_line,
24     unsigned int    pixel_step,
25     unsigned int    output_height,
26     unsigned int    output_width,
27     const short      *vp8_filter
28 );
29 extern void vp8_filter_block1dc_v6_mmx
30 (
31     unsigned short *src_ptr,
32     unsigned char  *output_ptr,
33     int             output_pitch,
34     unsigned int    pixels_per_line,
35     unsigned int    pixel_step,
36     unsigned int    output_height,
37     unsigned int    output_width,
38     const short    *vp8_filter
39 );
40 extern void vp8_filter_block1d8_h6_sse2
41 (
42     unsigned char  *src_ptr,
43     unsigned short *output_ptr,
44     unsigned int    src_pixels_per_line,
45     unsigned int    pixel_step,
46     unsigned int    output_height,
47     unsigned int    output_width,
48     const short    *vp8_filter
49 );
50 extern void vp8_filter_block1d16_h6_sse2
51 (
52     unsigned char  *src_ptr,
53     unsigned short *output_ptr,
54     unsigned int    src_pixels_per_line,
55     unsigned int    pixel_step,
56     unsigned int    output_height,
57     unsigned int    output_width,
58     const short    *vp8_filter
59 );
60 extern void vp8_filter_block1d8_v6_sse2
61 (
62     unsigned short *src_ptr,
63     unsigned char *output_ptr,
64     int dst_ptich,
65     unsigned int pixels_per_line,
66     unsigned int pixel_step,
67     unsigned int output_height,
68     unsigned int output_width,
69     const short    *vp8_filter
70 );
71 extern void vp8_filter_block1d16_v6_sse2
72 (
73     unsigned short *src_ptr,
74     unsigned char *output_ptr,
75     int dst_ptich,
76     unsigned int pixels_per_line,
77     unsigned int pixel_step,
78     unsigned int output_height,
79     unsigned int output_width,
80     const short    *vp8_filter
81 );
82 extern void vp8_unpack_block1d16_h6_sse2
83 (
84     unsigned char  *src_ptr,
85     unsigned short *output_ptr,
86     unsigned int    src_pixels_per_line,
87     unsigned int    output_height,
88     unsigned int    output_width
89 );
90 extern void vp8_filter_block1d8_h6_only_sse2
91 (
92     unsigned char  *src_ptr,
93     unsigned int    src_pixels_per_line,
94     unsigned char  *output_ptr,
95     int dst_ptich,
96     unsigned int    output_height,
97     const short    *vp8_filter
98 );
99 extern void vp8_filter_block1d16_h6_only_sse2
100 (
101     unsigned char  *src_ptr,
102     unsigned int    src_pixels_per_line,
103     unsigned char  *output_ptr,
104     int dst_ptich,
105     unsigned int    output_height,
106     const short    *vp8_filter
107 );
108 extern void vp8_filter_block1d8_v6_only_sse2
109 (
110     unsigned char *src_ptr,
111     unsigned int   src_pixels_per_line,
112     unsigned char *output_ptr,
113     int dst_ptich,
114     unsigned int   output_height,
115     const short   *vp8_filter
116 );
117 extern prototype_subpixel_predict(vp8_bilinear_predict8x8_mmx);
118 
119 
120 #if HAVE_MMX
vp8_sixtap_predict4x4_mmx(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)121 void vp8_sixtap_predict4x4_mmx
122 (
123     unsigned char  *src_ptr,
124     int   src_pixels_per_line,
125     int  xoffset,
126     int  yoffset,
127     unsigned char *dst_ptr,
128     int dst_pitch
129 )
130 {
131     DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16);  /* Temp data bufffer used in filtering */
132     const short *HFilter, *VFilter;
133     HFilter = vp8_six_tap_mmx[xoffset];
134     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter);
135     VFilter = vp8_six_tap_mmx[yoffset];
136     vp8_filter_block1dc_v6_mmx(FData2 + 8, dst_ptr, dst_pitch, 8, 4 , 4, 4, VFilter);
137 
138 }
139 
140 
vp8_sixtap_predict16x16_mmx(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)141 void vp8_sixtap_predict16x16_mmx
142 (
143     unsigned char  *src_ptr,
144     int   src_pixels_per_line,
145     int  xoffset,
146     int  yoffset,
147     unsigned char *dst_ptr,
148     int dst_pitch
149 )
150 {
151 
152     DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24);  /* Temp data bufffer used in filtering */
153 
154     const short *HFilter, *VFilter;
155 
156 
157     HFilter = vp8_six_tap_mmx[xoffset];
158 
159     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),    FData2,   src_pixels_per_line, 1, 21, 32, HFilter);
160     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,  FData2 + 4, src_pixels_per_line, 1, 21, 32, HFilter);
161     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 8,  FData2 + 8, src_pixels_per_line, 1, 21, 32, HFilter);
162     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 12, FData2 + 12, src_pixels_per_line, 1, 21, 32, HFilter);
163 
164     VFilter = vp8_six_tap_mmx[yoffset];
165     vp8_filter_block1dc_v6_mmx(FData2 + 32, dst_ptr,   dst_pitch, 32, 16 , 16, 16, VFilter);
166     vp8_filter_block1dc_v6_mmx(FData2 + 36, dst_ptr + 4, dst_pitch, 32, 16 , 16, 16, VFilter);
167     vp8_filter_block1dc_v6_mmx(FData2 + 40, dst_ptr + 8, dst_pitch, 32, 16 , 16, 16, VFilter);
168     vp8_filter_block1dc_v6_mmx(FData2 + 44, dst_ptr + 12, dst_pitch, 32, 16 , 16, 16, VFilter);
169 
170 }
171 
172 
vp8_sixtap_predict8x8_mmx(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)173 void vp8_sixtap_predict8x8_mmx
174 (
175     unsigned char  *src_ptr,
176     int   src_pixels_per_line,
177     int  xoffset,
178     int  yoffset,
179     unsigned char *dst_ptr,
180     int dst_pitch
181 )
182 {
183 
184     DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);    /* Temp data bufffer used in filtering */
185 
186     const short *HFilter, *VFilter;
187 
188     HFilter = vp8_six_tap_mmx[xoffset];
189     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),    FData2,   src_pixels_per_line, 1, 13, 16, HFilter);
190     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,  FData2 + 4, src_pixels_per_line, 1, 13, 16, HFilter);
191 
192     VFilter = vp8_six_tap_mmx[yoffset];
193     vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 8, 8, VFilter);
194     vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 8, 8, VFilter);
195 
196 }
197 
198 
vp8_sixtap_predict8x4_mmx(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)199 void vp8_sixtap_predict8x4_mmx
200 (
201     unsigned char  *src_ptr,
202     int   src_pixels_per_line,
203     int  xoffset,
204     int  yoffset,
205     unsigned char *dst_ptr,
206     int dst_pitch
207 )
208 {
209 
210     DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);    /* Temp data bufffer used in filtering */
211 
212     const short *HFilter, *VFilter;
213 
214     HFilter = vp8_six_tap_mmx[xoffset];
215     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),    FData2,   src_pixels_per_line, 1, 9, 16, HFilter);
216     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,  FData2 + 4, src_pixels_per_line, 1, 9, 16, HFilter);
217 
218     VFilter = vp8_six_tap_mmx[yoffset];
219     vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 4, 8, VFilter);
220     vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 4, 8, VFilter);
221 
222 }
223 
224 
225 
vp8_bilinear_predict16x16_mmx(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)226 void vp8_bilinear_predict16x16_mmx
227 (
228     unsigned char  *src_ptr,
229     int   src_pixels_per_line,
230     int  xoffset,
231     int  yoffset,
232     unsigned char *dst_ptr,
233     int dst_pitch
234 )
235 {
236     vp8_bilinear_predict8x8_mmx(src_ptr,   src_pixels_per_line, xoffset, yoffset, dst_ptr,   dst_pitch);
237     vp8_bilinear_predict8x8_mmx(src_ptr + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + 8, dst_pitch);
238     vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line,   src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8,   dst_pitch);
239     vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8 + 8, dst_pitch);
240 }
241 #endif
242 
243 
244 #if HAVE_SSE2
vp8_sixtap_predict16x16_sse2(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)245 void vp8_sixtap_predict16x16_sse2
246 (
247     unsigned char  *src_ptr,
248     int   src_pixels_per_line,
249     int  xoffset,
250     int  yoffset,
251     unsigned char *dst_ptr,
252     int dst_pitch
253 
254 )
255 {
256     DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24);    /* Temp data bufffer used in filtering */
257 
258     const short *HFilter, *VFilter;
259 
260     if (xoffset)
261     {
262         if (yoffset)
263         {
264             HFilter = vp8_six_tap_mmx[xoffset];
265             vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 21, 32, HFilter);
266             VFilter = vp8_six_tap_mmx[yoffset];
267             vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr,   dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
268         }
269         else
270         {
271             /* First-pass only */
272             HFilter = vp8_six_tap_mmx[xoffset];
273             vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter);
274         }
275     }
276     else
277     {
278         /* Second-pass only */
279         VFilter = vp8_six_tap_mmx[yoffset];
280         vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 21, 32);
281         vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr,   dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
282     }
283 }
284 
285 
vp8_sixtap_predict8x8_sse2(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)286 void vp8_sixtap_predict8x8_sse2
287 (
288     unsigned char  *src_ptr,
289     int   src_pixels_per_line,
290     int  xoffset,
291     int  yoffset,
292     unsigned char *dst_ptr,
293     int dst_pitch
294 )
295 {
296     DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);  /* Temp data bufffer used in filtering */
297     const short *HFilter, *VFilter;
298 
299     if (xoffset)
300     {
301         if (yoffset)
302         {
303             HFilter = vp8_six_tap_mmx[xoffset];
304             vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 13, 16, HFilter);
305             VFilter = vp8_six_tap_mmx[yoffset];
306             vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
307         }
308         else
309         {
310             /* First-pass only */
311             HFilter = vp8_six_tap_mmx[xoffset];
312             vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter);
313         }
314     }
315     else
316     {
317         /* Second-pass only */
318         VFilter = vp8_six_tap_mmx[yoffset];
319         vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter);
320     }
321 }
322 
323 
vp8_sixtap_predict8x4_sse2(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)324 void vp8_sixtap_predict8x4_sse2
325 (
326     unsigned char  *src_ptr,
327     int   src_pixels_per_line,
328     int  xoffset,
329     int  yoffset,
330     unsigned char *dst_ptr,
331     int dst_pitch
332 )
333 {
334     DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);  /* Temp data bufffer used in filtering */
335     const short *HFilter, *VFilter;
336 
337     if (xoffset)
338     {
339         if (yoffset)
340         {
341             HFilter = vp8_six_tap_mmx[xoffset];
342             vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 9, 16, HFilter);
343             VFilter = vp8_six_tap_mmx[yoffset];
344             vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
345         }
346         else
347         {
348             /* First-pass only */
349             HFilter = vp8_six_tap_mmx[xoffset];
350             vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter);
351         }
352     }
353     else
354     {
355         /* Second-pass only */
356         VFilter = vp8_six_tap_mmx[yoffset];
357         vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter);
358     }
359 }
360 
361 #endif
362 
363 #if HAVE_SSSE3
364 
365 extern void vp8_filter_block1d8_h6_ssse3
366 (
367     unsigned char  *src_ptr,
368     unsigned int    src_pixels_per_line,
369     unsigned char  *output_ptr,
370     unsigned int    output_pitch,
371     unsigned int    output_height,
372     unsigned int    vp8_filter_index
373 );
374 
375 extern void vp8_filter_block1d16_h6_ssse3
376 (
377     unsigned char  *src_ptr,
378     unsigned int    src_pixels_per_line,
379     unsigned char  *output_ptr,
380     unsigned int    output_pitch,
381     unsigned int    output_height,
382     unsigned int    vp8_filter_index
383 );
384 
385 extern void vp8_filter_block1d16_v6_ssse3
386 (
387     unsigned char *src_ptr,
388     unsigned int   src_pitch,
389     unsigned char *output_ptr,
390     unsigned int   out_pitch,
391     unsigned int   output_height,
392     unsigned int   vp8_filter_index
393 );
394 
395 extern void vp8_filter_block1d8_v6_ssse3
396 (
397     unsigned char *src_ptr,
398     unsigned int   src_pitch,
399     unsigned char *output_ptr,
400     unsigned int   out_pitch,
401     unsigned int   output_height,
402     unsigned int   vp8_filter_index
403 );
404 
405 extern void vp8_filter_block1d4_h6_ssse3
406 (
407     unsigned char  *src_ptr,
408     unsigned int    src_pixels_per_line,
409     unsigned char  *output_ptr,
410     unsigned int    output_pitch,
411     unsigned int    output_height,
412     unsigned int    vp8_filter_index
413 );
414 
415 extern void vp8_filter_block1d4_v6_ssse3
416 (
417     unsigned char *src_ptr,
418     unsigned int   src_pitch,
419     unsigned char *output_ptr,
420     unsigned int   out_pitch,
421     unsigned int   output_height,
422     unsigned int   vp8_filter_index
423 );
424 
vp8_sixtap_predict16x16_ssse3(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)425 void vp8_sixtap_predict16x16_ssse3
426 (
427     unsigned char  *src_ptr,
428     int   src_pixels_per_line,
429     int  xoffset,
430     int  yoffset,
431     unsigned char *dst_ptr,
432     int dst_pitch
433 
434 )
435 {
436     DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
437 
438     if (xoffset)
439     {
440         if (yoffset)
441         {
442             vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset);
443             vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset);
444         }
445         else
446         {
447             /* First-pass only */
448             vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset);
449         }
450     }
451     else
452     {
453         /* Second-pass only */
454         vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset);
455     }
456 }
457 
vp8_sixtap_predict8x8_ssse3(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)458 void vp8_sixtap_predict8x8_ssse3
459 (
460     unsigned char  *src_ptr,
461     int   src_pixels_per_line,
462     int  xoffset,
463     int  yoffset,
464     unsigned char *dst_ptr,
465     int dst_pitch
466 )
467 {
468     DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
469 
470     if (xoffset)
471     {
472         if (yoffset)
473         {
474             vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset);
475             vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset);
476         }
477         else
478         {
479             vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset);
480         }
481     }
482     else
483     {
484         /* Second-pass only */
485         vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset);
486     }
487 }
488 
489 
vp8_sixtap_predict8x4_ssse3(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)490 void vp8_sixtap_predict8x4_ssse3
491 (
492     unsigned char  *src_ptr,
493     int   src_pixels_per_line,
494     int  xoffset,
495     int  yoffset,
496     unsigned char *dst_ptr,
497     int dst_pitch
498 )
499 {
500     DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
501 
502     if (xoffset)
503     {
504         if (yoffset)
505         {
506             vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset);
507             vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset);
508         }
509         else
510         {
511             /* First-pass only */
512             vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
513         }
514     }
515     else
516     {
517         /* Second-pass only */
518         vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
519     }
520 }
521 
vp8_sixtap_predict4x4_ssse3(unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,unsigned char * dst_ptr,int dst_pitch)522 void vp8_sixtap_predict4x4_ssse3
523 (
524     unsigned char  *src_ptr,
525     int   src_pixels_per_line,
526     int  xoffset,
527     int  yoffset,
528     unsigned char *dst_ptr,
529     int dst_pitch
530 )
531 {
532   DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
533 
534   if (xoffset)
535   {
536       if (yoffset)
537       {
538           vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset);
539           vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset);
540       }
541       else
542       {
543           vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
544       }
545   }
546   else
547   {
548       vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
549   }
550 
551 }
552 
553 #endif
554