• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include "vp8/encoder/variance.h"
13 #include "vp8/common/pragmas.h"
14 #include "vpx_ports/mem.h"
15 
16 extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
17 extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
18 extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
19 extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
20 
21 extern void vp8_filter_block2d_bil4x4_var_mmx
22 (
23     const unsigned char *ref_ptr,
24     int ref_pixels_per_line,
25     const unsigned char *src_ptr,
26     int src_pixels_per_line,
27     const short *HFilter,
28     const short *VFilter,
29     int *sum,
30     unsigned int *sumsquared
31 );
32 
33 extern unsigned int vp8_get4x4var_mmx
34 (
35     const unsigned char *src_ptr,
36     int  source_stride,
37     const unsigned char *ref_ptr,
38     int  recon_stride,
39     unsigned int *SSE,
40     int *Sum
41 );
42 
43 unsigned int vp8_get_mb_ss_sse2
44 (
45     const short *src_ptr
46 );
47 unsigned int vp8_get16x16var_sse2
48 (
49     const unsigned char *src_ptr,
50     int source_stride,
51     const unsigned char *ref_ptr,
52     int recon_stride,
53     unsigned int *SSE,
54     int *Sum
55 );
56 unsigned int vp8_get16x16pred_error_sse2
57 (
58     const unsigned char *src_ptr,
59     int src_stride,
60     const unsigned char *ref_ptr,
61     int ref_stride
62 );
63 unsigned int vp8_get8x8var_sse2
64 (
65     const unsigned char *src_ptr,
66     int source_stride,
67     const unsigned char *ref_ptr,
68     int recon_stride,
69     unsigned int *SSE,
70     int *Sum
71 );
72 void vp8_filter_block2d_bil_var_sse2
73 (
74     const unsigned char *ref_ptr,
75     int ref_pixels_per_line,
76     const unsigned char *src_ptr,
77     int src_pixels_per_line,
78     unsigned int Height,
79     int  xoffset,
80     int  yoffset,
81     int *sum,
82     unsigned int *sumsquared
83 );
84 void vp8_half_horiz_vert_variance8x_h_sse2
85 (
86     const unsigned char *ref_ptr,
87     int ref_pixels_per_line,
88     const unsigned char *src_ptr,
89     int src_pixels_per_line,
90     unsigned int Height,
91     int *sum,
92     unsigned int *sumsquared
93 );
94 void vp8_half_horiz_vert_variance16x_h_sse2
95 (
96     const unsigned char *ref_ptr,
97     int ref_pixels_per_line,
98     const unsigned char *src_ptr,
99     int src_pixels_per_line,
100     unsigned int Height,
101     int *sum,
102     unsigned int *sumsquared
103 );
104 void vp8_half_horiz_variance8x_h_sse2
105 (
106     const unsigned char *ref_ptr,
107     int ref_pixels_per_line,
108     const unsigned char *src_ptr,
109     int src_pixels_per_line,
110     unsigned int Height,
111     int *sum,
112     unsigned int *sumsquared
113 );
114 void vp8_half_horiz_variance16x_h_sse2
115 (
116     const unsigned char *ref_ptr,
117     int ref_pixels_per_line,
118     const unsigned char *src_ptr,
119     int src_pixels_per_line,
120     unsigned int Height,
121     int *sum,
122     unsigned int *sumsquared
123 );
124 void vp8_half_vert_variance8x_h_sse2
125 (
126     const unsigned char *ref_ptr,
127     int ref_pixels_per_line,
128     const unsigned char *src_ptr,
129     int src_pixels_per_line,
130     unsigned int Height,
131     int *sum,
132     unsigned int *sumsquared
133 );
134 void vp8_half_vert_variance16x_h_sse2
135 (
136     const unsigned char *ref_ptr,
137     int ref_pixels_per_line,
138     const unsigned char *src_ptr,
139     int src_pixels_per_line,
140     unsigned int Height,
141     int *sum,
142     unsigned int *sumsquared
143 );
144 
145 DECLARE_ALIGNED(16, extern short, vp8_vp7_bilinear_filters_mmx[8][8]);
146 
vp8_variance4x4_wmt(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride)147 unsigned int vp8_variance4x4_wmt(
148     const unsigned char *src_ptr,
149     int  source_stride,
150     const unsigned char *ref_ptr,
151     int  recon_stride)
152 {
153     unsigned int var;
154     int avg;
155 
156     vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
157     return (var - ((avg * avg) >> 4));
158 
159 }
160 
161 
162 
vp8_variance8x8_wmt(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride)163 unsigned int vp8_variance8x8_wmt
164 (
165     const unsigned char *src_ptr,
166     int  source_stride,
167     const unsigned char *ref_ptr,
168     int  recon_stride)
169 {
170     unsigned int var;
171     int avg;
172 
173     vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
174 
175     return (var - ((avg * avg) >> 6));
176 
177 }
178 
179 
vp8_variance16x16_wmt(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)180 unsigned int vp8_variance16x16_wmt
181 (
182     const unsigned char *src_ptr,
183     int  source_stride,
184     const unsigned char *ref_ptr,
185     int  recon_stride,
186     unsigned int *sse)
187 {
188     unsigned int sse0;
189     int sum0;
190 
191 
192     vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
193     *sse = sse0;
194     return (sse0 - ((sum0 * sum0) >> 8));
195 }
vp8_mse16x16_wmt(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)196 unsigned int vp8_mse16x16_wmt(
197     const unsigned char *src_ptr,
198     int  source_stride,
199     const unsigned char *ref_ptr,
200     int  recon_stride,
201     unsigned int *sse)
202 {
203 
204     unsigned int sse0;
205     int sum0;
206     vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
207     *sse = sse0;
208     return sse0;
209 
210 }
211 
212 
vp8_variance16x8_wmt(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)213 unsigned int vp8_variance16x8_wmt
214 (
215     const unsigned char *src_ptr,
216     int  source_stride,
217     const unsigned char *ref_ptr,
218     int  recon_stride,
219     unsigned int *sse)
220 {
221     unsigned int sse0, sse1, var;
222     int sum0, sum1, avg;
223 
224     vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
225     vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
226 
227     var = sse0 + sse1;
228     avg = sum0 + sum1;
229     *sse = var;
230     return (var - ((avg * avg) >> 7));
231 
232 }
233 
vp8_variance8x16_wmt(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)234 unsigned int vp8_variance8x16_wmt
235 (
236     const unsigned char *src_ptr,
237     int  source_stride,
238     const unsigned char *ref_ptr,
239     int  recon_stride,
240     unsigned int *sse)
241 {
242     unsigned int sse0, sse1, var;
243     int sum0, sum1, avg;
244 
245     vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
246     vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
247 
248     var = sse0 + sse1;
249     avg = sum0 + sum1;
250     *sse = var;
251     return (var - ((avg * avg) >> 7));
252 
253 }
254 
vp8_sub_pixel_variance4x4_wmt(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)255 unsigned int vp8_sub_pixel_variance4x4_wmt
256 (
257     const unsigned char  *src_ptr,
258     int  src_pixels_per_line,
259     int  xoffset,
260     int  yoffset,
261     const unsigned char *dst_ptr,
262     int dst_pixels_per_line,
263     unsigned int *sse
264 )
265 {
266     int xsum;
267     unsigned int xxsum;
268     vp8_filter_block2d_bil4x4_var_mmx(
269         src_ptr, src_pixels_per_line,
270         dst_ptr, dst_pixels_per_line,
271         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
272         &xsum, &xxsum
273     );
274     *sse = xxsum;
275     return (xxsum - ((xsum * xsum) >> 4));
276 }
277 
278 
vp8_sub_pixel_variance8x8_wmt(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)279 unsigned int vp8_sub_pixel_variance8x8_wmt
280 (
281     const unsigned char  *src_ptr,
282     int  src_pixels_per_line,
283     int  xoffset,
284     int  yoffset,
285     const unsigned char *dst_ptr,
286     int dst_pixels_per_line,
287     unsigned int *sse
288 )
289 {
290     int xsum;
291     unsigned int xxsum;
292 
293     if (xoffset == 4 && yoffset == 0)
294     {
295         vp8_half_horiz_variance8x_h_sse2(
296             src_ptr, src_pixels_per_line,
297             dst_ptr, dst_pixels_per_line, 8,
298             &xsum, &xxsum);
299     }
300     else if (xoffset == 0 && yoffset == 4)
301     {
302         vp8_half_vert_variance8x_h_sse2(
303             src_ptr, src_pixels_per_line,
304             dst_ptr, dst_pixels_per_line, 8,
305             &xsum, &xxsum);
306     }
307     else if (xoffset == 4 && yoffset == 4)
308     {
309         vp8_half_horiz_vert_variance8x_h_sse2(
310             src_ptr, src_pixels_per_line,
311             dst_ptr, dst_pixels_per_line, 8,
312             &xsum, &xxsum);
313     }
314     else
315     {
316         vp8_filter_block2d_bil_var_sse2(
317             src_ptr, src_pixels_per_line,
318             dst_ptr, dst_pixels_per_line, 8,
319             xoffset, yoffset,
320             &xsum, &xxsum);
321     }
322 
323     *sse = xxsum;
324     return (xxsum - ((xsum * xsum) >> 6));
325 }
326 
vp8_sub_pixel_variance16x16_wmt(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)327 unsigned int vp8_sub_pixel_variance16x16_wmt
328 (
329     const unsigned char  *src_ptr,
330     int  src_pixels_per_line,
331     int  xoffset,
332     int  yoffset,
333     const unsigned char *dst_ptr,
334     int dst_pixels_per_line,
335     unsigned int *sse
336 )
337 {
338     int xsum0, xsum1;
339     unsigned int xxsum0, xxsum1;
340 
341 
342     // note we could avoid these if statements if the calling function
343     // just called the appropriate functions inside.
344     if (xoffset == 4 && yoffset == 0)
345     {
346         vp8_half_horiz_variance16x_h_sse2(
347             src_ptr, src_pixels_per_line,
348             dst_ptr, dst_pixels_per_line, 16,
349             &xsum0, &xxsum0);
350     }
351     else if (xoffset == 0 && yoffset == 4)
352     {
353         vp8_half_vert_variance16x_h_sse2(
354             src_ptr, src_pixels_per_line,
355             dst_ptr, dst_pixels_per_line, 16,
356             &xsum0, &xxsum0);
357     }
358     else if (xoffset == 4 && yoffset == 4)
359     {
360         vp8_half_horiz_vert_variance16x_h_sse2(
361             src_ptr, src_pixels_per_line,
362             dst_ptr, dst_pixels_per_line, 16,
363             &xsum0, &xxsum0);
364     }
365     else
366     {
367         vp8_filter_block2d_bil_var_sse2(
368             src_ptr, src_pixels_per_line,
369             dst_ptr, dst_pixels_per_line, 16,
370             xoffset, yoffset,
371             &xsum0, &xxsum0
372         );
373 
374         vp8_filter_block2d_bil_var_sse2(
375             src_ptr + 8, src_pixels_per_line,
376             dst_ptr + 8, dst_pixels_per_line, 16,
377             xoffset, yoffset,
378             &xsum1, &xxsum1
379         );
380         xsum0 += xsum1;
381         xxsum0 += xxsum1;
382     }
383 
384     *sse = xxsum0;
385     return (xxsum0 - ((xsum0 * xsum0) >> 8));
386 }
387 
vp8_sub_pixel_mse16x16_wmt(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)388 unsigned int vp8_sub_pixel_mse16x16_wmt(
389     const unsigned char  *src_ptr,
390     int  src_pixels_per_line,
391     int  xoffset,
392     int  yoffset,
393     const unsigned char *dst_ptr,
394     int dst_pixels_per_line,
395     unsigned int *sse
396 )
397 {
398     vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
399     return *sse;
400 }
401 
vp8_sub_pixel_variance16x8_wmt(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)402 unsigned int vp8_sub_pixel_variance16x8_wmt
403 (
404     const unsigned char  *src_ptr,
405     int  src_pixels_per_line,
406     int  xoffset,
407     int  yoffset,
408     const unsigned char *dst_ptr,
409     int dst_pixels_per_line,
410     unsigned int *sse
411 
412 )
413 {
414     int xsum0, xsum1;
415     unsigned int xxsum0, xxsum1;
416 
417     if (xoffset == 4 && yoffset == 0)
418     {
419         vp8_half_horiz_variance16x_h_sse2(
420             src_ptr, src_pixels_per_line,
421             dst_ptr, dst_pixels_per_line, 8,
422             &xsum0, &xxsum0);
423     }
424     else if (xoffset == 0 && yoffset == 4)
425     {
426         vp8_half_vert_variance16x_h_sse2(
427             src_ptr, src_pixels_per_line,
428             dst_ptr, dst_pixels_per_line, 8,
429             &xsum0, &xxsum0);
430     }
431     else if (xoffset == 4 && yoffset == 4)
432     {
433         vp8_half_horiz_vert_variance16x_h_sse2(
434             src_ptr, src_pixels_per_line,
435             dst_ptr, dst_pixels_per_line, 8,
436             &xsum0, &xxsum0);
437     }
438     else
439     {
440         vp8_filter_block2d_bil_var_sse2(
441             src_ptr, src_pixels_per_line,
442             dst_ptr, dst_pixels_per_line, 8,
443             xoffset, yoffset,
444             &xsum0, &xxsum0);
445 
446         vp8_filter_block2d_bil_var_sse2(
447             src_ptr + 8, src_pixels_per_line,
448             dst_ptr + 8, dst_pixels_per_line, 8,
449             xoffset, yoffset,
450             &xsum1, &xxsum1);
451         xsum0 += xsum1;
452         xxsum0 += xxsum1;
453     }
454 
455     *sse = xxsum0;
456     return (xxsum0 - ((xsum0 * xsum0) >> 7));
457 }
458 
vp8_sub_pixel_variance8x16_wmt(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)459 unsigned int vp8_sub_pixel_variance8x16_wmt
460 (
461     const unsigned char  *src_ptr,
462     int  src_pixels_per_line,
463     int  xoffset,
464     int  yoffset,
465     const unsigned char *dst_ptr,
466     int dst_pixels_per_line,
467     unsigned int *sse
468 )
469 {
470     int xsum;
471     unsigned int xxsum;
472 
473     if (xoffset == 4 && yoffset == 0)
474     {
475         vp8_half_horiz_variance8x_h_sse2(
476             src_ptr, src_pixels_per_line,
477             dst_ptr, dst_pixels_per_line, 16,
478             &xsum, &xxsum);
479     }
480     else if (xoffset == 0 && yoffset == 4)
481     {
482         vp8_half_vert_variance8x_h_sse2(
483             src_ptr, src_pixels_per_line,
484             dst_ptr, dst_pixels_per_line, 16,
485             &xsum, &xxsum);
486     }
487     else if (xoffset == 4 && yoffset == 4)
488     {
489         vp8_half_horiz_vert_variance8x_h_sse2(
490             src_ptr, src_pixels_per_line,
491             dst_ptr, dst_pixels_per_line, 16,
492             &xsum, &xxsum);
493     }
494     else
495     {
496         vp8_filter_block2d_bil_var_sse2(
497             src_ptr, src_pixels_per_line,
498             dst_ptr, dst_pixels_per_line, 16,
499             xoffset, yoffset,
500             &xsum, &xxsum);
501     }
502 
503     *sse = xxsum;
504     return (xxsum - ((xsum * xsum) >> 7));
505 }
506 
507 
vp8_variance_halfpixvar16x16_h_wmt(const unsigned char * src_ptr,int src_pixels_per_line,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)508 unsigned int vp8_variance_halfpixvar16x16_h_wmt(
509     const unsigned char *src_ptr,
510     int  src_pixels_per_line,
511     const unsigned char *dst_ptr,
512     int  dst_pixels_per_line,
513     unsigned int *sse)
514 {
515     int xsum0;
516     unsigned int xxsum0;
517 
518     vp8_half_horiz_variance16x_h_sse2(
519         src_ptr, src_pixels_per_line,
520         dst_ptr, dst_pixels_per_line, 16,
521         &xsum0, &xxsum0);
522 
523     *sse = xxsum0;
524     return (xxsum0 - ((xsum0 * xsum0) >> 8));
525 }
526 
527 
vp8_variance_halfpixvar16x16_v_wmt(const unsigned char * src_ptr,int src_pixels_per_line,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)528 unsigned int vp8_variance_halfpixvar16x16_v_wmt(
529     const unsigned char *src_ptr,
530     int  src_pixels_per_line,
531     const unsigned char *dst_ptr,
532     int  dst_pixels_per_line,
533     unsigned int *sse)
534 {
535     int xsum0;
536     unsigned int xxsum0;
537     vp8_half_vert_variance16x_h_sse2(
538         src_ptr, src_pixels_per_line,
539         dst_ptr, dst_pixels_per_line, 16,
540         &xsum0, &xxsum0);
541 
542     *sse = xxsum0;
543     return (xxsum0 - ((xsum0 * xsum0) >> 8));
544 }
545 
546 
vp8_variance_halfpixvar16x16_hv_wmt(const unsigned char * src_ptr,int src_pixels_per_line,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)547 unsigned int vp8_variance_halfpixvar16x16_hv_wmt(
548     const unsigned char *src_ptr,
549     int  src_pixels_per_line,
550     const unsigned char *dst_ptr,
551     int  dst_pixels_per_line,
552     unsigned int *sse)
553 {
554     int xsum0;
555     unsigned int xxsum0;
556 
557     vp8_half_horiz_vert_variance16x_h_sse2(
558         src_ptr, src_pixels_per_line,
559         dst_ptr, dst_pixels_per_line, 16,
560         &xsum0, &xxsum0);
561 
562     *sse = xxsum0;
563     return (xxsum0 - ((xsum0 * xsum0) >> 8));
564 }
565