• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include "variance.h"
13 #include "pragmas.h"
14 #include "vpx_ports/mem.h"
15 
16 extern void filter_block1d_h6_mmx
17 (
18     const unsigned char *src_ptr,
19     unsigned short *output_ptr,
20     unsigned int src_pixels_per_line,
21     unsigned int pixel_step,
22     unsigned int output_height,
23     unsigned int output_width,
24     short *vp7_filter
25 );
26 extern void filter_block1d_v6_mmx
27 (
28     const short *src_ptr,
29     unsigned char *output_ptr,
30     unsigned int pixels_per_line,
31     unsigned int pixel_step,
32     unsigned int output_height,
33     unsigned int output_width,
34     short *vp7_filter
35 );
36 
37 extern unsigned int vp8_get_mb_ss_mmx(short *src_ptr);
38 extern unsigned int vp8_get8x8var_mmx
39 (
40     const unsigned char *src_ptr,
41     int  source_stride,
42     const unsigned char *ref_ptr,
43     int  recon_stride,
44     unsigned int *SSE,
45     int *Sum
46 );
47 extern unsigned int vp8_get4x4var_mmx
48 (
49     const unsigned char *src_ptr,
50     int  source_stride,
51     const unsigned char *ref_ptr,
52     int  recon_stride,
53     unsigned int *SSE,
54     int *Sum
55 );
56 extern unsigned int vp8_get4x4sse_cs_mmx
57 (
58     const unsigned char *src_ptr,
59     int  source_stride,
60     const unsigned char *ref_ptr,
61     int  recon_stride
62 );
63 extern void vp8_filter_block2d_bil4x4_var_mmx
64 (
65     const unsigned char *ref_ptr,
66     int ref_pixels_per_line,
67     const unsigned char *src_ptr,
68     int src_pixels_per_line,
69     const short *HFilter,
70     const short *VFilter,
71     int *sum,
72     unsigned int *sumsquared
73 );
74 extern void vp8_filter_block2d_bil_var_mmx
75 (
76     const unsigned char *ref_ptr,
77     int ref_pixels_per_line,
78     const unsigned char *src_ptr,
79     int src_pixels_per_line,
80     unsigned int Height,
81     const short *HFilter,
82     const short *VFilter,
83     int *sum,
84     unsigned int *sumsquared
85 );
86 extern unsigned int vp8_get16x16pred_error_mmx
87 (
88     unsigned char *src_ptr,
89     int src_stride,
90     unsigned char *ref_ptr,
91     int ref_stride
92 );
93 
94 
vp8_test_get_mb_ss(void)95 void vp8_test_get_mb_ss(void)
96 {
97     short zz[] =
98     {
99         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
100         -2, -2, -2, -2, 2, 2, 2, 2, -2, -2, -2, -2, 2, 2, 2, 2,
101         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
102         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
103         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
104         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
105         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
106         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
107         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
108         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
109         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
110         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
111         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
112         -3, -3, -3, -3, 3, 3, 3, 3, -3, -3, -3, -3, 3, 3, 3, 3,
113         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
114         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
115     };
116     int s = 0, x = vp8_get_mb_ss_mmx(zz);
117     {
118         int y;
119 
120         for (y = 0; y < 256; y++)
121             s += (zz[y] * zz[y]);
122     }
123 
124     x += 0;
125 }
126 
127 
vp8_get16x16var_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned * SSE,unsigned * SUM)128 unsigned int vp8_get16x16var_mmx(
129     const unsigned char *src_ptr,
130     int  source_stride,
131     const unsigned char *ref_ptr,
132     int  recon_stride,
133     unsigned *SSE,
134     unsigned *SUM
135 )
136 {
137     unsigned int sse0, sse1, sse2, sse3, var;
138     int sum0, sum1, sum2, sum3, avg;
139 
140 
141     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
142     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
143     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
144     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
145 
146     var = sse0 + sse1 + sse2 + sse3;
147     avg = sum0 + sum1 + sum2 + sum3;
148 
149     *SSE = var;
150     *SUM = avg;
151     return (var - ((avg * avg) >> 8));
152 
153 }
154 
155 
156 
157 
158 
vp8_variance4x4_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)159 unsigned int vp8_variance4x4_mmx(
160     const unsigned char *src_ptr,
161     int  source_stride,
162     const unsigned char *ref_ptr,
163     int  recon_stride,
164     unsigned int *sse)
165 {
166     unsigned int var;
167     int avg;
168 
169     vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
170     *sse = var;
171     return (var - ((avg * avg) >> 4));
172 
173 }
174 
vp8_variance8x8_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)175 unsigned int vp8_variance8x8_mmx(
176     const unsigned char *src_ptr,
177     int  source_stride,
178     const unsigned char *ref_ptr,
179     int  recon_stride,
180     unsigned int *sse)
181 {
182     unsigned int var;
183     int avg;
184 
185     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
186     *sse = var;
187 
188     return (var - ((avg * avg) >> 6));
189 
190 }
191 
vp8_mse16x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)192 unsigned int vp8_mse16x16_mmx(
193     const unsigned char *src_ptr,
194     int  source_stride,
195     const unsigned char *ref_ptr,
196     int  recon_stride,
197     unsigned int *sse)
198 {
199     unsigned int sse0, sse1, sse2, sse3, var;
200     int sum0, sum1, sum2, sum3;
201 
202 
203     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
204     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
205     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
206     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
207 
208     var = sse0 + sse1 + sse2 + sse3;
209     *sse = var;
210     return var;
211 }
212 
213 
vp8_variance16x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,int * sse)214 unsigned int vp8_variance16x16_mmx(
215     const unsigned char *src_ptr,
216     int  source_stride,
217     const unsigned char *ref_ptr,
218     int  recon_stride,
219     int *sse)
220 {
221     unsigned int sse0, sse1, sse2, sse3, var;
222     int sum0, sum1, sum2, sum3, avg;
223 
224 
225     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
226     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
227     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
228     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
229 
230     var = sse0 + sse1 + sse2 + sse3;
231     avg = sum0 + sum1 + sum2 + sum3;
232     *sse = var;
233     return (var - ((avg * avg) >> 8));
234 }
235 
vp8_variance16x8_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)236 unsigned int vp8_variance16x8_mmx(
237     const unsigned char *src_ptr,
238     int  source_stride,
239     const unsigned char *ref_ptr,
240     int  recon_stride,
241     unsigned int *sse)
242 {
243     unsigned int sse0, sse1, var;
244     int sum0, sum1, avg;
245 
246     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
247     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
248 
249     var = sse0 + sse1;
250     avg = sum0 + sum1;
251     *sse = var;
252     return (var - ((avg * avg) >> 7));
253 
254 }
255 
256 
vp8_variance8x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)257 unsigned int vp8_variance8x16_mmx(
258     const unsigned char *src_ptr,
259     int  source_stride,
260     const unsigned char *ref_ptr,
261     int  recon_stride,
262     unsigned int *sse)
263 {
264     unsigned int sse0, sse1, var;
265     int sum0, sum1, avg;
266 
267     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
268     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
269 
270     var = sse0 + sse1;
271     avg = sum0 + sum1;
272     *sse = var;
273 
274     return (var - ((avg * avg) >> 7));
275 
276 }
277 
278 
279 
280 
281 ///////////////////////////////////////////////////////////////////////////
282 // the mmx function that does the bilinear filtering and var calculation //
283 // int one pass                                                          //
284 ///////////////////////////////////////////////////////////////////////////
285 DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
286 {
287     { 128, 128, 128, 128,  0,  0,  0,  0 },
288     { 112, 112, 112, 112, 16, 16, 16, 16 },
289     {  96, 96, 96, 96, 32, 32, 32, 32 },
290     {  80, 80, 80, 80, 48, 48, 48, 48 },
291     {  64, 64, 64, 64, 64, 64, 64, 64 },
292     {  48, 48, 48, 48, 80, 80, 80, 80 },
293     {  32, 32, 32, 32, 96, 96, 96, 96 },
294     {  16, 16, 16, 16, 112, 112, 112, 112 }
295 };
296 
vp8_sub_pixel_variance4x4_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)297 unsigned int vp8_sub_pixel_variance4x4_mmx
298 (
299     const unsigned char  *src_ptr,
300     int  src_pixels_per_line,
301     int  xoffset,
302     int  yoffset,
303     const unsigned char *dst_ptr,
304     int dst_pixels_per_line,
305     unsigned int *sse)
306 
307 {
308     int xsum;
309     unsigned int xxsum;
310     vp8_filter_block2d_bil4x4_var_mmx(
311         src_ptr, src_pixels_per_line,
312         dst_ptr, dst_pixels_per_line,
313         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
314         &xsum, &xxsum
315     );
316     *sse = xxsum;
317     return (xxsum - ((xsum * xsum) >> 4));
318 }
319 
320 
vp8_sub_pixel_variance8x8_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)321 unsigned int vp8_sub_pixel_variance8x8_mmx
322 (
323     const unsigned char  *src_ptr,
324     int  src_pixels_per_line,
325     int  xoffset,
326     int  yoffset,
327     const unsigned char *dst_ptr,
328     int dst_pixels_per_line,
329     unsigned int *sse
330 )
331 {
332 
333     int xsum;
334     unsigned int xxsum;
335     vp8_filter_block2d_bil_var_mmx(
336         src_ptr, src_pixels_per_line,
337         dst_ptr, dst_pixels_per_line, 8,
338         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
339         &xsum, &xxsum
340     );
341     *sse = xxsum;
342     return (xxsum - ((xsum * xsum) >> 6));
343 }
344 
vp8_sub_pixel_variance16x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)345 unsigned int vp8_sub_pixel_variance16x16_mmx
346 (
347     const unsigned char  *src_ptr,
348     int  src_pixels_per_line,
349     int  xoffset,
350     int  yoffset,
351     const unsigned char *dst_ptr,
352     int dst_pixels_per_line,
353     unsigned int *sse
354 )
355 {
356 
357     int xsum0, xsum1;
358     unsigned int xxsum0, xxsum1;
359 
360 
361     vp8_filter_block2d_bil_var_mmx(
362         src_ptr, src_pixels_per_line,
363         dst_ptr, dst_pixels_per_line, 16,
364         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
365         &xsum0, &xxsum0
366     );
367 
368 
369     vp8_filter_block2d_bil_var_mmx(
370         src_ptr + 8, src_pixels_per_line,
371         dst_ptr + 8, dst_pixels_per_line, 16,
372         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
373         &xsum1, &xxsum1
374     );
375 
376     xsum0 += xsum1;
377     xxsum0 += xxsum1;
378 
379     *sse = xxsum0;
380     return (xxsum0 - ((xsum0 * xsum0) >> 8));
381 
382 
383 }
384 
vp8_sub_pixel_mse16x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)385 unsigned int vp8_sub_pixel_mse16x16_mmx(
386     const unsigned char  *src_ptr,
387     int  src_pixels_per_line,
388     int  xoffset,
389     int  yoffset,
390     const unsigned char *dst_ptr,
391     int dst_pixels_per_line,
392     unsigned int *sse
393 )
394 {
395     vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
396     return *sse;
397 }
398 
vp8_sub_pixel_variance16x8_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)399 unsigned int vp8_sub_pixel_variance16x8_mmx
400 (
401     const unsigned char  *src_ptr,
402     int  src_pixels_per_line,
403     int  xoffset,
404     int  yoffset,
405     const unsigned char *dst_ptr,
406     int dst_pixels_per_line,
407     unsigned int *sse
408 )
409 {
410     int xsum0, xsum1;
411     unsigned int xxsum0, xxsum1;
412 
413 
414     vp8_filter_block2d_bil_var_mmx(
415         src_ptr, src_pixels_per_line,
416         dst_ptr, dst_pixels_per_line, 8,
417         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
418         &xsum0, &xxsum0
419     );
420 
421 
422     vp8_filter_block2d_bil_var_mmx(
423         src_ptr + 8, src_pixels_per_line,
424         dst_ptr + 8, dst_pixels_per_line, 8,
425         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
426         &xsum1, &xxsum1
427     );
428 
429     xsum0 += xsum1;
430     xxsum0 += xxsum1;
431 
432     *sse = xxsum0;
433     return (xxsum0 - ((xsum0 * xsum0) >> 7));
434 }
435 
vp8_sub_pixel_variance8x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,int * sse)436 unsigned int vp8_sub_pixel_variance8x16_mmx
437 (
438     const unsigned char  *src_ptr,
439     int  src_pixels_per_line,
440     int  xoffset,
441     int  yoffset,
442     const unsigned char *dst_ptr,
443     int dst_pixels_per_line,
444     int *sse
445 )
446 {
447     int xsum;
448     unsigned int xxsum;
449     vp8_filter_block2d_bil_var_mmx(
450         src_ptr, src_pixels_per_line,
451         dst_ptr, dst_pixels_per_line, 16,
452         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
453         &xsum, &xxsum
454     );
455     *sse = xxsum;
456     return (xxsum - ((xsum * xsum) >> 7));
457 }
458 
vp8_i_variance16x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)459 unsigned int vp8_i_variance16x16_mmx(
460     const unsigned char *src_ptr,
461     int  source_stride,
462     const unsigned char *ref_ptr,
463     int  recon_stride,
464     unsigned int *sse)
465 {
466     unsigned int sse0, sse1, sse2, sse3, var;
467     int sum0, sum1, sum2, sum3, avg;
468 
469 
470     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
471     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
472     vp8_get8x8var_mmx(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse2, &sum2) ;
473     vp8_get8x8var_mmx(src_ptr + (source_stride >> 1) + 8, source_stride, ref_ptr + (recon_stride >> 1) + 8, recon_stride, &sse3, &sum3);
474 
475     var = sse0 + sse1 + sse2 + sse3;
476     avg = sum0 + sum1 + sum2 + sum3;
477     *sse = var;
478     return (var - ((avg * avg) >> 8));
479 
480 }
481 
vp8_i_variance8x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)482 unsigned int vp8_i_variance8x16_mmx(
483     const unsigned char *src_ptr,
484     int  source_stride,
485     const unsigned char *ref_ptr,
486     int  recon_stride,
487     unsigned int *sse)
488 {
489     unsigned int sse0, sse1, var;
490     int sum0, sum1, avg;
491     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
492     vp8_get8x8var_mmx(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse1, &sum1) ;
493 
494     var = sse0 + sse1;
495     avg = sum0 + sum1;
496 
497     *sse = var;
498     return (var - ((avg * avg) >> 7));
499 
500 }
501 
vp8_i_sub_pixel_variance16x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)502 unsigned int vp8_i_sub_pixel_variance16x16_mmx
503 (
504     const unsigned char  *src_ptr,
505     int  src_pixels_per_line,
506     int  xoffset,
507     int  yoffset,
508     const unsigned char *dst_ptr,
509     int dst_pixels_per_line,
510     unsigned int *sse
511 )
512 {
513     int xsum0, xsum1;
514     unsigned int xxsum0, xxsum1;
515     int f2soffset = (src_pixels_per_line >> 1);
516     int f2doffset = (dst_pixels_per_line >> 1);
517 
518 
519     vp8_filter_block2d_bil_var_mmx(
520         src_ptr, src_pixels_per_line,
521         dst_ptr, dst_pixels_per_line, 8,
522         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
523         &xsum0, &xxsum0
524     );
525 
526 
527     vp8_filter_block2d_bil_var_mmx(
528         src_ptr + 8, src_pixels_per_line,
529         dst_ptr + 8, dst_pixels_per_line, 8,
530         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
531         &xsum1, &xxsum1
532     );
533 
534     xsum0 += xsum1;
535     xxsum0 += xxsum1;
536 
537     vp8_filter_block2d_bil_var_mmx(
538         src_ptr + f2soffset, src_pixels_per_line,
539         dst_ptr + f2doffset, dst_pixels_per_line, 8,
540         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
541         &xsum1, &xxsum1
542     );
543 
544     xsum0 += xsum1;
545     xxsum0 += xxsum1;
546 
547     vp8_filter_block2d_bil_var_mmx(
548         src_ptr + f2soffset + 8, src_pixels_per_line,
549         dst_ptr + f2doffset + 8, dst_pixels_per_line, 8,
550         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
551         &xsum1, &xxsum1
552     );
553 
554     xsum0 += xsum1;
555     xxsum0 += xxsum1;
556     *sse = xxsum0;
557     return (xxsum0 - ((xsum0 * xsum0) >> 8));
558 }
559 
560 
vp8_i_sub_pixel_variance8x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)561 unsigned int vp8_i_sub_pixel_variance8x16_mmx
562 (
563     const unsigned char  *src_ptr,
564     int  src_pixels_per_line,
565     int  xoffset,
566     int  yoffset,
567     const unsigned char *dst_ptr,
568     int dst_pixels_per_line,
569     unsigned int *sse
570 )
571 {
572     int xsum0, xsum1;
573     unsigned int xxsum0, xxsum1;
574     int f2soffset = (src_pixels_per_line >> 1);
575     int f2doffset = (dst_pixels_per_line >> 1);
576 
577 
578     vp8_filter_block2d_bil_var_mmx(
579         src_ptr, src_pixels_per_line,
580         dst_ptr, dst_pixels_per_line, 8,
581         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
582         &xsum0, &xxsum0
583     );
584 
585 
586     vp8_filter_block2d_bil_var_mmx(
587         src_ptr + f2soffset, src_pixels_per_line,
588         dst_ptr + f2doffset, dst_pixels_per_line, 8,
589         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
590         &xsum1, &xxsum1
591     );
592 
593     xsum0 += xsum1;
594     xxsum0 += xxsum1;
595     *sse = xxsum0;
596     return (xxsum0 - ((xsum0 * xsum0) >> 7));
597 }
598 
599 
vp8_variance_halfpixvar16x16_h_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)600 unsigned int vp8_variance_halfpixvar16x16_h_mmx(
601     const unsigned char *src_ptr,
602     int  source_stride,
603     const unsigned char *ref_ptr,
604     int  recon_stride,
605     unsigned int *sse)
606 {
607     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
608                                            ref_ptr, recon_stride, sse);
609 }
610 
611 
vp8_variance_halfpixvar16x16_v_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)612 unsigned int vp8_variance_halfpixvar16x16_v_mmx(
613     const unsigned char *src_ptr,
614     int  source_stride,
615     const unsigned char *ref_ptr,
616     int  recon_stride,
617     unsigned int *sse)
618 {
619     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
620                                            ref_ptr, recon_stride, sse);
621 }
622 
623 
vp8_variance_halfpixvar16x16_hv_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)624 unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
625     const unsigned char *src_ptr,
626     int  source_stride,
627     const unsigned char *ref_ptr,
628     int  recon_stride,
629     unsigned int *sse)
630 {
631     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
632                                            ref_ptr, recon_stride, sse);
633 }
634