• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include "variance.h"
13 #include "pragmas.h"
14 #include "vpx_ports/mem.h"
15 
16 extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
17 extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
18 extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
19 extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
20 
21 extern void vp8_filter_block2d_bil4x4_var_mmx
22 (
23     const unsigned char *ref_ptr,
24     int ref_pixels_per_line,
25     const unsigned char *src_ptr,
26     int src_pixels_per_line,
27     const short *HFilter,
28     const short *VFilter,
29     int *sum,
30     unsigned int *sumsquared
31 );
32 
33 extern unsigned int vp8_get4x4var_mmx
34 (
35     const unsigned char *src_ptr,
36     int  source_stride,
37     const unsigned char *ref_ptr,
38     int  recon_stride,
39     unsigned int *SSE,
40     int *Sum
41 );
42 
43 unsigned int vp8_get_mb_ss_sse2
44 (
45     const short *src_ptr
46 );
47 unsigned int vp8_get16x16var_sse2
48 (
49     const unsigned char *src_ptr,
50     int source_stride,
51     const unsigned char *ref_ptr,
52     int recon_stride,
53     unsigned int *SSE,
54     int *Sum
55 );
56 unsigned int vp8_get16x16pred_error_sse2
57 (
58     const unsigned char *src_ptr,
59     int src_stride,
60     const unsigned char *ref_ptr,
61     int ref_stride
62 );
63 unsigned int vp8_get8x8var_sse2
64 (
65     const unsigned char *src_ptr,
66     int source_stride,
67     const unsigned char *ref_ptr,
68     int recon_stride,
69     unsigned int *SSE,
70     int *Sum
71 );
72 void vp8_filter_block2d_bil_var_sse2
73 (
74     const unsigned char *ref_ptr,
75     int ref_pixels_per_line,
76     const unsigned char *src_ptr,
77     int src_pixels_per_line,
78     unsigned int Height,
79     const short *HFilter,
80     const short *VFilter,
81     int *sum,
82     unsigned int *sumsquared
83 );
84 void vp8_half_horiz_vert_variance16x_h_sse2
85 (
86     const unsigned char *ref_ptr,
87     int ref_pixels_per_line,
88     const unsigned char *src_ptr,
89     int src_pixels_per_line,
90     unsigned int Height,
91     int *sum,
92     unsigned int *sumsquared
93 );
94 void vp8_half_horiz_variance16x_h_sse2
95 (
96     const unsigned char *ref_ptr,
97     int ref_pixels_per_line,
98     const unsigned char *src_ptr,
99     int src_pixels_per_line,
100     unsigned int Height,
101     int *sum,
102     unsigned int *sumsquared
103 );
104 void vp8_half_vert_variance16x_h_sse2
105 (
106     const unsigned char *ref_ptr,
107     int ref_pixels_per_line,
108     const unsigned char *src_ptr,
109     int src_pixels_per_line,
110     unsigned int Height,
111     int *sum,
112     unsigned int *sumsquared
113 );
114 
115 DECLARE_ALIGNED(16, extern short, vp8_vp7_bilinear_filters_mmx[8][8]);
116 
vp8_variance4x4_wmt(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride)117 unsigned int vp8_variance4x4_wmt(
118     const unsigned char *src_ptr,
119     int  source_stride,
120     const unsigned char *ref_ptr,
121     int  recon_stride)
122 {
123     unsigned int var;
124     int avg;
125 
126     vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
127     return (var - ((avg * avg) >> 4));
128 
129 }
130 
131 
132 
vp8_variance8x8_wmt(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride)133 unsigned int vp8_variance8x8_wmt
134 (
135     const unsigned char *src_ptr,
136     int  source_stride,
137     const unsigned char *ref_ptr,
138     int  recon_stride)
139 {
140     unsigned int var;
141     int avg;
142 
143     vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
144 
145     return (var - ((avg * avg) >> 6));
146 
147 }
148 
149 
vp8_variance16x16_wmt(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)150 unsigned int vp8_variance16x16_wmt
151 (
152     const unsigned char *src_ptr,
153     int  source_stride,
154     const unsigned char *ref_ptr,
155     int  recon_stride,
156     unsigned int *sse)
157 {
158     unsigned int sse0;
159     int sum0;
160 
161 
162     vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
163     *sse = sse0;
164     return (sse0 - ((sum0 * sum0) >> 8));
165 }
vp8_mse16x16_wmt(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)166 unsigned int vp8_mse16x16_wmt(
167     const unsigned char *src_ptr,
168     int  source_stride,
169     const unsigned char *ref_ptr,
170     int  recon_stride,
171     unsigned int *sse)
172 {
173 
174     unsigned int sse0;
175     int sum0;
176     vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
177     *sse = sse0;
178     return sse0;
179 
180 }
181 
182 
vp8_variance16x8_wmt(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)183 unsigned int vp8_variance16x8_wmt
184 (
185     const unsigned char *src_ptr,
186     int  source_stride,
187     const unsigned char *ref_ptr,
188     int  recon_stride,
189     unsigned int *sse)
190 {
191     unsigned int sse0, sse1, var;
192     int sum0, sum1, avg;
193 
194     vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
195     vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
196 
197     var = sse0 + sse1;
198     avg = sum0 + sum1;
199     *sse = var;
200     return (var - ((avg * avg) >> 7));
201 
202 }
203 
vp8_variance8x16_wmt(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)204 unsigned int vp8_variance8x16_wmt
205 (
206     const unsigned char *src_ptr,
207     int  source_stride,
208     const unsigned char *ref_ptr,
209     int  recon_stride,
210     unsigned int *sse)
211 {
212     unsigned int sse0, sse1, var;
213     int sum0, sum1, avg;
214 
215     vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
216     vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
217 
218     var = sse0 + sse1;
219     avg = sum0 + sum1;
220     *sse = var;
221     return (var - ((avg * avg) >> 7));
222 
223 }
224 
225 ///////////////////////////////////////////////////////////////////////////
226 // the mmx function that does the bilinear filtering and var calculation //
227 // int one pass                                                          //
228 ///////////////////////////////////////////////////////////////////////////
229 DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_xmm[8][16]) =
230 {
231     { 128, 128, 128, 128, 128, 128, 128, 128,  0,  0,  0,  0,  0,  0,  0,  0 },
232     { 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 },
233     {  96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
234     {  80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
235     {  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
236     {  48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
237     {  32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
238     {  16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 }
239 };
vp8_sub_pixel_variance4x4_wmt(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)240 unsigned int vp8_sub_pixel_variance4x4_wmt
241 (
242     const unsigned char  *src_ptr,
243     int  src_pixels_per_line,
244     int  xoffset,
245     int  yoffset,
246     const unsigned char *dst_ptr,
247     int dst_pixels_per_line,
248     unsigned int *sse
249 )
250 {
251     int xsum;
252     unsigned int xxsum;
253     vp8_filter_block2d_bil4x4_var_mmx(
254         src_ptr, src_pixels_per_line,
255         dst_ptr, dst_pixels_per_line,
256         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
257         &xsum, &xxsum
258     );
259     *sse = xxsum;
260     return (xxsum - ((xsum * xsum) >> 4));
261 }
262 
263 
vp8_sub_pixel_variance8x8_wmt(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)264 unsigned int vp8_sub_pixel_variance8x8_wmt
265 (
266     const unsigned char  *src_ptr,
267     int  src_pixels_per_line,
268     int  xoffset,
269     int  yoffset,
270     const unsigned char *dst_ptr,
271     int dst_pixels_per_line,
272     unsigned int *sse
273 )
274 {
275 
276     int xsum;
277     unsigned int xxsum;
278     vp8_filter_block2d_bil_var_sse2(
279         src_ptr, src_pixels_per_line,
280         dst_ptr, dst_pixels_per_line, 8,
281         vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
282         &xsum, &xxsum
283     );
284 
285     *sse = xxsum;
286     return (xxsum - ((xsum * xsum) >> 6));
287 }
288 
vp8_sub_pixel_variance16x16_wmt(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)289 unsigned int vp8_sub_pixel_variance16x16_wmt
290 (
291     const unsigned char  *src_ptr,
292     int  src_pixels_per_line,
293     int  xoffset,
294     int  yoffset,
295     const unsigned char *dst_ptr,
296     int dst_pixels_per_line,
297     unsigned int *sse
298 )
299 {
300     int xsum0, xsum1;
301     unsigned int xxsum0, xxsum1;
302 
303 
304     // note we could avoid these if statements if the calling function
305     // just called the appropriate functions inside.
306     if (xoffset == 4 && yoffset == 0)
307     {
308         vp8_half_horiz_variance16x_h_sse2(
309             src_ptr, src_pixels_per_line,
310             dst_ptr, dst_pixels_per_line, 16,
311             &xsum0, &xxsum0);
312 
313         vp8_half_horiz_variance16x_h_sse2(
314             src_ptr + 8, src_pixels_per_line,
315             dst_ptr + 8, dst_pixels_per_line, 16,
316             &xsum1, &xxsum1);
317     }
318     else if (xoffset == 0 && yoffset == 4)
319     {
320         vp8_half_vert_variance16x_h_sse2(
321             src_ptr, src_pixels_per_line,
322             dst_ptr, dst_pixels_per_line, 16,
323             &xsum0, &xxsum0);
324 
325         vp8_half_vert_variance16x_h_sse2(
326             src_ptr + 8, src_pixels_per_line,
327             dst_ptr + 8, dst_pixels_per_line, 16,
328             &xsum1, &xxsum1);
329     }
330     else if (xoffset == 4 && yoffset == 4)
331     {
332         vp8_half_horiz_vert_variance16x_h_sse2(
333             src_ptr, src_pixels_per_line,
334             dst_ptr, dst_pixels_per_line, 16,
335             &xsum0, &xxsum0);
336 
337         vp8_half_horiz_vert_variance16x_h_sse2(
338             src_ptr + 8, src_pixels_per_line,
339             dst_ptr + 8, dst_pixels_per_line, 16,
340             &xsum1, &xxsum1);
341     }
342     else
343     {
344         vp8_filter_block2d_bil_var_sse2(
345             src_ptr, src_pixels_per_line,
346             dst_ptr, dst_pixels_per_line, 16,
347             vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
348             &xsum0, &xxsum0
349         );
350 
351 
352         vp8_filter_block2d_bil_var_sse2(
353             src_ptr + 8, src_pixels_per_line,
354             dst_ptr + 8, dst_pixels_per_line, 16,
355             vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
356             &xsum1, &xxsum1
357         );
358     }
359 
360     xsum0 += xsum1;
361     xxsum0 += xxsum1;
362     *sse = xxsum0;
363     return (xxsum0 - ((xsum0 * xsum0) >> 8));
364 }
365 
vp8_sub_pixel_mse16x16_wmt(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)366 unsigned int vp8_sub_pixel_mse16x16_wmt(
367     const unsigned char  *src_ptr,
368     int  src_pixels_per_line,
369     int  xoffset,
370     int  yoffset,
371     const unsigned char *dst_ptr,
372     int dst_pixels_per_line,
373     unsigned int *sse
374 )
375 {
376     vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
377     return *sse;
378 }
379 
vp8_sub_pixel_variance16x8_wmt(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)380 unsigned int vp8_sub_pixel_variance16x8_wmt
381 (
382     const unsigned char  *src_ptr,
383     int  src_pixels_per_line,
384     int  xoffset,
385     int  yoffset,
386     const unsigned char *dst_ptr,
387     int dst_pixels_per_line,
388     unsigned int *sse
389 
390 )
391 {
392     int xsum0, xsum1;
393     unsigned int xxsum0, xxsum1;
394 
395 
396     vp8_filter_block2d_bil_var_sse2(
397         src_ptr, src_pixels_per_line,
398         dst_ptr, dst_pixels_per_line, 8,
399         vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
400         &xsum0, &xxsum0
401     );
402 
403 
404     vp8_filter_block2d_bil_var_sse2(
405         src_ptr + 8, src_pixels_per_line,
406         dst_ptr + 8, dst_pixels_per_line, 8,
407         vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
408         &xsum1, &xxsum1
409     );
410 
411     xsum0 += xsum1;
412     xxsum0 += xxsum1;
413 
414     *sse = xxsum0;
415     return (xxsum0 - ((xsum0 * xsum0) >> 7));
416 }
417 
vp8_sub_pixel_variance8x16_wmt(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)418 unsigned int vp8_sub_pixel_variance8x16_wmt
419 (
420     const unsigned char  *src_ptr,
421     int  src_pixels_per_line,
422     int  xoffset,
423     int  yoffset,
424     const unsigned char *dst_ptr,
425     int dst_pixels_per_line,
426     unsigned int *sse
427 )
428 {
429     int xsum;
430     unsigned int xxsum;
431     vp8_filter_block2d_bil_var_sse2(
432         src_ptr, src_pixels_per_line,
433         dst_ptr, dst_pixels_per_line, 16,
434         vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
435         &xsum, &xxsum
436     );
437 
438     *sse = xxsum;
439     return (xxsum - ((xsum * xsum) >> 7));
440 }
441 
vp8_i_variance16x16_wmt(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)442 unsigned int vp8_i_variance16x16_wmt(
443     const unsigned char *src_ptr,
444     int  source_stride,
445     const unsigned char *ref_ptr,
446     int  recon_stride,
447     unsigned int *sse)
448 {
449     unsigned int sse0, sse1, sse2, sse3, var;
450     int sum0, sum1, sum2, sum3, avg;
451 
452 
453     vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
454     vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
455     vp8_get8x8var_sse2(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse2, &sum2) ;
456     vp8_get8x8var_sse2(src_ptr + (source_stride >> 1) + 8, source_stride, ref_ptr + (recon_stride >> 1) + 8, recon_stride, &sse3, &sum3);
457 
458     var = sse0 + sse1 + sse2 + sse3;
459     avg = sum0 + sum1 + sum2 + sum3;
460 
461     *sse = var;
462     return (var - ((avg * avg) >> 8));
463 
464 }
465 
vp8_i_variance8x16_wmt(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)466 unsigned int vp8_i_variance8x16_wmt(
467     const unsigned char *src_ptr,
468     int  source_stride,
469     const unsigned char *ref_ptr,
470     int  recon_stride,
471     unsigned int *sse)
472 {
473     unsigned int sse0, sse1, var;
474     int sum0, sum1, avg;
475     vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
476     vp8_get8x8var_sse2(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse1, &sum1) ;
477 
478     var = sse0 + sse1;
479     avg = sum0 + sum1;
480 
481     *sse = var;
482     return (var - ((avg * avg) >> 7));
483 
484 }
485 
486 
vp8_i_sub_pixel_variance16x16_wmt(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)487 unsigned int vp8_i_sub_pixel_variance16x16_wmt
488 (
489     const unsigned char  *src_ptr,
490     int  src_pixels_per_line,
491     int  xoffset,
492     int  yoffset,
493     const unsigned char *dst_ptr,
494     int dst_pixels_per_line,
495     unsigned int *sse
496 )
497 {
498     return vp8_sub_pixel_variance16x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse);
499 }
500 
501 
vp8_i_sub_pixel_variance8x16_wmt(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)502 unsigned int vp8_i_sub_pixel_variance8x16_wmt
503 (
504     const unsigned char  *src_ptr,
505     int  src_pixels_per_line,
506     int  xoffset,
507     int  yoffset,
508     const unsigned char *dst_ptr,
509     int dst_pixels_per_line,
510     unsigned int *sse
511 )
512 {
513 
514     return vp8_sub_pixel_variance8x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse);
515 }
516 
517 
vp8_variance_halfpixvar16x16_h_wmt(const unsigned char * src_ptr,int src_pixels_per_line,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)518 unsigned int vp8_variance_halfpixvar16x16_h_wmt(
519     const unsigned char *src_ptr,
520     int  src_pixels_per_line,
521     const unsigned char *dst_ptr,
522     int  dst_pixels_per_line,
523     unsigned int *sse)
524 {
525     int xsum0, xsum1;
526     unsigned int xxsum0, xxsum1;
527 
528     vp8_half_horiz_variance16x_h_sse2(
529         src_ptr, src_pixels_per_line,
530         dst_ptr, dst_pixels_per_line, 16,
531         &xsum0, &xxsum0);
532 
533     vp8_half_horiz_variance16x_h_sse2(
534         src_ptr + 8, src_pixels_per_line,
535         dst_ptr + 8, dst_pixels_per_line, 16,
536         &xsum1, &xxsum1);
537 
538     xsum0 += xsum1;
539     xxsum0 += xxsum1;
540     *sse = xxsum0;
541     return (xxsum0 - ((xsum0 * xsum0) >> 8));
542 }
543 
544 
vp8_variance_halfpixvar16x16_v_wmt(const unsigned char * src_ptr,int src_pixels_per_line,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)545 unsigned int vp8_variance_halfpixvar16x16_v_wmt(
546     const unsigned char *src_ptr,
547     int  src_pixels_per_line,
548     const unsigned char *dst_ptr,
549     int  dst_pixels_per_line,
550     unsigned int *sse)
551 {
552     int xsum0, xsum1;
553     unsigned int xxsum0, xxsum1;
554 
555     vp8_half_vert_variance16x_h_sse2(
556         src_ptr, src_pixels_per_line,
557         dst_ptr, dst_pixels_per_line, 16,
558         &xsum0, &xxsum0);
559 
560     vp8_half_vert_variance16x_h_sse2(
561         src_ptr + 8, src_pixels_per_line,
562         dst_ptr + 8, dst_pixels_per_line, 16,
563         &xsum1, &xxsum1);
564 
565     xsum0 += xsum1;
566     xxsum0 += xxsum1;
567     *sse = xxsum0;
568     return (xxsum0 - ((xsum0 * xsum0) >> 8));
569 }
570 
571 
vp8_variance_halfpixvar16x16_hv_wmt(const unsigned char * src_ptr,int src_pixels_per_line,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)572 unsigned int vp8_variance_halfpixvar16x16_hv_wmt(
573     const unsigned char *src_ptr,
574     int  src_pixels_per_line,
575     const unsigned char *dst_ptr,
576     int  dst_pixels_per_line,
577     unsigned int *sse)
578 {
579     int xsum0, xsum1;
580     unsigned int xxsum0, xxsum1;
581 
582     vp8_half_horiz_vert_variance16x_h_sse2(
583         src_ptr, src_pixels_per_line,
584         dst_ptr, dst_pixels_per_line, 16,
585         &xsum0, &xxsum0);
586 
587     vp8_half_horiz_vert_variance16x_h_sse2(
588         src_ptr + 8, src_pixels_per_line,
589         dst_ptr + 8, dst_pixels_per_line, 16,
590         &xsum1, &xxsum1);
591 
592     xsum0 += xsum1;
593     xxsum0 += xxsum1;
594     *sse = xxsum0;
595     return (xxsum0 - ((xsum0 * xsum0) >> 8));
596 }
597