1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12 #include "variance.h"
13 #include "pragmas.h"
14 #include "vpx_ports/mem.h"
15
16 extern void filter_block1d_h6_mmx
17 (
18 const unsigned char *src_ptr,
19 unsigned short *output_ptr,
20 unsigned int src_pixels_per_line,
21 unsigned int pixel_step,
22 unsigned int output_height,
23 unsigned int output_width,
24 short *vp7_filter
25 );
26 extern void filter_block1d_v6_mmx
27 (
28 const short *src_ptr,
29 unsigned char *output_ptr,
30 unsigned int pixels_per_line,
31 unsigned int pixel_step,
32 unsigned int output_height,
33 unsigned int output_width,
34 short *vp7_filter
35 );
36
37 extern unsigned int vp8_get_mb_ss_mmx(short *src_ptr);
38 extern unsigned int vp8_get8x8var_mmx
39 (
40 const unsigned char *src_ptr,
41 int source_stride,
42 const unsigned char *ref_ptr,
43 int recon_stride,
44 unsigned int *SSE,
45 int *Sum
46 );
47 extern unsigned int vp8_get4x4var_mmx
48 (
49 const unsigned char *src_ptr,
50 int source_stride,
51 const unsigned char *ref_ptr,
52 int recon_stride,
53 unsigned int *SSE,
54 int *Sum
55 );
56 extern unsigned int vp8_get4x4sse_cs_mmx
57 (
58 const unsigned char *src_ptr,
59 int source_stride,
60 const unsigned char *ref_ptr,
61 int recon_stride
62 );
63 extern void vp8_filter_block2d_bil4x4_var_mmx
64 (
65 const unsigned char *ref_ptr,
66 int ref_pixels_per_line,
67 const unsigned char *src_ptr,
68 int src_pixels_per_line,
69 const short *HFilter,
70 const short *VFilter,
71 int *sum,
72 unsigned int *sumsquared
73 );
74 extern void vp8_filter_block2d_bil_var_mmx
75 (
76 const unsigned char *ref_ptr,
77 int ref_pixels_per_line,
78 const unsigned char *src_ptr,
79 int src_pixels_per_line,
80 unsigned int Height,
81 const short *HFilter,
82 const short *VFilter,
83 int *sum,
84 unsigned int *sumsquared
85 );
86 extern unsigned int vp8_get16x16pred_error_mmx
87 (
88 unsigned char *src_ptr,
89 int src_stride,
90 unsigned char *ref_ptr,
91 int ref_stride
92 );
93
94
vp8_test_get_mb_ss(void)95 void vp8_test_get_mb_ss(void)
96 {
97 short zz[] =
98 {
99 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
100 -2, -2, -2, -2, 2, 2, 2, 2, -2, -2, -2, -2, 2, 2, 2, 2,
101 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
102 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
103 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
104 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
105 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
106 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
107 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
108 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
109 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
110 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
111 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
112 -3, -3, -3, -3, 3, 3, 3, 3, -3, -3, -3, -3, 3, 3, 3, 3,
113 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
114 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
115 };
116 int s = 0, x = vp8_get_mb_ss_mmx(zz);
117 {
118 int y;
119
120 for (y = 0; y < 256; y++)
121 s += (zz[y] * zz[y]);
122 }
123
124 x += 0;
125 }
126
127
vp8_get16x16var_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned * SSE,unsigned * SUM)128 unsigned int vp8_get16x16var_mmx(
129 const unsigned char *src_ptr,
130 int source_stride,
131 const unsigned char *ref_ptr,
132 int recon_stride,
133 unsigned *SSE,
134 unsigned *SUM
135 )
136 {
137 unsigned int sse0, sse1, sse2, sse3, var;
138 int sum0, sum1, sum2, sum3, avg;
139
140
141 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
142 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
143 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
144 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
145
146 var = sse0 + sse1 + sse2 + sse3;
147 avg = sum0 + sum1 + sum2 + sum3;
148
149 *SSE = var;
150 *SUM = avg;
151 return (var - ((avg * avg) >> 8));
152
153 }
154
155
156
157
158
vp8_variance4x4_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)159 unsigned int vp8_variance4x4_mmx(
160 const unsigned char *src_ptr,
161 int source_stride,
162 const unsigned char *ref_ptr,
163 int recon_stride,
164 unsigned int *sse)
165 {
166 unsigned int var;
167 int avg;
168
169 vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
170 *sse = var;
171 return (var - ((avg * avg) >> 4));
172
173 }
174
vp8_variance8x8_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)175 unsigned int vp8_variance8x8_mmx(
176 const unsigned char *src_ptr,
177 int source_stride,
178 const unsigned char *ref_ptr,
179 int recon_stride,
180 unsigned int *sse)
181 {
182 unsigned int var;
183 int avg;
184
185 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
186 *sse = var;
187
188 return (var - ((avg * avg) >> 6));
189
190 }
191
vp8_mse16x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)192 unsigned int vp8_mse16x16_mmx(
193 const unsigned char *src_ptr,
194 int source_stride,
195 const unsigned char *ref_ptr,
196 int recon_stride,
197 unsigned int *sse)
198 {
199 unsigned int sse0, sse1, sse2, sse3, var;
200 int sum0, sum1, sum2, sum3;
201
202
203 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
204 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
205 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
206 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
207
208 var = sse0 + sse1 + sse2 + sse3;
209 *sse = var;
210 return var;
211 }
212
213
vp8_variance16x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,int * sse)214 unsigned int vp8_variance16x16_mmx(
215 const unsigned char *src_ptr,
216 int source_stride,
217 const unsigned char *ref_ptr,
218 int recon_stride,
219 int *sse)
220 {
221 unsigned int sse0, sse1, sse2, sse3, var;
222 int sum0, sum1, sum2, sum3, avg;
223
224
225 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
226 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
227 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
228 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
229
230 var = sse0 + sse1 + sse2 + sse3;
231 avg = sum0 + sum1 + sum2 + sum3;
232 *sse = var;
233 return (var - ((avg * avg) >> 8));
234 }
235
vp8_variance16x8_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)236 unsigned int vp8_variance16x8_mmx(
237 const unsigned char *src_ptr,
238 int source_stride,
239 const unsigned char *ref_ptr,
240 int recon_stride,
241 unsigned int *sse)
242 {
243 unsigned int sse0, sse1, var;
244 int sum0, sum1, avg;
245
246 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
247 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
248
249 var = sse0 + sse1;
250 avg = sum0 + sum1;
251 *sse = var;
252 return (var - ((avg * avg) >> 7));
253
254 }
255
256
vp8_variance8x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)257 unsigned int vp8_variance8x16_mmx(
258 const unsigned char *src_ptr,
259 int source_stride,
260 const unsigned char *ref_ptr,
261 int recon_stride,
262 unsigned int *sse)
263 {
264 unsigned int sse0, sse1, var;
265 int sum0, sum1, avg;
266
267 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
268 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
269
270 var = sse0 + sse1;
271 avg = sum0 + sum1;
272 *sse = var;
273
274 return (var - ((avg * avg) >> 7));
275
276 }
277
278
279
280
281 ///////////////////////////////////////////////////////////////////////////
282 // the mmx function that does the bilinear filtering and var calculation //
283 // int one pass //
284 ///////////////////////////////////////////////////////////////////////////
285 DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
286 {
287 { 128, 128, 128, 128, 0, 0, 0, 0 },
288 { 112, 112, 112, 112, 16, 16, 16, 16 },
289 { 96, 96, 96, 96, 32, 32, 32, 32 },
290 { 80, 80, 80, 80, 48, 48, 48, 48 },
291 { 64, 64, 64, 64, 64, 64, 64, 64 },
292 { 48, 48, 48, 48, 80, 80, 80, 80 },
293 { 32, 32, 32, 32, 96, 96, 96, 96 },
294 { 16, 16, 16, 16, 112, 112, 112, 112 }
295 };
296
vp8_sub_pixel_variance4x4_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)297 unsigned int vp8_sub_pixel_variance4x4_mmx
298 (
299 const unsigned char *src_ptr,
300 int src_pixels_per_line,
301 int xoffset,
302 int yoffset,
303 const unsigned char *dst_ptr,
304 int dst_pixels_per_line,
305 unsigned int *sse)
306
307 {
308 int xsum;
309 unsigned int xxsum;
310 vp8_filter_block2d_bil4x4_var_mmx(
311 src_ptr, src_pixels_per_line,
312 dst_ptr, dst_pixels_per_line,
313 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
314 &xsum, &xxsum
315 );
316 *sse = xxsum;
317 return (xxsum - ((xsum * xsum) >> 4));
318 }
319
320
vp8_sub_pixel_variance8x8_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)321 unsigned int vp8_sub_pixel_variance8x8_mmx
322 (
323 const unsigned char *src_ptr,
324 int src_pixels_per_line,
325 int xoffset,
326 int yoffset,
327 const unsigned char *dst_ptr,
328 int dst_pixels_per_line,
329 unsigned int *sse
330 )
331 {
332
333 int xsum;
334 unsigned int xxsum;
335 vp8_filter_block2d_bil_var_mmx(
336 src_ptr, src_pixels_per_line,
337 dst_ptr, dst_pixels_per_line, 8,
338 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
339 &xsum, &xxsum
340 );
341 *sse = xxsum;
342 return (xxsum - ((xsum * xsum) >> 6));
343 }
344
vp8_sub_pixel_variance16x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)345 unsigned int vp8_sub_pixel_variance16x16_mmx
346 (
347 const unsigned char *src_ptr,
348 int src_pixels_per_line,
349 int xoffset,
350 int yoffset,
351 const unsigned char *dst_ptr,
352 int dst_pixels_per_line,
353 unsigned int *sse
354 )
355 {
356
357 int xsum0, xsum1;
358 unsigned int xxsum0, xxsum1;
359
360
361 vp8_filter_block2d_bil_var_mmx(
362 src_ptr, src_pixels_per_line,
363 dst_ptr, dst_pixels_per_line, 16,
364 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
365 &xsum0, &xxsum0
366 );
367
368
369 vp8_filter_block2d_bil_var_mmx(
370 src_ptr + 8, src_pixels_per_line,
371 dst_ptr + 8, dst_pixels_per_line, 16,
372 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
373 &xsum1, &xxsum1
374 );
375
376 xsum0 += xsum1;
377 xxsum0 += xxsum1;
378
379 *sse = xxsum0;
380 return (xxsum0 - ((xsum0 * xsum0) >> 8));
381
382
383 }
384
vp8_sub_pixel_mse16x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)385 unsigned int vp8_sub_pixel_mse16x16_mmx(
386 const unsigned char *src_ptr,
387 int src_pixels_per_line,
388 int xoffset,
389 int yoffset,
390 const unsigned char *dst_ptr,
391 int dst_pixels_per_line,
392 unsigned int *sse
393 )
394 {
395 vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
396 return *sse;
397 }
398
vp8_sub_pixel_variance16x8_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)399 unsigned int vp8_sub_pixel_variance16x8_mmx
400 (
401 const unsigned char *src_ptr,
402 int src_pixels_per_line,
403 int xoffset,
404 int yoffset,
405 const unsigned char *dst_ptr,
406 int dst_pixels_per_line,
407 unsigned int *sse
408 )
409 {
410 int xsum0, xsum1;
411 unsigned int xxsum0, xxsum1;
412
413
414 vp8_filter_block2d_bil_var_mmx(
415 src_ptr, src_pixels_per_line,
416 dst_ptr, dst_pixels_per_line, 8,
417 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
418 &xsum0, &xxsum0
419 );
420
421
422 vp8_filter_block2d_bil_var_mmx(
423 src_ptr + 8, src_pixels_per_line,
424 dst_ptr + 8, dst_pixels_per_line, 8,
425 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
426 &xsum1, &xxsum1
427 );
428
429 xsum0 += xsum1;
430 xxsum0 += xxsum1;
431
432 *sse = xxsum0;
433 return (xxsum0 - ((xsum0 * xsum0) >> 7));
434 }
435
vp8_sub_pixel_variance8x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,int * sse)436 unsigned int vp8_sub_pixel_variance8x16_mmx
437 (
438 const unsigned char *src_ptr,
439 int src_pixels_per_line,
440 int xoffset,
441 int yoffset,
442 const unsigned char *dst_ptr,
443 int dst_pixels_per_line,
444 int *sse
445 )
446 {
447 int xsum;
448 unsigned int xxsum;
449 vp8_filter_block2d_bil_var_mmx(
450 src_ptr, src_pixels_per_line,
451 dst_ptr, dst_pixels_per_line, 16,
452 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
453 &xsum, &xxsum
454 );
455 *sse = xxsum;
456 return (xxsum - ((xsum * xsum) >> 7));
457 }
458
vp8_i_variance16x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)459 unsigned int vp8_i_variance16x16_mmx(
460 const unsigned char *src_ptr,
461 int source_stride,
462 const unsigned char *ref_ptr,
463 int recon_stride,
464 unsigned int *sse)
465 {
466 unsigned int sse0, sse1, sse2, sse3, var;
467 int sum0, sum1, sum2, sum3, avg;
468
469
470 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
471 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
472 vp8_get8x8var_mmx(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse2, &sum2) ;
473 vp8_get8x8var_mmx(src_ptr + (source_stride >> 1) + 8, source_stride, ref_ptr + (recon_stride >> 1) + 8, recon_stride, &sse3, &sum3);
474
475 var = sse0 + sse1 + sse2 + sse3;
476 avg = sum0 + sum1 + sum2 + sum3;
477 *sse = var;
478 return (var - ((avg * avg) >> 8));
479
480 }
481
vp8_i_variance8x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)482 unsigned int vp8_i_variance8x16_mmx(
483 const unsigned char *src_ptr,
484 int source_stride,
485 const unsigned char *ref_ptr,
486 int recon_stride,
487 unsigned int *sse)
488 {
489 unsigned int sse0, sse1, var;
490 int sum0, sum1, avg;
491 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
492 vp8_get8x8var_mmx(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse1, &sum1) ;
493
494 var = sse0 + sse1;
495 avg = sum0 + sum1;
496
497 *sse = var;
498 return (var - ((avg * avg) >> 7));
499
500 }
501
vp8_i_sub_pixel_variance16x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)502 unsigned int vp8_i_sub_pixel_variance16x16_mmx
503 (
504 const unsigned char *src_ptr,
505 int src_pixels_per_line,
506 int xoffset,
507 int yoffset,
508 const unsigned char *dst_ptr,
509 int dst_pixels_per_line,
510 unsigned int *sse
511 )
512 {
513 int xsum0, xsum1;
514 unsigned int xxsum0, xxsum1;
515 int f2soffset = (src_pixels_per_line >> 1);
516 int f2doffset = (dst_pixels_per_line >> 1);
517
518
519 vp8_filter_block2d_bil_var_mmx(
520 src_ptr, src_pixels_per_line,
521 dst_ptr, dst_pixels_per_line, 8,
522 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
523 &xsum0, &xxsum0
524 );
525
526
527 vp8_filter_block2d_bil_var_mmx(
528 src_ptr + 8, src_pixels_per_line,
529 dst_ptr + 8, dst_pixels_per_line, 8,
530 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
531 &xsum1, &xxsum1
532 );
533
534 xsum0 += xsum1;
535 xxsum0 += xxsum1;
536
537 vp8_filter_block2d_bil_var_mmx(
538 src_ptr + f2soffset, src_pixels_per_line,
539 dst_ptr + f2doffset, dst_pixels_per_line, 8,
540 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
541 &xsum1, &xxsum1
542 );
543
544 xsum0 += xsum1;
545 xxsum0 += xxsum1;
546
547 vp8_filter_block2d_bil_var_mmx(
548 src_ptr + f2soffset + 8, src_pixels_per_line,
549 dst_ptr + f2doffset + 8, dst_pixels_per_line, 8,
550 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
551 &xsum1, &xxsum1
552 );
553
554 xsum0 += xsum1;
555 xxsum0 += xxsum1;
556 *sse = xxsum0;
557 return (xxsum0 - ((xsum0 * xsum0) >> 8));
558 }
559
560
vp8_i_sub_pixel_variance8x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)561 unsigned int vp8_i_sub_pixel_variance8x16_mmx
562 (
563 const unsigned char *src_ptr,
564 int src_pixels_per_line,
565 int xoffset,
566 int yoffset,
567 const unsigned char *dst_ptr,
568 int dst_pixels_per_line,
569 unsigned int *sse
570 )
571 {
572 int xsum0, xsum1;
573 unsigned int xxsum0, xxsum1;
574 int f2soffset = (src_pixels_per_line >> 1);
575 int f2doffset = (dst_pixels_per_line >> 1);
576
577
578 vp8_filter_block2d_bil_var_mmx(
579 src_ptr, src_pixels_per_line,
580 dst_ptr, dst_pixels_per_line, 8,
581 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
582 &xsum0, &xxsum0
583 );
584
585
586 vp8_filter_block2d_bil_var_mmx(
587 src_ptr + f2soffset, src_pixels_per_line,
588 dst_ptr + f2doffset, dst_pixels_per_line, 8,
589 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
590 &xsum1, &xxsum1
591 );
592
593 xsum0 += xsum1;
594 xxsum0 += xxsum1;
595 *sse = xxsum0;
596 return (xxsum0 - ((xsum0 * xsum0) >> 7));
597 }
598
599
vp8_variance_halfpixvar16x16_h_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)600 unsigned int vp8_variance_halfpixvar16x16_h_mmx(
601 const unsigned char *src_ptr,
602 int source_stride,
603 const unsigned char *ref_ptr,
604 int recon_stride,
605 unsigned int *sse)
606 {
607 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
608 ref_ptr, recon_stride, sse);
609 }
610
611
vp8_variance_halfpixvar16x16_v_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)612 unsigned int vp8_variance_halfpixvar16x16_v_mmx(
613 const unsigned char *src_ptr,
614 int source_stride,
615 const unsigned char *ref_ptr,
616 int recon_stride,
617 unsigned int *sse)
618 {
619 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
620 ref_ptr, recon_stride, sse);
621 }
622
623
vp8_variance_halfpixvar16x16_hv_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)624 unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
625 const unsigned char *src_ptr,
626 int source_stride,
627 const unsigned char *ref_ptr,
628 int recon_stride,
629 unsigned int *sse)
630 {
631 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
632 ref_ptr, recon_stride, sse);
633 }
634