• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include "vp8/encoder/variance.h"
13 #include "vp8/common/pragmas.h"
14 #include "vpx_ports/mem.h"
15 
16 extern void filter_block1d_h6_mmx
17 (
18     const unsigned char *src_ptr,
19     unsigned short *output_ptr,
20     unsigned int src_pixels_per_line,
21     unsigned int pixel_step,
22     unsigned int output_height,
23     unsigned int output_width,
24     short *vp7_filter
25 );
26 extern void filter_block1d_v6_mmx
27 (
28     const short *src_ptr,
29     unsigned char *output_ptr,
30     unsigned int pixels_per_line,
31     unsigned int pixel_step,
32     unsigned int output_height,
33     unsigned int output_width,
34     short *vp7_filter
35 );
36 
37 extern unsigned int vp8_get_mb_ss_mmx(short *src_ptr);
38 extern unsigned int vp8_get8x8var_mmx
39 (
40     const unsigned char *src_ptr,
41     int  source_stride,
42     const unsigned char *ref_ptr,
43     int  recon_stride,
44     unsigned int *SSE,
45     int *Sum
46 );
47 extern unsigned int vp8_get4x4var_mmx
48 (
49     const unsigned char *src_ptr,
50     int  source_stride,
51     const unsigned char *ref_ptr,
52     int  recon_stride,
53     unsigned int *SSE,
54     int *Sum
55 );
56 extern void vp8_filter_block2d_bil4x4_var_mmx
57 (
58     const unsigned char *ref_ptr,
59     int ref_pixels_per_line,
60     const unsigned char *src_ptr,
61     int src_pixels_per_line,
62     const short *HFilter,
63     const short *VFilter,
64     int *sum,
65     unsigned int *sumsquared
66 );
67 extern void vp8_filter_block2d_bil_var_mmx
68 (
69     const unsigned char *ref_ptr,
70     int ref_pixels_per_line,
71     const unsigned char *src_ptr,
72     int src_pixels_per_line,
73     unsigned int Height,
74     const short *HFilter,
75     const short *VFilter,
76     int *sum,
77     unsigned int *sumsquared
78 );
79 extern unsigned int vp8_get16x16pred_error_mmx
80 (
81     unsigned char *src_ptr,
82     int src_stride,
83     unsigned char *ref_ptr,
84     int ref_stride
85 );
86 
87 
vp8_get16x16var_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned * SSE,unsigned * SUM)88 unsigned int vp8_get16x16var_mmx(
89     const unsigned char *src_ptr,
90     int  source_stride,
91     const unsigned char *ref_ptr,
92     int  recon_stride,
93     unsigned *SSE,
94     unsigned *SUM
95 )
96 {
97     unsigned int sse0, sse1, sse2, sse3, var;
98     int sum0, sum1, sum2, sum3, avg;
99 
100 
101     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
102     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
103     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
104     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
105 
106     var = sse0 + sse1 + sse2 + sse3;
107     avg = sum0 + sum1 + sum2 + sum3;
108 
109     *SSE = var;
110     *SUM = avg;
111     return (var - ((avg * avg) >> 8));
112 
113 }
114 
115 
116 
117 
118 
vp8_variance4x4_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)119 unsigned int vp8_variance4x4_mmx(
120     const unsigned char *src_ptr,
121     int  source_stride,
122     const unsigned char *ref_ptr,
123     int  recon_stride,
124     unsigned int *sse)
125 {
126     unsigned int var;
127     int avg;
128 
129     vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
130     *sse = var;
131     return (var - ((avg * avg) >> 4));
132 
133 }
134 
vp8_variance8x8_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)135 unsigned int vp8_variance8x8_mmx(
136     const unsigned char *src_ptr,
137     int  source_stride,
138     const unsigned char *ref_ptr,
139     int  recon_stride,
140     unsigned int *sse)
141 {
142     unsigned int var;
143     int avg;
144 
145     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
146     *sse = var;
147 
148     return (var - ((avg * avg) >> 6));
149 
150 }
151 
vp8_mse16x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)152 unsigned int vp8_mse16x16_mmx(
153     const unsigned char *src_ptr,
154     int  source_stride,
155     const unsigned char *ref_ptr,
156     int  recon_stride,
157     unsigned int *sse)
158 {
159     unsigned int sse0, sse1, sse2, sse3, var;
160     int sum0, sum1, sum2, sum3;
161 
162 
163     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
164     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
165     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
166     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
167 
168     var = sse0 + sse1 + sse2 + sse3;
169     *sse = var;
170     return var;
171 }
172 
173 
vp8_variance16x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,int * sse)174 unsigned int vp8_variance16x16_mmx(
175     const unsigned char *src_ptr,
176     int  source_stride,
177     const unsigned char *ref_ptr,
178     int  recon_stride,
179     int *sse)
180 {
181     unsigned int sse0, sse1, sse2, sse3, var;
182     int sum0, sum1, sum2, sum3, avg;
183 
184 
185     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
186     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
187     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
188     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
189 
190     var = sse0 + sse1 + sse2 + sse3;
191     avg = sum0 + sum1 + sum2 + sum3;
192     *sse = var;
193     return (var - ((avg * avg) >> 8));
194 }
195 
vp8_variance16x8_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)196 unsigned int vp8_variance16x8_mmx(
197     const unsigned char *src_ptr,
198     int  source_stride,
199     const unsigned char *ref_ptr,
200     int  recon_stride,
201     unsigned int *sse)
202 {
203     unsigned int sse0, sse1, var;
204     int sum0, sum1, avg;
205 
206     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
207     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
208 
209     var = sse0 + sse1;
210     avg = sum0 + sum1;
211     *sse = var;
212     return (var - ((avg * avg) >> 7));
213 
214 }
215 
216 
vp8_variance8x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)217 unsigned int vp8_variance8x16_mmx(
218     const unsigned char *src_ptr,
219     int  source_stride,
220     const unsigned char *ref_ptr,
221     int  recon_stride,
222     unsigned int *sse)
223 {
224     unsigned int sse0, sse1, var;
225     int sum0, sum1, avg;
226 
227     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
228     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
229 
230     var = sse0 + sse1;
231     avg = sum0 + sum1;
232     *sse = var;
233 
234     return (var - ((avg * avg) >> 7));
235 
236 }
237 
238 
239 
240 
241 ///////////////////////////////////////////////////////////////////////////
242 // the mmx function that does the bilinear filtering and var calculation //
243 // int one pass                                                          //
244 ///////////////////////////////////////////////////////////////////////////
245 DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
246 {
247     { 128, 128, 128, 128,  0,  0,  0,  0 },
248     { 112, 112, 112, 112, 16, 16, 16, 16 },
249     {  96, 96, 96, 96, 32, 32, 32, 32 },
250     {  80, 80, 80, 80, 48, 48, 48, 48 },
251     {  64, 64, 64, 64, 64, 64, 64, 64 },
252     {  48, 48, 48, 48, 80, 80, 80, 80 },
253     {  32, 32, 32, 32, 96, 96, 96, 96 },
254     {  16, 16, 16, 16, 112, 112, 112, 112 }
255 };
256 
vp8_sub_pixel_variance4x4_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)257 unsigned int vp8_sub_pixel_variance4x4_mmx
258 (
259     const unsigned char  *src_ptr,
260     int  src_pixels_per_line,
261     int  xoffset,
262     int  yoffset,
263     const unsigned char *dst_ptr,
264     int dst_pixels_per_line,
265     unsigned int *sse)
266 
267 {
268     int xsum;
269     unsigned int xxsum;
270     vp8_filter_block2d_bil4x4_var_mmx(
271         src_ptr, src_pixels_per_line,
272         dst_ptr, dst_pixels_per_line,
273         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
274         &xsum, &xxsum
275     );
276     *sse = xxsum;
277     return (xxsum - ((xsum * xsum) >> 4));
278 }
279 
280 
vp8_sub_pixel_variance8x8_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)281 unsigned int vp8_sub_pixel_variance8x8_mmx
282 (
283     const unsigned char  *src_ptr,
284     int  src_pixels_per_line,
285     int  xoffset,
286     int  yoffset,
287     const unsigned char *dst_ptr,
288     int dst_pixels_per_line,
289     unsigned int *sse
290 )
291 {
292 
293     int xsum;
294     unsigned int xxsum;
295     vp8_filter_block2d_bil_var_mmx(
296         src_ptr, src_pixels_per_line,
297         dst_ptr, dst_pixels_per_line, 8,
298         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
299         &xsum, &xxsum
300     );
301     *sse = xxsum;
302     return (xxsum - ((xsum * xsum) >> 6));
303 }
304 
vp8_sub_pixel_variance16x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)305 unsigned int vp8_sub_pixel_variance16x16_mmx
306 (
307     const unsigned char  *src_ptr,
308     int  src_pixels_per_line,
309     int  xoffset,
310     int  yoffset,
311     const unsigned char *dst_ptr,
312     int dst_pixels_per_line,
313     unsigned int *sse
314 )
315 {
316 
317     int xsum0, xsum1;
318     unsigned int xxsum0, xxsum1;
319 
320 
321     vp8_filter_block2d_bil_var_mmx(
322         src_ptr, src_pixels_per_line,
323         dst_ptr, dst_pixels_per_line, 16,
324         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
325         &xsum0, &xxsum0
326     );
327 
328 
329     vp8_filter_block2d_bil_var_mmx(
330         src_ptr + 8, src_pixels_per_line,
331         dst_ptr + 8, dst_pixels_per_line, 16,
332         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
333         &xsum1, &xxsum1
334     );
335 
336     xsum0 += xsum1;
337     xxsum0 += xxsum1;
338 
339     *sse = xxsum0;
340     return (xxsum0 - ((xsum0 * xsum0) >> 8));
341 
342 
343 }
344 
vp8_sub_pixel_mse16x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)345 unsigned int vp8_sub_pixel_mse16x16_mmx(
346     const unsigned char  *src_ptr,
347     int  src_pixels_per_line,
348     int  xoffset,
349     int  yoffset,
350     const unsigned char *dst_ptr,
351     int dst_pixels_per_line,
352     unsigned int *sse
353 )
354 {
355     vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
356     return *sse;
357 }
358 
vp8_sub_pixel_variance16x8_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)359 unsigned int vp8_sub_pixel_variance16x8_mmx
360 (
361     const unsigned char  *src_ptr,
362     int  src_pixels_per_line,
363     int  xoffset,
364     int  yoffset,
365     const unsigned char *dst_ptr,
366     int dst_pixels_per_line,
367     unsigned int *sse
368 )
369 {
370     int xsum0, xsum1;
371     unsigned int xxsum0, xxsum1;
372 
373 
374     vp8_filter_block2d_bil_var_mmx(
375         src_ptr, src_pixels_per_line,
376         dst_ptr, dst_pixels_per_line, 8,
377         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
378         &xsum0, &xxsum0
379     );
380 
381 
382     vp8_filter_block2d_bil_var_mmx(
383         src_ptr + 8, src_pixels_per_line,
384         dst_ptr + 8, dst_pixels_per_line, 8,
385         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
386         &xsum1, &xxsum1
387     );
388 
389     xsum0 += xsum1;
390     xxsum0 += xxsum1;
391 
392     *sse = xxsum0;
393     return (xxsum0 - ((xsum0 * xsum0) >> 7));
394 }
395 
vp8_sub_pixel_variance8x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,int * sse)396 unsigned int vp8_sub_pixel_variance8x16_mmx
397 (
398     const unsigned char  *src_ptr,
399     int  src_pixels_per_line,
400     int  xoffset,
401     int  yoffset,
402     const unsigned char *dst_ptr,
403     int dst_pixels_per_line,
404     int *sse
405 )
406 {
407     int xsum;
408     unsigned int xxsum;
409     vp8_filter_block2d_bil_var_mmx(
410         src_ptr, src_pixels_per_line,
411         dst_ptr, dst_pixels_per_line, 16,
412         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
413         &xsum, &xxsum
414     );
415     *sse = xxsum;
416     return (xxsum - ((xsum * xsum) >> 7));
417 }
418 
419 
vp8_variance_halfpixvar16x16_h_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)420 unsigned int vp8_variance_halfpixvar16x16_h_mmx(
421     const unsigned char *src_ptr,
422     int  source_stride,
423     const unsigned char *ref_ptr,
424     int  recon_stride,
425     unsigned int *sse)
426 {
427     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
428                                            ref_ptr, recon_stride, sse);
429 }
430 
431 
vp8_variance_halfpixvar16x16_v_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)432 unsigned int vp8_variance_halfpixvar16x16_v_mmx(
433     const unsigned char *src_ptr,
434     int  source_stride,
435     const unsigned char *ref_ptr,
436     int  recon_stride,
437     unsigned int *sse)
438 {
439     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
440                                            ref_ptr, recon_stride, sse);
441 }
442 
443 
vp8_variance_halfpixvar16x16_hv_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)444 unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
445     const unsigned char *src_ptr,
446     int  source_stride,
447     const unsigned char *ref_ptr,
448     int  recon_stride,
449     unsigned int *sse)
450 {
451     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
452                                            ref_ptr, recon_stride, sse);
453 }
454