• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "vpx_config.h"
12 #include "vp8/common/variance.h"
13 #include "vp8/common/pragmas.h"
14 #include "vpx_ports/mem.h"
15 #include "vp8/common/x86/filter_x86.h"
16 
17 extern void filter_block1d_h6_mmx
18 (
19     const unsigned char *src_ptr,
20     unsigned short *output_ptr,
21     unsigned int src_pixels_per_line,
22     unsigned int pixel_step,
23     unsigned int output_height,
24     unsigned int output_width,
25     short *filter
26 );
27 extern void filter_block1d_v6_mmx
28 (
29     const short *src_ptr,
30     unsigned char *output_ptr,
31     unsigned int pixels_per_line,
32     unsigned int pixel_step,
33     unsigned int output_height,
34     unsigned int output_width,
35     short *filter
36 );
37 
38 extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr);
39 extern unsigned int vp8_get8x8var_mmx
40 (
41     const unsigned char *src_ptr,
42     int  source_stride,
43     const unsigned char *ref_ptr,
44     int  recon_stride,
45     unsigned int *SSE,
46     int *Sum
47 );
48 extern unsigned int vp8_get4x4var_mmx
49 (
50     const unsigned char *src_ptr,
51     int  source_stride,
52     const unsigned char *ref_ptr,
53     int  recon_stride,
54     unsigned int *SSE,
55     int *Sum
56 );
57 extern void vp8_filter_block2d_bil4x4_var_mmx
58 (
59     const unsigned char *ref_ptr,
60     int ref_pixels_per_line,
61     const unsigned char *src_ptr,
62     int src_pixels_per_line,
63     const short *HFilter,
64     const short *VFilter,
65     int *sum,
66     unsigned int *sumsquared
67 );
68 extern void vp8_filter_block2d_bil_var_mmx
69 (
70     const unsigned char *ref_ptr,
71     int ref_pixels_per_line,
72     const unsigned char *src_ptr,
73     int src_pixels_per_line,
74     unsigned int Height,
75     const short *HFilter,
76     const short *VFilter,
77     int *sum,
78     unsigned int *sumsquared
79 );
80 
81 
vp8_variance4x4_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)82 unsigned int vp8_variance4x4_mmx(
83     const unsigned char *src_ptr,
84     int  source_stride,
85     const unsigned char *ref_ptr,
86     int  recon_stride,
87     unsigned int *sse)
88 {
89     unsigned int var;
90     int avg;
91 
92     vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
93     *sse = var;
94     return (var - (((unsigned int)avg * avg) >> 4));
95 
96 }
97 
vp8_variance8x8_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)98 unsigned int vp8_variance8x8_mmx(
99     const unsigned char *src_ptr,
100     int  source_stride,
101     const unsigned char *ref_ptr,
102     int  recon_stride,
103     unsigned int *sse)
104 {
105     unsigned int var;
106     int avg;
107 
108     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
109     *sse = var;
110 
111     return (var - (((unsigned int)avg * avg) >> 6));
112 
113 }
114 
vp8_mse16x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)115 unsigned int vp8_mse16x16_mmx(
116     const unsigned char *src_ptr,
117     int  source_stride,
118     const unsigned char *ref_ptr,
119     int  recon_stride,
120     unsigned int *sse)
121 {
122     unsigned int sse0, sse1, sse2, sse3, var;
123     int sum0, sum1, sum2, sum3;
124 
125 
126     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
127     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
128     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
129     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
130 
131     var = sse0 + sse1 + sse2 + sse3;
132     *sse = var;
133     return var;
134 }
135 
136 
vp8_variance16x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)137 unsigned int vp8_variance16x16_mmx(
138     const unsigned char *src_ptr,
139     int  source_stride,
140     const unsigned char *ref_ptr,
141     int  recon_stride,
142     unsigned int *sse)
143 {
144     unsigned int sse0, sse1, sse2, sse3, var;
145     int sum0, sum1, sum2, sum3, avg;
146 
147 
148     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
149     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
150     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
151     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
152 
153     var = sse0 + sse1 + sse2 + sse3;
154     avg = sum0 + sum1 + sum2 + sum3;
155     *sse = var;
156     return (var - (((unsigned int)avg * avg) >> 8));
157 }
158 
vp8_variance16x8_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)159 unsigned int vp8_variance16x8_mmx(
160     const unsigned char *src_ptr,
161     int  source_stride,
162     const unsigned char *ref_ptr,
163     int  recon_stride,
164     unsigned int *sse)
165 {
166     unsigned int sse0, sse1, var;
167     int sum0, sum1, avg;
168 
169     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
170     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
171 
172     var = sse0 + sse1;
173     avg = sum0 + sum1;
174     *sse = var;
175     return (var - (((unsigned int)avg * avg) >> 7));
176 
177 }
178 
179 
vp8_variance8x16_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)180 unsigned int vp8_variance8x16_mmx(
181     const unsigned char *src_ptr,
182     int  source_stride,
183     const unsigned char *ref_ptr,
184     int  recon_stride,
185     unsigned int *sse)
186 {
187     unsigned int sse0, sse1, var;
188     int sum0, sum1, avg;
189 
190     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
191     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
192 
193     var = sse0 + sse1;
194     avg = sum0 + sum1;
195     *sse = var;
196 
197     return (var - (((unsigned int)avg * avg) >> 7));
198 
199 }
200 
201 
vp8_sub_pixel_variance4x4_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)202 unsigned int vp8_sub_pixel_variance4x4_mmx
203 (
204     const unsigned char  *src_ptr,
205     int  src_pixels_per_line,
206     int  xoffset,
207     int  yoffset,
208     const unsigned char *dst_ptr,
209     int dst_pixels_per_line,
210     unsigned int *sse)
211 
212 {
213     int xsum;
214     unsigned int xxsum;
215     vp8_filter_block2d_bil4x4_var_mmx(
216         src_ptr, src_pixels_per_line,
217         dst_ptr, dst_pixels_per_line,
218         vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
219         &xsum, &xxsum
220     );
221     *sse = xxsum;
222     return (xxsum - (((unsigned int)xsum * xsum) >> 4));
223 }
224 
225 
vp8_sub_pixel_variance8x8_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)226 unsigned int vp8_sub_pixel_variance8x8_mmx
227 (
228     const unsigned char  *src_ptr,
229     int  src_pixels_per_line,
230     int  xoffset,
231     int  yoffset,
232     const unsigned char *dst_ptr,
233     int dst_pixels_per_line,
234     unsigned int *sse
235 )
236 {
237 
238     int xsum;
239     unsigned int xxsum;
240     vp8_filter_block2d_bil_var_mmx(
241         src_ptr, src_pixels_per_line,
242         dst_ptr, dst_pixels_per_line, 8,
243         vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
244         &xsum, &xxsum
245     );
246     *sse = xxsum;
247     return (xxsum - (((unsigned int)xsum * xsum) >> 6));
248 }
249 
vp8_sub_pixel_variance16x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)250 unsigned int vp8_sub_pixel_variance16x16_mmx
251 (
252     const unsigned char  *src_ptr,
253     int  src_pixels_per_line,
254     int  xoffset,
255     int  yoffset,
256     const unsigned char *dst_ptr,
257     int dst_pixels_per_line,
258     unsigned int *sse
259 )
260 {
261 
262     int xsum0, xsum1;
263     unsigned int xxsum0, xxsum1;
264 
265 
266     vp8_filter_block2d_bil_var_mmx(
267         src_ptr, src_pixels_per_line,
268         dst_ptr, dst_pixels_per_line, 16,
269         vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
270         &xsum0, &xxsum0
271     );
272 
273 
274     vp8_filter_block2d_bil_var_mmx(
275         src_ptr + 8, src_pixels_per_line,
276         dst_ptr + 8, dst_pixels_per_line, 16,
277         vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
278         &xsum1, &xxsum1
279     );
280 
281     xsum0 += xsum1;
282     xxsum0 += xxsum1;
283 
284     *sse = xxsum0;
285     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
286 
287 
288 }
289 
vp8_sub_pixel_mse16x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)290 unsigned int vp8_sub_pixel_mse16x16_mmx(
291     const unsigned char  *src_ptr,
292     int  src_pixels_per_line,
293     int  xoffset,
294     int  yoffset,
295     const unsigned char *dst_ptr,
296     int dst_pixels_per_line,
297     unsigned int *sse
298 )
299 {
300     vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
301     return *sse;
302 }
303 
vp8_sub_pixel_variance16x8_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)304 unsigned int vp8_sub_pixel_variance16x8_mmx
305 (
306     const unsigned char  *src_ptr,
307     int  src_pixels_per_line,
308     int  xoffset,
309     int  yoffset,
310     const unsigned char *dst_ptr,
311     int dst_pixels_per_line,
312     unsigned int *sse
313 )
314 {
315     int xsum0, xsum1;
316     unsigned int xxsum0, xxsum1;
317 
318 
319     vp8_filter_block2d_bil_var_mmx(
320         src_ptr, src_pixels_per_line,
321         dst_ptr, dst_pixels_per_line, 8,
322         vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
323         &xsum0, &xxsum0
324     );
325 
326 
327     vp8_filter_block2d_bil_var_mmx(
328         src_ptr + 8, src_pixels_per_line,
329         dst_ptr + 8, dst_pixels_per_line, 8,
330         vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
331         &xsum1, &xxsum1
332     );
333 
334     xsum0 += xsum1;
335     xxsum0 += xxsum1;
336 
337     *sse = xxsum0;
338     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
339 }
340 
vp8_sub_pixel_variance8x16_mmx(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)341 unsigned int vp8_sub_pixel_variance8x16_mmx
342 (
343     const unsigned char  *src_ptr,
344     int  src_pixels_per_line,
345     int  xoffset,
346     int  yoffset,
347     const unsigned char *dst_ptr,
348     int dst_pixels_per_line,
349     unsigned int *sse
350 )
351 {
352     int xsum;
353     unsigned int xxsum;
354     vp8_filter_block2d_bil_var_mmx(
355         src_ptr, src_pixels_per_line,
356         dst_ptr, dst_pixels_per_line, 16,
357         vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
358         &xsum, &xxsum
359     );
360     *sse = xxsum;
361     return (xxsum - (((unsigned int)xsum * xsum) >> 7));
362 }
363 
364 
vp8_variance_halfpixvar16x16_h_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)365 unsigned int vp8_variance_halfpixvar16x16_h_mmx(
366     const unsigned char *src_ptr,
367     int  source_stride,
368     const unsigned char *ref_ptr,
369     int  recon_stride,
370     unsigned int *sse)
371 {
372     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
373                                            ref_ptr, recon_stride, sse);
374 }
375 
376 
vp8_variance_halfpixvar16x16_v_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)377 unsigned int vp8_variance_halfpixvar16x16_v_mmx(
378     const unsigned char *src_ptr,
379     int  source_stride,
380     const unsigned char *ref_ptr,
381     int  recon_stride,
382     unsigned int *sse)
383 {
384     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
385                                            ref_ptr, recon_stride, sse);
386 }
387 
388 
vp8_variance_halfpixvar16x16_hv_mmx(const unsigned char * src_ptr,int source_stride,const unsigned char * ref_ptr,int recon_stride,unsigned int * sse)389 unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
390     const unsigned char *src_ptr,
391     int  source_stride,
392     const unsigned char *ref_ptr,
393     int  recon_stride,
394     unsigned int *sse)
395 {
396     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
397                                            ref_ptr, recon_stride, sse);
398 }
399