• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vp9_rtcd.h"
12 
13 #include "vpx_ports/mem.h"
14 #include "vpx/vpx_integer.h"
15 
16 #include "vp9/common/vp9_common.h"
17 #include "vp9/common/vp9_filter.h"
18 
19 #include "vp9/encoder/vp9_variance.h"
20 
variance(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,int w,int h,unsigned int * sse,int * sum)21 void variance(const uint8_t *src_ptr,
22               int  source_stride,
23               const uint8_t *ref_ptr,
24               int  recon_stride,
25               int  w,
26               int  h,
27               unsigned int *sse,
28               int *sum) {
29   int i, j;
30   int diff;
31 
32   *sum = 0;
33   *sse = 0;
34 
35   for (i = 0; i < h; i++) {
36     for (j = 0; j < w; j++) {
37       diff = src_ptr[j] - ref_ptr[j];
38       *sum += diff;
39       *sse += diff * diff;
40     }
41 
42     src_ptr += source_stride;
43     ref_ptr += recon_stride;
44   }
45 }
46 
47 /****************************************************************************
48  *
49  *  ROUTINE       : filter_block2d_bil_first_pass
50  *
51  *  INPUTS        : uint8_t  *src_ptr          : Pointer to source block.
52  *                  uint32_t src_pixels_per_line : Stride of input block.
53  *                  uint32_t pixel_step        : Offset between filter input
54  *                                               samples (see notes).
55  *                  uint32_t output_height     : Input block height.
56  *                  uint32_t output_width      : Input block width.
57  *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
58  *                                               taps.
59  *
60  *  OUTPUTS       : int32_t *output_ptr        : Pointer to filtered block.
61  *
62  *  RETURNS       : void
63  *
64  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
65  *                  either horizontal or vertical direction to produce the
66  *                  filtered output block. Used to implement first-pass
67  *                  of 2-D separable filter.
68  *
69  *  SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
70  *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
71  *                  pixel_step defines whether the filter is applied
72  *                  horizontally (pixel_step=1) or vertically (pixel_step=
73  *                  stride).
74  *                  It defines the offset required to move from one input
75  *                  to the next.
76  *
77  ****************************************************************************/
var_filter_block2d_bil_first_pass(const uint8_t * src_ptr,uint16_t * output_ptr,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const int16_t * vp9_filter)78 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
79                                               uint16_t *output_ptr,
80                                               unsigned int src_pixels_per_line,
81                                               int pixel_step,
82                                               unsigned int output_height,
83                                               unsigned int output_width,
84                                               const int16_t *vp9_filter) {
85   unsigned int i, j;
86 
87   for (i = 0; i < output_height; i++) {
88     for (j = 0; j < output_width; j++) {
89       output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
90                           (int)src_ptr[pixel_step] * vp9_filter[1],
91                           FILTER_BITS);
92 
93       src_ptr++;
94     }
95 
96     // Next row...
97     src_ptr    += src_pixels_per_line - output_width;
98     output_ptr += output_width;
99   }
100 }
101 
102 /****************************************************************************
103  *
104  *  ROUTINE       : filter_block2d_bil_second_pass
105  *
106  *  INPUTS        : int32_t  *src_ptr          : Pointer to source block.
107  *                  uint32_t src_pixels_per_line : Stride of input block.
108  *                  uint32_t pixel_step        : Offset between filter input
109  *                                               samples (see notes).
110  *                  uint32_t output_height     : Input block height.
111  *                  uint32_t output_width      : Input block width.
112  *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
113  *                                               taps.
114  *
115  *  OUTPUTS       : uint16_t *output_ptr       : Pointer to filtered block.
116  *
117  *  RETURNS       : void
118  *
119  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
120  *                  either horizontal or vertical direction to produce the
121  *                  filtered output block. Used to implement second-pass
122  *                  of 2-D separable filter.
123  *
124  *  SPECIAL NOTES : Requires 32-bit input as produced by
125  *                  filter_block2d_bil_first_pass.
126  *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
127  *                  pixel_step defines whether the filter is applied
128  *                  horizontally (pixel_step=1) or vertically (pixel_step=
129  *                  stride).
130  *                  It defines the offset required to move from one input
131  *                  to the next.
132  *
133  ****************************************************************************/
var_filter_block2d_bil_second_pass(const uint16_t * src_ptr,uint8_t * output_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const int16_t * vp9_filter)134 static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
135                                                uint8_t *output_ptr,
136                                                unsigned int src_pixels_per_line,
137                                                unsigned int pixel_step,
138                                                unsigned int output_height,
139                                                unsigned int output_width,
140                                                const int16_t *vp9_filter) {
141   unsigned int  i, j;
142 
143   for (i = 0; i < output_height; i++) {
144     for (j = 0; j < output_width; j++) {
145       output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
146                           (int)src_ptr[pixel_step] * vp9_filter[1],
147                           FILTER_BITS);
148       src_ptr++;
149     }
150 
151     src_ptr += src_pixels_per_line - output_width;
152     output_ptr += output_width;
153   }
154 }
155 
vp9_get_mb_ss_c(const int16_t * src_ptr)156 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
157   unsigned int i, sum = 0;
158 
159   for (i = 0; i < 256; i++) {
160     sum += (src_ptr[i] * src_ptr[i]);
161   }
162 
163   return sum;
164 }
165 
vp9_variance64x32_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)166 unsigned int vp9_variance64x32_c(const uint8_t *src_ptr,
167                                  int  source_stride,
168                                  const uint8_t *ref_ptr,
169                                  int  recon_stride,
170                                  unsigned int *sse) {
171   unsigned int var;
172   int avg;
173 
174   variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg);
175   *sse = var;
176   return (var - (((int64_t)avg * avg) >> 11));
177 }
178 
vp9_sub_pixel_variance64x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)179 unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
180                                            int  src_pixels_per_line,
181                                            int  xoffset,
182                                            int  yoffset,
183                                            const uint8_t *dst_ptr,
184                                            int dst_pixels_per_line,
185                                            unsigned int *sse) {
186   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
187   uint8_t temp2[68 * 64];
188   const int16_t *hfilter, *vfilter;
189 
190   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
191   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
192 
193   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
194                                     1, 33, 64, hfilter);
195   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
196 
197   return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
198 }
199 
vp9_sub_pixel_avg_variance64x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)200 unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
201                                                int  src_pixels_per_line,
202                                                int  xoffset,
203                                                int  yoffset,
204                                                const uint8_t *dst_ptr,
205                                                int dst_pixels_per_line,
206                                                unsigned int *sse,
207                                                const uint8_t *second_pred) {
208   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
209   uint8_t temp2[68 * 64];
210   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
211   const int16_t *hfilter, *vfilter;
212 
213   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
214   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
215 
216   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
217                                     1, 33, 64, hfilter);
218   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
219   vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
220   return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
221 }
222 
vp9_variance32x64_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)223 unsigned int vp9_variance32x64_c(const uint8_t *src_ptr,
224                                  int  source_stride,
225                                  const uint8_t *ref_ptr,
226                                  int  recon_stride,
227                                  unsigned int *sse) {
228   unsigned int var;
229   int avg;
230 
231   variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg);
232   *sse = var;
233   return (var - (((int64_t)avg * avg) >> 11));
234 }
235 
vp9_sub_pixel_variance32x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)236 unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
237                                            int  src_pixels_per_line,
238                                            int  xoffset,
239                                            int  yoffset,
240                                            const uint8_t *dst_ptr,
241                                            int dst_pixels_per_line,
242                                            unsigned int *sse) {
243   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
244   uint8_t temp2[68 * 64];
245   const int16_t *hfilter, *vfilter;
246 
247   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
248   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
249 
250   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
251                                     1, 65, 32, hfilter);
252   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
253 
254   return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
255 }
256 
vp9_sub_pixel_avg_variance32x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)257 unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
258                                                int  src_pixels_per_line,
259                                                int  xoffset,
260                                                int  yoffset,
261                                                const uint8_t *dst_ptr,
262                                                int dst_pixels_per_line,
263                                                unsigned int *sse,
264                                                const uint8_t *second_pred) {
265   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
266   uint8_t temp2[68 * 64];
267   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64);  // compound pred buffer
268   const int16_t *hfilter, *vfilter;
269 
270   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
271   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
272 
273   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
274                                     1, 65, 32, hfilter);
275   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
276   vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
277   return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
278 }
279 
vp9_variance32x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)280 unsigned int vp9_variance32x16_c(const uint8_t *src_ptr,
281                                  int  source_stride,
282                                  const uint8_t *ref_ptr,
283                                  int  recon_stride,
284                                  unsigned int *sse) {
285   unsigned int var;
286   int avg;
287 
288   variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg);
289   *sse = var;
290   return (var - (((int64_t)avg * avg) >> 9));
291 }
292 
vp9_sub_pixel_variance32x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)293 unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
294                                            int  src_pixels_per_line,
295                                            int  xoffset,
296                                            int  yoffset,
297                                            const uint8_t *dst_ptr,
298                                            int dst_pixels_per_line,
299                                            unsigned int *sse) {
300   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
301   uint8_t temp2[36 * 32];
302   const int16_t *hfilter, *vfilter;
303 
304   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
305   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
306 
307   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
308                                     1, 17, 32, hfilter);
309   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
310 
311   return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
312 }
313 
vp9_sub_pixel_avg_variance32x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)314 unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
315                                                int  src_pixels_per_line,
316                                                int  xoffset,
317                                                int  yoffset,
318                                                const uint8_t *dst_ptr,
319                                                int dst_pixels_per_line,
320                                                unsigned int *sse,
321                                                const uint8_t *second_pred) {
322   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
323   uint8_t temp2[36 * 32];
324   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16);  // compound pred buffer
325   const int16_t *hfilter, *vfilter;
326 
327   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
328   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
329 
330   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
331                                     1, 17, 32, hfilter);
332   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
333   vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
334   return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
335 }
336 
vp9_variance16x32_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)337 unsigned int vp9_variance16x32_c(const uint8_t *src_ptr,
338                                  int  source_stride,
339                                  const uint8_t *ref_ptr,
340                                  int  recon_stride,
341                                  unsigned int *sse) {
342   unsigned int var;
343   int avg;
344 
345   variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg);
346   *sse = var;
347   return (var - (((int64_t)avg * avg) >> 9));
348 }
349 
vp9_sub_pixel_variance16x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)350 unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
351                                            int  src_pixels_per_line,
352                                            int  xoffset,
353                                            int  yoffset,
354                                            const uint8_t *dst_ptr,
355                                            int dst_pixels_per_line,
356                                            unsigned int *sse) {
357   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
358   uint8_t temp2[36 * 32];
359   const int16_t *hfilter, *vfilter;
360 
361   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
362   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
363 
364   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
365                                     1, 33, 16, hfilter);
366   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
367 
368   return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
369 }
370 
vp9_sub_pixel_avg_variance16x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)371 unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
372                                                int  src_pixels_per_line,
373                                                int  xoffset,
374                                                int  yoffset,
375                                                const uint8_t *dst_ptr,
376                                                int dst_pixels_per_line,
377                                                unsigned int *sse,
378                                                const uint8_t *second_pred) {
379   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
380   uint8_t temp2[36 * 32];
381   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32);  // compound pred buffer
382   const int16_t *hfilter, *vfilter;
383 
384   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
385   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
386 
387   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
388                                     1, 33, 16, hfilter);
389   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
390   vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
391   return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
392 }
393 
vp9_variance64x64_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)394 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
395                                  int  source_stride,
396                                  const uint8_t *ref_ptr,
397                                  int  recon_stride,
398                                  unsigned int *sse) {
399   unsigned int var;
400   int avg;
401 
402   variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg);
403   *sse = var;
404   return (var - (((int64_t)avg * avg) >> 12));
405 }
406 
vp9_variance32x32_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)407 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
408                                  int  source_stride,
409                                  const uint8_t *ref_ptr,
410                                  int  recon_stride,
411                                  unsigned int *sse) {
412   unsigned int var;
413   int avg;
414 
415   variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
416   *sse = var;
417   return (var - (((int64_t)avg * avg) >> 10));
418 }
419 
vp9_get_sse_sum_16x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int ref_stride,unsigned int * sse,int * sum)420 void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride,
421                              const uint8_t *ref_ptr, int ref_stride,
422                              unsigned int *sse, int *sum) {
423   variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
424 }
425 
vp9_variance16x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)426 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
427                                  int  source_stride,
428                                  const uint8_t *ref_ptr,
429                                  int  recon_stride,
430                                  unsigned int *sse) {
431   unsigned int var;
432   int avg;
433 
434   variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
435   *sse = var;
436   return (var - (((unsigned int)avg * avg) >> 8));
437 }
438 
vp9_variance8x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)439 unsigned int vp9_variance8x16_c(const uint8_t *src_ptr,
440                                 int  source_stride,
441                                 const uint8_t *ref_ptr,
442                                 int  recon_stride,
443                                 unsigned int *sse) {
444   unsigned int var;
445   int avg;
446 
447   variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
448   *sse = var;
449   return (var - (((unsigned int)avg * avg) >> 7));
450 }
451 
vp9_variance16x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)452 unsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
453                                 int  source_stride,
454                                 const uint8_t *ref_ptr,
455                                 int  recon_stride,
456                                 unsigned int *sse) {
457   unsigned int var;
458   int avg;
459 
460   variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
461   *sse = var;
462   return (var - (((unsigned int)avg * avg) >> 7));
463 }
464 
vp9_get_sse_sum_8x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int ref_stride,unsigned int * sse,int * sum)465 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
466                        const uint8_t *ref_ptr, int ref_stride,
467                        unsigned int *sse, int *sum) {
468   variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
469 }
470 
vp9_variance8x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)471 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
472                                int  source_stride,
473                                const uint8_t *ref_ptr,
474                                int  recon_stride,
475                                unsigned int *sse) {
476   unsigned int var;
477   int avg;
478 
479   variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
480   *sse = var;
481   return (var - (((unsigned int)avg * avg) >> 6));
482 }
483 
vp9_variance8x4_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)484 unsigned int vp9_variance8x4_c(const uint8_t *src_ptr,
485                                int  source_stride,
486                                const uint8_t *ref_ptr,
487                                int  recon_stride,
488                                unsigned int *sse) {
489   unsigned int var;
490   int avg;
491 
492   variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg);
493   *sse = var;
494   return (var - (((unsigned int)avg * avg) >> 5));
495 }
496 
vp9_variance4x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)497 unsigned int vp9_variance4x8_c(const uint8_t *src_ptr,
498                                int  source_stride,
499                                const uint8_t *ref_ptr,
500                                int  recon_stride,
501                                unsigned int *sse) {
502   unsigned int var;
503   int avg;
504 
505   variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg);
506   *sse = var;
507   return (var - (((unsigned int)avg * avg) >> 5));
508 }
509 
vp9_variance4x4_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)510 unsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
511                                int  source_stride,
512                                const uint8_t *ref_ptr,
513                                int  recon_stride,
514                                unsigned int *sse) {
515   unsigned int var;
516   int avg;
517 
518   variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
519   *sse = var;
520   return (var - (((unsigned int)avg * avg) >> 4));
521 }
522 
523 
vp9_mse16x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)524 unsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
525                             int  source_stride,
526                             const uint8_t *ref_ptr,
527                             int  recon_stride,
528                             unsigned int *sse) {
529   unsigned int var;
530   int avg;
531 
532   variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
533   *sse = var;
534   return var;
535 }
536 
vp9_mse16x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)537 unsigned int vp9_mse16x8_c(const uint8_t *src_ptr,
538                            int  source_stride,
539                            const uint8_t *ref_ptr,
540                            int  recon_stride,
541                            unsigned int *sse) {
542   unsigned int var;
543   int avg;
544 
545   variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
546   *sse = var;
547   return var;
548 }
549 
vp9_mse8x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)550 unsigned int vp9_mse8x16_c(const uint8_t *src_ptr,
551                            int  source_stride,
552                            const uint8_t *ref_ptr,
553                            int  recon_stride,
554                            unsigned int *sse) {
555   unsigned int var;
556   int avg;
557 
558   variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
559   *sse = var;
560   return var;
561 }
562 
vp9_mse8x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)563 unsigned int vp9_mse8x8_c(const uint8_t *src_ptr,
564                           int  source_stride,
565                           const uint8_t *ref_ptr,
566                           int  recon_stride,
567                           unsigned int *sse) {
568   unsigned int var;
569   int avg;
570 
571   variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
572   *sse = var;
573   return var;
574 }
575 
576 
vp9_sub_pixel_variance4x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)577 unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
578                                          int  src_pixels_per_line,
579                                          int  xoffset,
580                                          int  yoffset,
581                                          const uint8_t *dst_ptr,
582                                          int dst_pixels_per_line,
583                                          unsigned int *sse) {
584   uint8_t temp2[20 * 16];
585   const int16_t *hfilter, *vfilter;
586   uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
587 
588   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
589   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
590 
591   // First filter 1d Horizontal
592   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
593                                     1, 5, 4, hfilter);
594 
595   // Now filter Verticaly
596   var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
597 
598   return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
599 }
600 
vp9_sub_pixel_avg_variance4x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)601 unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
602                                              int  src_pixels_per_line,
603                                              int  xoffset,
604                                              int  yoffset,
605                                              const uint8_t *dst_ptr,
606                                              int dst_pixels_per_line,
607                                              unsigned int *sse,
608                                              const uint8_t *second_pred) {
609   uint8_t temp2[20 * 16];
610   const int16_t *hfilter, *vfilter;
611   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4);  // compound pred buffer
612   uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
613 
614   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
615   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
616 
617   // First filter 1d Horizontal
618   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
619                                     1, 5, 4, hfilter);
620 
621   // Now filter Verticaly
622   var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
623   vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
624   return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
625 }
626 
vp9_sub_pixel_variance8x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)627 unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
628                                          int  src_pixels_per_line,
629                                          int  xoffset,
630                                          int  yoffset,
631                                          const uint8_t *dst_ptr,
632                                          int dst_pixels_per_line,
633                                          unsigned int *sse) {
634   uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
635   uint8_t temp2[20 * 16];
636   const int16_t *hfilter, *vfilter;
637 
638   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
639   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
640 
641   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
642                                     1, 9, 8, hfilter);
643   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
644 
645   return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
646 }
647 
vp9_sub_pixel_avg_variance8x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)648 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
649                                              int  src_pixels_per_line,
650                                              int  xoffset,
651                                              int  yoffset,
652                                              const uint8_t *dst_ptr,
653                                              int dst_pixels_per_line,
654                                              unsigned int *sse,
655                                              const uint8_t *second_pred) {
656   uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
657   uint8_t temp2[20 * 16];
658   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8);  // compound pred buffer
659   const int16_t *hfilter, *vfilter;
660 
661   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
662   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
663 
664   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
665                                     1, 9, 8, hfilter);
666   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
667   vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
668   return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
669 }
670 
vp9_sub_pixel_variance16x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)671 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
672                                            int  src_pixels_per_line,
673                                            int  xoffset,
674                                            int  yoffset,
675                                            const uint8_t *dst_ptr,
676                                            int dst_pixels_per_line,
677                                            unsigned int *sse) {
678   uint16_t fdata3[17 * 16];  // Temp data buffer used in filtering
679   uint8_t temp2[20 * 16];
680   const int16_t *hfilter, *vfilter;
681 
682   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
683   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
684 
685   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
686                                     1, 17, 16, hfilter);
687   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
688 
689   return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
690 }
691 
vp9_sub_pixel_avg_variance16x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)692 unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
693                                                int  src_pixels_per_line,
694                                                int  xoffset,
695                                                int  yoffset,
696                                                const uint8_t *dst_ptr,
697                                                int dst_pixels_per_line,
698                                                unsigned int *sse,
699                                                const uint8_t *second_pred) {
700   uint16_t fdata3[17 * 16];
701   uint8_t temp2[20 * 16];
702   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16);  // compound pred buffer
703   const int16_t *hfilter, *vfilter;
704 
705   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
706   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
707 
708   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
709                                     1, 17, 16, hfilter);
710   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
711 
712   vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
713   return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
714 }
715 
vp9_sub_pixel_variance64x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)716 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
717                                            int  src_pixels_per_line,
718                                            int  xoffset,
719                                            int  yoffset,
720                                            const uint8_t *dst_ptr,
721                                            int dst_pixels_per_line,
722                                            unsigned int *sse) {
723   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
724   uint8_t temp2[68 * 64];
725   const int16_t *hfilter, *vfilter;
726 
727   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
728   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
729 
730   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
731                                     1, 65, 64, hfilter);
732   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
733 
734   return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
735 }
736 
vp9_sub_pixel_avg_variance64x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)737 unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
738                                                int  src_pixels_per_line,
739                                                int  xoffset,
740                                                int  yoffset,
741                                                const uint8_t *dst_ptr,
742                                                int dst_pixels_per_line,
743                                                unsigned int *sse,
744                                                const uint8_t *second_pred) {
745   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
746   uint8_t temp2[68 * 64];
747   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
748   const int16_t *hfilter, *vfilter;
749 
750   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
751   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
752 
753   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
754                                     1, 65, 64, hfilter);
755   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
756   vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
757   return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
758 }
759 
vp9_sub_pixel_variance32x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)760 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
761                                            int  src_pixels_per_line,
762                                            int  xoffset,
763                                            int  yoffset,
764                                            const uint8_t *dst_ptr,
765                                            int dst_pixels_per_line,
766                                            unsigned int *sse) {
767   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
768   uint8_t temp2[36 * 32];
769   const int16_t *hfilter, *vfilter;
770 
771   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
772   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
773 
774   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
775                                     1, 33, 32, hfilter);
776   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
777 
778   return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
779 }
780 
vp9_sub_pixel_avg_variance32x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)781 unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
782                                                int  src_pixels_per_line,
783                                                int  xoffset,
784                                                int  yoffset,
785                                                const uint8_t *dst_ptr,
786                                                int dst_pixels_per_line,
787                                                unsigned int *sse,
788                                                const uint8_t *second_pred) {
789   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
790   uint8_t temp2[36 * 32];
791   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32);  // compound pred buffer
792   const int16_t *hfilter, *vfilter;
793 
794   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
795   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
796 
797   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
798                                     1, 33, 32, hfilter);
799   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
800   vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
801   return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
802 }
803 
vp9_variance_halfpixvar16x16_h_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)804 unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
805                                               int  source_stride,
806                                               const uint8_t *ref_ptr,
807                                               int  recon_stride,
808                                               unsigned int *sse) {
809   return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
810                                        ref_ptr, recon_stride, sse);
811 }
812 
vp9_variance_halfpixvar32x32_h_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)813 unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
814                                               int  source_stride,
815                                               const uint8_t *ref_ptr,
816                                               int  recon_stride,
817                                               unsigned int *sse) {
818   return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
819                                        ref_ptr, recon_stride, sse);
820 }
821 
vp9_variance_halfpixvar64x64_h_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)822 unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
823                                               int  source_stride,
824                                               const uint8_t *ref_ptr,
825                                               int  recon_stride,
826                                               unsigned int *sse) {
827   return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
828                                        ref_ptr, recon_stride, sse);
829 }
830 
vp9_variance_halfpixvar16x16_v_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)831 unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
832                                               int  source_stride,
833                                               const uint8_t *ref_ptr,
834                                               int  recon_stride,
835                                               unsigned int *sse) {
836   return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
837                                        ref_ptr, recon_stride, sse);
838 }
839 
vp9_variance_halfpixvar32x32_v_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)840 unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
841                                               int  source_stride,
842                                               const uint8_t *ref_ptr,
843                                               int  recon_stride,
844                                               unsigned int *sse) {
845   return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
846                                        ref_ptr, recon_stride, sse);
847 }
848 
vp9_variance_halfpixvar64x64_v_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)849 unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
850                                               int  source_stride,
851                                               const uint8_t *ref_ptr,
852                                               int  recon_stride,
853                                               unsigned int *sse) {
854   return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
855                                        ref_ptr, recon_stride, sse);
856 }
857 
vp9_variance_halfpixvar16x16_hv_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)858 unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
859                                                int  source_stride,
860                                                const uint8_t *ref_ptr,
861                                                int  recon_stride,
862                                                unsigned int *sse) {
863   return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
864                                        ref_ptr, recon_stride, sse);
865 }
866 
vp9_variance_halfpixvar32x32_hv_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)867 unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
868                                                int  source_stride,
869                                                const uint8_t *ref_ptr,
870                                                int  recon_stride,
871                                                unsigned int *sse) {
872   return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
873                                        ref_ptr, recon_stride, sse);
874 }
875 
vp9_variance_halfpixvar64x64_hv_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)876 unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
877                                                int  source_stride,
878                                                const uint8_t *ref_ptr,
879                                                int  recon_stride,
880                                                unsigned int *sse) {
881   return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
882                                        ref_ptr, recon_stride, sse);
883 }
884 
vp9_sub_pixel_mse16x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)885 unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
886                                       int  src_pixels_per_line,
887                                       int  xoffset,
888                                       int  yoffset,
889                                       const uint8_t *dst_ptr,
890                                       int dst_pixels_per_line,
891                                       unsigned int *sse) {
892   vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
893                                 xoffset, yoffset, dst_ptr,
894                                 dst_pixels_per_line, sse);
895   return *sse;
896 }
897 
vp9_sub_pixel_mse32x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)898 unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
899                                       int  src_pixels_per_line,
900                                       int  xoffset,
901                                       int  yoffset,
902                                       const uint8_t *dst_ptr,
903                                       int dst_pixels_per_line,
904                                       unsigned int *sse) {
905   vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
906                                 xoffset, yoffset, dst_ptr,
907                                 dst_pixels_per_line, sse);
908   return *sse;
909 }
910 
vp9_sub_pixel_mse64x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)911 unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
912                                       int  src_pixels_per_line,
913                                       int  xoffset,
914                                       int  yoffset,
915                                       const uint8_t *dst_ptr,
916                                       int dst_pixels_per_line,
917                                       unsigned int *sse) {
918   vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
919                                 xoffset, yoffset, dst_ptr,
920                                 dst_pixels_per_line, sse);
921   return *sse;
922 }
923 
vp9_sub_pixel_variance16x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)924 unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
925                                           int  src_pixels_per_line,
926                                           int  xoffset,
927                                           int  yoffset,
928                                           const uint8_t *dst_ptr,
929                                           int dst_pixels_per_line,
930                                           unsigned int *sse) {
931   uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
932   uint8_t temp2[20 * 16];
933   const int16_t *hfilter, *vfilter;
934 
935   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
936   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
937 
938   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
939                                     1, 9, 16, hfilter);
940   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
941 
942   return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
943 }
944 
vp9_sub_pixel_avg_variance16x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)945 unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
946                                               int  src_pixels_per_line,
947                                               int  xoffset,
948                                               int  yoffset,
949                                               const uint8_t *dst_ptr,
950                                               int dst_pixels_per_line,
951                                               unsigned int *sse,
952                                               const uint8_t *second_pred) {
953   uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
954   uint8_t temp2[20 * 16];
955   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8);  // compound pred buffer
956   const int16_t *hfilter, *vfilter;
957 
958   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
959   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
960 
961   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
962                                     1, 9, 16, hfilter);
963   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
964   vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
965   return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
966 }
967 
vp9_sub_pixel_variance8x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)968 unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
969                                           int  src_pixels_per_line,
970                                           int  xoffset,
971                                           int  yoffset,
972                                           const uint8_t *dst_ptr,
973                                           int dst_pixels_per_line,
974                                           unsigned int *sse) {
975   uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
976   uint8_t temp2[20 * 16];
977   const int16_t *hfilter, *vfilter;
978 
979   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
980   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
981 
982   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
983                                     1, 17, 8, hfilter);
984   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
985 
986   return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
987 }
988 
vp9_sub_pixel_avg_variance8x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)989 unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
990                                               int  src_pixels_per_line,
991                                               int  xoffset,
992                                               int  yoffset,
993                                               const uint8_t *dst_ptr,
994                                               int dst_pixels_per_line,
995                                               unsigned int *sse,
996                                               const uint8_t *second_pred) {
997   uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
998   uint8_t temp2[20 * 16];
999   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16);  // compound pred buffer
1000   const int16_t *hfilter, *vfilter;
1001 
1002   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1003   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1004 
1005   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1006                                     1, 17, 8, hfilter);
1007   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
1008   vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
1009   return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1010 }
1011 
vp9_sub_pixel_variance8x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)1012 unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
1013                                          int  src_pixels_per_line,
1014                                          int  xoffset,
1015                                          int  yoffset,
1016                                          const uint8_t *dst_ptr,
1017                                          int dst_pixels_per_line,
1018                                          unsigned int *sse) {
1019   uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
1020   uint8_t temp2[20 * 16];
1021   const int16_t *hfilter, *vfilter;
1022 
1023   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1024   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1025 
1026   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1027                                     1, 5, 8, hfilter);
1028   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1029 
1030   return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
1031 }
1032 
vp9_sub_pixel_avg_variance8x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)1033 unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
1034                                              int  src_pixels_per_line,
1035                                              int  xoffset,
1036                                              int  yoffset,
1037                                              const uint8_t *dst_ptr,
1038                                              int dst_pixels_per_line,
1039                                              unsigned int *sse,
1040                                              const uint8_t *second_pred) {
1041   uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
1042   uint8_t temp2[20 * 16];
1043   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4);  // compound pred buffer
1044   const int16_t *hfilter, *vfilter;
1045 
1046   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1047   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1048 
1049   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1050                                     1, 5, 8, hfilter);
1051   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1052   vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
1053   return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1054 }
1055 
vp9_sub_pixel_variance4x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)1056 unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
1057                                          int  src_pixels_per_line,
1058                                          int  xoffset,
1059                                          int  yoffset,
1060                                          const uint8_t *dst_ptr,
1061                                          int dst_pixels_per_line,
1062                                          unsigned int *sse) {
1063   uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
1064   // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be
1065   // of this big? same issue appears in all other block size settings.
1066   uint8_t temp2[20 * 16];
1067   const int16_t *hfilter, *vfilter;
1068 
1069   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1070   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1071 
1072   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1073                                     1, 9, 4, hfilter);
1074   var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1075 
1076   return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
1077 }
1078 
vp9_sub_pixel_avg_variance4x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)1079 unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
1080                                              int  src_pixels_per_line,
1081                                              int  xoffset,
1082                                              int  yoffset,
1083                                              const uint8_t *dst_ptr,
1084                                              int dst_pixels_per_line,
1085                                              unsigned int *sse,
1086                                              const uint8_t *second_pred) {
1087   uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
1088   uint8_t temp2[20 * 16];
1089   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8);  // compound pred buffer
1090   const int16_t *hfilter, *vfilter;
1091 
1092   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1093   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1094 
1095   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1096                                     1, 9, 4, hfilter);
1097   var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1098   vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
1099   return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
1100 }
1101 
1102 
vp9_comp_avg_pred(uint8_t * comp_pred,const uint8_t * pred,int width,int height,const uint8_t * ref,int ref_stride)1103 void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
1104                        int height, const uint8_t *ref, int ref_stride) {
1105   int i, j;
1106 
1107   for (i = 0; i < height; i++) {
1108     for (j = 0; j < width; j++) {
1109       int tmp;
1110       tmp = pred[j] + ref[j];
1111       comp_pred[j] = (tmp + 1) >> 1;
1112     }
1113     comp_pred += width;
1114     pred += width;
1115     ref += ref_stride;
1116   }
1117 }
1118