1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vp9_rtcd.h"
12
13 #include "vpx_ports/mem.h"
14 #include "vpx/vpx_integer.h"
15
16 #include "vp9/common/vp9_common.h"
17 #include "vp9/common/vp9_filter.h"
18
19 #include "vp9/encoder/vp9_variance.h"
20
variance(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,int w,int h,unsigned int * sse,int * sum)21 void variance(const uint8_t *src_ptr,
22 int source_stride,
23 const uint8_t *ref_ptr,
24 int recon_stride,
25 int w,
26 int h,
27 unsigned int *sse,
28 int *sum) {
29 int i, j;
30 int diff;
31
32 *sum = 0;
33 *sse = 0;
34
35 for (i = 0; i < h; i++) {
36 for (j = 0; j < w; j++) {
37 diff = src_ptr[j] - ref_ptr[j];
38 *sum += diff;
39 *sse += diff * diff;
40 }
41
42 src_ptr += source_stride;
43 ref_ptr += recon_stride;
44 }
45 }
46
47 /****************************************************************************
48 *
49 * ROUTINE : filter_block2d_bil_first_pass
50 *
51 * INPUTS : uint8_t *src_ptr : Pointer to source block.
52 * uint32_t src_pixels_per_line : Stride of input block.
53 * uint32_t pixel_step : Offset between filter input
54 * samples (see notes).
55 * uint32_t output_height : Input block height.
56 * uint32_t output_width : Input block width.
57 * int32_t *vp9_filter : Array of 2 bi-linear filter
58 * taps.
59 *
60 * OUTPUTS : int32_t *output_ptr : Pointer to filtered block.
61 *
62 * RETURNS : void
63 *
64 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
65 * either horizontal or vertical direction to produce the
66 * filtered output block. Used to implement first-pass
67 * of 2-D separable filter.
68 *
69 * SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
70 * Two filter taps should sum to VP9_FILTER_WEIGHT.
71 * pixel_step defines whether the filter is applied
72 * horizontally (pixel_step=1) or vertically (pixel_step=
73 * stride).
74 * It defines the offset required to move from one input
75 * to the next.
76 *
77 ****************************************************************************/
var_filter_block2d_bil_first_pass(const uint8_t * src_ptr,uint16_t * output_ptr,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const int16_t * vp9_filter)78 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
79 uint16_t *output_ptr,
80 unsigned int src_pixels_per_line,
81 int pixel_step,
82 unsigned int output_height,
83 unsigned int output_width,
84 const int16_t *vp9_filter) {
85 unsigned int i, j;
86
87 for (i = 0; i < output_height; i++) {
88 for (j = 0; j < output_width; j++) {
89 output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
90 (int)src_ptr[pixel_step] * vp9_filter[1],
91 FILTER_BITS);
92
93 src_ptr++;
94 }
95
96 // Next row...
97 src_ptr += src_pixels_per_line - output_width;
98 output_ptr += output_width;
99 }
100 }
101
102 /****************************************************************************
103 *
104 * ROUTINE : filter_block2d_bil_second_pass
105 *
106 * INPUTS : int32_t *src_ptr : Pointer to source block.
107 * uint32_t src_pixels_per_line : Stride of input block.
108 * uint32_t pixel_step : Offset between filter input
109 * samples (see notes).
110 * uint32_t output_height : Input block height.
111 * uint32_t output_width : Input block width.
112 * int32_t *vp9_filter : Array of 2 bi-linear filter
113 * taps.
114 *
115 * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block.
116 *
117 * RETURNS : void
118 *
119 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
120 * either horizontal or vertical direction to produce the
121 * filtered output block. Used to implement second-pass
122 * of 2-D separable filter.
123 *
124 * SPECIAL NOTES : Requires 32-bit input as produced by
125 * filter_block2d_bil_first_pass.
126 * Two filter taps should sum to VP9_FILTER_WEIGHT.
127 * pixel_step defines whether the filter is applied
128 * horizontally (pixel_step=1) or vertically (pixel_step=
129 * stride).
130 * It defines the offset required to move from one input
131 * to the next.
132 *
133 ****************************************************************************/
var_filter_block2d_bil_second_pass(const uint16_t * src_ptr,uint8_t * output_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const int16_t * vp9_filter)134 static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
135 uint8_t *output_ptr,
136 unsigned int src_pixels_per_line,
137 unsigned int pixel_step,
138 unsigned int output_height,
139 unsigned int output_width,
140 const int16_t *vp9_filter) {
141 unsigned int i, j;
142
143 for (i = 0; i < output_height; i++) {
144 for (j = 0; j < output_width; j++) {
145 output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
146 (int)src_ptr[pixel_step] * vp9_filter[1],
147 FILTER_BITS);
148 src_ptr++;
149 }
150
151 src_ptr += src_pixels_per_line - output_width;
152 output_ptr += output_width;
153 }
154 }
155
vp9_get_mb_ss_c(const int16_t * src_ptr)156 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
157 unsigned int i, sum = 0;
158
159 for (i = 0; i < 256; i++) {
160 sum += (src_ptr[i] * src_ptr[i]);
161 }
162
163 return sum;
164 }
165
vp9_variance64x32_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)166 unsigned int vp9_variance64x32_c(const uint8_t *src_ptr,
167 int source_stride,
168 const uint8_t *ref_ptr,
169 int recon_stride,
170 unsigned int *sse) {
171 unsigned int var;
172 int avg;
173
174 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg);
175 *sse = var;
176 return (var - (((int64_t)avg * avg) >> 11));
177 }
178
vp9_sub_pixel_variance64x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)179 unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
180 int src_pixels_per_line,
181 int xoffset,
182 int yoffset,
183 const uint8_t *dst_ptr,
184 int dst_pixels_per_line,
185 unsigned int *sse) {
186 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
187 uint8_t temp2[68 * 64];
188 const int16_t *hfilter, *vfilter;
189
190 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
191 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
192
193 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
194 1, 33, 64, hfilter);
195 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
196
197 return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
198 }
199
vp9_sub_pixel_avg_variance64x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)200 unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
201 int src_pixels_per_line,
202 int xoffset,
203 int yoffset,
204 const uint8_t *dst_ptr,
205 int dst_pixels_per_line,
206 unsigned int *sse,
207 const uint8_t *second_pred) {
208 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
209 uint8_t temp2[68 * 64];
210 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer
211 const int16_t *hfilter, *vfilter;
212
213 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
214 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
215
216 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
217 1, 33, 64, hfilter);
218 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
219 vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
220 return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
221 }
222
vp9_variance32x64_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)223 unsigned int vp9_variance32x64_c(const uint8_t *src_ptr,
224 int source_stride,
225 const uint8_t *ref_ptr,
226 int recon_stride,
227 unsigned int *sse) {
228 unsigned int var;
229 int avg;
230
231 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg);
232 *sse = var;
233 return (var - (((int64_t)avg * avg) >> 11));
234 }
235
vp9_sub_pixel_variance32x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)236 unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
237 int src_pixels_per_line,
238 int xoffset,
239 int yoffset,
240 const uint8_t *dst_ptr,
241 int dst_pixels_per_line,
242 unsigned int *sse) {
243 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
244 uint8_t temp2[68 * 64];
245 const int16_t *hfilter, *vfilter;
246
247 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
248 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
249
250 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
251 1, 65, 32, hfilter);
252 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
253
254 return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
255 }
256
vp9_sub_pixel_avg_variance32x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)257 unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
258 int src_pixels_per_line,
259 int xoffset,
260 int yoffset,
261 const uint8_t *dst_ptr,
262 int dst_pixels_per_line,
263 unsigned int *sse,
264 const uint8_t *second_pred) {
265 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
266 uint8_t temp2[68 * 64];
267 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer
268 const int16_t *hfilter, *vfilter;
269
270 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
271 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
272
273 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
274 1, 65, 32, hfilter);
275 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
276 vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
277 return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
278 }
279
vp9_variance32x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)280 unsigned int vp9_variance32x16_c(const uint8_t *src_ptr,
281 int source_stride,
282 const uint8_t *ref_ptr,
283 int recon_stride,
284 unsigned int *sse) {
285 unsigned int var;
286 int avg;
287
288 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg);
289 *sse = var;
290 return (var - (((int64_t)avg * avg) >> 9));
291 }
292
vp9_sub_pixel_variance32x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)293 unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
294 int src_pixels_per_line,
295 int xoffset,
296 int yoffset,
297 const uint8_t *dst_ptr,
298 int dst_pixels_per_line,
299 unsigned int *sse) {
300 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
301 uint8_t temp2[36 * 32];
302 const int16_t *hfilter, *vfilter;
303
304 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
305 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
306
307 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
308 1, 17, 32, hfilter);
309 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
310
311 return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
312 }
313
vp9_sub_pixel_avg_variance32x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)314 unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
315 int src_pixels_per_line,
316 int xoffset,
317 int yoffset,
318 const uint8_t *dst_ptr,
319 int dst_pixels_per_line,
320 unsigned int *sse,
321 const uint8_t *second_pred) {
322 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
323 uint8_t temp2[36 * 32];
324 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer
325 const int16_t *hfilter, *vfilter;
326
327 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
328 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
329
330 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
331 1, 17, 32, hfilter);
332 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
333 vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
334 return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
335 }
336
vp9_variance16x32_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)337 unsigned int vp9_variance16x32_c(const uint8_t *src_ptr,
338 int source_stride,
339 const uint8_t *ref_ptr,
340 int recon_stride,
341 unsigned int *sse) {
342 unsigned int var;
343 int avg;
344
345 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg);
346 *sse = var;
347 return (var - (((int64_t)avg * avg) >> 9));
348 }
349
vp9_sub_pixel_variance16x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)350 unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
351 int src_pixels_per_line,
352 int xoffset,
353 int yoffset,
354 const uint8_t *dst_ptr,
355 int dst_pixels_per_line,
356 unsigned int *sse) {
357 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
358 uint8_t temp2[36 * 32];
359 const int16_t *hfilter, *vfilter;
360
361 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
362 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
363
364 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
365 1, 33, 16, hfilter);
366 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
367
368 return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
369 }
370
vp9_sub_pixel_avg_variance16x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)371 unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
372 int src_pixels_per_line,
373 int xoffset,
374 int yoffset,
375 const uint8_t *dst_ptr,
376 int dst_pixels_per_line,
377 unsigned int *sse,
378 const uint8_t *second_pred) {
379 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
380 uint8_t temp2[36 * 32];
381 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer
382 const int16_t *hfilter, *vfilter;
383
384 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
385 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
386
387 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
388 1, 33, 16, hfilter);
389 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
390 vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
391 return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
392 }
393
vp9_variance64x64_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)394 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
395 int source_stride,
396 const uint8_t *ref_ptr,
397 int recon_stride,
398 unsigned int *sse) {
399 unsigned int var;
400 int avg;
401
402 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg);
403 *sse = var;
404 return (var - (((int64_t)avg * avg) >> 12));
405 }
406
vp9_variance32x32_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)407 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
408 int source_stride,
409 const uint8_t *ref_ptr,
410 int recon_stride,
411 unsigned int *sse) {
412 unsigned int var;
413 int avg;
414
415 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
416 *sse = var;
417 return (var - (((int64_t)avg * avg) >> 10));
418 }
419
vp9_variance16x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)420 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
421 int source_stride,
422 const uint8_t *ref_ptr,
423 int recon_stride,
424 unsigned int *sse) {
425 unsigned int var;
426 int avg;
427
428 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
429 *sse = var;
430 return (var - (((unsigned int)avg * avg) >> 8));
431 }
432
vp9_variance8x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)433 unsigned int vp9_variance8x16_c(const uint8_t *src_ptr,
434 int source_stride,
435 const uint8_t *ref_ptr,
436 int recon_stride,
437 unsigned int *sse) {
438 unsigned int var;
439 int avg;
440
441 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
442 *sse = var;
443 return (var - (((unsigned int)avg * avg) >> 7));
444 }
445
vp9_variance16x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)446 unsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
447 int source_stride,
448 const uint8_t *ref_ptr,
449 int recon_stride,
450 unsigned int *sse) {
451 unsigned int var;
452 int avg;
453
454 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
455 *sse = var;
456 return (var - (((unsigned int)avg * avg) >> 7));
457 }
458
vp9_get_sse_sum_8x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int ref_stride,unsigned int * sse,int * sum)459 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
460 const uint8_t *ref_ptr, int ref_stride,
461 unsigned int *sse, int *sum) {
462 variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
463 }
464
vp9_variance8x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)465 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
466 int source_stride,
467 const uint8_t *ref_ptr,
468 int recon_stride,
469 unsigned int *sse) {
470 unsigned int var;
471 int avg;
472
473 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
474 *sse = var;
475 return (var - (((unsigned int)avg * avg) >> 6));
476 }
477
vp9_variance8x4_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)478 unsigned int vp9_variance8x4_c(const uint8_t *src_ptr,
479 int source_stride,
480 const uint8_t *ref_ptr,
481 int recon_stride,
482 unsigned int *sse) {
483 unsigned int var;
484 int avg;
485
486 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg);
487 *sse = var;
488 return (var - (((unsigned int)avg * avg) >> 5));
489 }
490
vp9_variance4x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)491 unsigned int vp9_variance4x8_c(const uint8_t *src_ptr,
492 int source_stride,
493 const uint8_t *ref_ptr,
494 int recon_stride,
495 unsigned int *sse) {
496 unsigned int var;
497 int avg;
498
499 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg);
500 *sse = var;
501 return (var - (((unsigned int)avg * avg) >> 5));
502 }
503
vp9_variance4x4_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)504 unsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
505 int source_stride,
506 const uint8_t *ref_ptr,
507 int recon_stride,
508 unsigned int *sse) {
509 unsigned int var;
510 int avg;
511
512 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
513 *sse = var;
514 return (var - (((unsigned int)avg * avg) >> 4));
515 }
516
517
vp9_mse16x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)518 unsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
519 int source_stride,
520 const uint8_t *ref_ptr,
521 int recon_stride,
522 unsigned int *sse) {
523 unsigned int var;
524 int avg;
525
526 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
527 *sse = var;
528 return var;
529 }
530
vp9_mse16x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)531 unsigned int vp9_mse16x8_c(const uint8_t *src_ptr,
532 int source_stride,
533 const uint8_t *ref_ptr,
534 int recon_stride,
535 unsigned int *sse) {
536 unsigned int var;
537 int avg;
538
539 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
540 *sse = var;
541 return var;
542 }
543
vp9_mse8x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)544 unsigned int vp9_mse8x16_c(const uint8_t *src_ptr,
545 int source_stride,
546 const uint8_t *ref_ptr,
547 int recon_stride,
548 unsigned int *sse) {
549 unsigned int var;
550 int avg;
551
552 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
553 *sse = var;
554 return var;
555 }
556
vp9_mse8x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)557 unsigned int vp9_mse8x8_c(const uint8_t *src_ptr,
558 int source_stride,
559 const uint8_t *ref_ptr,
560 int recon_stride,
561 unsigned int *sse) {
562 unsigned int var;
563 int avg;
564
565 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
566 *sse = var;
567 return var;
568 }
569
570
vp9_sub_pixel_variance4x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)571 unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
572 int src_pixels_per_line,
573 int xoffset,
574 int yoffset,
575 const uint8_t *dst_ptr,
576 int dst_pixels_per_line,
577 unsigned int *sse) {
578 uint8_t temp2[20 * 16];
579 const int16_t *hfilter, *vfilter;
580 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering
581
582 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
583 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
584
585 // First filter 1d Horizontal
586 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
587 1, 5, 4, hfilter);
588
589 // Now filter Verticaly
590 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter);
591
592 return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
593 }
594
vp9_sub_pixel_avg_variance4x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)595 unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
596 int src_pixels_per_line,
597 int xoffset,
598 int yoffset,
599 const uint8_t *dst_ptr,
600 int dst_pixels_per_line,
601 unsigned int *sse,
602 const uint8_t *second_pred) {
603 uint8_t temp2[20 * 16];
604 const int16_t *hfilter, *vfilter;
605 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer
606 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering
607
608 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
609 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
610
611 // First filter 1d Horizontal
612 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
613 1, 5, 4, hfilter);
614
615 // Now filter Verticaly
616 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter);
617 vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
618 return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
619 }
620
vp9_sub_pixel_variance8x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)621 unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
622 int src_pixels_per_line,
623 int xoffset,
624 int yoffset,
625 const uint8_t *dst_ptr,
626 int dst_pixels_per_line,
627 unsigned int *sse) {
628 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering
629 uint8_t temp2[20 * 16];
630 const int16_t *hfilter, *vfilter;
631
632 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
633 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
634
635 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
636 1, 9, 8, hfilter);
637 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
638
639 return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
640 }
641
vp9_sub_pixel_avg_variance8x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)642 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
643 int src_pixels_per_line,
644 int xoffset,
645 int yoffset,
646 const uint8_t *dst_ptr,
647 int dst_pixels_per_line,
648 unsigned int *sse,
649 const uint8_t *second_pred) {
650 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering
651 uint8_t temp2[20 * 16];
652 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer
653 const int16_t *hfilter, *vfilter;
654
655 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
656 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
657
658 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
659 1, 9, 8, hfilter);
660 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
661 vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
662 return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
663 }
664
vp9_sub_pixel_variance16x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)665 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
666 int src_pixels_per_line,
667 int xoffset,
668 int yoffset,
669 const uint8_t *dst_ptr,
670 int dst_pixels_per_line,
671 unsigned int *sse) {
672 uint16_t fdata3[17 * 16]; // Temp data buffer used in filtering
673 uint8_t temp2[20 * 16];
674 const int16_t *hfilter, *vfilter;
675
676 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
677 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
678
679 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
680 1, 17, 16, hfilter);
681 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
682
683 return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
684 }
685
vp9_sub_pixel_avg_variance16x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)686 unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
687 int src_pixels_per_line,
688 int xoffset,
689 int yoffset,
690 const uint8_t *dst_ptr,
691 int dst_pixels_per_line,
692 unsigned int *sse,
693 const uint8_t *second_pred) {
694 uint16_t fdata3[17 * 16];
695 uint8_t temp2[20 * 16];
696 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer
697 const int16_t *hfilter, *vfilter;
698
699 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
700 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
701
702 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
703 1, 17, 16, hfilter);
704 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
705
706 vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
707 return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
708 }
709
vp9_sub_pixel_variance64x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)710 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
711 int src_pixels_per_line,
712 int xoffset,
713 int yoffset,
714 const uint8_t *dst_ptr,
715 int dst_pixels_per_line,
716 unsigned int *sse) {
717 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
718 uint8_t temp2[68 * 64];
719 const int16_t *hfilter, *vfilter;
720
721 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
722 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
723
724 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
725 1, 65, 64, hfilter);
726 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
727
728 return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
729 }
730
vp9_sub_pixel_avg_variance64x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)731 unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
732 int src_pixels_per_line,
733 int xoffset,
734 int yoffset,
735 const uint8_t *dst_ptr,
736 int dst_pixels_per_line,
737 unsigned int *sse,
738 const uint8_t *second_pred) {
739 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
740 uint8_t temp2[68 * 64];
741 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer
742 const int16_t *hfilter, *vfilter;
743
744 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
745 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
746
747 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
748 1, 65, 64, hfilter);
749 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
750 vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
751 return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
752 }
753
vp9_sub_pixel_variance32x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)754 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
755 int src_pixels_per_line,
756 int xoffset,
757 int yoffset,
758 const uint8_t *dst_ptr,
759 int dst_pixels_per_line,
760 unsigned int *sse) {
761 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
762 uint8_t temp2[36 * 32];
763 const int16_t *hfilter, *vfilter;
764
765 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
766 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
767
768 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
769 1, 33, 32, hfilter);
770 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
771
772 return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
773 }
774
vp9_sub_pixel_avg_variance32x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)775 unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
776 int src_pixels_per_line,
777 int xoffset,
778 int yoffset,
779 const uint8_t *dst_ptr,
780 int dst_pixels_per_line,
781 unsigned int *sse,
782 const uint8_t *second_pred) {
783 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
784 uint8_t temp2[36 * 32];
785 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer
786 const int16_t *hfilter, *vfilter;
787
788 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
789 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
790
791 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
792 1, 33, 32, hfilter);
793 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
794 vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
795 return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
796 }
797
vp9_variance_halfpixvar16x16_h_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)798 unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
799 int source_stride,
800 const uint8_t *ref_ptr,
801 int recon_stride,
802 unsigned int *sse) {
803 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
804 ref_ptr, recon_stride, sse);
805 }
806
vp9_variance_halfpixvar32x32_h_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)807 unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
808 int source_stride,
809 const uint8_t *ref_ptr,
810 int recon_stride,
811 unsigned int *sse) {
812 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
813 ref_ptr, recon_stride, sse);
814 }
815
vp9_variance_halfpixvar64x64_h_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)816 unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
817 int source_stride,
818 const uint8_t *ref_ptr,
819 int recon_stride,
820 unsigned int *sse) {
821 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
822 ref_ptr, recon_stride, sse);
823 }
824
vp9_variance_halfpixvar16x16_v_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)825 unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
826 int source_stride,
827 const uint8_t *ref_ptr,
828 int recon_stride,
829 unsigned int *sse) {
830 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
831 ref_ptr, recon_stride, sse);
832 }
833
vp9_variance_halfpixvar32x32_v_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)834 unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
835 int source_stride,
836 const uint8_t *ref_ptr,
837 int recon_stride,
838 unsigned int *sse) {
839 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
840 ref_ptr, recon_stride, sse);
841 }
842
vp9_variance_halfpixvar64x64_v_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)843 unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
844 int source_stride,
845 const uint8_t *ref_ptr,
846 int recon_stride,
847 unsigned int *sse) {
848 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
849 ref_ptr, recon_stride, sse);
850 }
851
vp9_variance_halfpixvar16x16_hv_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)852 unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
853 int source_stride,
854 const uint8_t *ref_ptr,
855 int recon_stride,
856 unsigned int *sse) {
857 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
858 ref_ptr, recon_stride, sse);
859 }
860
vp9_variance_halfpixvar32x32_hv_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)861 unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
862 int source_stride,
863 const uint8_t *ref_ptr,
864 int recon_stride,
865 unsigned int *sse) {
866 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
867 ref_ptr, recon_stride, sse);
868 }
869
vp9_variance_halfpixvar64x64_hv_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)870 unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
871 int source_stride,
872 const uint8_t *ref_ptr,
873 int recon_stride,
874 unsigned int *sse) {
875 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
876 ref_ptr, recon_stride, sse);
877 }
878
vp9_sub_pixel_mse16x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)879 unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
880 int src_pixels_per_line,
881 int xoffset,
882 int yoffset,
883 const uint8_t *dst_ptr,
884 int dst_pixels_per_line,
885 unsigned int *sse) {
886 vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
887 xoffset, yoffset, dst_ptr,
888 dst_pixels_per_line, sse);
889 return *sse;
890 }
891
vp9_sub_pixel_mse32x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)892 unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
893 int src_pixels_per_line,
894 int xoffset,
895 int yoffset,
896 const uint8_t *dst_ptr,
897 int dst_pixels_per_line,
898 unsigned int *sse) {
899 vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
900 xoffset, yoffset, dst_ptr,
901 dst_pixels_per_line, sse);
902 return *sse;
903 }
904
vp9_sub_pixel_mse64x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)905 unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
906 int src_pixels_per_line,
907 int xoffset,
908 int yoffset,
909 const uint8_t *dst_ptr,
910 int dst_pixels_per_line,
911 unsigned int *sse) {
912 vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
913 xoffset, yoffset, dst_ptr,
914 dst_pixels_per_line, sse);
915 return *sse;
916 }
917
vp9_sub_pixel_variance16x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)918 unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
919 int src_pixels_per_line,
920 int xoffset,
921 int yoffset,
922 const uint8_t *dst_ptr,
923 int dst_pixels_per_line,
924 unsigned int *sse) {
925 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering
926 uint8_t temp2[20 * 16];
927 const int16_t *hfilter, *vfilter;
928
929 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
930 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
931
932 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
933 1, 9, 16, hfilter);
934 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
935
936 return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
937 }
938
vp9_sub_pixel_avg_variance16x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)939 unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
940 int src_pixels_per_line,
941 int xoffset,
942 int yoffset,
943 const uint8_t *dst_ptr,
944 int dst_pixels_per_line,
945 unsigned int *sse,
946 const uint8_t *second_pred) {
947 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering
948 uint8_t temp2[20 * 16];
949 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer
950 const int16_t *hfilter, *vfilter;
951
952 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
953 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
954
955 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
956 1, 9, 16, hfilter);
957 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
958 vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
959 return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
960 }
961
vp9_sub_pixel_variance8x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)962 unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
963 int src_pixels_per_line,
964 int xoffset,
965 int yoffset,
966 const uint8_t *dst_ptr,
967 int dst_pixels_per_line,
968 unsigned int *sse) {
969 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering
970 uint8_t temp2[20 * 16];
971 const int16_t *hfilter, *vfilter;
972
973 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
974 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
975
976 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
977 1, 17, 8, hfilter);
978 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
979
980 return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
981 }
982
vp9_sub_pixel_avg_variance8x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)983 unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
984 int src_pixels_per_line,
985 int xoffset,
986 int yoffset,
987 const uint8_t *dst_ptr,
988 int dst_pixels_per_line,
989 unsigned int *sse,
990 const uint8_t *second_pred) {
991 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering
992 uint8_t temp2[20 * 16];
993 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer
994 const int16_t *hfilter, *vfilter;
995
996 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
997 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
998
999 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1000 1, 17, 8, hfilter);
1001 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
1002 vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
1003 return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1004 }
1005
vp9_sub_pixel_variance8x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)1006 unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
1007 int src_pixels_per_line,
1008 int xoffset,
1009 int yoffset,
1010 const uint8_t *dst_ptr,
1011 int dst_pixels_per_line,
1012 unsigned int *sse) {
1013 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering
1014 uint8_t temp2[20 * 16];
1015 const int16_t *hfilter, *vfilter;
1016
1017 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1018 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1019
1020 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1021 1, 5, 8, hfilter);
1022 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1023
1024 return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
1025 }
1026
vp9_sub_pixel_avg_variance8x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)1027 unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
1028 int src_pixels_per_line,
1029 int xoffset,
1030 int yoffset,
1031 const uint8_t *dst_ptr,
1032 int dst_pixels_per_line,
1033 unsigned int *sse,
1034 const uint8_t *second_pred) {
1035 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering
1036 uint8_t temp2[20 * 16];
1037 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer
1038 const int16_t *hfilter, *vfilter;
1039
1040 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1041 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1042
1043 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1044 1, 5, 8, hfilter);
1045 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1046 vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
1047 return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1048 }
1049
vp9_sub_pixel_variance4x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)1050 unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
1051 int src_pixels_per_line,
1052 int xoffset,
1053 int yoffset,
1054 const uint8_t *dst_ptr,
1055 int dst_pixels_per_line,
1056 unsigned int *sse) {
1057 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering
1058 // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be
1059 // of this big? same issue appears in all other block size settings.
1060 uint8_t temp2[20 * 16];
1061 const int16_t *hfilter, *vfilter;
1062
1063 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1064 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1065
1066 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1067 1, 9, 4, hfilter);
1068 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1069
1070 return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
1071 }
1072
vp9_sub_pixel_avg_variance4x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)1073 unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
1074 int src_pixels_per_line,
1075 int xoffset,
1076 int yoffset,
1077 const uint8_t *dst_ptr,
1078 int dst_pixels_per_line,
1079 unsigned int *sse,
1080 const uint8_t *second_pred) {
1081 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering
1082 uint8_t temp2[20 * 16];
1083 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer
1084 const int16_t *hfilter, *vfilter;
1085
1086 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1087 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1088
1089 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1090 1, 9, 4, hfilter);
1091 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1092 vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
1093 return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
1094 }
1095
1096
vp9_comp_avg_pred(uint8_t * comp_pred,const uint8_t * pred,int width,int height,const uint8_t * ref,int ref_stride)1097 void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
1098 int height, const uint8_t *ref, int ref_stride) {
1099 int i, j;
1100
1101 for (i = 0; i < height; i++) {
1102 for (j = 0; j < width; j++) {
1103 int tmp;
1104 tmp = pred[j] + ref[j];
1105 comp_pred[j] = (tmp + 1) >> 1;
1106 }
1107 comp_pred += width;
1108 pred += width;
1109 ref += ref_stride;
1110 }
1111 }
1112