1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vp9_rtcd.h"
12
13 #include "vpx_ports/mem.h"
14 #include "vpx/vpx_integer.h"
15
16 #include "vp9/common/vp9_common.h"
17 #include "vp9/common/vp9_filter.h"
18
19 #include "vp9/encoder/vp9_variance.h"
20
variance(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,int w,int h,unsigned int * sse,int * sum)21 void variance(const uint8_t *src_ptr,
22 int source_stride,
23 const uint8_t *ref_ptr,
24 int recon_stride,
25 int w,
26 int h,
27 unsigned int *sse,
28 int *sum) {
29 int i, j;
30 int diff;
31
32 *sum = 0;
33 *sse = 0;
34
35 for (i = 0; i < h; i++) {
36 for (j = 0; j < w; j++) {
37 diff = src_ptr[j] - ref_ptr[j];
38 *sum += diff;
39 *sse += diff * diff;
40 }
41
42 src_ptr += source_stride;
43 ref_ptr += recon_stride;
44 }
45 }
46
47 /****************************************************************************
48 *
49 * ROUTINE : filter_block2d_bil_first_pass
50 *
51 * INPUTS : uint8_t *src_ptr : Pointer to source block.
52 * uint32_t src_pixels_per_line : Stride of input block.
53 * uint32_t pixel_step : Offset between filter input
54 * samples (see notes).
55 * uint32_t output_height : Input block height.
56 * uint32_t output_width : Input block width.
57 * int32_t *vp9_filter : Array of 2 bi-linear filter
58 * taps.
59 *
60 * OUTPUTS : int32_t *output_ptr : Pointer to filtered block.
61 *
62 * RETURNS : void
63 *
64 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
65 * either horizontal or vertical direction to produce the
66 * filtered output block. Used to implement first-pass
67 * of 2-D separable filter.
68 *
69 * SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
70 * Two filter taps should sum to VP9_FILTER_WEIGHT.
71 * pixel_step defines whether the filter is applied
72 * horizontally (pixel_step=1) or vertically (pixel_step=
73 * stride).
74 * It defines the offset required to move from one input
75 * to the next.
76 *
77 ****************************************************************************/
var_filter_block2d_bil_first_pass(const uint8_t * src_ptr,uint16_t * output_ptr,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const int16_t * vp9_filter)78 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
79 uint16_t *output_ptr,
80 unsigned int src_pixels_per_line,
81 int pixel_step,
82 unsigned int output_height,
83 unsigned int output_width,
84 const int16_t *vp9_filter) {
85 unsigned int i, j;
86
87 for (i = 0; i < output_height; i++) {
88 for (j = 0; j < output_width; j++) {
89 output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
90 (int)src_ptr[pixel_step] * vp9_filter[1],
91 FILTER_BITS);
92
93 src_ptr++;
94 }
95
96 // Next row...
97 src_ptr += src_pixels_per_line - output_width;
98 output_ptr += output_width;
99 }
100 }
101
102 /****************************************************************************
103 *
104 * ROUTINE : filter_block2d_bil_second_pass
105 *
106 * INPUTS : int32_t *src_ptr : Pointer to source block.
107 * uint32_t src_pixels_per_line : Stride of input block.
108 * uint32_t pixel_step : Offset between filter input
109 * samples (see notes).
110 * uint32_t output_height : Input block height.
111 * uint32_t output_width : Input block width.
112 * int32_t *vp9_filter : Array of 2 bi-linear filter
113 * taps.
114 *
115 * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block.
116 *
117 * RETURNS : void
118 *
119 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
120 * either horizontal or vertical direction to produce the
121 * filtered output block. Used to implement second-pass
122 * of 2-D separable filter.
123 *
124 * SPECIAL NOTES : Requires 32-bit input as produced by
125 * filter_block2d_bil_first_pass.
126 * Two filter taps should sum to VP9_FILTER_WEIGHT.
127 * pixel_step defines whether the filter is applied
128 * horizontally (pixel_step=1) or vertically (pixel_step=
129 * stride).
130 * It defines the offset required to move from one input
131 * to the next.
132 *
133 ****************************************************************************/
var_filter_block2d_bil_second_pass(const uint16_t * src_ptr,uint8_t * output_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const int16_t * vp9_filter)134 static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
135 uint8_t *output_ptr,
136 unsigned int src_pixels_per_line,
137 unsigned int pixel_step,
138 unsigned int output_height,
139 unsigned int output_width,
140 const int16_t *vp9_filter) {
141 unsigned int i, j;
142
143 for (i = 0; i < output_height; i++) {
144 for (j = 0; j < output_width; j++) {
145 output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
146 (int)src_ptr[pixel_step] * vp9_filter[1],
147 FILTER_BITS);
148 src_ptr++;
149 }
150
151 src_ptr += src_pixels_per_line - output_width;
152 output_ptr += output_width;
153 }
154 }
155
vp9_get_mb_ss_c(const int16_t * src_ptr)156 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
157 unsigned int i, sum = 0;
158
159 for (i = 0; i < 256; i++) {
160 sum += (src_ptr[i] * src_ptr[i]);
161 }
162
163 return sum;
164 }
165
vp9_variance64x32_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)166 unsigned int vp9_variance64x32_c(const uint8_t *src_ptr,
167 int source_stride,
168 const uint8_t *ref_ptr,
169 int recon_stride,
170 unsigned int *sse) {
171 unsigned int var;
172 int avg;
173
174 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg);
175 *sse = var;
176 return (var - (((int64_t)avg * avg) >> 11));
177 }
178
vp9_sub_pixel_variance64x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)179 unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
180 int src_pixels_per_line,
181 int xoffset,
182 int yoffset,
183 const uint8_t *dst_ptr,
184 int dst_pixels_per_line,
185 unsigned int *sse) {
186 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
187 uint8_t temp2[68 * 64];
188 const int16_t *hfilter, *vfilter;
189
190 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
191 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
192
193 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
194 1, 33, 64, hfilter);
195 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
196
197 return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
198 }
199
vp9_sub_pixel_avg_variance64x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)200 unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
201 int src_pixels_per_line,
202 int xoffset,
203 int yoffset,
204 const uint8_t *dst_ptr,
205 int dst_pixels_per_line,
206 unsigned int *sse,
207 const uint8_t *second_pred) {
208 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
209 uint8_t temp2[68 * 64];
210 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer
211 const int16_t *hfilter, *vfilter;
212
213 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
214 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
215
216 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
217 1, 33, 64, hfilter);
218 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
219 vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
220 return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
221 }
222
vp9_variance32x64_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)223 unsigned int vp9_variance32x64_c(const uint8_t *src_ptr,
224 int source_stride,
225 const uint8_t *ref_ptr,
226 int recon_stride,
227 unsigned int *sse) {
228 unsigned int var;
229 int avg;
230
231 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg);
232 *sse = var;
233 return (var - (((int64_t)avg * avg) >> 11));
234 }
235
vp9_sub_pixel_variance32x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)236 unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
237 int src_pixels_per_line,
238 int xoffset,
239 int yoffset,
240 const uint8_t *dst_ptr,
241 int dst_pixels_per_line,
242 unsigned int *sse) {
243 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
244 uint8_t temp2[68 * 64];
245 const int16_t *hfilter, *vfilter;
246
247 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
248 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
249
250 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
251 1, 65, 32, hfilter);
252 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
253
254 return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
255 }
256
vp9_sub_pixel_avg_variance32x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)257 unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
258 int src_pixels_per_line,
259 int xoffset,
260 int yoffset,
261 const uint8_t *dst_ptr,
262 int dst_pixels_per_line,
263 unsigned int *sse,
264 const uint8_t *second_pred) {
265 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
266 uint8_t temp2[68 * 64];
267 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer
268 const int16_t *hfilter, *vfilter;
269
270 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
271 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
272
273 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
274 1, 65, 32, hfilter);
275 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
276 vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
277 return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
278 }
279
vp9_variance32x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)280 unsigned int vp9_variance32x16_c(const uint8_t *src_ptr,
281 int source_stride,
282 const uint8_t *ref_ptr,
283 int recon_stride,
284 unsigned int *sse) {
285 unsigned int var;
286 int avg;
287
288 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg);
289 *sse = var;
290 return (var - (((int64_t)avg * avg) >> 9));
291 }
292
vp9_sub_pixel_variance32x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)293 unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
294 int src_pixels_per_line,
295 int xoffset,
296 int yoffset,
297 const uint8_t *dst_ptr,
298 int dst_pixels_per_line,
299 unsigned int *sse) {
300 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
301 uint8_t temp2[36 * 32];
302 const int16_t *hfilter, *vfilter;
303
304 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
305 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
306
307 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
308 1, 17, 32, hfilter);
309 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
310
311 return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
312 }
313
vp9_sub_pixel_avg_variance32x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)314 unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
315 int src_pixels_per_line,
316 int xoffset,
317 int yoffset,
318 const uint8_t *dst_ptr,
319 int dst_pixels_per_line,
320 unsigned int *sse,
321 const uint8_t *second_pred) {
322 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
323 uint8_t temp2[36 * 32];
324 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer
325 const int16_t *hfilter, *vfilter;
326
327 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
328 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
329
330 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
331 1, 17, 32, hfilter);
332 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
333 vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
334 return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
335 }
336
vp9_variance16x32_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)337 unsigned int vp9_variance16x32_c(const uint8_t *src_ptr,
338 int source_stride,
339 const uint8_t *ref_ptr,
340 int recon_stride,
341 unsigned int *sse) {
342 unsigned int var;
343 int avg;
344
345 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg);
346 *sse = var;
347 return (var - (((int64_t)avg * avg) >> 9));
348 }
349
vp9_sub_pixel_variance16x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)350 unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
351 int src_pixels_per_line,
352 int xoffset,
353 int yoffset,
354 const uint8_t *dst_ptr,
355 int dst_pixels_per_line,
356 unsigned int *sse) {
357 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
358 uint8_t temp2[36 * 32];
359 const int16_t *hfilter, *vfilter;
360
361 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
362 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
363
364 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
365 1, 33, 16, hfilter);
366 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
367
368 return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
369 }
370
vp9_sub_pixel_avg_variance16x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)371 unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
372 int src_pixels_per_line,
373 int xoffset,
374 int yoffset,
375 const uint8_t *dst_ptr,
376 int dst_pixels_per_line,
377 unsigned int *sse,
378 const uint8_t *second_pred) {
379 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
380 uint8_t temp2[36 * 32];
381 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer
382 const int16_t *hfilter, *vfilter;
383
384 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
385 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
386
387 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
388 1, 33, 16, hfilter);
389 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
390 vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
391 return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
392 }
393
vp9_variance64x64_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)394 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
395 int source_stride,
396 const uint8_t *ref_ptr,
397 int recon_stride,
398 unsigned int *sse) {
399 unsigned int var;
400 int avg;
401
402 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg);
403 *sse = var;
404 return (var - (((int64_t)avg * avg) >> 12));
405 }
406
vp9_variance32x32_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)407 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
408 int source_stride,
409 const uint8_t *ref_ptr,
410 int recon_stride,
411 unsigned int *sse) {
412 unsigned int var;
413 int avg;
414
415 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
416 *sse = var;
417 return (var - (((int64_t)avg * avg) >> 10));
418 }
419
vp9_get_sse_sum_16x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int ref_stride,unsigned int * sse,int * sum)420 void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride,
421 const uint8_t *ref_ptr, int ref_stride,
422 unsigned int *sse, int *sum) {
423 variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
424 }
425
vp9_variance16x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)426 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
427 int source_stride,
428 const uint8_t *ref_ptr,
429 int recon_stride,
430 unsigned int *sse) {
431 unsigned int var;
432 int avg;
433
434 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
435 *sse = var;
436 return (var - (((unsigned int)avg * avg) >> 8));
437 }
438
vp9_variance8x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)439 unsigned int vp9_variance8x16_c(const uint8_t *src_ptr,
440 int source_stride,
441 const uint8_t *ref_ptr,
442 int recon_stride,
443 unsigned int *sse) {
444 unsigned int var;
445 int avg;
446
447 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
448 *sse = var;
449 return (var - (((unsigned int)avg * avg) >> 7));
450 }
451
vp9_variance16x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)452 unsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
453 int source_stride,
454 const uint8_t *ref_ptr,
455 int recon_stride,
456 unsigned int *sse) {
457 unsigned int var;
458 int avg;
459
460 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
461 *sse = var;
462 return (var - (((unsigned int)avg * avg) >> 7));
463 }
464
vp9_get_sse_sum_8x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int ref_stride,unsigned int * sse,int * sum)465 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
466 const uint8_t *ref_ptr, int ref_stride,
467 unsigned int *sse, int *sum) {
468 variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
469 }
470
vp9_variance8x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)471 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
472 int source_stride,
473 const uint8_t *ref_ptr,
474 int recon_stride,
475 unsigned int *sse) {
476 unsigned int var;
477 int avg;
478
479 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
480 *sse = var;
481 return (var - (((unsigned int)avg * avg) >> 6));
482 }
483
vp9_variance8x4_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)484 unsigned int vp9_variance8x4_c(const uint8_t *src_ptr,
485 int source_stride,
486 const uint8_t *ref_ptr,
487 int recon_stride,
488 unsigned int *sse) {
489 unsigned int var;
490 int avg;
491
492 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg);
493 *sse = var;
494 return (var - (((unsigned int)avg * avg) >> 5));
495 }
496
vp9_variance4x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)497 unsigned int vp9_variance4x8_c(const uint8_t *src_ptr,
498 int source_stride,
499 const uint8_t *ref_ptr,
500 int recon_stride,
501 unsigned int *sse) {
502 unsigned int var;
503 int avg;
504
505 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg);
506 *sse = var;
507 return (var - (((unsigned int)avg * avg) >> 5));
508 }
509
vp9_variance4x4_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)510 unsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
511 int source_stride,
512 const uint8_t *ref_ptr,
513 int recon_stride,
514 unsigned int *sse) {
515 unsigned int var;
516 int avg;
517
518 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
519 *sse = var;
520 return (var - (((unsigned int)avg * avg) >> 4));
521 }
522
523
vp9_mse16x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)524 unsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
525 int source_stride,
526 const uint8_t *ref_ptr,
527 int recon_stride,
528 unsigned int *sse) {
529 unsigned int var;
530 int avg;
531
532 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
533 *sse = var;
534 return var;
535 }
536
vp9_mse16x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)537 unsigned int vp9_mse16x8_c(const uint8_t *src_ptr,
538 int source_stride,
539 const uint8_t *ref_ptr,
540 int recon_stride,
541 unsigned int *sse) {
542 unsigned int var;
543 int avg;
544
545 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
546 *sse = var;
547 return var;
548 }
549
vp9_mse8x16_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)550 unsigned int vp9_mse8x16_c(const uint8_t *src_ptr,
551 int source_stride,
552 const uint8_t *ref_ptr,
553 int recon_stride,
554 unsigned int *sse) {
555 unsigned int var;
556 int avg;
557
558 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
559 *sse = var;
560 return var;
561 }
562
vp9_mse8x8_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)563 unsigned int vp9_mse8x8_c(const uint8_t *src_ptr,
564 int source_stride,
565 const uint8_t *ref_ptr,
566 int recon_stride,
567 unsigned int *sse) {
568 unsigned int var;
569 int avg;
570
571 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
572 *sse = var;
573 return var;
574 }
575
576
vp9_sub_pixel_variance4x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)577 unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
578 int src_pixels_per_line,
579 int xoffset,
580 int yoffset,
581 const uint8_t *dst_ptr,
582 int dst_pixels_per_line,
583 unsigned int *sse) {
584 uint8_t temp2[20 * 16];
585 const int16_t *hfilter, *vfilter;
586 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering
587
588 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
589 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
590
591 // First filter 1d Horizontal
592 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
593 1, 5, 4, hfilter);
594
595 // Now filter Verticaly
596 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter);
597
598 return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
599 }
600
vp9_sub_pixel_avg_variance4x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)601 unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
602 int src_pixels_per_line,
603 int xoffset,
604 int yoffset,
605 const uint8_t *dst_ptr,
606 int dst_pixels_per_line,
607 unsigned int *sse,
608 const uint8_t *second_pred) {
609 uint8_t temp2[20 * 16];
610 const int16_t *hfilter, *vfilter;
611 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer
612 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering
613
614 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
615 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
616
617 // First filter 1d Horizontal
618 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
619 1, 5, 4, hfilter);
620
621 // Now filter Verticaly
622 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter);
623 vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
624 return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
625 }
626
vp9_sub_pixel_variance8x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)627 unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
628 int src_pixels_per_line,
629 int xoffset,
630 int yoffset,
631 const uint8_t *dst_ptr,
632 int dst_pixels_per_line,
633 unsigned int *sse) {
634 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering
635 uint8_t temp2[20 * 16];
636 const int16_t *hfilter, *vfilter;
637
638 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
639 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
640
641 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
642 1, 9, 8, hfilter);
643 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
644
645 return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
646 }
647
vp9_sub_pixel_avg_variance8x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)648 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
649 int src_pixels_per_line,
650 int xoffset,
651 int yoffset,
652 const uint8_t *dst_ptr,
653 int dst_pixels_per_line,
654 unsigned int *sse,
655 const uint8_t *second_pred) {
656 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering
657 uint8_t temp2[20 * 16];
658 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer
659 const int16_t *hfilter, *vfilter;
660
661 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
662 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
663
664 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
665 1, 9, 8, hfilter);
666 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
667 vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
668 return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
669 }
670
vp9_sub_pixel_variance16x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)671 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
672 int src_pixels_per_line,
673 int xoffset,
674 int yoffset,
675 const uint8_t *dst_ptr,
676 int dst_pixels_per_line,
677 unsigned int *sse) {
678 uint16_t fdata3[17 * 16]; // Temp data buffer used in filtering
679 uint8_t temp2[20 * 16];
680 const int16_t *hfilter, *vfilter;
681
682 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
683 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
684
685 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
686 1, 17, 16, hfilter);
687 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
688
689 return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
690 }
691
vp9_sub_pixel_avg_variance16x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)692 unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
693 int src_pixels_per_line,
694 int xoffset,
695 int yoffset,
696 const uint8_t *dst_ptr,
697 int dst_pixels_per_line,
698 unsigned int *sse,
699 const uint8_t *second_pred) {
700 uint16_t fdata3[17 * 16];
701 uint8_t temp2[20 * 16];
702 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer
703 const int16_t *hfilter, *vfilter;
704
705 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
706 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
707
708 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
709 1, 17, 16, hfilter);
710 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
711
712 vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
713 return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
714 }
715
vp9_sub_pixel_variance64x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)716 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
717 int src_pixels_per_line,
718 int xoffset,
719 int yoffset,
720 const uint8_t *dst_ptr,
721 int dst_pixels_per_line,
722 unsigned int *sse) {
723 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
724 uint8_t temp2[68 * 64];
725 const int16_t *hfilter, *vfilter;
726
727 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
728 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
729
730 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
731 1, 65, 64, hfilter);
732 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
733
734 return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
735 }
736
vp9_sub_pixel_avg_variance64x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)737 unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
738 int src_pixels_per_line,
739 int xoffset,
740 int yoffset,
741 const uint8_t *dst_ptr,
742 int dst_pixels_per_line,
743 unsigned int *sse,
744 const uint8_t *second_pred) {
745 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
746 uint8_t temp2[68 * 64];
747 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer
748 const int16_t *hfilter, *vfilter;
749
750 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
751 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
752
753 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
754 1, 65, 64, hfilter);
755 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
756 vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
757 return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
758 }
759
vp9_sub_pixel_variance32x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)760 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
761 int src_pixels_per_line,
762 int xoffset,
763 int yoffset,
764 const uint8_t *dst_ptr,
765 int dst_pixels_per_line,
766 unsigned int *sse) {
767 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
768 uint8_t temp2[36 * 32];
769 const int16_t *hfilter, *vfilter;
770
771 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
772 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
773
774 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
775 1, 33, 32, hfilter);
776 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
777
778 return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
779 }
780
vp9_sub_pixel_avg_variance32x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)781 unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
782 int src_pixels_per_line,
783 int xoffset,
784 int yoffset,
785 const uint8_t *dst_ptr,
786 int dst_pixels_per_line,
787 unsigned int *sse,
788 const uint8_t *second_pred) {
789 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
790 uint8_t temp2[36 * 32];
791 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer
792 const int16_t *hfilter, *vfilter;
793
794 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
795 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
796
797 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
798 1, 33, 32, hfilter);
799 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
800 vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
801 return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
802 }
803
vp9_variance_halfpixvar16x16_h_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)804 unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
805 int source_stride,
806 const uint8_t *ref_ptr,
807 int recon_stride,
808 unsigned int *sse) {
809 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
810 ref_ptr, recon_stride, sse);
811 }
812
vp9_variance_halfpixvar32x32_h_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)813 unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
814 int source_stride,
815 const uint8_t *ref_ptr,
816 int recon_stride,
817 unsigned int *sse) {
818 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
819 ref_ptr, recon_stride, sse);
820 }
821
vp9_variance_halfpixvar64x64_h_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)822 unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
823 int source_stride,
824 const uint8_t *ref_ptr,
825 int recon_stride,
826 unsigned int *sse) {
827 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
828 ref_ptr, recon_stride, sse);
829 }
830
vp9_variance_halfpixvar16x16_v_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)831 unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
832 int source_stride,
833 const uint8_t *ref_ptr,
834 int recon_stride,
835 unsigned int *sse) {
836 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
837 ref_ptr, recon_stride, sse);
838 }
839
vp9_variance_halfpixvar32x32_v_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)840 unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
841 int source_stride,
842 const uint8_t *ref_ptr,
843 int recon_stride,
844 unsigned int *sse) {
845 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
846 ref_ptr, recon_stride, sse);
847 }
848
vp9_variance_halfpixvar64x64_v_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)849 unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
850 int source_stride,
851 const uint8_t *ref_ptr,
852 int recon_stride,
853 unsigned int *sse) {
854 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
855 ref_ptr, recon_stride, sse);
856 }
857
vp9_variance_halfpixvar16x16_hv_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)858 unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
859 int source_stride,
860 const uint8_t *ref_ptr,
861 int recon_stride,
862 unsigned int *sse) {
863 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
864 ref_ptr, recon_stride, sse);
865 }
866
vp9_variance_halfpixvar32x32_hv_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)867 unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
868 int source_stride,
869 const uint8_t *ref_ptr,
870 int recon_stride,
871 unsigned int *sse) {
872 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
873 ref_ptr, recon_stride, sse);
874 }
875
vp9_variance_halfpixvar64x64_hv_c(const uint8_t * src_ptr,int source_stride,const uint8_t * ref_ptr,int recon_stride,unsigned int * sse)876 unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
877 int source_stride,
878 const uint8_t *ref_ptr,
879 int recon_stride,
880 unsigned int *sse) {
881 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
882 ref_ptr, recon_stride, sse);
883 }
884
vp9_sub_pixel_mse16x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)885 unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
886 int src_pixels_per_line,
887 int xoffset,
888 int yoffset,
889 const uint8_t *dst_ptr,
890 int dst_pixels_per_line,
891 unsigned int *sse) {
892 vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
893 xoffset, yoffset, dst_ptr,
894 dst_pixels_per_line, sse);
895 return *sse;
896 }
897
vp9_sub_pixel_mse32x32_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)898 unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
899 int src_pixels_per_line,
900 int xoffset,
901 int yoffset,
902 const uint8_t *dst_ptr,
903 int dst_pixels_per_line,
904 unsigned int *sse) {
905 vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
906 xoffset, yoffset, dst_ptr,
907 dst_pixels_per_line, sse);
908 return *sse;
909 }
910
vp9_sub_pixel_mse64x64_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)911 unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
912 int src_pixels_per_line,
913 int xoffset,
914 int yoffset,
915 const uint8_t *dst_ptr,
916 int dst_pixels_per_line,
917 unsigned int *sse) {
918 vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
919 xoffset, yoffset, dst_ptr,
920 dst_pixels_per_line, sse);
921 return *sse;
922 }
923
vp9_sub_pixel_variance16x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)924 unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
925 int src_pixels_per_line,
926 int xoffset,
927 int yoffset,
928 const uint8_t *dst_ptr,
929 int dst_pixels_per_line,
930 unsigned int *sse) {
931 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering
932 uint8_t temp2[20 * 16];
933 const int16_t *hfilter, *vfilter;
934
935 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
936 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
937
938 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
939 1, 9, 16, hfilter);
940 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
941
942 return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
943 }
944
vp9_sub_pixel_avg_variance16x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)945 unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
946 int src_pixels_per_line,
947 int xoffset,
948 int yoffset,
949 const uint8_t *dst_ptr,
950 int dst_pixels_per_line,
951 unsigned int *sse,
952 const uint8_t *second_pred) {
953 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering
954 uint8_t temp2[20 * 16];
955 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer
956 const int16_t *hfilter, *vfilter;
957
958 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
959 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
960
961 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
962 1, 9, 16, hfilter);
963 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
964 vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
965 return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
966 }
967
vp9_sub_pixel_variance8x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)968 unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
969 int src_pixels_per_line,
970 int xoffset,
971 int yoffset,
972 const uint8_t *dst_ptr,
973 int dst_pixels_per_line,
974 unsigned int *sse) {
975 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering
976 uint8_t temp2[20 * 16];
977 const int16_t *hfilter, *vfilter;
978
979 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
980 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
981
982 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
983 1, 17, 8, hfilter);
984 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
985
986 return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
987 }
988
vp9_sub_pixel_avg_variance8x16_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)989 unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
990 int src_pixels_per_line,
991 int xoffset,
992 int yoffset,
993 const uint8_t *dst_ptr,
994 int dst_pixels_per_line,
995 unsigned int *sse,
996 const uint8_t *second_pred) {
997 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering
998 uint8_t temp2[20 * 16];
999 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer
1000 const int16_t *hfilter, *vfilter;
1001
1002 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1003 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1004
1005 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1006 1, 17, 8, hfilter);
1007 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
1008 vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
1009 return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1010 }
1011
vp9_sub_pixel_variance8x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)1012 unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
1013 int src_pixels_per_line,
1014 int xoffset,
1015 int yoffset,
1016 const uint8_t *dst_ptr,
1017 int dst_pixels_per_line,
1018 unsigned int *sse) {
1019 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering
1020 uint8_t temp2[20 * 16];
1021 const int16_t *hfilter, *vfilter;
1022
1023 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1024 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1025
1026 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1027 1, 5, 8, hfilter);
1028 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1029
1030 return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
1031 }
1032
vp9_sub_pixel_avg_variance8x4_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)1033 unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
1034 int src_pixels_per_line,
1035 int xoffset,
1036 int yoffset,
1037 const uint8_t *dst_ptr,
1038 int dst_pixels_per_line,
1039 unsigned int *sse,
1040 const uint8_t *second_pred) {
1041 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering
1042 uint8_t temp2[20 * 16];
1043 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer
1044 const int16_t *hfilter, *vfilter;
1045
1046 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1047 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1048
1049 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1050 1, 5, 8, hfilter);
1051 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1052 vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
1053 return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1054 }
1055
vp9_sub_pixel_variance4x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse)1056 unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
1057 int src_pixels_per_line,
1058 int xoffset,
1059 int yoffset,
1060 const uint8_t *dst_ptr,
1061 int dst_pixels_per_line,
1062 unsigned int *sse) {
1063 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering
1064 // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be
1065 // of this big? same issue appears in all other block size settings.
1066 uint8_t temp2[20 * 16];
1067 const int16_t *hfilter, *vfilter;
1068
1069 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1070 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1071
1072 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1073 1, 9, 4, hfilter);
1074 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1075
1076 return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
1077 }
1078
vp9_sub_pixel_avg_variance4x8_c(const uint8_t * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const uint8_t * dst_ptr,int dst_pixels_per_line,unsigned int * sse,const uint8_t * second_pred)1079 unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
1080 int src_pixels_per_line,
1081 int xoffset,
1082 int yoffset,
1083 const uint8_t *dst_ptr,
1084 int dst_pixels_per_line,
1085 unsigned int *sse,
1086 const uint8_t *second_pred) {
1087 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering
1088 uint8_t temp2[20 * 16];
1089 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer
1090 const int16_t *hfilter, *vfilter;
1091
1092 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1093 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1094
1095 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1096 1, 9, 4, hfilter);
1097 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1098 vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
1099 return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
1100 }
1101
1102
vp9_comp_avg_pred(uint8_t * comp_pred,const uint8_t * pred,int width,int height,const uint8_t * ref,int ref_stride)1103 void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
1104 int height, const uint8_t *ref, int ref_stride) {
1105 int i, j;
1106
1107 for (i = 0; i < height; i++) {
1108 for (j = 0; j < width; j++) {
1109 int tmp;
1110 tmp = pred[j] + ref[j];
1111 comp_pred[j] = (tmp + 1) >> 1;
1112 }
1113 comp_pred += width;
1114 pred += width;
1115 ref += ref_stride;
1116 }
1117 }
1118