1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vpx_config.h"
12 #include "./vpx_dsp_rtcd.h"
13
14 #include "vpx_ports/mem.h"
15 #include "vpx/vpx_integer.h"
16
17 #include "vpx_dsp/variance.h"
18
19 static const uint8_t bilinear_filters[8][2] = {
20 { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
21 { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 },
22 };
23
vpx_get4x4sse_cs_c(const uint8_t * src_ptr,int src_stride,const uint8_t * ref_ptr,int ref_stride)24 uint32_t vpx_get4x4sse_cs_c(const uint8_t *src_ptr, int src_stride,
25 const uint8_t *ref_ptr, int ref_stride) {
26 int distortion = 0;
27 int r, c;
28
29 for (r = 0; r < 4; ++r) {
30 for (c = 0; c < 4; ++c) {
31 int diff = src_ptr[c] - ref_ptr[c];
32 distortion += diff * diff;
33 }
34
35 src_ptr += src_stride;
36 ref_ptr += ref_stride;
37 }
38
39 return distortion;
40 }
41
vpx_get_mb_ss_c(const int16_t * src_ptr)42 uint32_t vpx_get_mb_ss_c(const int16_t *src_ptr) {
43 unsigned int i, sum = 0;
44
45 for (i = 0; i < 256; ++i) {
46 sum += src_ptr[i] * src_ptr[i];
47 }
48
49 return sum;
50 }
51
variance(const uint8_t * src_ptr,int src_stride,const uint8_t * ref_ptr,int ref_stride,int w,int h,uint32_t * sse,int * sum)52 static void variance(const uint8_t *src_ptr, int src_stride,
53 const uint8_t *ref_ptr, int ref_stride, int w, int h,
54 uint32_t *sse, int *sum) {
55 int i, j;
56
57 *sum = 0;
58 *sse = 0;
59
60 for (i = 0; i < h; ++i) {
61 for (j = 0; j < w; ++j) {
62 const int diff = src_ptr[j] - ref_ptr[j];
63 *sum += diff;
64 *sse += diff * diff;
65 }
66
67 src_ptr += src_stride;
68 ref_ptr += ref_stride;
69 }
70 }
71
72 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
73 // or vertical direction to produce the filtered output block. Used to implement
74 // the first-pass of 2-D separable filter.
75 //
76 // Produces int16_t output to retain precision for the next pass. Two filter
77 // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
78 // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
79 // It defines the offset required to move from one input to the next.
var_filter_block2d_bil_first_pass(const uint8_t * src_ptr,uint16_t * ref_ptr,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)80 static void var_filter_block2d_bil_first_pass(
81 const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line,
82 int pixel_step, unsigned int output_height, unsigned int output_width,
83 const uint8_t *filter) {
84 unsigned int i, j;
85
86 for (i = 0; i < output_height; ++i) {
87 for (j = 0; j < output_width; ++j) {
88 ref_ptr[j] = ROUND_POWER_OF_TWO(
89 (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
90 FILTER_BITS);
91
92 ++src_ptr;
93 }
94
95 src_ptr += src_pixels_per_line - output_width;
96 ref_ptr += output_width;
97 }
98 }
99
100 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
101 // or vertical direction to produce the filtered output block. Used to implement
102 // the second-pass of 2-D separable filter.
103 //
104 // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
105 // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
106 // filter is applied horizontally (pixel_step = 1) or vertically
107 // (pixel_step = stride). It defines the offset required to move from one input
108 // to the next. Output is 8-bit.
var_filter_block2d_bil_second_pass(const uint16_t * src_ptr,uint8_t * ref_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)109 static void var_filter_block2d_bil_second_pass(
110 const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line,
111 unsigned int pixel_step, unsigned int output_height,
112 unsigned int output_width, const uint8_t *filter) {
113 unsigned int i, j;
114
115 for (i = 0; i < output_height; ++i) {
116 for (j = 0; j < output_width; ++j) {
117 ref_ptr[j] = ROUND_POWER_OF_TWO(
118 (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
119 FILTER_BITS);
120 ++src_ptr;
121 }
122
123 src_ptr += src_pixels_per_line - output_width;
124 ref_ptr += output_width;
125 }
126 }
127
128 #define VAR(W, H) \
129 uint32_t vpx_variance##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \
130 const uint8_t *ref_ptr, int ref_stride, \
131 uint32_t *sse) { \
132 int sum; \
133 variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
134 return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
135 }
136
137 #define SUBPIX_VAR(W, H) \
138 uint32_t vpx_sub_pixel_variance##W##x##H##_c( \
139 const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
140 const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
141 uint16_t fdata3[(H + 1) * W]; \
142 uint8_t temp2[H * W]; \
143 \
144 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \
145 W, bilinear_filters[x_offset]); \
146 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
147 bilinear_filters[y_offset]); \
148 \
149 return vpx_variance##W##x##H##_c(temp2, W, ref_ptr, ref_stride, sse); \
150 }
151
152 #define SUBPIX_AVG_VAR(W, H) \
153 uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c( \
154 const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
155 const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
156 const uint8_t *second_pred) { \
157 uint16_t fdata3[(H + 1) * W]; \
158 uint8_t temp2[H * W]; \
159 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
160 \
161 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \
162 W, bilinear_filters[x_offset]); \
163 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
164 bilinear_filters[y_offset]); \
165 \
166 vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \
167 \
168 return vpx_variance##W##x##H##_c(temp3, W, ref_ptr, ref_stride, sse); \
169 }
170
171 /* Identical to the variance call except it takes an additional parameter, sum,
172 * and returns that value using pass-by-reference instead of returning
173 * sse - sum^2 / w*h
174 */
175 #define GET_VAR(W, H) \
176 void vpx_get##W##x##H##var_c(const uint8_t *src_ptr, int src_stride, \
177 const uint8_t *ref_ptr, int ref_stride, \
178 uint32_t *sse, int *sum) { \
179 variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \
180 }
181
182 /* Identical to the variance call except it does not calculate the
183 * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
184 * variable.
185 */
186 #define MSE(W, H) \
187 uint32_t vpx_mse##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \
188 const uint8_t *ref_ptr, int ref_stride, \
189 uint32_t *sse) { \
190 int sum; \
191 variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
192 return *sse; \
193 }
194
195 /* All three forms of the variance are available in the same sizes. */
196 #define VARIANCES(W, H) \
197 VAR(W, H) \
198 SUBPIX_VAR(W, H) \
199 SUBPIX_AVG_VAR(W, H)
200
201 VARIANCES(64, 64)
202 VARIANCES(64, 32)
203 VARIANCES(32, 64)
204 VARIANCES(32, 32)
205 VARIANCES(32, 16)
206 VARIANCES(16, 32)
207 VARIANCES(16, 16)
208 VARIANCES(16, 8)
209 VARIANCES(8, 16)
210 VARIANCES(8, 8)
211 VARIANCES(8, 4)
212 VARIANCES(4, 8)
213 VARIANCES(4, 4)
214
215 GET_VAR(16, 16)
216 GET_VAR(8, 8)
217
218 MSE(16, 16)
219 MSE(16, 8)
220 MSE(8, 16)
221 MSE(8, 8)
222
vpx_comp_avg_pred_c(uint8_t * comp_pred,const uint8_t * pred,int width,int height,const uint8_t * ref,int ref_stride)223 void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
224 int height, const uint8_t *ref, int ref_stride) {
225 int i, j;
226
227 for (i = 0; i < height; ++i) {
228 for (j = 0; j < width; ++j) {
229 const int tmp = pred[j] + ref[j];
230 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
231 }
232 comp_pred += width;
233 pred += width;
234 ref += ref_stride;
235 }
236 }
237
238 #if CONFIG_VP9_HIGHBITDEPTH
highbd_variance64(const uint8_t * src8_ptr,int src_stride,const uint8_t * ref8_ptr,int ref_stride,int w,int h,uint64_t * sse,int64_t * sum)239 static void highbd_variance64(const uint8_t *src8_ptr, int src_stride,
240 const uint8_t *ref8_ptr, int ref_stride, int w,
241 int h, uint64_t *sse, int64_t *sum) {
242 int i, j;
243
244 uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8_ptr);
245 uint16_t *ref_ptr = CONVERT_TO_SHORTPTR(ref8_ptr);
246 *sum = 0;
247 *sse = 0;
248
249 for (i = 0; i < h; ++i) {
250 for (j = 0; j < w; ++j) {
251 const int diff = src_ptr[j] - ref_ptr[j];
252 *sum += diff;
253 *sse += diff * diff;
254 }
255 src_ptr += src_stride;
256 ref_ptr += ref_stride;
257 }
258 }
259
highbd_8_variance(const uint8_t * src8_ptr,int src_stride,const uint8_t * ref8_ptr,int ref_stride,int w,int h,uint32_t * sse,int * sum)260 static void highbd_8_variance(const uint8_t *src8_ptr, int src_stride,
261 const uint8_t *ref8_ptr, int ref_stride, int w,
262 int h, uint32_t *sse, int *sum) {
263 uint64_t sse_long = 0;
264 int64_t sum_long = 0;
265 highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long,
266 &sum_long);
267 *sse = (uint32_t)sse_long;
268 *sum = (int)sum_long;
269 }
270
highbd_10_variance(const uint8_t * src8_ptr,int src_stride,const uint8_t * ref8_ptr,int ref_stride,int w,int h,uint32_t * sse,int * sum)271 static void highbd_10_variance(const uint8_t *src8_ptr, int src_stride,
272 const uint8_t *ref8_ptr, int ref_stride, int w,
273 int h, uint32_t *sse, int *sum) {
274 uint64_t sse_long = 0;
275 int64_t sum_long = 0;
276 highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long,
277 &sum_long);
278 *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
279 *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
280 }
281
highbd_12_variance(const uint8_t * src8_ptr,int src_stride,const uint8_t * ref8_ptr,int ref_stride,int w,int h,uint32_t * sse,int * sum)282 static void highbd_12_variance(const uint8_t *src8_ptr, int src_stride,
283 const uint8_t *ref8_ptr, int ref_stride, int w,
284 int h, uint32_t *sse, int *sum) {
285 uint64_t sse_long = 0;
286 int64_t sum_long = 0;
287 highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long,
288 &sum_long);
289 *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
290 *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
291 }
292
293 #define HIGHBD_VAR(W, H) \
294 uint32_t vpx_highbd_8_variance##W##x##H##_c( \
295 const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
296 int ref_stride, uint32_t *sse) { \
297 int sum; \
298 highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
299 &sum); \
300 return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
301 } \
302 \
303 uint32_t vpx_highbd_10_variance##W##x##H##_c( \
304 const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
305 int ref_stride, uint32_t *sse) { \
306 int sum; \
307 int64_t var; \
308 highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
309 &sum); \
310 var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
311 return (var >= 0) ? (uint32_t)var : 0; \
312 } \
313 \
314 uint32_t vpx_highbd_12_variance##W##x##H##_c( \
315 const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
316 int ref_stride, uint32_t *sse) { \
317 int sum; \
318 int64_t var; \
319 highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
320 &sum); \
321 var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
322 return (var >= 0) ? (uint32_t)var : 0; \
323 }
324
325 #define HIGHBD_GET_VAR(S) \
326 void vpx_highbd_8_get##S##x##S##var_c( \
327 const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
328 int ref_stride, uint32_t *sse, int *sum) { \
329 highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \
330 sum); \
331 } \
332 \
333 void vpx_highbd_10_get##S##x##S##var_c( \
334 const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
335 int ref_stride, uint32_t *sse, int *sum) { \
336 highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \
337 sum); \
338 } \
339 \
340 void vpx_highbd_12_get##S##x##S##var_c( \
341 const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
342 int ref_stride, uint32_t *sse, int *sum) { \
343 highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \
344 sum); \
345 }
346
347 #define HIGHBD_MSE(W, H) \
348 uint32_t vpx_highbd_8_mse##W##x##H##_c( \
349 const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
350 int ref_stride, uint32_t *sse) { \
351 int sum; \
352 highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
353 &sum); \
354 return *sse; \
355 } \
356 \
357 uint32_t vpx_highbd_10_mse##W##x##H##_c( \
358 const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
359 int ref_stride, uint32_t *sse) { \
360 int sum; \
361 highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
362 &sum); \
363 return *sse; \
364 } \
365 \
366 uint32_t vpx_highbd_12_mse##W##x##H##_c( \
367 const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
368 int ref_stride, uint32_t *sse) { \
369 int sum; \
370 highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
371 &sum); \
372 return *sse; \
373 }
374
highbd_var_filter_block2d_bil_first_pass(const uint8_t * src_ptr8,uint16_t * output_ptr,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)375 static void highbd_var_filter_block2d_bil_first_pass(
376 const uint8_t *src_ptr8, uint16_t *output_ptr,
377 unsigned int src_pixels_per_line, int pixel_step,
378 unsigned int output_height, unsigned int output_width,
379 const uint8_t *filter) {
380 unsigned int i, j;
381 uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
382 for (i = 0; i < output_height; ++i) {
383 for (j = 0; j < output_width; ++j) {
384 output_ptr[j] = ROUND_POWER_OF_TWO(
385 (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
386 FILTER_BITS);
387
388 ++src_ptr;
389 }
390
391 // Next row...
392 src_ptr += src_pixels_per_line - output_width;
393 output_ptr += output_width;
394 }
395 }
396
highbd_var_filter_block2d_bil_second_pass(const uint16_t * src_ptr,uint16_t * output_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)397 static void highbd_var_filter_block2d_bil_second_pass(
398 const uint16_t *src_ptr, uint16_t *output_ptr,
399 unsigned int src_pixels_per_line, unsigned int pixel_step,
400 unsigned int output_height, unsigned int output_width,
401 const uint8_t *filter) {
402 unsigned int i, j;
403
404 for (i = 0; i < output_height; ++i) {
405 for (j = 0; j < output_width; ++j) {
406 output_ptr[j] = ROUND_POWER_OF_TWO(
407 (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
408 FILTER_BITS);
409 ++src_ptr;
410 }
411
412 src_ptr += src_pixels_per_line - output_width;
413 output_ptr += output_width;
414 }
415 }
416
417 #define HIGHBD_SUBPIX_VAR(W, H) \
418 uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \
419 const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
420 const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
421 uint16_t fdata3[(H + 1) * W]; \
422 uint16_t temp2[H * W]; \
423 \
424 highbd_var_filter_block2d_bil_first_pass( \
425 src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
426 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
427 bilinear_filters[y_offset]); \
428 \
429 return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
430 ref_ptr, ref_stride, sse); \
431 } \
432 \
433 uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \
434 const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
435 const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
436 uint16_t fdata3[(H + 1) * W]; \
437 uint16_t temp2[H * W]; \
438 \
439 highbd_var_filter_block2d_bil_first_pass( \
440 src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
441 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
442 bilinear_filters[y_offset]); \
443 \
444 return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
445 ref_ptr, ref_stride, sse); \
446 } \
447 \
448 uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \
449 const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
450 const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
451 uint16_t fdata3[(H + 1) * W]; \
452 uint16_t temp2[H * W]; \
453 \
454 highbd_var_filter_block2d_bil_first_pass( \
455 src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
456 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
457 bilinear_filters[y_offset]); \
458 \
459 return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
460 ref_ptr, ref_stride, sse); \
461 }
462
463 #define HIGHBD_SUBPIX_AVG_VAR(W, H) \
464 uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \
465 const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
466 const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
467 const uint8_t *second_pred) { \
468 uint16_t fdata3[(H + 1) * W]; \
469 uint16_t temp2[H * W]; \
470 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
471 \
472 highbd_var_filter_block2d_bil_first_pass( \
473 src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
474 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
475 bilinear_filters[y_offset]); \
476 \
477 vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \
478 temp2, W); \
479 \
480 return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
481 ref_ptr, ref_stride, sse); \
482 } \
483 \
484 uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
485 const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
486 const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
487 const uint8_t *second_pred) { \
488 uint16_t fdata3[(H + 1) * W]; \
489 uint16_t temp2[H * W]; \
490 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
491 \
492 highbd_var_filter_block2d_bil_first_pass( \
493 src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
494 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
495 bilinear_filters[y_offset]); \
496 \
497 vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \
498 temp2, W); \
499 \
500 return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
501 ref_ptr, ref_stride, sse); \
502 } \
503 \
504 uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
505 const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
506 const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
507 const uint8_t *second_pred) { \
508 uint16_t fdata3[(H + 1) * W]; \
509 uint16_t temp2[H * W]; \
510 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
511 \
512 highbd_var_filter_block2d_bil_first_pass( \
513 src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
514 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
515 bilinear_filters[y_offset]); \
516 \
517 vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \
518 temp2, W); \
519 \
520 return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
521 ref_ptr, ref_stride, sse); \
522 }
523
524 /* All three forms of the variance are available in the same sizes. */
525 #define HIGHBD_VARIANCES(W, H) \
526 HIGHBD_VAR(W, H) \
527 HIGHBD_SUBPIX_VAR(W, H) \
528 HIGHBD_SUBPIX_AVG_VAR(W, H)
529
530 HIGHBD_VARIANCES(64, 64)
531 HIGHBD_VARIANCES(64, 32)
532 HIGHBD_VARIANCES(32, 64)
533 HIGHBD_VARIANCES(32, 32)
534 HIGHBD_VARIANCES(32, 16)
535 HIGHBD_VARIANCES(16, 32)
536 HIGHBD_VARIANCES(16, 16)
537 HIGHBD_VARIANCES(16, 8)
538 HIGHBD_VARIANCES(8, 16)
539 HIGHBD_VARIANCES(8, 8)
540 HIGHBD_VARIANCES(8, 4)
541 HIGHBD_VARIANCES(4, 8)
542 HIGHBD_VARIANCES(4, 4)
543
544 HIGHBD_GET_VAR(8)
545 HIGHBD_GET_VAR(16)
546
547 HIGHBD_MSE(16, 16)
548 HIGHBD_MSE(16, 8)
549 HIGHBD_MSE(8, 16)
550 HIGHBD_MSE(8, 8)
551
vpx_highbd_comp_avg_pred(uint16_t * comp_pred,const uint16_t * pred,int width,int height,const uint16_t * ref,int ref_stride)552 void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint16_t *pred,
553 int width, int height, const uint16_t *ref,
554 int ref_stride) {
555 int i, j;
556 for (i = 0; i < height; ++i) {
557 for (j = 0; j < width; ++j) {
558 const int tmp = pred[j] + ref[j];
559 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
560 }
561 comp_pred += width;
562 pred += width;
563 ref += ref_stride;
564 }
565 }
566 #endif // CONFIG_VP9_HIGHBITDEPTH
567