• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 #include <assert.h>
12 #include <stdlib.h>
13 #include <string.h>
14 
15 #include "config/aom_config.h"
16 #include "config/aom_dsp_rtcd.h"
17 #include "config/av1_rtcd.h"
18 
19 #include "aom/aom_integer.h"
20 #include "aom_ports/mem.h"
21 
22 #include "aom_dsp/aom_filter.h"
23 #include "aom_dsp/blend.h"
24 #include "aom_dsp/variance.h"
25 
26 #include "av1/common/av1_common_int.h"
27 #include "av1/common/filter.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/encoder/reconinter_enc.h"
30 
aom_get4x4sse_cs_c(const uint8_t * a,int a_stride,const uint8_t * b,int b_stride)31 uint32_t aom_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
32                             int b_stride) {
33   int distortion = 0;
34   int r, c;
35 
36   for (r = 0; r < 4; ++r) {
37     for (c = 0; c < 4; ++c) {
38       int diff = a[c] - b[c];
39       distortion += diff * diff;
40     }
41 
42     a += a_stride;
43     b += b_stride;
44   }
45 
46   return distortion;
47 }
48 
aom_get_mb_ss_c(const int16_t * a)49 uint32_t aom_get_mb_ss_c(const int16_t *a) {
50   unsigned int i, sum = 0;
51 
52   for (i = 0; i < 256; ++i) {
53     sum += a[i] * a[i];
54   }
55 
56   return sum;
57 }
58 
variance(const uint8_t * a,int a_stride,const uint8_t * b,int b_stride,int w,int h,uint32_t * sse,int * sum)59 static void variance(const uint8_t *a, int a_stride, const uint8_t *b,
60                      int b_stride, int w, int h, uint32_t *sse, int *sum) {
61   int i, j;
62 
63   *sum = 0;
64   *sse = 0;
65 
66   for (i = 0; i < h; ++i) {
67     for (j = 0; j < w; ++j) {
68       const int diff = a[j] - b[j];
69       *sum += diff;
70       *sse += diff * diff;
71     }
72 
73     a += a_stride;
74     b += b_stride;
75   }
76 }
77 
aom_sse_odd_size(const uint8_t * a,int a_stride,const uint8_t * b,int b_stride,int w,int h)78 uint32_t aom_sse_odd_size(const uint8_t *a, int a_stride, const uint8_t *b,
79                           int b_stride, int w, int h) {
80   uint32_t sse;
81   int sum;
82   variance(a, a_stride, b, b_stride, w, h, &sse, &sum);
83   return sse;
84 }
85 
86 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
87 // or vertical direction to produce the filtered output block. Used to implement
88 // the first-pass of 2-D separable filter.
89 //
90 // Produces int16_t output to retain precision for the next pass. Two filter
91 // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
92 // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
93 // It defines the offset required to move from one input to the next.
aom_var_filter_block2d_bil_first_pass_c(const uint8_t * a,uint16_t * b,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)94 void aom_var_filter_block2d_bil_first_pass_c(const uint8_t *a, uint16_t *b,
95                                              unsigned int src_pixels_per_line,
96                                              unsigned int pixel_step,
97                                              unsigned int output_height,
98                                              unsigned int output_width,
99                                              const uint8_t *filter) {
100   unsigned int i, j;
101 
102   for (i = 0; i < output_height; ++i) {
103     for (j = 0; j < output_width; ++j) {
104       b[j] = ROUND_POWER_OF_TWO(
105           (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
106 
107       ++a;
108     }
109 
110     a += src_pixels_per_line - output_width;
111     b += output_width;
112   }
113 }
114 
115 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
116 // or vertical direction to produce the filtered output block. Used to implement
117 // the second-pass of 2-D separable filter.
118 //
119 // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
120 // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
121 // filter is applied horizontally (pixel_step = 1) or vertically
122 // (pixel_step = stride). It defines the offset required to move from one input
123 // to the next. Output is 8-bit.
aom_var_filter_block2d_bil_second_pass_c(const uint16_t * a,uint8_t * b,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)124 void aom_var_filter_block2d_bil_second_pass_c(const uint16_t *a, uint8_t *b,
125                                               unsigned int src_pixels_per_line,
126                                               unsigned int pixel_step,
127                                               unsigned int output_height,
128                                               unsigned int output_width,
129                                               const uint8_t *filter) {
130   unsigned int i, j;
131 
132   for (i = 0; i < output_height; ++i) {
133     for (j = 0; j < output_width; ++j) {
134       b[j] = ROUND_POWER_OF_TWO(
135           (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
136       ++a;
137     }
138 
139     a += src_pixels_per_line - output_width;
140     b += output_width;
141   }
142 }
143 
144 #define VAR(W, H)                                                    \
145   uint32_t aom_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
146                                      const uint8_t *b, int b_stride, \
147                                      uint32_t *sse) {                \
148     int sum;                                                         \
149     variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
150     return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));        \
151   }
152 
153 #define SUBPIX_VAR(W, H)                                                      \
154   uint32_t aom_sub_pixel_variance##W##x##H##_c(                               \
155       const uint8_t *a, int a_stride, int xoffset, int yoffset,               \
156       const uint8_t *b, int b_stride, uint32_t *sse) {                        \
157     uint16_t fdata3[(H + 1) * W];                                             \
158     uint8_t temp2[H * W];                                                     \
159                                                                               \
160     aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W, \
161                                             bilinear_filters_2t[xoffset]);    \
162     aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W,       \
163                                              bilinear_filters_2t[yoffset]);   \
164                                                                               \
165     return aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse);             \
166   }
167 
168 #define SUBPIX_AVG_VAR(W, H)                                                   \
169   uint32_t aom_sub_pixel_avg_variance##W##x##H##_c(                            \
170       const uint8_t *a, int a_stride, int xoffset, int yoffset,                \
171       const uint8_t *b, int b_stride, uint32_t *sse,                           \
172       const uint8_t *second_pred) {                                            \
173     uint16_t fdata3[(H + 1) * W];                                              \
174     uint8_t temp2[H * W];                                                      \
175     DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                                \
176                                                                                \
177     aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W,  \
178                                             bilinear_filters_2t[xoffset]);     \
179     aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W,        \
180                                              bilinear_filters_2t[yoffset]);    \
181                                                                                \
182     aom_comp_avg_pred(temp3, second_pred, W, H, temp2, W);                     \
183                                                                                \
184     return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse);              \
185   }                                                                            \
186   uint32_t aom_dist_wtd_sub_pixel_avg_variance##W##x##H##_c(                   \
187       const uint8_t *a, int a_stride, int xoffset, int yoffset,                \
188       const uint8_t *b, int b_stride, uint32_t *sse,                           \
189       const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param) {     \
190     uint16_t fdata3[(H + 1) * W];                                              \
191     uint8_t temp2[H * W];                                                      \
192     DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                                \
193                                                                                \
194     aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W,  \
195                                             bilinear_filters_2t[xoffset]);     \
196     aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W,        \
197                                              bilinear_filters_2t[yoffset]);    \
198                                                                                \
199     aom_dist_wtd_comp_avg_pred(temp3, second_pred, W, H, temp2, W, jcp_param); \
200                                                                                \
201     return aom_variance##W##x##H(temp3, W, b, b_stride, sse);                  \
202   }
203 
204 /* Identical to the variance call except it takes an additional parameter, sum,
205  * and returns that value using pass-by-reference instead of returning
206  * sse - sum^2 / w*h
207  */
208 #define GET_VAR(W, H)                                                         \
209   void aom_get##W##x##H##var_c(const uint8_t *a, int a_stride,                \
210                                const uint8_t *b, int b_stride, uint32_t *sse, \
211                                int *sum) {                                    \
212     variance(a, a_stride, b, b_stride, W, H, sse, sum);                       \
213   }
214 
215 /* Identical to the variance call except it does not calculate the
216  * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
217  * variable.
218  */
219 #define MSE(W, H)                                               \
220   uint32_t aom_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
221                                 const uint8_t *b, int b_stride, \
222                                 uint32_t *sse) {                \
223     int sum;                                                    \
224     variance(a, a_stride, b, b_stride, W, H, sse, &sum);        \
225     return *sse;                                                \
226   }
227 
228 /* All three forms of the variance are available in the same sizes. */
229 #define VARIANCES(W, H) \
230   VAR(W, H)             \
231   SUBPIX_VAR(W, H)      \
232   SUBPIX_AVG_VAR(W, H)
233 
234 VARIANCES(128, 128)
235 VARIANCES(128, 64)
236 VARIANCES(64, 128)
237 VARIANCES(64, 64)
238 VARIANCES(64, 32)
239 VARIANCES(32, 64)
240 VARIANCES(32, 32)
241 VARIANCES(32, 16)
242 VARIANCES(16, 32)
243 VARIANCES(16, 16)
244 VARIANCES(16, 8)
245 VARIANCES(8, 16)
246 VARIANCES(8, 8)
247 VARIANCES(8, 4)
248 VARIANCES(4, 8)
249 VARIANCES(4, 4)
250 VARIANCES(4, 2)
251 VARIANCES(2, 4)
252 VARIANCES(2, 2)
253 VARIANCES(4, 16)
254 VARIANCES(16, 4)
255 VARIANCES(8, 32)
256 VARIANCES(32, 8)
257 VARIANCES(16, 64)
258 VARIANCES(64, 16)
259 
260 GET_VAR(16, 16)
261 GET_VAR(8, 8)
262 
263 MSE(16, 16)
264 MSE(16, 8)
265 MSE(8, 16)
266 MSE(8, 8)
267 
aom_comp_avg_pred_c(uint8_t * comp_pred,const uint8_t * pred,int width,int height,const uint8_t * ref,int ref_stride)268 void aom_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
269                          int height, const uint8_t *ref, int ref_stride) {
270   int i, j;
271 
272   for (i = 0; i < height; ++i) {
273     for (j = 0; j < width; ++j) {
274       const int tmp = pred[j] + ref[j];
275       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
276     }
277     comp_pred += width;
278     pred += width;
279     ref += ref_stride;
280   }
281 }
282 
283 // Get pred block from up-sampled reference.
aom_upsampled_pred_c(MACROBLOCKD * xd,const AV1_COMMON * const cm,int mi_row,int mi_col,const MV * const mv,uint8_t * comp_pred,int width,int height,int subpel_x_q3,int subpel_y_q3,const uint8_t * ref,int ref_stride,int subpel_search)284 void aom_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm,
285                           int mi_row, int mi_col, const MV *const mv,
286                           uint8_t *comp_pred, int width, int height,
287                           int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
288                           int ref_stride, int subpel_search) {
289   // expect xd == NULL only in tests
290   if (xd != NULL) {
291     const MB_MODE_INFO *mi = xd->mi[0];
292     const int ref_num = 0;
293     const int is_intrabc = is_intrabc_block(mi);
294     const struct scale_factors *const sf =
295         is_intrabc ? &cm->sf_identity : xd->block_ref_scale_factors[ref_num];
296     const int is_scaled = av1_is_scaled(sf);
297 
298     if (is_scaled) {
299       int plane = 0;
300       const int mi_x = mi_col * MI_SIZE;
301       const int mi_y = mi_row * MI_SIZE;
302       const struct macroblockd_plane *const pd = &xd->plane[plane];
303       const struct buf_2d *const dst_buf = &pd->dst;
304       const struct buf_2d *const pre_buf =
305           is_intrabc ? dst_buf : &pd->pre[ref_num];
306 
307       InterPredParams inter_pred_params;
308       inter_pred_params.conv_params = get_conv_params(0, plane, xd->bd);
309       const int_interpfilters filters =
310           av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
311       av1_init_inter_params(
312           &inter_pred_params, width, height, mi_y >> pd->subsampling_y,
313           mi_x >> pd->subsampling_x, pd->subsampling_x, pd->subsampling_y,
314           xd->bd, is_cur_buf_hbd(xd), is_intrabc, sf, pre_buf, filters);
315       av1_enc_build_one_inter_predictor(comp_pred, width, mv,
316                                         &inter_pred_params);
317       return;
318     }
319   }
320 
321   const InterpFilterParams *filter = av1_get_filter(subpel_search);
322 
323   if (!subpel_x_q3 && !subpel_y_q3) {
324     for (int i = 0; i < height; i++) {
325       memcpy(comp_pred, ref, width * sizeof(*comp_pred));
326       comp_pred += width;
327       ref += ref_stride;
328     }
329   } else if (!subpel_y_q3) {
330     const int16_t *const kernel =
331         av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
332     aom_convolve8_horiz_c(ref, ref_stride, comp_pred, width, kernel, 16, NULL,
333                           -1, width, height);
334   } else if (!subpel_x_q3) {
335     const int16_t *const kernel =
336         av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
337     aom_convolve8_vert_c(ref, ref_stride, comp_pred, width, NULL, -1, kernel,
338                          16, width, height);
339   } else {
340     DECLARE_ALIGNED(16, uint8_t,
341                     temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
342     const int16_t *const kernel_x =
343         av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
344     const int16_t *const kernel_y =
345         av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
346     const int intermediate_height =
347         (((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps;
348     assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
349     aom_convolve8_horiz_c(ref - ref_stride * ((filter->taps >> 1) - 1),
350                           ref_stride, temp, MAX_SB_SIZE, kernel_x, 16, NULL, -1,
351                           width, intermediate_height);
352     aom_convolve8_vert_c(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1),
353                          MAX_SB_SIZE, comp_pred, width, NULL, -1, kernel_y, 16,
354                          width, height);
355   }
356 }
357 
aom_comp_avg_upsampled_pred_c(MACROBLOCKD * xd,const AV1_COMMON * const cm,int mi_row,int mi_col,const MV * const mv,uint8_t * comp_pred,const uint8_t * pred,int width,int height,int subpel_x_q3,int subpel_y_q3,const uint8_t * ref,int ref_stride,int subpel_search)358 void aom_comp_avg_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm,
359                                    int mi_row, int mi_col, const MV *const mv,
360                                    uint8_t *comp_pred, const uint8_t *pred,
361                                    int width, int height, int subpel_x_q3,
362                                    int subpel_y_q3, const uint8_t *ref,
363                                    int ref_stride, int subpel_search) {
364   int i, j;
365 
366   aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
367                      subpel_x_q3, subpel_y_q3, ref, ref_stride, subpel_search);
368   for (i = 0; i < height; i++) {
369     for (j = 0; j < width; j++) {
370       comp_pred[j] = ROUND_POWER_OF_TWO(comp_pred[j] + pred[j], 1);
371     }
372     comp_pred += width;
373     pred += width;
374   }
375 }
376 
aom_dist_wtd_comp_avg_pred_c(uint8_t * comp_pred,const uint8_t * pred,int width,int height,const uint8_t * ref,int ref_stride,const DIST_WTD_COMP_PARAMS * jcp_param)377 void aom_dist_wtd_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred,
378                                   int width, int height, const uint8_t *ref,
379                                   int ref_stride,
380                                   const DIST_WTD_COMP_PARAMS *jcp_param) {
381   int i, j;
382   const int fwd_offset = jcp_param->fwd_offset;
383   const int bck_offset = jcp_param->bck_offset;
384 
385   for (i = 0; i < height; ++i) {
386     for (j = 0; j < width; ++j) {
387       int tmp = pred[j] * bck_offset + ref[j] * fwd_offset;
388       tmp = ROUND_POWER_OF_TWO(tmp, DIST_PRECISION_BITS);
389       comp_pred[j] = (uint8_t)tmp;
390     }
391     comp_pred += width;
392     pred += width;
393     ref += ref_stride;
394   }
395 }
396 
aom_dist_wtd_comp_avg_upsampled_pred_c(MACROBLOCKD * xd,const AV1_COMMON * const cm,int mi_row,int mi_col,const MV * const mv,uint8_t * comp_pred,const uint8_t * pred,int width,int height,int subpel_x_q3,int subpel_y_q3,const uint8_t * ref,int ref_stride,const DIST_WTD_COMP_PARAMS * jcp_param,int subpel_search)397 void aom_dist_wtd_comp_avg_upsampled_pred_c(
398     MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
399     const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
400     int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
401     int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search) {
402   int i, j;
403   const int fwd_offset = jcp_param->fwd_offset;
404   const int bck_offset = jcp_param->bck_offset;
405 
406   aom_upsampled_pred_c(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
407                        subpel_x_q3, subpel_y_q3, ref, ref_stride,
408                        subpel_search);
409 
410   for (i = 0; i < height; i++) {
411     for (j = 0; j < width; j++) {
412       int tmp = pred[j] * bck_offset + comp_pred[j] * fwd_offset;
413       tmp = ROUND_POWER_OF_TWO(tmp, DIST_PRECISION_BITS);
414       comp_pred[j] = (uint8_t)tmp;
415     }
416     comp_pred += width;
417     pred += width;
418   }
419 }
420 
421 #if CONFIG_AV1_HIGHBITDEPTH
highbd_variance64(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint64_t * sse,int64_t * sum)422 static void highbd_variance64(const uint8_t *a8, int a_stride,
423                               const uint8_t *b8, int b_stride, int w, int h,
424                               uint64_t *sse, int64_t *sum) {
425   const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
426   const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
427   int64_t tsum = 0;
428   uint64_t tsse = 0;
429   for (int i = 0; i < h; ++i) {
430     int32_t lsum = 0;
431     for (int j = 0; j < w; ++j) {
432       const int diff = a[j] - b[j];
433       lsum += diff;
434       tsse += (uint32_t)(diff * diff);
435     }
436     tsum += lsum;
437     a += a_stride;
438     b += b_stride;
439   }
440   *sum = tsum;
441   *sse = tsse;
442 }
443 
aom_highbd_sse_odd_size(const uint8_t * a,int a_stride,const uint8_t * b,int b_stride,int w,int h)444 uint64_t aom_highbd_sse_odd_size(const uint8_t *a, int a_stride,
445                                  const uint8_t *b, int b_stride, int w, int h) {
446   uint64_t sse;
447   int64_t sum;
448   highbd_variance64(a, a_stride, b, b_stride, w, h, &sse, &sum);
449   return sse;
450 }
451 
highbd_8_variance(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint32_t * sse,int * sum)452 static void highbd_8_variance(const uint8_t *a8, int a_stride,
453                               const uint8_t *b8, int b_stride, int w, int h,
454                               uint32_t *sse, int *sum) {
455   uint64_t sse_long = 0;
456   int64_t sum_long = 0;
457   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
458   *sse = (uint32_t)sse_long;
459   *sum = (int)sum_long;
460 }
461 
highbd_10_variance(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint32_t * sse,int * sum)462 static void highbd_10_variance(const uint8_t *a8, int a_stride,
463                                const uint8_t *b8, int b_stride, int w, int h,
464                                uint32_t *sse, int *sum) {
465   uint64_t sse_long = 0;
466   int64_t sum_long = 0;
467   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
468   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
469   *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
470 }
471 
highbd_12_variance(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint32_t * sse,int * sum)472 static void highbd_12_variance(const uint8_t *a8, int a_stride,
473                                const uint8_t *b8, int b_stride, int w, int h,
474                                uint32_t *sse, int *sum) {
475   uint64_t sse_long = 0;
476   int64_t sum_long = 0;
477   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
478   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
479   *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
480 }
481 
482 #define HIGHBD_VAR(W, H)                                                       \
483   uint32_t aom_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride,  \
484                                               const uint8_t *b, int b_stride,  \
485                                               uint32_t *sse) {                 \
486     int sum;                                                                   \
487     highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum);              \
488     return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));                  \
489   }                                                                            \
490                                                                                \
491   uint32_t aom_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
492                                                const uint8_t *b, int b_stride, \
493                                                uint32_t *sse) {                \
494     int sum;                                                                   \
495     int64_t var;                                                               \
496     highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
497     var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
498     return (var >= 0) ? (uint32_t)var : 0;                                     \
499   }                                                                            \
500                                                                                \
501   uint32_t aom_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
502                                                const uint8_t *b, int b_stride, \
503                                                uint32_t *sse) {                \
504     int sum;                                                                   \
505     int64_t var;                                                               \
506     highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
507     var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
508     return (var >= 0) ? (uint32_t)var : 0;                                     \
509   }
510 
511 #define HIGHBD_GET_VAR(S)                                                    \
512   void aom_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride,  \
513                                         const uint8_t *ref, int ref_stride,  \
514                                         uint32_t *sse, int *sum) {           \
515     highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);     \
516   }                                                                          \
517                                                                              \
518   void aom_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
519                                          const uint8_t *ref, int ref_stride, \
520                                          uint32_t *sse, int *sum) {          \
521     highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
522   }                                                                          \
523                                                                              \
524   void aom_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
525                                          const uint8_t *ref, int ref_stride, \
526                                          uint32_t *sse, int *sum) {          \
527     highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
528   }
529 
530 #define HIGHBD_MSE(W, H)                                                      \
531   uint32_t aom_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride,  \
532                                          const uint8_t *ref, int ref_stride,  \
533                                          uint32_t *sse) {                     \
534     int sum;                                                                  \
535     highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);     \
536     return *sse;                                                              \
537   }                                                                           \
538                                                                               \
539   uint32_t aom_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
540                                           const uint8_t *ref, int ref_stride, \
541                                           uint32_t *sse) {                    \
542     int sum;                                                                  \
543     highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
544     return *sse;                                                              \
545   }                                                                           \
546                                                                               \
547   uint32_t aom_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
548                                           const uint8_t *ref, int ref_stride, \
549                                           uint32_t *sse) {                    \
550     int sum;                                                                  \
551     highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
552     return *sse;                                                              \
553   }
554 
aom_highbd_var_filter_block2d_bil_first_pass(const uint8_t * src_ptr8,uint16_t * output_ptr,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)555 void aom_highbd_var_filter_block2d_bil_first_pass(
556     const uint8_t *src_ptr8, uint16_t *output_ptr,
557     unsigned int src_pixels_per_line, int pixel_step,
558     unsigned int output_height, unsigned int output_width,
559     const uint8_t *filter) {
560   unsigned int i, j;
561   uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
562   for (i = 0; i < output_height; ++i) {
563     for (j = 0; j < output_width; ++j) {
564       output_ptr[j] = ROUND_POWER_OF_TWO(
565           (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
566           FILTER_BITS);
567 
568       ++src_ptr;
569     }
570 
571     // Next row...
572     src_ptr += src_pixels_per_line - output_width;
573     output_ptr += output_width;
574   }
575 }
576 
aom_highbd_var_filter_block2d_bil_second_pass(const uint16_t * src_ptr,uint16_t * output_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)577 void aom_highbd_var_filter_block2d_bil_second_pass(
578     const uint16_t *src_ptr, uint16_t *output_ptr,
579     unsigned int src_pixels_per_line, unsigned int pixel_step,
580     unsigned int output_height, unsigned int output_width,
581     const uint8_t *filter) {
582   unsigned int i, j;
583 
584   for (i = 0; i < output_height; ++i) {
585     for (j = 0; j < output_width; ++j) {
586       output_ptr[j] = ROUND_POWER_OF_TWO(
587           (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
588           FILTER_BITS);
589       ++src_ptr;
590     }
591 
592     src_ptr += src_pixels_per_line - output_width;
593     output_ptr += output_width;
594   }
595 }
596 
597 #define HIGHBD_SUBPIX_VAR(W, H)                                              \
598   uint32_t aom_highbd_8_sub_pixel_variance##W##x##H##_c(                     \
599       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
600       const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
601     uint16_t fdata3[(H + 1) * W];                                            \
602     uint16_t temp2[H * W];                                                   \
603                                                                              \
604     aom_highbd_var_filter_block2d_bil_first_pass(                            \
605         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
606     aom_highbd_var_filter_block2d_bil_second_pass(                           \
607         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
608                                                                              \
609     return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W,  \
610                                               dst, dst_stride, sse);         \
611   }                                                                          \
612                                                                              \
613   uint32_t aom_highbd_10_sub_pixel_variance##W##x##H##_c(                    \
614       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
615       const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
616     uint16_t fdata3[(H + 1) * W];                                            \
617     uint16_t temp2[H * W];                                                   \
618                                                                              \
619     aom_highbd_var_filter_block2d_bil_first_pass(                            \
620         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
621     aom_highbd_var_filter_block2d_bil_second_pass(                           \
622         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
623                                                                              \
624     return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
625                                                dst, dst_stride, sse);        \
626   }                                                                          \
627                                                                              \
628   uint32_t aom_highbd_12_sub_pixel_variance##W##x##H##_c(                    \
629       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
630       const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
631     uint16_t fdata3[(H + 1) * W];                                            \
632     uint16_t temp2[H * W];                                                   \
633                                                                              \
634     aom_highbd_var_filter_block2d_bil_first_pass(                            \
635         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
636     aom_highbd_var_filter_block2d_bil_second_pass(                           \
637         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
638                                                                              \
639     return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
640                                                dst, dst_stride, sse);        \
641   }
642 
643 #define HIGHBD_SUBPIX_AVG_VAR(W, H)                                           \
644   uint32_t aom_highbd_8_sub_pixel_avg_variance##W##x##H##_c(                  \
645       const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
646       const uint8_t *dst, int dst_stride, uint32_t *sse,                      \
647       const uint8_t *second_pred) {                                           \
648     uint16_t fdata3[(H + 1) * W];                                             \
649     uint16_t temp2[H * W];                                                    \
650     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                              \
651                                                                               \
652     aom_highbd_var_filter_block2d_bil_first_pass(                             \
653         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
654     aom_highbd_var_filter_block2d_bil_second_pass(                            \
655         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
656                                                                               \
657     aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H,  \
658                                CONVERT_TO_BYTEPTR(temp2), W);                 \
659                                                                               \
660     return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,   \
661                                               dst, dst_stride, sse);          \
662   }                                                                           \
663                                                                               \
664   uint32_t aom_highbd_10_sub_pixel_avg_variance##W##x##H##_c(                 \
665       const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
666       const uint8_t *dst, int dst_stride, uint32_t *sse,                      \
667       const uint8_t *second_pred) {                                           \
668     uint16_t fdata3[(H + 1) * W];                                             \
669     uint16_t temp2[H * W];                                                    \
670     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                              \
671                                                                               \
672     aom_highbd_var_filter_block2d_bil_first_pass(                             \
673         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
674     aom_highbd_var_filter_block2d_bil_second_pass(                            \
675         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
676                                                                               \
677     aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H,  \
678                                CONVERT_TO_BYTEPTR(temp2), W);                 \
679                                                                               \
680     return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,  \
681                                                dst, dst_stride, sse);         \
682   }                                                                           \
683                                                                               \
684   uint32_t aom_highbd_12_sub_pixel_avg_variance##W##x##H##_c(                 \
685       const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
686       const uint8_t *dst, int dst_stride, uint32_t *sse,                      \
687       const uint8_t *second_pred) {                                           \
688     uint16_t fdata3[(H + 1) * W];                                             \
689     uint16_t temp2[H * W];                                                    \
690     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                              \
691                                                                               \
692     aom_highbd_var_filter_block2d_bil_first_pass(                             \
693         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
694     aom_highbd_var_filter_block2d_bil_second_pass(                            \
695         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
696                                                                               \
697     aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H,  \
698                                CONVERT_TO_BYTEPTR(temp2), W);                 \
699                                                                               \
700     return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,  \
701                                                dst, dst_stride, sse);         \
702   }                                                                           \
703                                                                               \
704   uint32_t aom_highbd_8_dist_wtd_sub_pixel_avg_variance##W##x##H##_c(         \
705       const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
706       const uint8_t *dst, int dst_stride, uint32_t *sse,                      \
707       const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param) {    \
708     uint16_t fdata3[(H + 1) * W];                                             \
709     uint16_t temp2[H * W];                                                    \
710     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                              \
711                                                                               \
712     aom_highbd_var_filter_block2d_bil_first_pass(                             \
713         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
714     aom_highbd_var_filter_block2d_bil_second_pass(                            \
715         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
716                                                                               \
717     aom_highbd_dist_wtd_comp_avg_pred(CONVERT_TO_BYTEPTR(temp3), second_pred, \
718                                       W, H, CONVERT_TO_BYTEPTR(temp2), W,     \
719                                       jcp_param);                             \
720                                                                               \
721     return aom_highbd_8_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst,  \
722                                           dst_stride, sse);                   \
723   }                                                                           \
724                                                                               \
725   uint32_t aom_highbd_10_dist_wtd_sub_pixel_avg_variance##W##x##H##_c(        \
726       const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
727       const uint8_t *dst, int dst_stride, uint32_t *sse,                      \
728       const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param) {    \
729     uint16_t fdata3[(H + 1) * W];                                             \
730     uint16_t temp2[H * W];                                                    \
731     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                              \
732                                                                               \
733     aom_highbd_var_filter_block2d_bil_first_pass(                             \
734         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
735     aom_highbd_var_filter_block2d_bil_second_pass(                            \
736         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
737                                                                               \
738     aom_highbd_dist_wtd_comp_avg_pred(CONVERT_TO_BYTEPTR(temp3), second_pred, \
739                                       W, H, CONVERT_TO_BYTEPTR(temp2), W,     \
740                                       jcp_param);                             \
741                                                                               \
742     return aom_highbd_10_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \
743                                            dst_stride, sse);                  \
744   }                                                                           \
745                                                                               \
746   uint32_t aom_highbd_12_dist_wtd_sub_pixel_avg_variance##W##x##H##_c(        \
747       const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
748       const uint8_t *dst, int dst_stride, uint32_t *sse,                      \
749       const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param) {    \
750     uint16_t fdata3[(H + 1) * W];                                             \
751     uint16_t temp2[H * W];                                                    \
752     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                              \
753                                                                               \
754     aom_highbd_var_filter_block2d_bil_first_pass(                             \
755         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
756     aom_highbd_var_filter_block2d_bil_second_pass(                            \
757         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
758                                                                               \
759     aom_highbd_dist_wtd_comp_avg_pred(CONVERT_TO_BYTEPTR(temp3), second_pred, \
760                                       W, H, CONVERT_TO_BYTEPTR(temp2), W,     \
761                                       jcp_param);                             \
762                                                                               \
763     return aom_highbd_12_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \
764                                            dst_stride, sse);                  \
765   }
766 
767 /* All three forms of the variance are available in the same sizes. */
768 #define HIGHBD_VARIANCES(W, H) \
769   HIGHBD_VAR(W, H)             \
770   HIGHBD_SUBPIX_VAR(W, H)      \
771   HIGHBD_SUBPIX_AVG_VAR(W, H)
772 
773 HIGHBD_VARIANCES(128, 128)
774 HIGHBD_VARIANCES(128, 64)
775 HIGHBD_VARIANCES(64, 128)
776 HIGHBD_VARIANCES(64, 64)
777 HIGHBD_VARIANCES(64, 32)
778 HIGHBD_VARIANCES(32, 64)
779 HIGHBD_VARIANCES(32, 32)
780 HIGHBD_VARIANCES(32, 16)
781 HIGHBD_VARIANCES(16, 32)
782 HIGHBD_VARIANCES(16, 16)
783 HIGHBD_VARIANCES(16, 8)
784 HIGHBD_VARIANCES(8, 16)
785 HIGHBD_VARIANCES(8, 8)
786 HIGHBD_VARIANCES(8, 4)
787 HIGHBD_VARIANCES(4, 8)
788 HIGHBD_VARIANCES(4, 4)
789 HIGHBD_VARIANCES(4, 2)
790 HIGHBD_VARIANCES(2, 4)
791 HIGHBD_VARIANCES(2, 2)
792 HIGHBD_VARIANCES(4, 16)
793 HIGHBD_VARIANCES(16, 4)
794 HIGHBD_VARIANCES(8, 32)
795 HIGHBD_VARIANCES(32, 8)
796 HIGHBD_VARIANCES(16, 64)
797 HIGHBD_VARIANCES(64, 16)
798 
799 HIGHBD_GET_VAR(8)
800 HIGHBD_GET_VAR(16)
801 
802 HIGHBD_MSE(16, 16)
803 HIGHBD_MSE(16, 8)
804 HIGHBD_MSE(8, 16)
805 HIGHBD_MSE(8, 8)
806 
aom_highbd_comp_avg_pred_c(uint8_t * comp_pred8,const uint8_t * pred8,int width,int height,const uint8_t * ref8,int ref_stride)807 void aom_highbd_comp_avg_pred_c(uint8_t *comp_pred8, const uint8_t *pred8,
808                                 int width, int height, const uint8_t *ref8,
809                                 int ref_stride) {
810   int i, j;
811   uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
812   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
813   uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
814   for (i = 0; i < height; ++i) {
815     for (j = 0; j < width; ++j) {
816       const int tmp = pred[j] + ref[j];
817       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
818     }
819     comp_pred += width;
820     pred += width;
821     ref += ref_stride;
822   }
823 }
824 
aom_highbd_upsampled_pred_c(MACROBLOCKD * xd,const struct AV1Common * const cm,int mi_row,int mi_col,const MV * const mv,uint8_t * comp_pred8,int width,int height,int subpel_x_q3,int subpel_y_q3,const uint8_t * ref8,int ref_stride,int bd,int subpel_search)825 void aom_highbd_upsampled_pred_c(MACROBLOCKD *xd,
826                                  const struct AV1Common *const cm, int mi_row,
827                                  int mi_col, const MV *const mv,
828                                  uint8_t *comp_pred8, int width, int height,
829                                  int subpel_x_q3, int subpel_y_q3,
830                                  const uint8_t *ref8, int ref_stride, int bd,
831                                  int subpel_search) {
832   // expect xd == NULL only in tests
833   if (xd != NULL) {
834     const MB_MODE_INFO *mi = xd->mi[0];
835     const int ref_num = 0;
836     const int is_intrabc = is_intrabc_block(mi);
837     const struct scale_factors *const sf =
838         is_intrabc ? &cm->sf_identity : xd->block_ref_scale_factors[ref_num];
839     const int is_scaled = av1_is_scaled(sf);
840 
841     if (is_scaled) {
842       int plane = 0;
843       const int mi_x = mi_col * MI_SIZE;
844       const int mi_y = mi_row * MI_SIZE;
845       const struct macroblockd_plane *const pd = &xd->plane[plane];
846       const struct buf_2d *const dst_buf = &pd->dst;
847       const struct buf_2d *const pre_buf =
848           is_intrabc ? dst_buf : &pd->pre[ref_num];
849 
850       InterPredParams inter_pred_params;
851       inter_pred_params.conv_params = get_conv_params(0, plane, xd->bd);
852       const int_interpfilters filters =
853           av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
854       av1_init_inter_params(
855           &inter_pred_params, width, height, mi_y >> pd->subsampling_y,
856           mi_x >> pd->subsampling_x, pd->subsampling_x, pd->subsampling_y,
857           xd->bd, is_cur_buf_hbd(xd), is_intrabc, sf, pre_buf, filters);
858       av1_enc_build_one_inter_predictor(comp_pred8, width, mv,
859                                         &inter_pred_params);
860       return;
861     }
862   }
863 
864   const InterpFilterParams *filter = av1_get_filter(subpel_search);
865 
866   if (!subpel_x_q3 && !subpel_y_q3) {
867     const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
868     uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
869     for (int i = 0; i < height; i++) {
870       memcpy(comp_pred, ref, width * sizeof(*comp_pred));
871       comp_pred += width;
872       ref += ref_stride;
873     }
874   } else if (!subpel_y_q3) {
875     const int16_t *const kernel =
876         av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
877     aom_highbd_convolve8_horiz_c(ref8, ref_stride, comp_pred8, width, kernel,
878                                  16, NULL, -1, width, height, bd);
879   } else if (!subpel_x_q3) {
880     const int16_t *const kernel =
881         av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
882     aom_highbd_convolve8_vert_c(ref8, ref_stride, comp_pred8, width, NULL, -1,
883                                 kernel, 16, width, height, bd);
884   } else {
885     DECLARE_ALIGNED(16, uint16_t,
886                     temp[((MAX_SB_SIZE + 16) + 16) * MAX_SB_SIZE]);
887     const int16_t *const kernel_x =
888         av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
889     const int16_t *const kernel_y =
890         av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
891     const int intermediate_height =
892         (((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps;
893     assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
894     aom_highbd_convolve8_horiz_c(ref8 - ref_stride * ((filter->taps >> 1) - 1),
895                                  ref_stride, CONVERT_TO_BYTEPTR(temp),
896                                  MAX_SB_SIZE, kernel_x, 16, NULL, -1, width,
897                                  intermediate_height, bd);
898     aom_highbd_convolve8_vert_c(
899         CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1)),
900         MAX_SB_SIZE, comp_pred8, width, NULL, -1, kernel_y, 16, width, height,
901         bd);
902   }
903 }
904 
aom_highbd_comp_avg_upsampled_pred_c(MACROBLOCKD * xd,const struct AV1Common * const cm,int mi_row,int mi_col,const MV * const mv,uint8_t * comp_pred8,const uint8_t * pred8,int width,int height,int subpel_x_q3,int subpel_y_q3,const uint8_t * ref8,int ref_stride,int bd,int subpel_search)905 void aom_highbd_comp_avg_upsampled_pred_c(
906     MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
907     const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width,
908     int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
909     int ref_stride, int bd, int subpel_search) {
910   int i, j;
911 
912   const uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
913   uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
914   aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred8, width,
915                             height, subpel_x_q3, subpel_y_q3, ref8, ref_stride,
916                             bd, subpel_search);
917   for (i = 0; i < height; ++i) {
918     for (j = 0; j < width; ++j) {
919       comp_pred[j] = ROUND_POWER_OF_TWO(pred[j] + comp_pred[j], 1);
920     }
921     comp_pred += width;
922     pred += width;
923   }
924 }
925 
aom_highbd_dist_wtd_comp_avg_pred_c(uint8_t * comp_pred8,const uint8_t * pred8,int width,int height,const uint8_t * ref8,int ref_stride,const DIST_WTD_COMP_PARAMS * jcp_param)926 void aom_highbd_dist_wtd_comp_avg_pred_c(
927     uint8_t *comp_pred8, const uint8_t *pred8, int width, int height,
928     const uint8_t *ref8, int ref_stride,
929     const DIST_WTD_COMP_PARAMS *jcp_param) {
930   int i, j;
931   const int fwd_offset = jcp_param->fwd_offset;
932   const int bck_offset = jcp_param->bck_offset;
933   uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
934   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
935   uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
936 
937   for (i = 0; i < height; ++i) {
938     for (j = 0; j < width; ++j) {
939       int tmp = pred[j] * bck_offset + ref[j] * fwd_offset;
940       tmp = ROUND_POWER_OF_TWO(tmp, DIST_PRECISION_BITS);
941       comp_pred[j] = (uint16_t)tmp;
942     }
943     comp_pred += width;
944     pred += width;
945     ref += ref_stride;
946   }
947 }
948 
aom_highbd_dist_wtd_comp_avg_upsampled_pred_c(MACROBLOCKD * xd,const struct AV1Common * const cm,int mi_row,int mi_col,const MV * const mv,uint8_t * comp_pred8,const uint8_t * pred8,int width,int height,int subpel_x_q3,int subpel_y_q3,const uint8_t * ref8,int ref_stride,int bd,const DIST_WTD_COMP_PARAMS * jcp_param,int subpel_search)949 void aom_highbd_dist_wtd_comp_avg_upsampled_pred_c(
950     MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
951     const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width,
952     int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
953     int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param,
954     int subpel_search) {
955   int i, j;
956   const int fwd_offset = jcp_param->fwd_offset;
957   const int bck_offset = jcp_param->bck_offset;
958   const uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
959   uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
960   aom_highbd_upsampled_pred_c(xd, cm, mi_row, mi_col, mv, comp_pred8, width,
961                               height, subpel_x_q3, subpel_y_q3, ref8,
962                               ref_stride, bd, subpel_search);
963 
964   for (i = 0; i < height; i++) {
965     for (j = 0; j < width; j++) {
966       int tmp = pred[j] * bck_offset + comp_pred[j] * fwd_offset;
967       tmp = ROUND_POWER_OF_TWO(tmp, DIST_PRECISION_BITS);
968       comp_pred[j] = (uint16_t)tmp;
969     }
970     comp_pred += width;
971     pred += width;
972   }
973 }
974 #endif  // CONFIG_AV1_HIGHBITDEPTH
975 
aom_comp_mask_pred_c(uint8_t * comp_pred,const uint8_t * pred,int width,int height,const uint8_t * ref,int ref_stride,const uint8_t * mask,int mask_stride,int invert_mask)976 void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
977                           int height, const uint8_t *ref, int ref_stride,
978                           const uint8_t *mask, int mask_stride,
979                           int invert_mask) {
980   int i, j;
981   const uint8_t *src0 = invert_mask ? pred : ref;
982   const uint8_t *src1 = invert_mask ? ref : pred;
983   const int stride0 = invert_mask ? width : ref_stride;
984   const int stride1 = invert_mask ? ref_stride : width;
985   for (i = 0; i < height; ++i) {
986     for (j = 0; j < width; ++j) {
987       comp_pred[j] = AOM_BLEND_A64(mask[j], src0[j], src1[j]);
988     }
989     comp_pred += width;
990     src0 += stride0;
991     src1 += stride1;
992     mask += mask_stride;
993   }
994 }
995 
aom_comp_mask_upsampled_pred_c(MACROBLOCKD * xd,const AV1_COMMON * const cm,int mi_row,int mi_col,const MV * const mv,uint8_t * comp_pred,const uint8_t * pred,int width,int height,int subpel_x_q3,int subpel_y_q3,const uint8_t * ref,int ref_stride,const uint8_t * mask,int mask_stride,int invert_mask,int subpel_search)996 void aom_comp_mask_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm,
997                                     int mi_row, int mi_col, const MV *const mv,
998                                     uint8_t *comp_pred, const uint8_t *pred,
999                                     int width, int height, int subpel_x_q3,
1000                                     int subpel_y_q3, const uint8_t *ref,
1001                                     int ref_stride, const uint8_t *mask,
1002                                     int mask_stride, int invert_mask,
1003                                     int subpel_search) {
1004   if (subpel_x_q3 | subpel_y_q3) {
1005     aom_upsampled_pred_c(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
1006                          subpel_x_q3, subpel_y_q3, ref, ref_stride,
1007                          subpel_search);
1008     ref = comp_pred;
1009     ref_stride = width;
1010   }
1011   aom_comp_mask_pred_c(comp_pred, pred, width, height, ref, ref_stride, mask,
1012                        mask_stride, invert_mask);
1013 }
1014 
1015 #define MASK_SUBPIX_VAR(W, H)                                                  \
1016   unsigned int aom_masked_sub_pixel_variance##W##x##H##_c(                     \
1017       const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
1018       const uint8_t *ref, int ref_stride, const uint8_t *second_pred,          \
1019       const uint8_t *msk, int msk_stride, int invert_mask,                     \
1020       unsigned int *sse) {                                                     \
1021     uint16_t fdata3[(H + 1) * W];                                              \
1022     uint8_t temp2[H * W];                                                      \
1023     DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                                \
1024                                                                                \
1025     aom_var_filter_block2d_bil_first_pass_c(src, fdata3, src_stride, 1, H + 1, \
1026                                             W, bilinear_filters_2t[xoffset]);  \
1027     aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W,        \
1028                                              bilinear_filters_2t[yoffset]);    \
1029                                                                                \
1030     aom_comp_mask_pred_c(temp3, second_pred, W, H, temp2, W, msk, msk_stride,  \
1031                          invert_mask);                                         \
1032     return aom_variance##W##x##H##_c(temp3, W, ref, ref_stride, sse);          \
1033   }
1034 
1035 MASK_SUBPIX_VAR(4, 4)
1036 MASK_SUBPIX_VAR(4, 8)
1037 MASK_SUBPIX_VAR(8, 4)
1038 MASK_SUBPIX_VAR(8, 8)
1039 MASK_SUBPIX_VAR(8, 16)
1040 MASK_SUBPIX_VAR(16, 8)
1041 MASK_SUBPIX_VAR(16, 16)
1042 MASK_SUBPIX_VAR(16, 32)
1043 MASK_SUBPIX_VAR(32, 16)
1044 MASK_SUBPIX_VAR(32, 32)
1045 MASK_SUBPIX_VAR(32, 64)
1046 MASK_SUBPIX_VAR(64, 32)
1047 MASK_SUBPIX_VAR(64, 64)
1048 MASK_SUBPIX_VAR(64, 128)
1049 MASK_SUBPIX_VAR(128, 64)
1050 MASK_SUBPIX_VAR(128, 128)
1051 MASK_SUBPIX_VAR(4, 16)
1052 MASK_SUBPIX_VAR(16, 4)
1053 MASK_SUBPIX_VAR(8, 32)
1054 MASK_SUBPIX_VAR(32, 8)
1055 MASK_SUBPIX_VAR(16, 64)
1056 MASK_SUBPIX_VAR(64, 16)
1057 
1058 #if CONFIG_AV1_HIGHBITDEPTH
aom_highbd_comp_mask_pred_c(uint8_t * comp_pred8,const uint8_t * pred8,int width,int height,const uint8_t * ref8,int ref_stride,const uint8_t * mask,int mask_stride,int invert_mask)1059 void aom_highbd_comp_mask_pred_c(uint8_t *comp_pred8, const uint8_t *pred8,
1060                                  int width, int height, const uint8_t *ref8,
1061                                  int ref_stride, const uint8_t *mask,
1062                                  int mask_stride, int invert_mask) {
1063   int i, j;
1064   uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
1065   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1066   uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
1067   for (i = 0; i < height; ++i) {
1068     for (j = 0; j < width; ++j) {
1069       if (!invert_mask)
1070         comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]);
1071       else
1072         comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]);
1073     }
1074     comp_pred += width;
1075     pred += width;
1076     ref += ref_stride;
1077     mask += mask_stride;
1078   }
1079 }
1080 
aom_highbd_comp_mask_upsampled_pred(MACROBLOCKD * xd,const struct AV1Common * const cm,int mi_row,int mi_col,const MV * const mv,uint8_t * comp_pred8,const uint8_t * pred8,int width,int height,int subpel_x_q3,int subpel_y_q3,const uint8_t * ref8,int ref_stride,const uint8_t * mask,int mask_stride,int invert_mask,int bd,int subpel_search)1081 void aom_highbd_comp_mask_upsampled_pred(
1082     MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
1083     const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width,
1084     int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
1085     int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
1086     int bd, int subpel_search) {
1087   aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred8, width,
1088                             height, subpel_x_q3, subpel_y_q3, ref8, ref_stride,
1089                             bd, subpel_search);
1090   aom_highbd_comp_mask_pred(comp_pred8, pred8, width, height, comp_pred8, width,
1091                             mask, mask_stride, invert_mask);
1092 }
1093 
1094 #define HIGHBD_MASK_SUBPIX_VAR(W, H)                                           \
1095   unsigned int aom_highbd_8_masked_sub_pixel_variance##W##x##H##_c(            \
1096       const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
1097       const uint8_t *ref, int ref_stride, const uint8_t *second_pred,          \
1098       const uint8_t *msk, int msk_stride, int invert_mask,                     \
1099       unsigned int *sse) {                                                     \
1100     uint16_t fdata3[(H + 1) * W];                                              \
1101     uint16_t temp2[H * W];                                                     \
1102     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                               \
1103                                                                                \
1104     aom_highbd_var_filter_block2d_bil_first_pass(                              \
1105         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
1106     aom_highbd_var_filter_block2d_bil_second_pass(                             \
1107         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
1108                                                                                \
1109     aom_highbd_comp_mask_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H,  \
1110                                 CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
1111                                 invert_mask);                                  \
1112                                                                                \
1113     return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,    \
1114                                               ref, ref_stride, sse);           \
1115   }                                                                            \
1116                                                                                \
1117   unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c(           \
1118       const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
1119       const uint8_t *ref, int ref_stride, const uint8_t *second_pred,          \
1120       const uint8_t *msk, int msk_stride, int invert_mask,                     \
1121       unsigned int *sse) {                                                     \
1122     uint16_t fdata3[(H + 1) * W];                                              \
1123     uint16_t temp2[H * W];                                                     \
1124     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                               \
1125                                                                                \
1126     aom_highbd_var_filter_block2d_bil_first_pass(                              \
1127         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
1128     aom_highbd_var_filter_block2d_bil_second_pass(                             \
1129         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
1130                                                                                \
1131     aom_highbd_comp_mask_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H,  \
1132                                 CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
1133                                 invert_mask);                                  \
1134                                                                                \
1135     return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,   \
1136                                                ref, ref_stride, sse);          \
1137   }                                                                            \
1138                                                                                \
1139   unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c(           \
1140       const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
1141       const uint8_t *ref, int ref_stride, const uint8_t *second_pred,          \
1142       const uint8_t *msk, int msk_stride, int invert_mask,                     \
1143       unsigned int *sse) {                                                     \
1144     uint16_t fdata3[(H + 1) * W];                                              \
1145     uint16_t temp2[H * W];                                                     \
1146     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                               \
1147                                                                                \
1148     aom_highbd_var_filter_block2d_bil_first_pass(                              \
1149         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
1150     aom_highbd_var_filter_block2d_bil_second_pass(                             \
1151         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
1152                                                                                \
1153     aom_highbd_comp_mask_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H,  \
1154                                 CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
1155                                 invert_mask);                                  \
1156                                                                                \
1157     return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,   \
1158                                                ref, ref_stride, sse);          \
1159   }
1160 
1161 HIGHBD_MASK_SUBPIX_VAR(4, 4)
1162 HIGHBD_MASK_SUBPIX_VAR(4, 8)
1163 HIGHBD_MASK_SUBPIX_VAR(8, 4)
1164 HIGHBD_MASK_SUBPIX_VAR(8, 8)
1165 HIGHBD_MASK_SUBPIX_VAR(8, 16)
1166 HIGHBD_MASK_SUBPIX_VAR(16, 8)
1167 HIGHBD_MASK_SUBPIX_VAR(16, 16)
1168 HIGHBD_MASK_SUBPIX_VAR(16, 32)
1169 HIGHBD_MASK_SUBPIX_VAR(32, 16)
1170 HIGHBD_MASK_SUBPIX_VAR(32, 32)
1171 HIGHBD_MASK_SUBPIX_VAR(32, 64)
1172 HIGHBD_MASK_SUBPIX_VAR(64, 32)
1173 HIGHBD_MASK_SUBPIX_VAR(64, 64)
1174 HIGHBD_MASK_SUBPIX_VAR(64, 128)
1175 HIGHBD_MASK_SUBPIX_VAR(128, 64)
1176 HIGHBD_MASK_SUBPIX_VAR(128, 128)
1177 HIGHBD_MASK_SUBPIX_VAR(4, 16)
1178 HIGHBD_MASK_SUBPIX_VAR(16, 4)
1179 HIGHBD_MASK_SUBPIX_VAR(8, 32)
1180 HIGHBD_MASK_SUBPIX_VAR(32, 8)
1181 HIGHBD_MASK_SUBPIX_VAR(16, 64)
1182 HIGHBD_MASK_SUBPIX_VAR(64, 16)
1183 #endif  // CONFIG_AV1_HIGHBITDEPTH
1184 
obmc_variance(const uint8_t * pre,int pre_stride,const int32_t * wsrc,const int32_t * mask,int w,int h,unsigned int * sse,int * sum)1185 static INLINE void obmc_variance(const uint8_t *pre, int pre_stride,
1186                                  const int32_t *wsrc, const int32_t *mask,
1187                                  int w, int h, unsigned int *sse, int *sum) {
1188   int i, j;
1189 
1190   *sse = 0;
1191   *sum = 0;
1192 
1193   for (i = 0; i < h; i++) {
1194     for (j = 0; j < w; j++) {
1195       int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
1196       *sum += diff;
1197       *sse += diff * diff;
1198     }
1199 
1200     pre += pre_stride;
1201     wsrc += w;
1202     mask += w;
1203   }
1204 }
1205 
1206 #define OBMC_VAR(W, H)                                            \
1207   unsigned int aom_obmc_variance##W##x##H##_c(                    \
1208       const uint8_t *pre, int pre_stride, const int32_t *wsrc,    \
1209       const int32_t *mask, unsigned int *sse) {                   \
1210     int sum;                                                      \
1211     obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum);  \
1212     return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
1213   }
1214 
1215 #define OBMC_SUBPIX_VAR(W, H)                                                  \
1216   unsigned int aom_obmc_sub_pixel_variance##W##x##H##_c(                       \
1217       const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
1218       const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
1219     uint16_t fdata3[(H + 1) * W];                                              \
1220     uint8_t temp2[H * W];                                                      \
1221                                                                                \
1222     aom_var_filter_block2d_bil_first_pass_c(pre, fdata3, pre_stride, 1, H + 1, \
1223                                             W, bilinear_filters_2t[xoffset]);  \
1224     aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W,        \
1225                                              bilinear_filters_2t[yoffset]);    \
1226                                                                                \
1227     return aom_obmc_variance##W##x##H##_c(temp2, W, wsrc, mask, sse);          \
1228   }
1229 
1230 OBMC_VAR(4, 4)
1231 OBMC_SUBPIX_VAR(4, 4)
1232 
1233 OBMC_VAR(4, 8)
1234 OBMC_SUBPIX_VAR(4, 8)
1235 
1236 OBMC_VAR(8, 4)
1237 OBMC_SUBPIX_VAR(8, 4)
1238 
1239 OBMC_VAR(8, 8)
1240 OBMC_SUBPIX_VAR(8, 8)
1241 
1242 OBMC_VAR(8, 16)
1243 OBMC_SUBPIX_VAR(8, 16)
1244 
1245 OBMC_VAR(16, 8)
1246 OBMC_SUBPIX_VAR(16, 8)
1247 
1248 OBMC_VAR(16, 16)
1249 OBMC_SUBPIX_VAR(16, 16)
1250 
1251 OBMC_VAR(16, 32)
1252 OBMC_SUBPIX_VAR(16, 32)
1253 
1254 OBMC_VAR(32, 16)
1255 OBMC_SUBPIX_VAR(32, 16)
1256 
1257 OBMC_VAR(32, 32)
1258 OBMC_SUBPIX_VAR(32, 32)
1259 
1260 OBMC_VAR(32, 64)
1261 OBMC_SUBPIX_VAR(32, 64)
1262 
1263 OBMC_VAR(64, 32)
1264 OBMC_SUBPIX_VAR(64, 32)
1265 
1266 OBMC_VAR(64, 64)
1267 OBMC_SUBPIX_VAR(64, 64)
1268 
1269 OBMC_VAR(64, 128)
1270 OBMC_SUBPIX_VAR(64, 128)
1271 
1272 OBMC_VAR(128, 64)
1273 OBMC_SUBPIX_VAR(128, 64)
1274 
1275 OBMC_VAR(128, 128)
1276 OBMC_SUBPIX_VAR(128, 128)
1277 
1278 OBMC_VAR(4, 16)
1279 OBMC_SUBPIX_VAR(4, 16)
1280 OBMC_VAR(16, 4)
1281 OBMC_SUBPIX_VAR(16, 4)
1282 OBMC_VAR(8, 32)
1283 OBMC_SUBPIX_VAR(8, 32)
1284 OBMC_VAR(32, 8)
1285 OBMC_SUBPIX_VAR(32, 8)
1286 OBMC_VAR(16, 64)
1287 OBMC_SUBPIX_VAR(16, 64)
1288 OBMC_VAR(64, 16)
1289 OBMC_SUBPIX_VAR(64, 16)
1290 
1291 #if CONFIG_AV1_HIGHBITDEPTH
highbd_obmc_variance64(const uint8_t * pre8,int pre_stride,const int32_t * wsrc,const int32_t * mask,int w,int h,uint64_t * sse,int64_t * sum)1292 static INLINE void highbd_obmc_variance64(const uint8_t *pre8, int pre_stride,
1293                                           const int32_t *wsrc,
1294                                           const int32_t *mask, int w, int h,
1295                                           uint64_t *sse, int64_t *sum) {
1296   int i, j;
1297   uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
1298 
1299   *sse = 0;
1300   *sum = 0;
1301 
1302   for (i = 0; i < h; i++) {
1303     for (j = 0; j < w; j++) {
1304       int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
1305       *sum += diff;
1306       *sse += diff * diff;
1307     }
1308 
1309     pre += pre_stride;
1310     wsrc += w;
1311     mask += w;
1312   }
1313 }
1314 
highbd_obmc_variance(const uint8_t * pre8,int pre_stride,const int32_t * wsrc,const int32_t * mask,int w,int h,unsigned int * sse,int * sum)1315 static INLINE void highbd_obmc_variance(const uint8_t *pre8, int pre_stride,
1316                                         const int32_t *wsrc,
1317                                         const int32_t *mask, int w, int h,
1318                                         unsigned int *sse, int *sum) {
1319   int64_t sum64;
1320   uint64_t sse64;
1321   highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1322   *sum = (int)sum64;
1323   *sse = (unsigned int)sse64;
1324 }
1325 
highbd_10_obmc_variance(const uint8_t * pre8,int pre_stride,const int32_t * wsrc,const int32_t * mask,int w,int h,unsigned int * sse,int * sum)1326 static INLINE void highbd_10_obmc_variance(const uint8_t *pre8, int pre_stride,
1327                                            const int32_t *wsrc,
1328                                            const int32_t *mask, int w, int h,
1329                                            unsigned int *sse, int *sum) {
1330   int64_t sum64;
1331   uint64_t sse64;
1332   highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1333   *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
1334   *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
1335 }
1336 
highbd_12_obmc_variance(const uint8_t * pre8,int pre_stride,const int32_t * wsrc,const int32_t * mask,int w,int h,unsigned int * sse,int * sum)1337 static INLINE void highbd_12_obmc_variance(const uint8_t *pre8, int pre_stride,
1338                                            const int32_t *wsrc,
1339                                            const int32_t *mask, int w, int h,
1340                                            unsigned int *sse, int *sum) {
1341   int64_t sum64;
1342   uint64_t sse64;
1343   highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1344   *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
1345   *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
1346 }
1347 
1348 #define HIGHBD_OBMC_VAR(W, H)                                              \
1349   unsigned int aom_highbd_obmc_variance##W##x##H##_c(                      \
1350       const uint8_t *pre, int pre_stride, const int32_t *wsrc,             \
1351       const int32_t *mask, unsigned int *sse) {                            \
1352     int sum;                                                               \
1353     highbd_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum);    \
1354     return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H));          \
1355   }                                                                        \
1356                                                                            \
1357   unsigned int aom_highbd_10_obmc_variance##W##x##H##_c(                   \
1358       const uint8_t *pre, int pre_stride, const int32_t *wsrc,             \
1359       const int32_t *mask, unsigned int *sse) {                            \
1360     int sum;                                                               \
1361     int64_t var;                                                           \
1362     highbd_10_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
1363     var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));              \
1364     return (var >= 0) ? (uint32_t)var : 0;                                 \
1365   }                                                                        \
1366                                                                            \
1367   unsigned int aom_highbd_12_obmc_variance##W##x##H##_c(                   \
1368       const uint8_t *pre, int pre_stride, const int32_t *wsrc,             \
1369       const int32_t *mask, unsigned int *sse) {                            \
1370     int sum;                                                               \
1371     int64_t var;                                                           \
1372     highbd_12_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
1373     var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));              \
1374     return (var >= 0) ? (uint32_t)var : 0;                                 \
1375   }
1376 
1377 #define HIGHBD_OBMC_SUBPIX_VAR(W, H)                                           \
1378   unsigned int aom_highbd_obmc_sub_pixel_variance##W##x##H##_c(                \
1379       const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
1380       const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
1381     uint16_t fdata3[(H + 1) * W];                                              \
1382     uint16_t temp2[H * W];                                                     \
1383                                                                                \
1384     aom_highbd_var_filter_block2d_bil_first_pass(                              \
1385         pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
1386     aom_highbd_var_filter_block2d_bil_second_pass(                             \
1387         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
1388                                                                                \
1389     return aom_highbd_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
1390                                                  wsrc, mask, sse);             \
1391   }                                                                            \
1392                                                                                \
1393   unsigned int aom_highbd_10_obmc_sub_pixel_variance##W##x##H##_c(             \
1394       const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
1395       const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
1396     uint16_t fdata3[(H + 1) * W];                                              \
1397     uint16_t temp2[H * W];                                                     \
1398                                                                                \
1399     aom_highbd_var_filter_block2d_bil_first_pass(                              \
1400         pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
1401     aom_highbd_var_filter_block2d_bil_second_pass(                             \
1402         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
1403                                                                                \
1404     return aom_highbd_10_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
1405                                                     W, wsrc, mask, sse);       \
1406   }                                                                            \
1407                                                                                \
1408   unsigned int aom_highbd_12_obmc_sub_pixel_variance##W##x##H##_c(             \
1409       const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
1410       const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
1411     uint16_t fdata3[(H + 1) * W];                                              \
1412     uint16_t temp2[H * W];                                                     \
1413                                                                                \
1414     aom_highbd_var_filter_block2d_bil_first_pass(                              \
1415         pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
1416     aom_highbd_var_filter_block2d_bil_second_pass(                             \
1417         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
1418                                                                                \
1419     return aom_highbd_12_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
1420                                                     W, wsrc, mask, sse);       \
1421   }
1422 
1423 HIGHBD_OBMC_VAR(4, 4)
1424 HIGHBD_OBMC_SUBPIX_VAR(4, 4)
1425 
1426 HIGHBD_OBMC_VAR(4, 8)
1427 HIGHBD_OBMC_SUBPIX_VAR(4, 8)
1428 
1429 HIGHBD_OBMC_VAR(8, 4)
1430 HIGHBD_OBMC_SUBPIX_VAR(8, 4)
1431 
1432 HIGHBD_OBMC_VAR(8, 8)
1433 HIGHBD_OBMC_SUBPIX_VAR(8, 8)
1434 
1435 HIGHBD_OBMC_VAR(8, 16)
1436 HIGHBD_OBMC_SUBPIX_VAR(8, 16)
1437 
1438 HIGHBD_OBMC_VAR(16, 8)
1439 HIGHBD_OBMC_SUBPIX_VAR(16, 8)
1440 
1441 HIGHBD_OBMC_VAR(16, 16)
1442 HIGHBD_OBMC_SUBPIX_VAR(16, 16)
1443 
1444 HIGHBD_OBMC_VAR(16, 32)
1445 HIGHBD_OBMC_SUBPIX_VAR(16, 32)
1446 
1447 HIGHBD_OBMC_VAR(32, 16)
1448 HIGHBD_OBMC_SUBPIX_VAR(32, 16)
1449 
1450 HIGHBD_OBMC_VAR(32, 32)
1451 HIGHBD_OBMC_SUBPIX_VAR(32, 32)
1452 
1453 HIGHBD_OBMC_VAR(32, 64)
1454 HIGHBD_OBMC_SUBPIX_VAR(32, 64)
1455 
1456 HIGHBD_OBMC_VAR(64, 32)
1457 HIGHBD_OBMC_SUBPIX_VAR(64, 32)
1458 
1459 HIGHBD_OBMC_VAR(64, 64)
1460 HIGHBD_OBMC_SUBPIX_VAR(64, 64)
1461 
1462 HIGHBD_OBMC_VAR(64, 128)
1463 HIGHBD_OBMC_SUBPIX_VAR(64, 128)
1464 
1465 HIGHBD_OBMC_VAR(128, 64)
1466 HIGHBD_OBMC_SUBPIX_VAR(128, 64)
1467 
1468 HIGHBD_OBMC_VAR(128, 128)
1469 HIGHBD_OBMC_SUBPIX_VAR(128, 128)
1470 
1471 HIGHBD_OBMC_VAR(4, 16)
1472 HIGHBD_OBMC_SUBPIX_VAR(4, 16)
1473 HIGHBD_OBMC_VAR(16, 4)
1474 HIGHBD_OBMC_SUBPIX_VAR(16, 4)
1475 HIGHBD_OBMC_VAR(8, 32)
1476 HIGHBD_OBMC_SUBPIX_VAR(8, 32)
1477 HIGHBD_OBMC_VAR(32, 8)
1478 HIGHBD_OBMC_SUBPIX_VAR(32, 8)
1479 HIGHBD_OBMC_VAR(16, 64)
1480 HIGHBD_OBMC_SUBPIX_VAR(16, 64)
1481 HIGHBD_OBMC_VAR(64, 16)
1482 HIGHBD_OBMC_SUBPIX_VAR(64, 16)
1483 #endif  // CONFIG_AV1_HIGHBITDEPTH
1484