• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <stdio.h>
14 #include <limits.h>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/aom_scale_rtcd.h"
19 
20 #include "aom/aom_integer.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_ports/aom_once.h"
23 
24 #include "av1/common/av1_common_int.h"
25 #include "av1/common/blockd.h"
26 #include "av1/common/mvref_common.h"
27 #include "av1/common/obmc.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/common/reconintra.h"
30 
31 // This function will determine whether or not to create a warped
32 // prediction.
allow_warp(const MB_MODE_INFO * const mbmi,const WarpTypesAllowed * const warp_types,const WarpedMotionParams * const gm_params,int build_for_obmc,const struct scale_factors * const sf,WarpedMotionParams * final_warp_params)33 static int allow_warp(const MB_MODE_INFO *const mbmi,
34                       const WarpTypesAllowed *const warp_types,
35                       const WarpedMotionParams *const gm_params,
36                       int build_for_obmc, const struct scale_factors *const sf,
37                       WarpedMotionParams *final_warp_params) {
38   // Note: As per the spec, we must test the fixed point scales here, which are
39   // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
40   // have 1 << 10 precision).
41   if (av1_is_scaled(sf)) return 0;
42 
43   if (final_warp_params != NULL) *final_warp_params = default_warp_params;
44 
45   if (build_for_obmc) return 0;
46 
47   if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
48     if (final_warp_params != NULL)
49       memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
50     return 1;
51   } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
52     if (final_warp_params != NULL)
53       memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
54     return 1;
55   }
56 
57   return 0;
58 }
59 
av1_init_warp_params(InterPredParams * inter_pred_params,const WarpTypesAllowed * warp_types,int ref,const MACROBLOCKD * xd,const MB_MODE_INFO * mi)60 void av1_init_warp_params(InterPredParams *inter_pred_params,
61                           const WarpTypesAllowed *warp_types, int ref,
62                           const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
63   if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
64     return;
65 
66   if (xd->cur_frame_force_integer_mv) return;
67 
68   if (allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
69                  inter_pred_params->scale_factors,
70                  &inter_pred_params->warp_params)) {
71     inter_pred_params->mode = WARP_PRED;
72   }
73 }
74 
av1_make_inter_predictor(const uint8_t * src,int src_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)75 void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
76                               int dst_stride,
77                               InterPredParams *inter_pred_params,
78                               const SubpelParams *subpel_params) {
79   assert(IMPLIES(inter_pred_params->conv_params.is_compound,
80                  inter_pred_params->conv_params.dst != NULL));
81 
82   if (inter_pred_params->mode == TRANSLATION_PRED) {
83 #if CONFIG_AV1_HIGHBITDEPTH
84     if (inter_pred_params->use_hbd_buf) {
85       highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
86                              inter_pred_params->block_width,
87                              inter_pred_params->block_height,
88                              &inter_pred_params->conv_params,
89                              inter_pred_params->interp_filter_params,
90                              inter_pred_params->bit_depth);
91     } else {
92       inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
93                       inter_pred_params->block_width,
94                       inter_pred_params->block_height,
95                       &inter_pred_params->conv_params,
96                       inter_pred_params->interp_filter_params);
97     }
98 #else
99     inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
100                     inter_pred_params->block_width,
101                     inter_pred_params->block_height,
102                     &inter_pred_params->conv_params,
103                     inter_pred_params->interp_filter_params);
104 #endif
105   }
106   // TODO(jingning): av1_warp_plane() can be further cleaned up.
107   else if (inter_pred_params->mode == WARP_PRED) {
108     av1_warp_plane(
109         &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
110         inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
111         inter_pred_params->ref_frame_buf.width,
112         inter_pred_params->ref_frame_buf.height,
113         inter_pred_params->ref_frame_buf.stride, dst,
114         inter_pred_params->pix_col, inter_pred_params->pix_row,
115         inter_pred_params->block_width, inter_pred_params->block_height,
116         dst_stride, inter_pred_params->subsampling_x,
117         inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
118   } else {
119     assert(0 && "Unsupported inter_pred_params->mode");
120   }
121 }
122 
123 static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
124   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
125   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
126   37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
127   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
128 };
129 static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
130   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
131   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
132   46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
133   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
134 };
135 static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
136   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
137   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
138   43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
139   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
140 };
141 
shift_copy(const uint8_t * src,uint8_t * dst,int shift,int width)142 static AOM_INLINE void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
143                                   int width) {
144   if (shift >= 0) {
145     memcpy(dst + shift, src, width - shift);
146     memset(dst, src[0], shift);
147   } else {
148     shift = -shift;
149     memcpy(dst, src + shift, width - shift);
150     memset(dst + width - shift, src[width - 1], shift);
151   }
152 }
153 
154 /* clang-format off */
155 DECLARE_ALIGNED(16, static uint8_t,
156                 wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
157   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
158   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
159   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
160   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
161   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
162   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
163   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
164   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
165   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
166   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
167   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
168   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
169   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
170   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
171   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
172   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
173   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
174   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
175   { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
176   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
177   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
178   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
179 };
180 /* clang-format on */
181 
182 // [negative][direction]
183 DECLARE_ALIGNED(
184     16, static uint8_t,
185     wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
186 
187 // 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
188 // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
189 DECLARE_ALIGNED(16, static uint8_t,
190                 wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
191 
192 DECLARE_ALIGNED(16, static uint8_t,
193                 smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
194                                           [MAX_WEDGE_SQUARE]);
195 
196 static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
197 
198 static const wedge_code_type wedge_codebook_16_hgtw[16] = {
199   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
200   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
201   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
202   { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
203   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
204   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
205   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
206   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
207 };
208 
209 static const wedge_code_type wedge_codebook_16_hltw[16] = {
210   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
211   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
212   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
213   { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
214   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
215   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
216   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
217   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
218 };
219 
220 static const wedge_code_type wedge_codebook_16_heqw[16] = {
221   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
222   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
223   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
224   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
225   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
226   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
227   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
228   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
229 };
230 
231 const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
232   { 0, NULL, NULL, NULL },
233   { 0, NULL, NULL, NULL },
234   { 0, NULL, NULL, NULL },
235   { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
236     wedge_masks[BLOCK_8X8] },
237   { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
238     wedge_masks[BLOCK_8X16] },
239   { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
240     wedge_masks[BLOCK_16X8] },
241   { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
242     wedge_masks[BLOCK_16X16] },
243   { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
244     wedge_masks[BLOCK_16X32] },
245   { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
246     wedge_masks[BLOCK_32X16] },
247   { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
248     wedge_masks[BLOCK_32X32] },
249   { 0, NULL, NULL, NULL },
250   { 0, NULL, NULL, NULL },
251   { 0, NULL, NULL, NULL },
252   { 0, NULL, NULL, NULL },
253   { 0, NULL, NULL, NULL },
254   { 0, NULL, NULL, NULL },
255   { 0, NULL, NULL, NULL },
256   { 0, NULL, NULL, NULL },
257   { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
258     wedge_masks[BLOCK_8X32] },
259   { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
260     wedge_masks[BLOCK_32X8] },
261   { 0, NULL, NULL, NULL },
262   { 0, NULL, NULL, NULL },
263 };
264 
get_wedge_mask_inplace(int wedge_index,int neg,BLOCK_SIZE sb_type)265 static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
266                                              BLOCK_SIZE sb_type) {
267   const uint8_t *master;
268   const int bh = block_size_high[sb_type];
269   const int bw = block_size_wide[sb_type];
270   const wedge_code_type *a =
271       av1_wedge_params_lookup[sb_type].codebook + wedge_index;
272   int woff, hoff;
273   const uint8_t wsignflip =
274       av1_wedge_params_lookup[sb_type].signflip[wedge_index];
275 
276   assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
277   woff = (a->x_offset * bw) >> 3;
278   hoff = (a->y_offset * bh) >> 3;
279   master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
280            MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
281            MASK_MASTER_SIZE / 2 - woff;
282   return master;
283 }
284 
av1_get_compound_type_mask(const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type)285 const uint8_t *av1_get_compound_type_mask(
286     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
287   (void)sb_type;
288   switch (comp_data->type) {
289     case COMPOUND_WEDGE:
290       return av1_get_contiguous_soft_mask(comp_data->wedge_index,
291                                           comp_data->wedge_sign, sb_type);
292     default: return comp_data->seg_mask;
293   }
294 }
295 
diffwtd_mask_d16(uint8_t * mask,int which_inverse,int mask_base,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)296 static AOM_INLINE void diffwtd_mask_d16(
297     uint8_t *mask, int which_inverse, int mask_base, const CONV_BUF_TYPE *src0,
298     int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
299     ConvolveParams *conv_params, int bd) {
300   int round =
301       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
302   int i, j, m, diff;
303   for (i = 0; i < h; ++i) {
304     for (j = 0; j < w; ++j) {
305       diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
306       diff = ROUND_POWER_OF_TWO(diff, round);
307       m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
308       mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
309     }
310   }
311 }
312 
av1_build_compound_diffwtd_mask_d16_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)313 void av1_build_compound_diffwtd_mask_d16_c(
314     uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
315     int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
316     ConvolveParams *conv_params, int bd) {
317   switch (mask_type) {
318     case DIFFWTD_38:
319       diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
320                        conv_params, bd);
321       break;
322     case DIFFWTD_38_INV:
323       diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
324                        conv_params, bd);
325       break;
326     default: assert(0);
327   }
328 }
329 
diffwtd_mask(uint8_t * mask,int which_inverse,int mask_base,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)330 static AOM_INLINE void diffwtd_mask(uint8_t *mask, int which_inverse,
331                                     int mask_base, const uint8_t *src0,
332                                     int src0_stride, const uint8_t *src1,
333                                     int src1_stride, int h, int w) {
334   int i, j, m, diff;
335   for (i = 0; i < h; ++i) {
336     for (j = 0; j < w; ++j) {
337       diff =
338           abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
339       m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
340       mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
341     }
342   }
343 }
344 
av1_build_compound_diffwtd_mask_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)345 void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
346                                        DIFFWTD_MASK_TYPE mask_type,
347                                        const uint8_t *src0, int src0_stride,
348                                        const uint8_t *src1, int src1_stride,
349                                        int h, int w) {
350   switch (mask_type) {
351     case DIFFWTD_38:
352       diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
353       break;
354     case DIFFWTD_38_INV:
355       diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
356       break;
357     default: assert(0);
358   }
359 }
360 
diffwtd_mask_highbd(uint8_t * mask,int which_inverse,int mask_base,const uint16_t * src0,int src0_stride,const uint16_t * src1,int src1_stride,int h,int w,const unsigned int bd)361 static AOM_FORCE_INLINE void diffwtd_mask_highbd(
362     uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
363     int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
364     const unsigned int bd) {
365   assert(bd >= 8);
366   if (bd == 8) {
367     if (which_inverse) {
368       for (int i = 0; i < h; ++i) {
369         for (int j = 0; j < w; ++j) {
370           int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
371           unsigned int m = negative_to_zero(mask_base + diff);
372           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
373           mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
374         }
375         src0 += src0_stride;
376         src1 += src1_stride;
377         mask += w;
378       }
379     } else {
380       for (int i = 0; i < h; ++i) {
381         for (int j = 0; j < w; ++j) {
382           int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
383           unsigned int m = negative_to_zero(mask_base + diff);
384           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
385           mask[j] = m;
386         }
387         src0 += src0_stride;
388         src1 += src1_stride;
389         mask += w;
390       }
391     }
392   } else {
393     const unsigned int bd_shift = bd - 8;
394     if (which_inverse) {
395       for (int i = 0; i < h; ++i) {
396         for (int j = 0; j < w; ++j) {
397           int diff =
398               (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
399           unsigned int m = negative_to_zero(mask_base + diff);
400           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
401           mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
402         }
403         src0 += src0_stride;
404         src1 += src1_stride;
405         mask += w;
406       }
407     } else {
408       for (int i = 0; i < h; ++i) {
409         for (int j = 0; j < w; ++j) {
410           int diff =
411               (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
412           unsigned int m = negative_to_zero(mask_base + diff);
413           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
414           mask[j] = m;
415         }
416         src0 += src0_stride;
417         src1 += src1_stride;
418         mask += w;
419       }
420     }
421   }
422 }
423 
av1_build_compound_diffwtd_mask_highbd_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w,int bd)424 void av1_build_compound_diffwtd_mask_highbd_c(
425     uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
426     int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
427     int bd) {
428   switch (mask_type) {
429     case DIFFWTD_38:
430       diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
431                           CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
432       break;
433     case DIFFWTD_38_INV:
434       diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
435                           CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
436       break;
437     default: assert(0);
438   }
439 }
440 
init_wedge_master_masks(void)441 static AOM_INLINE void init_wedge_master_masks(void) {
442   int i, j;
443   const int w = MASK_MASTER_SIZE;
444   const int h = MASK_MASTER_SIZE;
445   const int stride = MASK_MASTER_STRIDE;
446   // Note: index [0] stores the masters, and [1] its complement.
447   // Generate prototype by shifting the masters
448   int shift = h / 4;
449   for (i = 0; i < h; i += 2) {
450     shift_copy(wedge_master_oblique_even,
451                &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
452                MASK_MASTER_SIZE);
453     shift--;
454     shift_copy(wedge_master_oblique_odd,
455                &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
456                MASK_MASTER_SIZE);
457     memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
458            wedge_master_vertical,
459            MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
460     memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
461            wedge_master_vertical,
462            MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
463   }
464 
465   for (i = 0; i < h; ++i) {
466     for (j = 0; j < w; ++j) {
467       const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
468       wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
469       wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
470           wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
471               (1 << WEDGE_WEIGHT_BITS) - msk;
472       wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
473           wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
474               (1 << WEDGE_WEIGHT_BITS) - msk;
475       wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
476           wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
477       const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
478       wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
479       wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
480           wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
481               (1 << WEDGE_WEIGHT_BITS) - mskx;
482     }
483   }
484 }
485 
init_wedge_masks(void)486 static AOM_INLINE void init_wedge_masks(void) {
487   uint8_t *dst = wedge_mask_buf;
488   BLOCK_SIZE bsize;
489   memset(wedge_masks, 0, sizeof(wedge_masks));
490   for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
491     const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
492     const int wtypes = wedge_params->wedge_types;
493     if (wtypes == 0) continue;
494     const uint8_t *mask;
495     const int bw = block_size_wide[bsize];
496     const int bh = block_size_high[bsize];
497     int w;
498     for (w = 0; w < wtypes; ++w) {
499       mask = get_wedge_mask_inplace(w, 0, bsize);
500       aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
501                         bh);
502       wedge_params->masks[0][w] = dst;
503       dst += bw * bh;
504 
505       mask = get_wedge_mask_inplace(w, 1, bsize);
506       aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
507                         bh);
508       wedge_params->masks[1][w] = dst;
509       dst += bw * bh;
510     }
511     assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
512   }
513 }
514 
515 /* clang-format off */
516 static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
517   60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
518   31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
519   16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
520   8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
521   4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
522   2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
523   1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
524 };
525 static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
526     32, 16, 16, 16, 8, 8, 8, 4,
527     4,  4,  2,  2,  2, 1, 1, 1,
528     8,  8,  4,  4,  2, 2
529 };
530 /* clang-format on */
531 
build_smooth_interintra_mask(uint8_t * mask,int stride,BLOCK_SIZE plane_bsize,INTERINTRA_MODE mode)532 static AOM_INLINE void build_smooth_interintra_mask(uint8_t *mask, int stride,
533                                                     BLOCK_SIZE plane_bsize,
534                                                     INTERINTRA_MODE mode) {
535   int i, j;
536   const int bw = block_size_wide[plane_bsize];
537   const int bh = block_size_high[plane_bsize];
538   const int size_scale = ii_size_scales[plane_bsize];
539 
540   switch (mode) {
541     case II_V_PRED:
542       for (i = 0; i < bh; ++i) {
543         memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
544         mask += stride;
545       }
546       break;
547 
548     case II_H_PRED:
549       for (i = 0; i < bh; ++i) {
550         for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
551         mask += stride;
552       }
553       break;
554 
555     case II_SMOOTH_PRED:
556       for (i = 0; i < bh; ++i) {
557         for (j = 0; j < bw; ++j)
558           mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
559         mask += stride;
560       }
561       break;
562 
563     case II_DC_PRED:
564     default:
565       for (i = 0; i < bh; ++i) {
566         memset(mask, 32, bw * sizeof(mask[0]));
567         mask += stride;
568       }
569       break;
570   }
571 }
572 
init_smooth_interintra_masks(void)573 static AOM_INLINE void init_smooth_interintra_masks(void) {
574   for (int m = 0; m < INTERINTRA_MODES; ++m) {
575     for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
576       const int bw = block_size_wide[bs];
577       const int bh = block_size_high[bs];
578       if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
579       build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
580                                    m);
581     }
582   }
583 }
584 
585 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
init_all_wedge_masks(void)586 static void init_all_wedge_masks(void) {
587   init_wedge_master_masks();
588   init_wedge_masks();
589   init_smooth_interintra_masks();
590 }
591 
av1_init_wedge_masks(void)592 void av1_init_wedge_masks(void) { aom_once(init_all_wedge_masks); }
593 
build_masked_compound_no_round(uint8_t * dst,int dst_stride,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type,int h,int w,InterPredParams * inter_pred_params)594 static AOM_INLINE void build_masked_compound_no_round(
595     uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
596     const CONV_BUF_TYPE *src1, int src1_stride,
597     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
598     int w, InterPredParams *inter_pred_params) {
599   const int ssy = inter_pred_params->subsampling_y;
600   const int ssx = inter_pred_params->subsampling_x;
601   const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
602   const int mask_stride = block_size_wide[sb_type];
603 #if CONFIG_AV1_HIGHBITDEPTH
604   if (inter_pred_params->use_hbd_buf) {
605     aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
606                                   src1_stride, mask, mask_stride, w, h, ssx,
607                                   ssy, &inter_pred_params->conv_params,
608                                   inter_pred_params->bit_depth);
609   } else {
610     aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
611                                  src1_stride, mask, mask_stride, w, h, ssx, ssy,
612                                  &inter_pred_params->conv_params);
613   }
614 #else
615   aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
616                                src1_stride, mask, mask_stride, w, h, ssx, ssy,
617                                &inter_pred_params->conv_params);
618 #endif
619 }
620 
av1_make_masked_inter_predictor(const uint8_t * pre,int pre_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)621 void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
622                                      uint8_t *dst, int dst_stride,
623                                      InterPredParams *inter_pred_params,
624                                      const SubpelParams *subpel_params) {
625   const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
626   BLOCK_SIZE sb_type = inter_pred_params->sb_type;
627 
628   // We're going to call av1_make_inter_predictor to generate a prediction into
629   // a temporary buffer, then will blend that temporary buffer with that from
630   // the other reference.
631   DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
632   uint8_t *tmp_dst =
633       inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
634 
635   const int tmp_buf_stride = MAX_SB_SIZE;
636   CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
637   int org_dst_stride = inter_pred_params->conv_params.dst_stride;
638   CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
639   inter_pred_params->conv_params.dst = tmp_buf16;
640   inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
641   assert(inter_pred_params->conv_params.do_average == 0);
642 
643   // This will generate a prediction in tmp_buf for the second reference
644   av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
645                            inter_pred_params, subpel_params);
646 
647   if (!inter_pred_params->conv_params.plane &&
648       comp_data->type == COMPOUND_DIFFWTD) {
649     av1_build_compound_diffwtd_mask_d16(
650         comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
651         tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
652         inter_pred_params->block_width, &inter_pred_params->conv_params,
653         inter_pred_params->bit_depth);
654   }
655   build_masked_compound_no_round(
656       dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
657       comp_data, sb_type, inter_pred_params->block_height,
658       inter_pred_params->block_width, inter_pred_params);
659 }
660 
av1_dist_wtd_comp_weight_assign(const AV1_COMMON * cm,const MB_MODE_INFO * mbmi,int * fwd_offset,int * bck_offset,int * use_dist_wtd_comp_avg,int is_compound)661 void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
662                                      const MB_MODE_INFO *mbmi, int *fwd_offset,
663                                      int *bck_offset,
664                                      int *use_dist_wtd_comp_avg,
665                                      int is_compound) {
666   assert(fwd_offset != NULL && bck_offset != NULL);
667   if (!is_compound || mbmi->compound_idx) {
668     *fwd_offset = 8;
669     *bck_offset = 8;
670     *use_dist_wtd_comp_avg = 0;
671     return;
672   }
673 
674   *use_dist_wtd_comp_avg = 1;
675   const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
676   const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
677   const int cur_frame_index = cm->cur_frame->order_hint;
678   int bck_frame_index = 0, fwd_frame_index = 0;
679 
680   if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
681   if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
682 
683   int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
684                                        fwd_frame_index, cur_frame_index)),
685                  0, MAX_FRAME_DISTANCE);
686   int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
687                                        cur_frame_index, bck_frame_index)),
688                  0, MAX_FRAME_DISTANCE);
689 
690   const int order = d0 <= d1;
691 
692   if (d0 == 0 || d1 == 0) {
693     *fwd_offset = quant_dist_lookup_table[3][order];
694     *bck_offset = quant_dist_lookup_table[3][1 - order];
695     return;
696   }
697 
698   int i;
699   for (i = 0; i < 3; ++i) {
700     int c0 = quant_dist_weight[i][order];
701     int c1 = quant_dist_weight[i][!order];
702     int d0_c0 = d0 * c0;
703     int d1_c1 = d1 * c1;
704     if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
705   }
706 
707   *fwd_offset = quant_dist_lookup_table[i][order];
708   *bck_offset = quant_dist_lookup_table[i][1 - order];
709 }
710 
av1_setup_dst_planes(struct macroblockd_plane * planes,BLOCK_SIZE bsize,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int plane_start,const int plane_end)711 void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
712                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
713                           const int plane_start, const int plane_end) {
714   // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
715   // the static analysis warnings.
716   for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
717     struct macroblockd_plane *const pd = &planes[i];
718     const int is_uv = i > 0;
719     setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
720                      src->crop_heights[is_uv], src->strides[is_uv], mi_row,
721                      mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
722   }
723 }
724 
av1_setup_pre_planes(MACROBLOCKD * xd,int idx,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * sf,const int num_planes)725 void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
726                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
727                           const struct scale_factors *sf,
728                           const int num_planes) {
729   if (src != NULL) {
730     // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
731     // the static analysis warnings.
732     for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
733       struct macroblockd_plane *const pd = &xd->plane[i];
734       const int is_uv = i > 0;
735       setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
736                        src->crop_widths[is_uv], src->crop_heights[is_uv],
737                        src->strides[is_uv], mi_row, mi_col, sf,
738                        pd->subsampling_x, pd->subsampling_y);
739     }
740   }
741 }
742 
743 // obmc_mask_N[overlap_position]
744 static const uint8_t obmc_mask_1[1] = { 64 };
745 DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
746 
747 DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
748 
749 static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
750 
751 static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
752                                           56, 58, 60, 61, 64, 64, 64, 64 };
753 
754 static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
755                                           45, 47, 48, 50, 51, 52, 53, 55,
756                                           56, 57, 58, 59, 60, 60, 61, 62,
757                                           64, 64, 64, 64, 64, 64, 64, 64 };
758 
759 static const uint8_t obmc_mask_64[64] = {
760   33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
761   45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
762   56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
763   62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
764 };
765 
av1_get_obmc_mask(int length)766 const uint8_t *av1_get_obmc_mask(int length) {
767   switch (length) {
768     case 1: return obmc_mask_1;
769     case 2: return obmc_mask_2;
770     case 4: return obmc_mask_4;
771     case 8: return obmc_mask_8;
772     case 16: return obmc_mask_16;
773     case 32: return obmc_mask_32;
774     case 64: return obmc_mask_64;
775     default: assert(0); return NULL;
776   }
777 }
778 
increment_int_ptr(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * mi,void * fun_ctxt,const int num_planes)779 static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
780                                      int rel_mi_col, uint8_t op_mi_size,
781                                      int dir, MB_MODE_INFO *mi, void *fun_ctxt,
782                                      const int num_planes) {
783   (void)xd;
784   (void)rel_mi_row;
785   (void)rel_mi_col;
786   (void)op_mi_size;
787   (void)dir;
788   (void)mi;
789   ++*(uint8_t *)fun_ctxt;
790   (void)num_planes;
791 }
792 
av1_count_overlappable_neighbors(const AV1_COMMON * cm,MACROBLOCKD * xd)793 void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
794   MB_MODE_INFO *mbmi = xd->mi[0];
795 
796   mbmi->overlappable_neighbors = 0;
797 
798   if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
799 
800   foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
801                                 &mbmi->overlappable_neighbors);
802   if (mbmi->overlappable_neighbors) return;
803   foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
804                                &mbmi->overlappable_neighbors);
805 }
806 
807 // HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
808 // block-size of current plane is smaller than 8x8, always only blend with the
809 // left neighbor(s) (skip blending with the above side).
810 #define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
811 
av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,const struct macroblockd_plane * pd,int dir)812 int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
813                                const struct macroblockd_plane *pd, int dir) {
814   assert(is_motion_variation_allowed_bsize(bsize));
815 
816   const BLOCK_SIZE bsize_plane =
817       get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
818   switch (bsize_plane) {
819 #if DISABLE_CHROMA_U8X8_OBMC
820     case BLOCK_4X4:
821     case BLOCK_8X4:
822     case BLOCK_4X8: return 1;
823 #else
824     case BLOCK_4X4:
825     case BLOCK_8X4:
826     case BLOCK_4X8: return dir == 0;
827 #endif
828     default: return 0;
829   }
830 }
831 
av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO * mbmi)832 void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
833   mbmi->ref_frame[1] = NONE_FRAME;
834   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
835 }
836 
837 struct obmc_inter_pred_ctxt {
838   uint8_t **adjacent;
839   int *adjacent_stride;
840 };
841 
build_obmc_inter_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * above_mi,void * fun_ctxt,const int num_planes)842 static INLINE void build_obmc_inter_pred_above(
843     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
844     int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
845   (void)above_mi;
846   (void)rel_mi_row;
847   (void)dir;
848   struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
849   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
850   const int overlap =
851       AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
852 
853   for (int plane = 0; plane < num_planes; ++plane) {
854     const struct macroblockd_plane *pd = &xd->plane[plane];
855     const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
856     const int bh = overlap >> pd->subsampling_y;
857     const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
858 
859     if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
860 
861     const int dst_stride = pd->dst.stride;
862     uint8_t *const dst = &pd->dst.buf[plane_col];
863     const int tmp_stride = ctxt->adjacent_stride[plane];
864     const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
865     const uint8_t *const mask = av1_get_obmc_mask(bh);
866 #if CONFIG_AV1_HIGHBITDEPTH
867     const int is_hbd = is_cur_buf_hbd(xd);
868     if (is_hbd)
869       aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
870                                  tmp_stride, mask, bw, bh, xd->bd);
871     else
872       aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
873                           mask, bw, bh);
874 #else
875     aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
876                         bw, bh);
877 #endif
878   }
879 }
880 
build_obmc_inter_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * left_mi,void * fun_ctxt,const int num_planes)881 static INLINE void build_obmc_inter_pred_left(
882     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
883     int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
884   (void)left_mi;
885   (void)rel_mi_col;
886   (void)dir;
887   struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
888   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
889   const int overlap =
890       AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
891 
892   for (int plane = 0; plane < num_planes; ++plane) {
893     const struct macroblockd_plane *pd = &xd->plane[plane];
894     const int bw = overlap >> pd->subsampling_x;
895     const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
896     const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
897 
898     if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
899 
900     const int dst_stride = pd->dst.stride;
901     uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
902     const int tmp_stride = ctxt->adjacent_stride[plane];
903     const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
904     const uint8_t *const mask = av1_get_obmc_mask(bw);
905 
906 #if CONFIG_AV1_HIGHBITDEPTH
907     const int is_hbd = is_cur_buf_hbd(xd);
908     if (is_hbd)
909       aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
910                                  tmp_stride, mask, bw, bh, xd->bd);
911     else
912       aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
913                           mask, bw, bh);
914 #else
915     aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
916                         bw, bh);
917 #endif
918   }
919 }
920 
921 // This function combines motion compensated predictions that are generated by
922 // top/left neighboring blocks' inter predictors with the regular inter
923 // prediction. We assume the original prediction (bmc) is stored in
924 // xd->plane[].dst.buf
av1_build_obmc_inter_prediction(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * above[MAX_MB_PLANE],int above_stride[MAX_MB_PLANE],uint8_t * left[MAX_MB_PLANE],int left_stride[MAX_MB_PLANE])925 void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
926                                      uint8_t *above[MAX_MB_PLANE],
927                                      int above_stride[MAX_MB_PLANE],
928                                      uint8_t *left[MAX_MB_PLANE],
929                                      int left_stride[MAX_MB_PLANE]) {
930   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
931 
932   // handle above row
933   struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
934   foreach_overlappable_nb_above(cm, xd,
935                                 max_neighbor_obmc[mi_size_wide_log2[bsize]],
936                                 build_obmc_inter_pred_above, &ctxt_above);
937 
938   // handle left column
939   struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
940   foreach_overlappable_nb_left(cm, xd,
941                                max_neighbor_obmc[mi_size_high_log2[bsize]],
942                                build_obmc_inter_pred_left, &ctxt_left);
943 }
944 
av1_setup_obmc_dst_bufs(MACROBLOCKD * xd,uint8_t ** dst_buf1,uint8_t ** dst_buf2)945 void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
946                              uint8_t **dst_buf2) {
947   if (is_cur_buf_hbd(xd)) {
948     int len = sizeof(uint16_t);
949     dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
950     dst_buf1[1] =
951         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
952     dst_buf1[2] =
953         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
954     dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
955     dst_buf2[1] =
956         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
957     dst_buf2[2] =
958         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
959   } else {
960     dst_buf1[0] = xd->tmp_obmc_bufs[0];
961     dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
962     dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
963     dst_buf2[0] = xd->tmp_obmc_bufs[1];
964     dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
965     dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
966   }
967 }
968 
av1_setup_build_prediction_by_above_pred(MACROBLOCKD * xd,int rel_mi_col,uint8_t above_mi_width,MB_MODE_INFO * above_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)969 void av1_setup_build_prediction_by_above_pred(
970     MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
971     MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
972     const int num_planes) {
973   const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
974   const int above_mi_col = xd->mi_col + rel_mi_col;
975 
976   av1_modify_neighbor_predictor_for_obmc(above_mbmi);
977 
978   for (int j = 0; j < num_planes; ++j) {
979     struct macroblockd_plane *const pd = &xd->plane[j];
980     setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
981                      ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
982                      NULL, pd->subsampling_x, pd->subsampling_y);
983   }
984 
985   const int num_refs = 1 + has_second_ref(above_mbmi);
986 
987   for (int ref = 0; ref < num_refs; ++ref) {
988     const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
989 
990     const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
991     const struct scale_factors *const sf =
992         get_ref_scale_factors_const(ctxt->cm, frame);
993     xd->block_ref_scale_factors[ref] = sf;
994     if ((!av1_is_valid_scale(sf)))
995       aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
996                          "Reference frame has invalid dimensions");
997     av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
998                          num_planes);
999   }
1000 
1001   xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
1002   xd->mb_to_right_edge =
1003       ctxt->mb_to_far_edge +
1004       (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
1005 }
1006 
av1_setup_build_prediction_by_left_pred(MACROBLOCKD * xd,int rel_mi_row,uint8_t left_mi_height,MB_MODE_INFO * left_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)1007 void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
1008                                              uint8_t left_mi_height,
1009                                              MB_MODE_INFO *left_mbmi,
1010                                              struct build_prediction_ctxt *ctxt,
1011                                              const int num_planes) {
1012   const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
1013   const int left_mi_row = xd->mi_row + rel_mi_row;
1014 
1015   av1_modify_neighbor_predictor_for_obmc(left_mbmi);
1016 
1017   for (int j = 0; j < num_planes; ++j) {
1018     struct macroblockd_plane *const pd = &xd->plane[j];
1019     setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1020                      ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
1021                      NULL, pd->subsampling_x, pd->subsampling_y);
1022   }
1023 
1024   const int num_refs = 1 + has_second_ref(left_mbmi);
1025 
1026   for (int ref = 0; ref < num_refs; ++ref) {
1027     const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
1028 
1029     const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1030     const struct scale_factors *const ref_scale_factors =
1031         get_ref_scale_factors_const(ctxt->cm, frame);
1032 
1033     xd->block_ref_scale_factors[ref] = ref_scale_factors;
1034     if ((!av1_is_valid_scale(ref_scale_factors)))
1035       aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1036                          "Reference frame has invalid dimensions");
1037     av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
1038                          ref_scale_factors, num_planes);
1039   }
1040 
1041   xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
1042   xd->mb_to_bottom_edge =
1043       ctxt->mb_to_far_edge +
1044       GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
1045 }
1046 
combine_interintra(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred,int compstride,const uint8_t * interpred,int interstride,const uint8_t * intrapred,int intrastride)1047 static AOM_INLINE void combine_interintra(
1048     INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1049     int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1050     uint8_t *comppred, int compstride, const uint8_t *interpred,
1051     int interstride, const uint8_t *intrapred, int intrastride) {
1052   const int bw = block_size_wide[plane_bsize];
1053   const int bh = block_size_high[plane_bsize];
1054 
1055   if (use_wedge_interintra) {
1056     if (av1_is_wedge_used(bsize)) {
1057       const uint8_t *mask =
1058           av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1059       const int subw = 2 * mi_size_wide[bsize] == bw;
1060       const int subh = 2 * mi_size_high[bsize] == bh;
1061       aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1062                          interpred, interstride, mask, block_size_wide[bsize],
1063                          bw, bh, subw, subh);
1064     }
1065     return;
1066   }
1067 
1068   const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
1069   aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1070                      interstride, mask, bw, bw, bh, 0, 0);
1071 }
1072 
1073 #if CONFIG_AV1_HIGHBITDEPTH
combine_interintra_highbd(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred8,int compstride,const uint8_t * interpred8,int interstride,const uint8_t * intrapred8,int intrastride,int bd)1074 static AOM_INLINE void combine_interintra_highbd(
1075     INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1076     int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1077     uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1078     int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1079   const int bw = block_size_wide[plane_bsize];
1080   const int bh = block_size_high[plane_bsize];
1081 
1082   if (use_wedge_interintra) {
1083     if (av1_is_wedge_used(bsize)) {
1084       const uint8_t *mask =
1085           av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1086       const int subh = 2 * mi_size_high[bsize] == bh;
1087       const int subw = 2 * mi_size_wide[bsize] == bw;
1088       aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1089                                 interpred8, interstride, mask,
1090                                 block_size_wide[bsize], bw, bh, subw, subh, bd);
1091     }
1092     return;
1093   }
1094 
1095   uint8_t mask[MAX_SB_SQUARE];
1096   build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1097   aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1098                             interpred8, interstride, mask, bw, bw, bh, 0, 0,
1099                             bd);
1100 }
1101 #endif
1102 
av1_build_intra_predictors_for_interintra(const AV1_COMMON * cm,MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const BUFFER_SET * ctx,uint8_t * dst,int dst_stride)1103 void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1104                                                MACROBLOCKD *xd,
1105                                                BLOCK_SIZE bsize, int plane,
1106                                                const BUFFER_SET *ctx,
1107                                                uint8_t *dst, int dst_stride) {
1108   struct macroblockd_plane *const pd = &xd->plane[plane];
1109   const int ssx = xd->plane[plane].subsampling_x;
1110   const int ssy = xd->plane[plane].subsampling_y;
1111   BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1112   PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1113   assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1114   assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1115   assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1116   assert(xd->mi[0]->use_intrabc == 0);
1117   const SequenceHeader *seq_params = cm->seq_params;
1118 
1119   av1_predict_intra_block(xd, seq_params->sb_size,
1120                           seq_params->enable_intra_edge_filter, pd->width,
1121                           pd->height, max_txsize_rect_lookup[plane_bsize], mode,
1122                           0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
1123                           ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1124 }
1125 
av1_combine_interintra(MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const uint8_t * inter_pred,int inter_stride,const uint8_t * intra_pred,int intra_stride)1126 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1127                             const uint8_t *inter_pred, int inter_stride,
1128                             const uint8_t *intra_pred, int intra_stride) {
1129   const int ssx = xd->plane[plane].subsampling_x;
1130   const int ssy = xd->plane[plane].subsampling_y;
1131   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1132 #if CONFIG_AV1_HIGHBITDEPTH
1133   if (is_cur_buf_hbd(xd)) {
1134     combine_interintra_highbd(
1135         xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1136         xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1137         plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1138         inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1139     return;
1140   }
1141 #endif
1142   combine_interintra(
1143       xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1144       xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1145       plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1146       inter_pred, inter_stride, intra_pred, intra_stride);
1147 }
1148 
1149 // build interintra_predictors for one plane
av1_build_interintra_predictor(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * pred,int stride,const BUFFER_SET * ctx,int plane,BLOCK_SIZE bsize)1150 void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1151                                     uint8_t *pred, int stride,
1152                                     const BUFFER_SET *ctx, int plane,
1153                                     BLOCK_SIZE bsize) {
1154   assert(bsize < BLOCK_SIZES_ALL);
1155   if (is_cur_buf_hbd(xd)) {
1156     DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1157     av1_build_intra_predictors_for_interintra(
1158         cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1159         MAX_SB_SIZE);
1160     av1_combine_interintra(xd, bsize, plane, pred, stride,
1161                            CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1162   } else {
1163     DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1164     av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1165                                               intrapredictor, MAX_SB_SIZE);
1166     av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1167                            MAX_SB_SIZE);
1168   }
1169 }
1170