• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <stdio.h>
14 #include <limits.h>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/aom_scale_rtcd.h"
19 
20 #include "aom/aom_integer.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_ports/aom_once.h"
23 
24 #include "av1/common/av1_common_int.h"
25 #include "av1/common/blockd.h"
26 #include "av1/common/mvref_common.h"
27 #include "av1/common/obmc.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/common/reconintra.h"
30 
31 // This function will determine whether or not to create a warped
32 // prediction.
av1_allow_warp(const MB_MODE_INFO * const mbmi,const WarpTypesAllowed * const warp_types,const WarpedMotionParams * const gm_params,int build_for_obmc,const struct scale_factors * const sf,WarpedMotionParams * final_warp_params)33 int av1_allow_warp(const MB_MODE_INFO *const mbmi,
34                    const WarpTypesAllowed *const warp_types,
35                    const WarpedMotionParams *const gm_params,
36                    int build_for_obmc, const struct scale_factors *const sf,
37                    WarpedMotionParams *final_warp_params) {
38   // Note: As per the spec, we must test the fixed point scales here, which are
39   // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
40   // have 1 << 10 precision).
41   if (av1_is_scaled(sf)) return 0;
42 
43   if (final_warp_params != NULL) *final_warp_params = default_warp_params;
44 
45   if (build_for_obmc) return 0;
46 
47   if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
48     if (final_warp_params != NULL)
49       memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
50     return 1;
51   } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
52     if (final_warp_params != NULL)
53       memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
54     return 1;
55   }
56 
57   return 0;
58 }
59 
av1_init_warp_params(InterPredParams * inter_pred_params,const WarpTypesAllowed * warp_types,int ref,const MACROBLOCKD * xd,const MB_MODE_INFO * mi)60 void av1_init_warp_params(InterPredParams *inter_pred_params,
61                           const WarpTypesAllowed *warp_types, int ref,
62                           const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
63   if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
64     return;
65 
66   if (xd->cur_frame_force_integer_mv) return;
67 
68   if (av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
69                      inter_pred_params->scale_factors,
70                      &inter_pred_params->warp_params)) {
71     inter_pred_params->mode = WARP_PRED;
72   }
73 }
74 
av1_make_inter_predictor(const uint8_t * src,int src_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)75 void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
76                               int dst_stride,
77                               InterPredParams *inter_pred_params,
78                               const SubpelParams *subpel_params) {
79   assert(IMPLIES(inter_pred_params->conv_params.is_compound,
80                  inter_pred_params->conv_params.dst != NULL));
81 
82   if (inter_pred_params->mode == TRANSLATION_PRED) {
83 #if CONFIG_AV1_HIGHBITDEPTH
84     if (inter_pred_params->use_hbd_buf) {
85       highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
86                              inter_pred_params->block_width,
87                              inter_pred_params->block_height,
88                              &inter_pred_params->conv_params,
89                              inter_pred_params->interp_filter_params,
90                              inter_pred_params->bit_depth);
91     } else {
92       inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
93                       inter_pred_params->block_width,
94                       inter_pred_params->block_height,
95                       &inter_pred_params->conv_params,
96                       inter_pred_params->interp_filter_params);
97     }
98 #else
99     inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
100                     inter_pred_params->block_width,
101                     inter_pred_params->block_height,
102                     &inter_pred_params->conv_params,
103                     inter_pred_params->interp_filter_params);
104 #endif
105   }
106   // TODO(jingning): av1_warp_plane() can be further cleaned up.
107   else if (inter_pred_params->mode == WARP_PRED) {
108     av1_warp_plane(
109         &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
110         inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
111         inter_pred_params->ref_frame_buf.width,
112         inter_pred_params->ref_frame_buf.height,
113         inter_pred_params->ref_frame_buf.stride, dst,
114         inter_pred_params->pix_col, inter_pred_params->pix_row,
115         inter_pred_params->block_width, inter_pred_params->block_height,
116         dst_stride, inter_pred_params->subsampling_x,
117         inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
118   } else {
119     assert(0 && "Unsupported inter_pred_params->mode");
120   }
121 }
122 
123 static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
124   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
125   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
126   37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
127   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
128 };
129 static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
130   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
131   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
132   46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
133   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
134 };
135 static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
136   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
137   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
138   43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
139   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
140 };
141 
shift_copy(const uint8_t * src,uint8_t * dst,int shift,int width)142 static AOM_INLINE void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
143                                   int width) {
144   if (shift >= 0) {
145     memcpy(dst + shift, src, width - shift);
146     memset(dst, src[0], shift);
147   } else {
148     shift = -shift;
149     memcpy(dst, src + shift, width - shift);
150     memset(dst + width - shift, src[width - 1], shift);
151   }
152 }
153 
154 /* clang-format off */
155 DECLARE_ALIGNED(16, static uint8_t,
156                 wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
157   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
158   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
159   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
160   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
161   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
162   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
163   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
164   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
165   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
166   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
167   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
168   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
169   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
170   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
171   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
172   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
173   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
174   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
175   { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
176   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
177   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
178   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
179 };
180 /* clang-format on */
181 
182 // [negative][direction]
183 DECLARE_ALIGNED(
184     16, static uint8_t,
185     wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
186 
187 // 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
188 // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
189 DECLARE_ALIGNED(16, static uint8_t,
190                 wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
191 
192 DECLARE_ALIGNED(16, static uint8_t,
193                 smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
194                                           [MAX_WEDGE_SQUARE]);
195 
196 static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
197 
198 static const wedge_code_type wedge_codebook_16_hgtw[16] = {
199   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
200   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
201   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
202   { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
203   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
204   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
205   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
206   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
207 };
208 
209 static const wedge_code_type wedge_codebook_16_hltw[16] = {
210   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
211   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
212   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
213   { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
214   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
215   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
216   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
217   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
218 };
219 
220 static const wedge_code_type wedge_codebook_16_heqw[16] = {
221   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
222   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
223   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
224   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
225   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
226   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
227   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
228   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
229 };
230 
231 const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
232   { 0, NULL, NULL, NULL },
233   { 0, NULL, NULL, NULL },
234   { 0, NULL, NULL, NULL },
235   { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
236     wedge_masks[BLOCK_8X8] },
237   { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
238     wedge_masks[BLOCK_8X16] },
239   { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
240     wedge_masks[BLOCK_16X8] },
241   { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
242     wedge_masks[BLOCK_16X16] },
243   { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
244     wedge_masks[BLOCK_16X32] },
245   { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
246     wedge_masks[BLOCK_32X16] },
247   { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
248     wedge_masks[BLOCK_32X32] },
249   { 0, NULL, NULL, NULL },
250   { 0, NULL, NULL, NULL },
251   { 0, NULL, NULL, NULL },
252   { 0, NULL, NULL, NULL },
253   { 0, NULL, NULL, NULL },
254   { 0, NULL, NULL, NULL },
255   { 0, NULL, NULL, NULL },
256   { 0, NULL, NULL, NULL },
257   { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
258     wedge_masks[BLOCK_8X32] },
259   { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
260     wedge_masks[BLOCK_32X8] },
261   { 0, NULL, NULL, NULL },
262   { 0, NULL, NULL, NULL },
263 };
264 
get_wedge_mask_inplace(int wedge_index,int neg,BLOCK_SIZE sb_type)265 static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
266                                              BLOCK_SIZE sb_type) {
267   const uint8_t *master;
268   const int bh = block_size_high[sb_type];
269   const int bw = block_size_wide[sb_type];
270   const wedge_code_type *a =
271       av1_wedge_params_lookup[sb_type].codebook + wedge_index;
272   int woff, hoff;
273   const uint8_t wsignflip =
274       av1_wedge_params_lookup[sb_type].signflip[wedge_index];
275 
276   assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
277   woff = (a->x_offset * bw) >> 3;
278   hoff = (a->y_offset * bh) >> 3;
279   master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
280            MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
281            MASK_MASTER_SIZE / 2 - woff;
282   return master;
283 }
284 
av1_get_compound_type_mask(const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type)285 const uint8_t *av1_get_compound_type_mask(
286     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
287   (void)sb_type;
288   switch (comp_data->type) {
289     case COMPOUND_WEDGE:
290       return av1_get_contiguous_soft_mask(comp_data->wedge_index,
291                                           comp_data->wedge_sign, sb_type);
292     default: return comp_data->seg_mask;
293   }
294 }
295 
diffwtd_mask_d16(uint8_t * mask,int which_inverse,int mask_base,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)296 static AOM_INLINE void diffwtd_mask_d16(
297     uint8_t *mask, int which_inverse, int mask_base, const CONV_BUF_TYPE *src0,
298     int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
299     ConvolveParams *conv_params, int bd) {
300   int round =
301       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
302   int i, j, m, diff;
303   for (i = 0; i < h; ++i) {
304     for (j = 0; j < w; ++j) {
305       diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
306       diff = ROUND_POWER_OF_TWO(diff, round);
307       m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
308       mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
309     }
310   }
311 }
312 
av1_build_compound_diffwtd_mask_d16_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)313 void av1_build_compound_diffwtd_mask_d16_c(
314     uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
315     int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
316     ConvolveParams *conv_params, int bd) {
317   switch (mask_type) {
318     case DIFFWTD_38:
319       diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
320                        conv_params, bd);
321       break;
322     case DIFFWTD_38_INV:
323       diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
324                        conv_params, bd);
325       break;
326     default: assert(0);
327   }
328 }
329 
diffwtd_mask(uint8_t * mask,int which_inverse,int mask_base,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)330 static AOM_INLINE void diffwtd_mask(uint8_t *mask, int which_inverse,
331                                     int mask_base, const uint8_t *src0,
332                                     int src0_stride, const uint8_t *src1,
333                                     int src1_stride, int h, int w) {
334   int i, j, m, diff;
335   for (i = 0; i < h; ++i) {
336     for (j = 0; j < w; ++j) {
337       diff =
338           abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
339       m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
340       mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
341     }
342   }
343 }
344 
av1_build_compound_diffwtd_mask_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)345 void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
346                                        DIFFWTD_MASK_TYPE mask_type,
347                                        const uint8_t *src0, int src0_stride,
348                                        const uint8_t *src1, int src1_stride,
349                                        int h, int w) {
350   switch (mask_type) {
351     case DIFFWTD_38:
352       diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
353       break;
354     case DIFFWTD_38_INV:
355       diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
356       break;
357     default: assert(0);
358   }
359 }
360 
diffwtd_mask_highbd(uint8_t * mask,int which_inverse,int mask_base,const uint16_t * src0,int src0_stride,const uint16_t * src1,int src1_stride,int h,int w,const unsigned int bd)361 static AOM_FORCE_INLINE void diffwtd_mask_highbd(
362     uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
363     int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
364     const unsigned int bd) {
365   assert(bd >= 8);
366   if (bd == 8) {
367     if (which_inverse) {
368       for (int i = 0; i < h; ++i) {
369         for (int j = 0; j < w; ++j) {
370           int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
371           unsigned int m = negative_to_zero(mask_base + diff);
372           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
373           mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
374         }
375         src0 += src0_stride;
376         src1 += src1_stride;
377         mask += w;
378       }
379     } else {
380       for (int i = 0; i < h; ++i) {
381         for (int j = 0; j < w; ++j) {
382           int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
383           unsigned int m = negative_to_zero(mask_base + diff);
384           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
385           mask[j] = m;
386         }
387         src0 += src0_stride;
388         src1 += src1_stride;
389         mask += w;
390       }
391     }
392   } else {
393     const unsigned int bd_shift = bd - 8;
394     if (which_inverse) {
395       for (int i = 0; i < h; ++i) {
396         for (int j = 0; j < w; ++j) {
397           int diff =
398               (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
399           unsigned int m = negative_to_zero(mask_base + diff);
400           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
401           mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
402         }
403         src0 += src0_stride;
404         src1 += src1_stride;
405         mask += w;
406       }
407     } else {
408       for (int i = 0; i < h; ++i) {
409         for (int j = 0; j < w; ++j) {
410           int diff =
411               (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
412           unsigned int m = negative_to_zero(mask_base + diff);
413           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
414           mask[j] = m;
415         }
416         src0 += src0_stride;
417         src1 += src1_stride;
418         mask += w;
419       }
420     }
421   }
422 }
423 
av1_build_compound_diffwtd_mask_highbd_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w,int bd)424 void av1_build_compound_diffwtd_mask_highbd_c(
425     uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
426     int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
427     int bd) {
428   switch (mask_type) {
429     case DIFFWTD_38:
430       diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
431                           CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
432       break;
433     case DIFFWTD_38_INV:
434       diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
435                           CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
436       break;
437     default: assert(0);
438   }
439 }
440 
init_wedge_master_masks(void)441 static AOM_INLINE void init_wedge_master_masks(void) {
442   int i, j;
443   const int w = MASK_MASTER_SIZE;
444   const int h = MASK_MASTER_SIZE;
445   const int stride = MASK_MASTER_STRIDE;
446   // Note: index [0] stores the masters, and [1] its complement.
447   // Generate prototype by shifting the masters
448   int shift = h / 4;
449   for (i = 0; i < h; i += 2) {
450     shift_copy(wedge_master_oblique_even,
451                &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
452                MASK_MASTER_SIZE);
453     shift--;
454     shift_copy(wedge_master_oblique_odd,
455                &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
456                MASK_MASTER_SIZE);
457     memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
458            wedge_master_vertical,
459            MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
460     memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
461            wedge_master_vertical,
462            MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
463   }
464 
465   for (i = 0; i < h; ++i) {
466     for (j = 0; j < w; ++j) {
467       const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
468       wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
469       wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
470           wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
471               (1 << WEDGE_WEIGHT_BITS) - msk;
472       wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
473           wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
474               (1 << WEDGE_WEIGHT_BITS) - msk;
475       wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
476           wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
477       const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
478       wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
479       wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
480           wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
481               (1 << WEDGE_WEIGHT_BITS) - mskx;
482     }
483   }
484 }
485 
init_wedge_masks(void)486 static AOM_INLINE void init_wedge_masks(void) {
487   uint8_t *dst = wedge_mask_buf;
488   BLOCK_SIZE bsize;
489   memset(wedge_masks, 0, sizeof(wedge_masks));
490   for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
491     const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
492     const int wtypes = wedge_params->wedge_types;
493     if (wtypes == 0) continue;
494     const uint8_t *mask;
495     const int bw = block_size_wide[bsize];
496     const int bh = block_size_high[bsize];
497     int w;
498     for (w = 0; w < wtypes; ++w) {
499       mask = get_wedge_mask_inplace(w, 0, bsize);
500       aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
501                         bh);
502       wedge_params->masks[0][w] = dst;
503       dst += bw * bh;
504 
505       mask = get_wedge_mask_inplace(w, 1, bsize);
506       aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
507                         bh);
508       wedge_params->masks[1][w] = dst;
509       dst += bw * bh;
510     }
511     assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
512   }
513 }
514 
515 /* clang-format off */
516 static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
517   60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
518   31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
519   16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
520   8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
521   4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
522   2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
523   1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
524 };
525 static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
526     32, 16, 16, 16, 8, 8, 8, 4,
527     4,  4,  2,  2,  2, 1, 1, 1,
528     8,  8,  4,  4,  2, 2
529 };
530 /* clang-format on */
531 
build_smooth_interintra_mask(uint8_t * mask,int stride,BLOCK_SIZE plane_bsize,INTERINTRA_MODE mode)532 static AOM_INLINE void build_smooth_interintra_mask(uint8_t *mask, int stride,
533                                                     BLOCK_SIZE plane_bsize,
534                                                     INTERINTRA_MODE mode) {
535   int i, j;
536   const int bw = block_size_wide[plane_bsize];
537   const int bh = block_size_high[plane_bsize];
538   const int size_scale = ii_size_scales[plane_bsize];
539 
540   switch (mode) {
541     case II_V_PRED:
542       for (i = 0; i < bh; ++i) {
543         memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
544         mask += stride;
545       }
546       break;
547 
548     case II_H_PRED:
549       for (i = 0; i < bh; ++i) {
550         for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
551         mask += stride;
552       }
553       break;
554 
555     case II_SMOOTH_PRED:
556       for (i = 0; i < bh; ++i) {
557         for (j = 0; j < bw; ++j)
558           mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
559         mask += stride;
560       }
561       break;
562 
563     case II_DC_PRED:
564     default:
565       for (i = 0; i < bh; ++i) {
566         memset(mask, 32, bw * sizeof(mask[0]));
567         mask += stride;
568       }
569       break;
570   }
571 }
572 
init_smooth_interintra_masks(void)573 static AOM_INLINE void init_smooth_interintra_masks(void) {
574   for (int m = 0; m < INTERINTRA_MODES; ++m) {
575     for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
576       const int bw = block_size_wide[bs];
577       const int bh = block_size_high[bs];
578       if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
579       build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
580                                    m);
581     }
582   }
583 }
584 
585 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
init_all_wedge_masks(void)586 static void init_all_wedge_masks(void) {
587   init_wedge_master_masks();
588   init_wedge_masks();
589   init_smooth_interintra_masks();
590 }
591 
av1_init_wedge_masks(void)592 void av1_init_wedge_masks(void) { aom_once(init_all_wedge_masks); }
593 
build_masked_compound_no_round(uint8_t * dst,int dst_stride,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type,int h,int w,InterPredParams * inter_pred_params)594 static AOM_INLINE void build_masked_compound_no_round(
595     uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
596     const CONV_BUF_TYPE *src1, int src1_stride,
597     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
598     int w, InterPredParams *inter_pred_params) {
599   const int ssy = inter_pred_params->subsampling_y;
600   const int ssx = inter_pred_params->subsampling_x;
601   const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
602   const int mask_stride = block_size_wide[sb_type];
603 #if CONFIG_AV1_HIGHBITDEPTH
604   if (inter_pred_params->use_hbd_buf) {
605     aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
606                                   src1_stride, mask, mask_stride, w, h, ssx,
607                                   ssy, &inter_pred_params->conv_params,
608                                   inter_pred_params->bit_depth);
609   } else {
610     aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
611                                  src1_stride, mask, mask_stride, w, h, ssx, ssy,
612                                  &inter_pred_params->conv_params);
613   }
614 #else
615   aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
616                                src1_stride, mask, mask_stride, w, h, ssx, ssy,
617                                &inter_pred_params->conv_params);
618 #endif
619 }
620 
av1_make_masked_inter_predictor(const uint8_t * pre,int pre_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)621 void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
622                                      uint8_t *dst, int dst_stride,
623                                      InterPredParams *inter_pred_params,
624                                      const SubpelParams *subpel_params) {
625   const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
626   BLOCK_SIZE sb_type = inter_pred_params->sb_type;
627 
628   // We're going to call av1_make_inter_predictor to generate a prediction into
629   // a temporary buffer, then will blend that temporary buffer with that from
630   // the other reference.
631   DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
632   uint8_t *tmp_dst =
633       inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
634 
635   const int tmp_buf_stride = MAX_SB_SIZE;
636   CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
637   int org_dst_stride = inter_pred_params->conv_params.dst_stride;
638   CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
639   inter_pred_params->conv_params.dst = tmp_buf16;
640   inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
641   assert(inter_pred_params->conv_params.do_average == 0);
642 
643   // This will generate a prediction in tmp_buf for the second reference
644   av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
645                            inter_pred_params, subpel_params);
646 
647   if (!inter_pred_params->conv_params.plane &&
648       comp_data->type == COMPOUND_DIFFWTD) {
649     av1_build_compound_diffwtd_mask_d16(
650         comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
651         tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
652         inter_pred_params->block_width, &inter_pred_params->conv_params,
653         inter_pred_params->bit_depth);
654   }
655   build_masked_compound_no_round(
656       dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
657       comp_data, sb_type, inter_pred_params->block_height,
658       inter_pred_params->block_width, inter_pred_params);
659 }
660 
av1_dist_wtd_comp_weight_assign(const AV1_COMMON * cm,const MB_MODE_INFO * mbmi,int * fwd_offset,int * bck_offset,int * use_dist_wtd_comp_avg,int is_compound)661 void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
662                                      const MB_MODE_INFO *mbmi, int *fwd_offset,
663                                      int *bck_offset,
664                                      int *use_dist_wtd_comp_avg,
665                                      int is_compound) {
666   assert(fwd_offset != NULL && bck_offset != NULL);
667   if (!is_compound || mbmi->compound_idx) {
668     *fwd_offset = 8;
669     *bck_offset = 8;
670     *use_dist_wtd_comp_avg = 0;
671     return;
672   }
673 
674   *use_dist_wtd_comp_avg = 1;
675   const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
676   const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
677   const int cur_frame_index = cm->cur_frame->order_hint;
678   int bck_frame_index = 0, fwd_frame_index = 0;
679 
680   if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
681   if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
682 
683   int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
684                                        fwd_frame_index, cur_frame_index)),
685                  0, MAX_FRAME_DISTANCE);
686   int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
687                                        cur_frame_index, bck_frame_index)),
688                  0, MAX_FRAME_DISTANCE);
689 
690   const int order = d0 <= d1;
691 
692   if (d0 == 0 || d1 == 0) {
693     *fwd_offset = quant_dist_lookup_table[3][order];
694     *bck_offset = quant_dist_lookup_table[3][1 - order];
695     return;
696   }
697 
698   int i;
699   for (i = 0; i < 3; ++i) {
700     int c0 = quant_dist_weight[i][order];
701     int c1 = quant_dist_weight[i][!order];
702     int d0_c0 = d0 * c0;
703     int d1_c1 = d1 * c1;
704     if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
705   }
706 
707   *fwd_offset = quant_dist_lookup_table[i][order];
708   *bck_offset = quant_dist_lookup_table[i][1 - order];
709 }
710 
av1_setup_dst_planes(struct macroblockd_plane * planes,BLOCK_SIZE bsize,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int plane_start,const int plane_end)711 void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
712                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
713                           const int plane_start, const int plane_end) {
714   // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
715   // the static analysis warnings.
716   for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
717     struct macroblockd_plane *const pd = &planes[i];
718     const int is_uv = i > 0;
719     setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
720                      src->crop_heights[is_uv], src->strides[is_uv], mi_row,
721                      mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
722   }
723 }
724 
av1_setup_pre_planes(MACROBLOCKD * xd,int idx,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * sf,const int num_planes)725 void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
726                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
727                           const struct scale_factors *sf,
728                           const int num_planes) {
729   if (src != NULL) {
730     // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
731     // the static analysis warnings.
732     for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
733       struct macroblockd_plane *const pd = &xd->plane[i];
734       const int is_uv = i > 0;
735       setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
736                        src->crop_widths[is_uv], src->crop_heights[is_uv],
737                        src->strides[is_uv], mi_row, mi_col, sf,
738                        pd->subsampling_x, pd->subsampling_y);
739     }
740   }
741 }
742 
743 // obmc_mask_N[overlap_position]
744 static const uint8_t obmc_mask_1[1] = { 64 };
745 DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
746 
747 DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
748 
749 static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
750 
751 static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
752                                           56, 58, 60, 61, 64, 64, 64, 64 };
753 
754 static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
755                                           45, 47, 48, 50, 51, 52, 53, 55,
756                                           56, 57, 58, 59, 60, 60, 61, 62,
757                                           64, 64, 64, 64, 64, 64, 64, 64 };
758 
759 static const uint8_t obmc_mask_64[64] = {
760   33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
761   45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
762   56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
763   62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
764 };
765 
av1_get_obmc_mask(int length)766 const uint8_t *av1_get_obmc_mask(int length) {
767   switch (length) {
768     case 1: return obmc_mask_1;
769     case 2: return obmc_mask_2;
770     case 4: return obmc_mask_4;
771     case 8: return obmc_mask_8;
772     case 16: return obmc_mask_16;
773     case 32: return obmc_mask_32;
774     case 64: return obmc_mask_64;
775     default: assert(0); return NULL;
776   }
777 }
778 
increment_int_ptr(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * mi,void * fun_ctxt,const int num_planes)779 static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
780                                      int rel_mi_col, uint8_t op_mi_size,
781                                      int dir, MB_MODE_INFO *mi, void *fun_ctxt,
782                                      const int num_planes) {
783   (void)xd;
784   (void)rel_mi_row;
785   (void)rel_mi_col;
786   (void)op_mi_size;
787   (void)dir;
788   (void)mi;
789   ++*(uint8_t *)fun_ctxt;
790   (void)num_planes;
791 }
792 
av1_count_overlappable_neighbors(const AV1_COMMON * cm,MACROBLOCKD * xd)793 void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
794   MB_MODE_INFO *mbmi = xd->mi[0];
795 
796   mbmi->overlappable_neighbors = 0;
797 
798   if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
799 
800   foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
801                                 &mbmi->overlappable_neighbors);
802   if (mbmi->overlappable_neighbors) return;
803   foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
804                                &mbmi->overlappable_neighbors);
805 }
806 
807 // HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
808 // block-size of current plane is smaller than 8x8, always only blend with the
809 // left neighbor(s) (skip blending with the above side).
810 #define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
811 
av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,const struct macroblockd_plane * pd,int dir)812 int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
813                                const struct macroblockd_plane *pd, int dir) {
814   assert(is_motion_variation_allowed_bsize(bsize));
815 
816   const BLOCK_SIZE bsize_plane =
817       get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
818   switch (bsize_plane) {
819 #if DISABLE_CHROMA_U8X8_OBMC
820     case BLOCK_4X4:
821     case BLOCK_8X4:
822     case BLOCK_4X8: return 1; break;
823 #else
824     case BLOCK_4X4:
825     case BLOCK_8X4:
826     case BLOCK_4X8: return dir == 0; break;
827 #endif
828     default: return 0;
829   }
830 }
831 
av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO * mbmi)832 void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
833   mbmi->ref_frame[1] = NONE_FRAME;
834   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
835 
836   return;
837 }
838 
839 struct obmc_inter_pred_ctxt {
840   uint8_t **adjacent;
841   int *adjacent_stride;
842 };
843 
build_obmc_inter_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * above_mi,void * fun_ctxt,const int num_planes)844 static INLINE void build_obmc_inter_pred_above(
845     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
846     int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
847   (void)above_mi;
848   (void)rel_mi_row;
849   (void)dir;
850   struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
851   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
852   const int overlap =
853       AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
854 
855   for (int plane = 0; plane < num_planes; ++plane) {
856     const struct macroblockd_plane *pd = &xd->plane[plane];
857     const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
858     const int bh = overlap >> pd->subsampling_y;
859     const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
860 
861     if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
862 
863     const int dst_stride = pd->dst.stride;
864     uint8_t *const dst = &pd->dst.buf[plane_col];
865     const int tmp_stride = ctxt->adjacent_stride[plane];
866     const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
867     const uint8_t *const mask = av1_get_obmc_mask(bh);
868 #if CONFIG_AV1_HIGHBITDEPTH
869     const int is_hbd = is_cur_buf_hbd(xd);
870     if (is_hbd)
871       aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
872                                  tmp_stride, mask, bw, bh, xd->bd);
873     else
874       aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
875                           mask, bw, bh);
876 #else
877     aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
878                         bw, bh);
879 #endif
880   }
881 }
882 
build_obmc_inter_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * left_mi,void * fun_ctxt,const int num_planes)883 static INLINE void build_obmc_inter_pred_left(
884     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
885     int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
886   (void)left_mi;
887   (void)rel_mi_col;
888   (void)dir;
889   struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
890   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
891   const int overlap =
892       AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
893 
894   for (int plane = 0; plane < num_planes; ++plane) {
895     const struct macroblockd_plane *pd = &xd->plane[plane];
896     const int bw = overlap >> pd->subsampling_x;
897     const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
898     const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
899 
900     if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
901 
902     const int dst_stride = pd->dst.stride;
903     uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
904     const int tmp_stride = ctxt->adjacent_stride[plane];
905     const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
906     const uint8_t *const mask = av1_get_obmc_mask(bw);
907 
908 #if CONFIG_AV1_HIGHBITDEPTH
909     const int is_hbd = is_cur_buf_hbd(xd);
910     if (is_hbd)
911       aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
912                                  tmp_stride, mask, bw, bh, xd->bd);
913     else
914       aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
915                           mask, bw, bh);
916 #else
917     aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
918                         bw, bh);
919 #endif
920   }
921 }
922 
923 // This function combines motion compensated predictions that are generated by
924 // top/left neighboring blocks' inter predictors with the regular inter
925 // prediction. We assume the original prediction (bmc) is stored in
926 // xd->plane[].dst.buf
av1_build_obmc_inter_prediction(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * above[MAX_MB_PLANE],int above_stride[MAX_MB_PLANE],uint8_t * left[MAX_MB_PLANE],int left_stride[MAX_MB_PLANE])927 void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
928                                      uint8_t *above[MAX_MB_PLANE],
929                                      int above_stride[MAX_MB_PLANE],
930                                      uint8_t *left[MAX_MB_PLANE],
931                                      int left_stride[MAX_MB_PLANE]) {
932   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
933 
934   // handle above row
935   struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
936   foreach_overlappable_nb_above(cm, xd,
937                                 max_neighbor_obmc[mi_size_wide_log2[bsize]],
938                                 build_obmc_inter_pred_above, &ctxt_above);
939 
940   // handle left column
941   struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
942   foreach_overlappable_nb_left(cm, xd,
943                                max_neighbor_obmc[mi_size_high_log2[bsize]],
944                                build_obmc_inter_pred_left, &ctxt_left);
945 }
946 
av1_setup_obmc_dst_bufs(MACROBLOCKD * xd,uint8_t ** dst_buf1,uint8_t ** dst_buf2)947 void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
948                              uint8_t **dst_buf2) {
949   if (is_cur_buf_hbd(xd)) {
950     int len = sizeof(uint16_t);
951     dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
952     dst_buf1[1] =
953         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
954     dst_buf1[2] =
955         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
956     dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
957     dst_buf2[1] =
958         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
959     dst_buf2[2] =
960         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
961   } else {
962     dst_buf1[0] = xd->tmp_obmc_bufs[0];
963     dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
964     dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
965     dst_buf2[0] = xd->tmp_obmc_bufs[1];
966     dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
967     dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
968   }
969 }
970 
av1_setup_build_prediction_by_above_pred(MACROBLOCKD * xd,int rel_mi_col,uint8_t above_mi_width,MB_MODE_INFO * above_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)971 void av1_setup_build_prediction_by_above_pred(
972     MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
973     MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
974     const int num_planes) {
975   const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
976   const int above_mi_col = xd->mi_col + rel_mi_col;
977 
978   av1_modify_neighbor_predictor_for_obmc(above_mbmi);
979 
980   for (int j = 0; j < num_planes; ++j) {
981     struct macroblockd_plane *const pd = &xd->plane[j];
982     setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
983                      ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
984                      NULL, pd->subsampling_x, pd->subsampling_y);
985   }
986 
987   const int num_refs = 1 + has_second_ref(above_mbmi);
988 
989   for (int ref = 0; ref < num_refs; ++ref) {
990     const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
991 
992     const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
993     const struct scale_factors *const sf =
994         get_ref_scale_factors_const(ctxt->cm, frame);
995     xd->block_ref_scale_factors[ref] = sf;
996     if ((!av1_is_valid_scale(sf)))
997       aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
998                          "Reference frame has invalid dimensions");
999     av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
1000                          num_planes);
1001   }
1002 
1003   xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
1004   xd->mb_to_right_edge =
1005       ctxt->mb_to_far_edge +
1006       (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
1007 }
1008 
av1_setup_build_prediction_by_left_pred(MACROBLOCKD * xd,int rel_mi_row,uint8_t left_mi_height,MB_MODE_INFO * left_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)1009 void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
1010                                              uint8_t left_mi_height,
1011                                              MB_MODE_INFO *left_mbmi,
1012                                              struct build_prediction_ctxt *ctxt,
1013                                              const int num_planes) {
1014   const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
1015   const int left_mi_row = xd->mi_row + rel_mi_row;
1016 
1017   av1_modify_neighbor_predictor_for_obmc(left_mbmi);
1018 
1019   for (int j = 0; j < num_planes; ++j) {
1020     struct macroblockd_plane *const pd = &xd->plane[j];
1021     setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1022                      ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
1023                      NULL, pd->subsampling_x, pd->subsampling_y);
1024   }
1025 
1026   const int num_refs = 1 + has_second_ref(left_mbmi);
1027 
1028   for (int ref = 0; ref < num_refs; ++ref) {
1029     const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
1030 
1031     const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1032     const struct scale_factors *const ref_scale_factors =
1033         get_ref_scale_factors_const(ctxt->cm, frame);
1034 
1035     xd->block_ref_scale_factors[ref] = ref_scale_factors;
1036     if ((!av1_is_valid_scale(ref_scale_factors)))
1037       aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1038                          "Reference frame has invalid dimensions");
1039     av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
1040                          ref_scale_factors, num_planes);
1041   }
1042 
1043   xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
1044   xd->mb_to_bottom_edge =
1045       ctxt->mb_to_far_edge +
1046       GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
1047 }
1048 
combine_interintra(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred,int compstride,const uint8_t * interpred,int interstride,const uint8_t * intrapred,int intrastride)1049 static AOM_INLINE void combine_interintra(
1050     INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1051     int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1052     uint8_t *comppred, int compstride, const uint8_t *interpred,
1053     int interstride, const uint8_t *intrapred, int intrastride) {
1054   const int bw = block_size_wide[plane_bsize];
1055   const int bh = block_size_high[plane_bsize];
1056 
1057   if (use_wedge_interintra) {
1058     if (av1_is_wedge_used(bsize)) {
1059       const uint8_t *mask =
1060           av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1061       const int subw = 2 * mi_size_wide[bsize] == bw;
1062       const int subh = 2 * mi_size_high[bsize] == bh;
1063       aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1064                          interpred, interstride, mask, block_size_wide[bsize],
1065                          bw, bh, subw, subh);
1066     }
1067     return;
1068   }
1069 
1070   const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
1071   aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1072                      interstride, mask, bw, bw, bh, 0, 0);
1073 }
1074 
1075 #if CONFIG_AV1_HIGHBITDEPTH
combine_interintra_highbd(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred8,int compstride,const uint8_t * interpred8,int interstride,const uint8_t * intrapred8,int intrastride,int bd)1076 static AOM_INLINE void combine_interintra_highbd(
1077     INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1078     int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1079     uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1080     int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1081   const int bw = block_size_wide[plane_bsize];
1082   const int bh = block_size_high[plane_bsize];
1083 
1084   if (use_wedge_interintra) {
1085     if (av1_is_wedge_used(bsize)) {
1086       const uint8_t *mask =
1087           av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1088       const int subh = 2 * mi_size_high[bsize] == bh;
1089       const int subw = 2 * mi_size_wide[bsize] == bw;
1090       aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1091                                 interpred8, interstride, mask,
1092                                 block_size_wide[bsize], bw, bh, subw, subh, bd);
1093     }
1094     return;
1095   }
1096 
1097   uint8_t mask[MAX_SB_SQUARE];
1098   build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1099   aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1100                             interpred8, interstride, mask, bw, bw, bh, 0, 0,
1101                             bd);
1102 }
1103 #endif
1104 
av1_build_intra_predictors_for_interintra(const AV1_COMMON * cm,MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const BUFFER_SET * ctx,uint8_t * dst,int dst_stride)1105 void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1106                                                MACROBLOCKD *xd,
1107                                                BLOCK_SIZE bsize, int plane,
1108                                                const BUFFER_SET *ctx,
1109                                                uint8_t *dst, int dst_stride) {
1110   struct macroblockd_plane *const pd = &xd->plane[plane];
1111   const int ssx = xd->plane[plane].subsampling_x;
1112   const int ssy = xd->plane[plane].subsampling_y;
1113   BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1114   PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1115   assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1116   assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1117   assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1118   assert(xd->mi[0]->use_intrabc == 0);
1119   const SequenceHeader *seq_params = cm->seq_params;
1120 
1121   av1_predict_intra_block(xd, seq_params->sb_size,
1122                           seq_params->enable_intra_edge_filter, pd->width,
1123                           pd->height, max_txsize_rect_lookup[plane_bsize], mode,
1124                           0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
1125                           ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1126 }
1127 
av1_combine_interintra(MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const uint8_t * inter_pred,int inter_stride,const uint8_t * intra_pred,int intra_stride)1128 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1129                             const uint8_t *inter_pred, int inter_stride,
1130                             const uint8_t *intra_pred, int intra_stride) {
1131   const int ssx = xd->plane[plane].subsampling_x;
1132   const int ssy = xd->plane[plane].subsampling_y;
1133   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1134 #if CONFIG_AV1_HIGHBITDEPTH
1135   if (is_cur_buf_hbd(xd)) {
1136     combine_interintra_highbd(
1137         xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1138         xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1139         plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1140         inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1141     return;
1142   }
1143 #endif
1144   combine_interintra(
1145       xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1146       xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1147       plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1148       inter_pred, inter_stride, intra_pred, intra_stride);
1149 }
1150 
1151 // build interintra_predictors for one plane
av1_build_interintra_predictor(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * pred,int stride,const BUFFER_SET * ctx,int plane,BLOCK_SIZE bsize)1152 void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1153                                     uint8_t *pred, int stride,
1154                                     const BUFFER_SET *ctx, int plane,
1155                                     BLOCK_SIZE bsize) {
1156   assert(bsize < BLOCK_SIZES_ALL);
1157   if (is_cur_buf_hbd(xd)) {
1158     DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1159     av1_build_intra_predictors_for_interintra(
1160         cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1161         MAX_SB_SIZE);
1162     av1_combine_interintra(xd, bsize, plane, pred, stride,
1163                            CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1164   } else {
1165     DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1166     av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1167                                               intrapredictor, MAX_SB_SIZE);
1168     av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1169                            MAX_SB_SIZE);
1170   }
1171 }
1172