1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <stdio.h>
14 #include <limits.h>
15
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/aom_scale_rtcd.h"
19
20 #include "aom/aom_integer.h"
21 #include "aom_dsp/blend.h"
22
23 #include "av1/common/blockd.h"
24 #include "av1/common/mvref_common.h"
25 #include "av1/common/reconinter.h"
26 #include "av1/common/reconintra.h"
27 #include "av1/common/onyxc_int.h"
28 #include "av1/common/obmc.h"
29
30 #define USE_PRECOMPUTED_WEDGE_MASK 1
31 #define USE_PRECOMPUTED_WEDGE_SIGN 1
32
33 // This function will determine whether or not to create a warped
34 // prediction.
av1_allow_warp(const MB_MODE_INFO * const mbmi,const WarpTypesAllowed * const warp_types,const WarpedMotionParams * const gm_params,int build_for_obmc,const struct scale_factors * const sf,WarpedMotionParams * final_warp_params)35 int av1_allow_warp(const MB_MODE_INFO *const mbmi,
36 const WarpTypesAllowed *const warp_types,
37 const WarpedMotionParams *const gm_params,
38 int build_for_obmc, const struct scale_factors *const sf,
39 WarpedMotionParams *final_warp_params) {
40 // Note: As per the spec, we must test the fixed point scales here, which are
41 // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
42 // have 1 << 10 precision).
43 if (av1_is_scaled(sf)) return 0;
44
45 if (final_warp_params != NULL) *final_warp_params = default_warp_params;
46
47 if (build_for_obmc) return 0;
48
49 if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
50 if (final_warp_params != NULL)
51 memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
52 return 1;
53 } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
54 if (final_warp_params != NULL)
55 memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
56 return 1;
57 }
58
59 return 0;
60 }
61
av1_make_inter_predictor(const uint8_t * src,int src_stride,uint8_t * dst,int dst_stride,const SubpelParams * subpel_params,const struct scale_factors * sf,int w,int h,ConvolveParams * conv_params,InterpFilters interp_filters,const WarpTypesAllowed * warp_types,int p_col,int p_row,int plane,int ref,const MB_MODE_INFO * mi,int build_for_obmc,const MACROBLOCKD * xd,int can_use_previous)62 void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
63 int dst_stride, const SubpelParams *subpel_params,
64 const struct scale_factors *sf, int w, int h,
65 ConvolveParams *conv_params,
66 InterpFilters interp_filters,
67 const WarpTypesAllowed *warp_types, int p_col,
68 int p_row, int plane, int ref,
69 const MB_MODE_INFO *mi, int build_for_obmc,
70 const MACROBLOCKD *xd, int can_use_previous) {
71 // Make sure the selected motion mode is valid for this configuration
72 assert_motion_mode_valid(mi->motion_mode, xd->global_motion, xd, mi,
73 can_use_previous);
74 assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
75
76 WarpedMotionParams final_warp_params;
77 const int do_warp =
78 (w >= 8 && h >= 8 &&
79 av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]],
80 build_for_obmc, sf, &final_warp_params));
81 const int is_intrabc = mi->use_intrabc;
82 assert(IMPLIES(is_intrabc, !do_warp));
83
84 if (do_warp && xd->cur_frame_force_integer_mv == 0) {
85 const struct macroblockd_plane *const pd = &xd->plane[plane];
86 const struct buf_2d *const pre_buf = &pd->pre[ref];
87 av1_warp_plane(&final_warp_params, is_cur_buf_hbd(xd), xd->bd,
88 pre_buf->buf0, pre_buf->width, pre_buf->height,
89 pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
90 pd->subsampling_x, pd->subsampling_y, conv_params);
91 } else if (is_cur_buf_hbd(xd)) {
92 highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf,
93 w, h, conv_params, interp_filters, is_intrabc,
94 xd->bd);
95 } else {
96 inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf, w, h,
97 conv_params, interp_filters, is_intrabc);
98 }
99 }
100
101 #if USE_PRECOMPUTED_WEDGE_MASK
102 static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 6, 18,
105 37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
106 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
107 };
108 static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
109 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
110 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 11, 27,
111 46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
112 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
113 };
114 static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
115 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
116 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7, 21,
117 43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
118 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
119 };
120
shift_copy(const uint8_t * src,uint8_t * dst,int shift,int width)121 static void shift_copy(const uint8_t *src, uint8_t *dst, int shift, int width) {
122 if (shift >= 0) {
123 memcpy(dst + shift, src, width - shift);
124 memset(dst, src[0], shift);
125 } else {
126 shift = -shift;
127 memcpy(dst, src + shift, width - shift);
128 memset(dst + width - shift, src[width - 1], shift);
129 }
130 }
131 #endif // USE_PRECOMPUTED_WEDGE_MASK
132
133 #if USE_PRECOMPUTED_WEDGE_SIGN
134 /* clang-format off */
135 DECLARE_ALIGNED(16, static uint8_t,
136 wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
137 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
138 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
139 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
140 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
141 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
142 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
143 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
144 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
145 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
146 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
147 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
148 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
149 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
150 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
151 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
152 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
153 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
154 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
155 { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
156 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
157 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
158 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
159 };
160 /* clang-format on */
161 #else
162 DECLARE_ALIGNED(16, static uint8_t,
163 wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]);
164 #endif // USE_PRECOMPUTED_WEDGE_SIGN
165
166 // [negative][direction]
167 DECLARE_ALIGNED(
168 16, static uint8_t,
169 wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
170
171 // 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
172 // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
173 DECLARE_ALIGNED(16, static uint8_t,
174 wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
175
176 static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
177
178 static const wedge_code_type wedge_codebook_16_hgtw[16] = {
179 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
180 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
181 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
182 { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
183 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
184 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
185 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
186 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
187 };
188
189 static const wedge_code_type wedge_codebook_16_hltw[16] = {
190 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
191 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
192 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 },
193 { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
194 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
195 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
196 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
197 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
198 };
199
200 static const wedge_code_type wedge_codebook_16_heqw[16] = {
201 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
202 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
203 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
204 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
205 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
206 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
207 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
208 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
209 };
210
211 const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL] = {
212 { 0, NULL, NULL, NULL },
213 { 0, NULL, NULL, NULL },
214 { 0, NULL, NULL, NULL },
215 { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
216 wedge_masks[BLOCK_8X8] },
217 { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
218 wedge_masks[BLOCK_8X16] },
219 { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
220 wedge_masks[BLOCK_16X8] },
221 { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
222 wedge_masks[BLOCK_16X16] },
223 { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
224 wedge_masks[BLOCK_16X32] },
225 { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
226 wedge_masks[BLOCK_32X16] },
227 { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
228 wedge_masks[BLOCK_32X32] },
229 { 0, NULL, NULL, NULL },
230 { 0, NULL, NULL, NULL },
231 { 0, NULL, NULL, NULL },
232 { 0, NULL, NULL, NULL },
233 { 0, NULL, NULL, NULL },
234 { 0, NULL, NULL, NULL },
235 { 0, NULL, NULL, NULL },
236 { 0, NULL, NULL, NULL },
237 { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
238 wedge_masks[BLOCK_8X32] },
239 { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
240 wedge_masks[BLOCK_32X8] },
241 { 0, NULL, NULL, NULL },
242 { 0, NULL, NULL, NULL },
243 };
244
get_wedge_mask_inplace(int wedge_index,int neg,BLOCK_SIZE sb_type)245 static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
246 BLOCK_SIZE sb_type) {
247 const uint8_t *master;
248 const int bh = block_size_high[sb_type];
249 const int bw = block_size_wide[sb_type];
250 const wedge_code_type *a =
251 wedge_params_lookup[sb_type].codebook + wedge_index;
252 int woff, hoff;
253 const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index];
254
255 assert(wedge_index >= 0 &&
256 wedge_index < (1 << get_wedge_bits_lookup(sb_type)));
257 woff = (a->x_offset * bw) >> 3;
258 hoff = (a->y_offset * bh) >> 3;
259 master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
260 MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
261 MASK_MASTER_SIZE / 2 - woff;
262 return master;
263 }
264
av1_get_compound_type_mask(const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type)265 const uint8_t *av1_get_compound_type_mask(
266 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
267 assert(is_masked_compound_type(comp_data->type));
268 (void)sb_type;
269 switch (comp_data->type) {
270 case COMPOUND_WEDGE:
271 return av1_get_contiguous_soft_mask(comp_data->wedge_index,
272 comp_data->wedge_sign, sb_type);
273 case COMPOUND_DIFFWTD: return comp_data->seg_mask;
274 default: assert(0); return NULL;
275 }
276 }
277
diffwtd_mask_d16(uint8_t * mask,int which_inverse,int mask_base,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)278 static void diffwtd_mask_d16(uint8_t *mask, int which_inverse, int mask_base,
279 const CONV_BUF_TYPE *src0, int src0_stride,
280 const CONV_BUF_TYPE *src1, int src1_stride, int h,
281 int w, ConvolveParams *conv_params, int bd) {
282 int round =
283 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
284 int i, j, m, diff;
285 for (i = 0; i < h; ++i) {
286 for (j = 0; j < w; ++j) {
287 diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
288 diff = ROUND_POWER_OF_TWO(diff, round);
289 m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
290 mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
291 }
292 }
293 }
294
av1_build_compound_diffwtd_mask_d16_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)295 void av1_build_compound_diffwtd_mask_d16_c(
296 uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
297 int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
298 ConvolveParams *conv_params, int bd) {
299 switch (mask_type) {
300 case DIFFWTD_38:
301 diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
302 conv_params, bd);
303 break;
304 case DIFFWTD_38_INV:
305 diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
306 conv_params, bd);
307 break;
308 default: assert(0);
309 }
310 }
311
diffwtd_mask(uint8_t * mask,int which_inverse,int mask_base,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)312 static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
313 const uint8_t *src0, int src0_stride,
314 const uint8_t *src1, int src1_stride, int h, int w) {
315 int i, j, m, diff;
316 for (i = 0; i < h; ++i) {
317 for (j = 0; j < w; ++j) {
318 diff =
319 abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
320 m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
321 mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
322 }
323 }
324 }
325
av1_build_compound_diffwtd_mask_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)326 void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
327 DIFFWTD_MASK_TYPE mask_type,
328 const uint8_t *src0, int src0_stride,
329 const uint8_t *src1, int src1_stride,
330 int h, int w) {
331 switch (mask_type) {
332 case DIFFWTD_38:
333 diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
334 break;
335 case DIFFWTD_38_INV:
336 diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
337 break;
338 default: assert(0);
339 }
340 }
341
diffwtd_mask_highbd(uint8_t * mask,int which_inverse,int mask_base,const uint16_t * src0,int src0_stride,const uint16_t * src1,int src1_stride,int h,int w,const unsigned int bd)342 static AOM_FORCE_INLINE void diffwtd_mask_highbd(
343 uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
344 int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
345 const unsigned int bd) {
346 assert(bd >= 8);
347 if (bd == 8) {
348 if (which_inverse) {
349 for (int i = 0; i < h; ++i) {
350 for (int j = 0; j < w; ++j) {
351 int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
352 unsigned int m = negative_to_zero(mask_base + diff);
353 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
354 mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
355 }
356 src0 += src0_stride;
357 src1 += src1_stride;
358 mask += w;
359 }
360 } else {
361 for (int i = 0; i < h; ++i) {
362 for (int j = 0; j < w; ++j) {
363 int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
364 unsigned int m = negative_to_zero(mask_base + diff);
365 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
366 mask[j] = m;
367 }
368 src0 += src0_stride;
369 src1 += src1_stride;
370 mask += w;
371 }
372 }
373 } else {
374 const unsigned int bd_shift = bd - 8;
375 if (which_inverse) {
376 for (int i = 0; i < h; ++i) {
377 for (int j = 0; j < w; ++j) {
378 int diff =
379 (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
380 unsigned int m = negative_to_zero(mask_base + diff);
381 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
382 mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
383 }
384 src0 += src0_stride;
385 src1 += src1_stride;
386 mask += w;
387 }
388 } else {
389 for (int i = 0; i < h; ++i) {
390 for (int j = 0; j < w; ++j) {
391 int diff =
392 (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
393 unsigned int m = negative_to_zero(mask_base + diff);
394 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
395 mask[j] = m;
396 }
397 src0 += src0_stride;
398 src1 += src1_stride;
399 mask += w;
400 }
401 }
402 }
403 }
404
av1_build_compound_diffwtd_mask_highbd_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w,int bd)405 void av1_build_compound_diffwtd_mask_highbd_c(
406 uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
407 int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
408 int bd) {
409 switch (mask_type) {
410 case DIFFWTD_38:
411 diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
412 CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
413 break;
414 case DIFFWTD_38_INV:
415 diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
416 CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
417 break;
418 default: assert(0);
419 }
420 }
421
init_wedge_master_masks()422 static void init_wedge_master_masks() {
423 int i, j;
424 const int w = MASK_MASTER_SIZE;
425 const int h = MASK_MASTER_SIZE;
426 const int stride = MASK_MASTER_STRIDE;
427 // Note: index [0] stores the masters, and [1] its complement.
428 #if USE_PRECOMPUTED_WEDGE_MASK
429 // Generate prototype by shifting the masters
430 int shift = h / 4;
431 for (i = 0; i < h; i += 2) {
432 shift_copy(wedge_master_oblique_even,
433 &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
434 MASK_MASTER_SIZE);
435 shift--;
436 shift_copy(wedge_master_oblique_odd,
437 &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
438 MASK_MASTER_SIZE);
439 memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
440 wedge_master_vertical,
441 MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
442 memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
443 wedge_master_vertical,
444 MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
445 }
446 #else
447 static const double smoother_param = 2.85;
448 const int a[2] = { 2, 1 };
449 const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
450 for (i = 0; i < h; i++) {
451 for (j = 0; j < w; ++j) {
452 int x = (2 * j + 1 - w);
453 int y = (2 * i + 1 - h);
454 double d = (a[0] * x + a[1] * y) / asqrt;
455 const int msk = (int)rint((1.0 + tanh(d / smoother_param)) * 32);
456 wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j] = msk;
457 const int mskx = (int)rint((1.0 + tanh(x / smoother_param)) * 32);
458 wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j] = mskx;
459 }
460 }
461 #endif // USE_PRECOMPUTED_WEDGE_MASK
462 for (i = 0; i < h; ++i) {
463 for (j = 0; j < w; ++j) {
464 const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
465 wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
466 wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
467 wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
468 (1 << WEDGE_WEIGHT_BITS) - msk;
469 wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
470 wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
471 (1 << WEDGE_WEIGHT_BITS) - msk;
472 wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
473 wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
474 const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
475 wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
476 wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
477 wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
478 (1 << WEDGE_WEIGHT_BITS) - mskx;
479 }
480 }
481 }
482
483 #if !USE_PRECOMPUTED_WEDGE_SIGN
484 // If the signs for the wedges for various blocksizes are
485 // inconsistent flip the sign flag. Do it only once for every
486 // wedge codebook.
init_wedge_signs()487 static void init_wedge_signs() {
488 BLOCK_SIZE sb_type;
489 memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup));
490 for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES_ALL; ++sb_type) {
491 const int bw = block_size_wide[sb_type];
492 const int bh = block_size_high[sb_type];
493 const wedge_params_type wedge_params = wedge_params_lookup[sb_type];
494 const int wbits = wedge_params.bits;
495 const int wtypes = 1 << wbits;
496 int i, w;
497 if (wbits) {
498 for (w = 0; w < wtypes; ++w) {
499 // Get the mask master, i.e. index [0]
500 const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type);
501 int avg = 0;
502 for (i = 0; i < bw; ++i) avg += mask[i];
503 for (i = 1; i < bh; ++i) avg += mask[i * MASK_MASTER_STRIDE];
504 avg = (avg + (bw + bh - 1) / 2) / (bw + bh - 1);
505 // Default sign of this wedge is 1 if the average < 32, 0 otherwise.
506 // If default sign is 1:
507 // If sign requested is 0, we need to flip the sign and return
508 // the complement i.e. index [1] instead. If sign requested is 1
509 // we need to flip the sign and return index [0] instead.
510 // If default sign is 0:
511 // If sign requested is 0, we need to return index [0] the master
512 // if sign requested is 1, we need to return the complement index [1]
513 // instead.
514 wedge_params.signflip[w] = (avg < 32);
515 }
516 }
517 }
518 }
519 #endif // !USE_PRECOMPUTED_WEDGE_SIGN
520
init_wedge_masks()521 static void init_wedge_masks() {
522 uint8_t *dst = wedge_mask_buf;
523 BLOCK_SIZE bsize;
524 memset(wedge_masks, 0, sizeof(wedge_masks));
525 for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
526 const uint8_t *mask;
527 const int bw = block_size_wide[bsize];
528 const int bh = block_size_high[bsize];
529 const wedge_params_type *wedge_params = &wedge_params_lookup[bsize];
530 const int wbits = wedge_params->bits;
531 const int wtypes = 1 << wbits;
532 int w;
533 if (wbits == 0) continue;
534 for (w = 0; w < wtypes; ++w) {
535 mask = get_wedge_mask_inplace(w, 0, bsize);
536 aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
537 bh);
538 wedge_params->masks[0][w] = dst;
539 dst += bw * bh;
540
541 mask = get_wedge_mask_inplace(w, 1, bsize);
542 aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
543 bh);
544 wedge_params->masks[1][w] = dst;
545 dst += bw * bh;
546 }
547 assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
548 }
549 }
550
551 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
av1_init_wedge_masks()552 void av1_init_wedge_masks() {
553 init_wedge_master_masks();
554 #if !USE_PRECOMPUTED_WEDGE_SIGN
555 init_wedge_signs();
556 #endif // !USE_PRECOMPUTED_WEDGE_SIGN
557 init_wedge_masks();
558 }
559
build_masked_compound_no_round(uint8_t * dst,int dst_stride,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type,int h,int w,ConvolveParams * conv_params,MACROBLOCKD * xd)560 static void build_masked_compound_no_round(
561 uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
562 const CONV_BUF_TYPE *src1, int src1_stride,
563 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
564 int w, ConvolveParams *conv_params, MACROBLOCKD *xd) {
565 // Derive subsampling from h and w passed in. May be refactored to
566 // pass in subsampling factors directly.
567 const int subh = (2 << mi_size_high_log2[sb_type]) == h;
568 const int subw = (2 << mi_size_wide_log2[sb_type]) == w;
569 const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
570 if (is_cur_buf_hbd(xd)) {
571 aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
572 src1_stride, mask, block_size_wide[sb_type],
573 w, h, subw, subh, conv_params, xd->bd);
574 } else {
575 aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
576 src1_stride, mask, block_size_wide[sb_type], w,
577 h, subw, subh, conv_params);
578 }
579 }
580
av1_make_masked_inter_predictor(const uint8_t * pre,int pre_stride,uint8_t * dst,int dst_stride,const SubpelParams * subpel_params,const struct scale_factors * sf,int w,int h,ConvolveParams * conv_params,InterpFilters interp_filters,int plane,const WarpTypesAllowed * warp_types,int p_col,int p_row,int ref,MACROBLOCKD * xd,int can_use_previous)581 void av1_make_masked_inter_predictor(
582 const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride,
583 const SubpelParams *subpel_params, const struct scale_factors *sf, int w,
584 int h, ConvolveParams *conv_params, InterpFilters interp_filters, int plane,
585 const WarpTypesAllowed *warp_types, int p_col, int p_row, int ref,
586 MACROBLOCKD *xd, int can_use_previous) {
587 MB_MODE_INFO *mi = xd->mi[0];
588 (void)dst;
589 (void)dst_stride;
590 mi->interinter_comp.seg_mask = xd->seg_mask;
591 const INTERINTER_COMPOUND_DATA *comp_data = &mi->interinter_comp;
592
593 // We're going to call av1_make_inter_predictor to generate a prediction into
594 // a temporary buffer, then will blend that temporary buffer with that from
595 // the other reference.
596 //
597 #define INTER_PRED_BYTES_PER_PIXEL 2
598
599 DECLARE_ALIGNED(32, uint8_t,
600 tmp_buf[INTER_PRED_BYTES_PER_PIXEL * MAX_SB_SQUARE]);
601 #undef INTER_PRED_BYTES_PER_PIXEL
602
603 uint8_t *tmp_dst = get_buf_by_bd(xd, tmp_buf);
604
605 const int tmp_buf_stride = MAX_SB_SIZE;
606 CONV_BUF_TYPE *org_dst = conv_params->dst;
607 int org_dst_stride = conv_params->dst_stride;
608 CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
609 conv_params->dst = tmp_buf16;
610 conv_params->dst_stride = tmp_buf_stride;
611 assert(conv_params->do_average == 0);
612
613 // This will generate a prediction in tmp_buf for the second reference
614 av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_params,
615 sf, w, h, conv_params, interp_filters, warp_types,
616 p_col, p_row, plane, ref, mi, 0, xd,
617 can_use_previous);
618
619 if (!plane && comp_data->type == COMPOUND_DIFFWTD) {
620 av1_build_compound_diffwtd_mask_d16(
621 comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
622 tmp_buf16, tmp_buf_stride, h, w, conv_params, xd->bd);
623 }
624 build_masked_compound_no_round(dst, dst_stride, org_dst, org_dst_stride,
625 tmp_buf16, tmp_buf_stride, comp_data,
626 mi->sb_type, h, w, conv_params, xd);
627 }
628
av1_dist_wtd_comp_weight_assign(const AV1_COMMON * cm,const MB_MODE_INFO * mbmi,int order_idx,int * fwd_offset,int * bck_offset,int * use_dist_wtd_comp_avg,int is_compound)629 void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
630 const MB_MODE_INFO *mbmi, int order_idx,
631 int *fwd_offset, int *bck_offset,
632 int *use_dist_wtd_comp_avg,
633 int is_compound) {
634 assert(fwd_offset != NULL && bck_offset != NULL);
635 if (!is_compound || mbmi->compound_idx) {
636 *use_dist_wtd_comp_avg = 0;
637 return;
638 }
639
640 *use_dist_wtd_comp_avg = 1;
641 const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
642 const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
643 const int cur_frame_index = cm->cur_frame->order_hint;
644 int bck_frame_index = 0, fwd_frame_index = 0;
645
646 if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
647 if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
648
649 int d0 = clamp(abs(get_relative_dist(&cm->seq_params.order_hint_info,
650 fwd_frame_index, cur_frame_index)),
651 0, MAX_FRAME_DISTANCE);
652 int d1 = clamp(abs(get_relative_dist(&cm->seq_params.order_hint_info,
653 cur_frame_index, bck_frame_index)),
654 0, MAX_FRAME_DISTANCE);
655
656 const int order = d0 <= d1;
657
658 if (d0 == 0 || d1 == 0) {
659 *fwd_offset = quant_dist_lookup_table[order_idx][3][order];
660 *bck_offset = quant_dist_lookup_table[order_idx][3][1 - order];
661 return;
662 }
663
664 int i;
665 for (i = 0; i < 3; ++i) {
666 int c0 = quant_dist_weight[i][order];
667 int c1 = quant_dist_weight[i][!order];
668 int d0_c0 = d0 * c0;
669 int d1_c1 = d1 * c1;
670 if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
671 }
672
673 *fwd_offset = quant_dist_lookup_table[order_idx][i][order];
674 *bck_offset = quant_dist_lookup_table[order_idx][i][1 - order];
675 }
676
av1_setup_dst_planes(struct macroblockd_plane * planes,BLOCK_SIZE bsize,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int plane_start,const int plane_end)677 void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
678 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
679 const int plane_start, const int plane_end) {
680 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
681 // the static analysis warnings.
682 for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
683 struct macroblockd_plane *const pd = &planes[i];
684 const int is_uv = i > 0;
685 setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
686 src->crop_heights[is_uv], src->strides[is_uv], mi_row,
687 mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
688 }
689 }
690
av1_setup_pre_planes(MACROBLOCKD * xd,int idx,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * sf,const int num_planes)691 void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
692 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
693 const struct scale_factors *sf,
694 const int num_planes) {
695 if (src != NULL) {
696 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
697 // the static analysis warnings.
698 for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
699 struct macroblockd_plane *const pd = &xd->plane[i];
700 const int is_uv = i > 0;
701 setup_pred_plane(&pd->pre[idx], xd->mi[0]->sb_type, src->buffers[i],
702 src->crop_widths[is_uv], src->crop_heights[is_uv],
703 src->strides[is_uv], mi_row, mi_col, sf,
704 pd->subsampling_x, pd->subsampling_y);
705 }
706 }
707 }
708
709 // obmc_mask_N[overlap_position]
710 static const uint8_t obmc_mask_1[1] = { 64 };
711 DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
712
713 DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
714
715 static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
716
717 static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
718 56, 58, 60, 61, 64, 64, 64, 64 };
719
720 static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
721 45, 47, 48, 50, 51, 52, 53, 55,
722 56, 57, 58, 59, 60, 60, 61, 62,
723 64, 64, 64, 64, 64, 64, 64, 64 };
724
725 static const uint8_t obmc_mask_64[64] = {
726 33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
727 45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
728 56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
729 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
730 };
731
av1_get_obmc_mask(int length)732 const uint8_t *av1_get_obmc_mask(int length) {
733 switch (length) {
734 case 1: return obmc_mask_1;
735 case 2: return obmc_mask_2;
736 case 4: return obmc_mask_4;
737 case 8: return obmc_mask_8;
738 case 16: return obmc_mask_16;
739 case 32: return obmc_mask_32;
740 case 64: return obmc_mask_64;
741 default: assert(0); return NULL;
742 }
743 }
744
increment_int_ptr(MACROBLOCKD * xd,int rel_mi_rc,uint8_t mi_hw,MB_MODE_INFO * mi,void * fun_ctxt,const int num_planes)745 static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_rc,
746 uint8_t mi_hw, MB_MODE_INFO *mi,
747 void *fun_ctxt, const int num_planes) {
748 (void)xd;
749 (void)rel_mi_rc;
750 (void)mi_hw;
751 (void)mi;
752 ++*(int *)fun_ctxt;
753 (void)num_planes;
754 }
755
av1_count_overlappable_neighbors(const AV1_COMMON * cm,MACROBLOCKD * xd,int mi_row,int mi_col)756 void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd,
757 int mi_row, int mi_col) {
758 MB_MODE_INFO *mbmi = xd->mi[0];
759
760 mbmi->overlappable_neighbors[0] = 0;
761 mbmi->overlappable_neighbors[1] = 0;
762
763 if (!is_motion_variation_allowed_bsize(mbmi->sb_type)) return;
764
765 foreach_overlappable_nb_above(cm, xd, mi_col, INT_MAX, increment_int_ptr,
766 &mbmi->overlappable_neighbors[0]);
767 foreach_overlappable_nb_left(cm, xd, mi_row, INT_MAX, increment_int_ptr,
768 &mbmi->overlappable_neighbors[1]);
769 }
770
771 // HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
772 // block-size of current plane is smaller than 8x8, always only blend with the
773 // left neighbor(s) (skip blending with the above side).
774 #define DISABLE_CHROMA_U8X8_OBMC 0 // 0: one-sided obmc; 1: disable
775
av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,const struct macroblockd_plane * pd,int dir)776 int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
777 const struct macroblockd_plane *pd, int dir) {
778 assert(is_motion_variation_allowed_bsize(bsize));
779
780 const BLOCK_SIZE bsize_plane =
781 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
782 switch (bsize_plane) {
783 #if DISABLE_CHROMA_U8X8_OBMC
784 case BLOCK_4X4:
785 case BLOCK_8X4:
786 case BLOCK_4X8: return 1; break;
787 #else
788 case BLOCK_4X4:
789 case BLOCK_8X4:
790 case BLOCK_4X8: return dir == 0; break;
791 #endif
792 default: return 0;
793 }
794 }
795
av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO * mbmi)796 void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
797 mbmi->ref_frame[1] = NONE_FRAME;
798 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
799
800 return;
801 }
802
803 struct obmc_inter_pred_ctxt {
804 uint8_t **adjacent;
805 int *adjacent_stride;
806 };
807
build_obmc_inter_pred_above(MACROBLOCKD * xd,int rel_mi_col,uint8_t above_mi_width,MB_MODE_INFO * above_mi,void * fun_ctxt,const int num_planes)808 static INLINE void build_obmc_inter_pred_above(MACROBLOCKD *xd, int rel_mi_col,
809 uint8_t above_mi_width,
810 MB_MODE_INFO *above_mi,
811 void *fun_ctxt,
812 const int num_planes) {
813 (void)above_mi;
814 struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
815 const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
816 const int is_hbd = is_cur_buf_hbd(xd);
817 const int overlap =
818 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
819
820 for (int plane = 0; plane < num_planes; ++plane) {
821 const struct macroblockd_plane *pd = &xd->plane[plane];
822 const int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x;
823 const int bh = overlap >> pd->subsampling_y;
824 const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
825
826 if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
827
828 const int dst_stride = pd->dst.stride;
829 uint8_t *const dst = &pd->dst.buf[plane_col];
830 const int tmp_stride = ctxt->adjacent_stride[plane];
831 const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
832 const uint8_t *const mask = av1_get_obmc_mask(bh);
833
834 if (is_hbd)
835 aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
836 tmp_stride, mask, bw, bh, xd->bd);
837 else
838 aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
839 mask, bw, bh);
840 }
841 }
842
build_obmc_inter_pred_left(MACROBLOCKD * xd,int rel_mi_row,uint8_t left_mi_height,MB_MODE_INFO * left_mi,void * fun_ctxt,const int num_planes)843 static INLINE void build_obmc_inter_pred_left(MACROBLOCKD *xd, int rel_mi_row,
844 uint8_t left_mi_height,
845 MB_MODE_INFO *left_mi,
846 void *fun_ctxt,
847 const int num_planes) {
848 (void)left_mi;
849 struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
850 const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
851 const int overlap =
852 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
853 const int is_hbd = is_cur_buf_hbd(xd);
854
855 for (int plane = 0; plane < num_planes; ++plane) {
856 const struct macroblockd_plane *pd = &xd->plane[plane];
857 const int bw = overlap >> pd->subsampling_x;
858 const int bh = (left_mi_height * MI_SIZE) >> pd->subsampling_y;
859 const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
860
861 if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
862
863 const int dst_stride = pd->dst.stride;
864 uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
865 const int tmp_stride = ctxt->adjacent_stride[plane];
866 const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
867 const uint8_t *const mask = av1_get_obmc_mask(bw);
868
869 if (is_hbd)
870 aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
871 tmp_stride, mask, bw, bh, xd->bd);
872 else
873 aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
874 mask, bw, bh);
875 }
876 }
877
878 // This function combines motion compensated predictions that are generated by
879 // top/left neighboring blocks' inter predictors with the regular inter
880 // prediction. We assume the original prediction (bmc) is stored in
881 // xd->plane[].dst.buf
av1_build_obmc_inter_prediction(const AV1_COMMON * cm,MACROBLOCKD * xd,int mi_row,int mi_col,uint8_t * above[MAX_MB_PLANE],int above_stride[MAX_MB_PLANE],uint8_t * left[MAX_MB_PLANE],int left_stride[MAX_MB_PLANE])882 void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
883 int mi_row, int mi_col,
884 uint8_t *above[MAX_MB_PLANE],
885 int above_stride[MAX_MB_PLANE],
886 uint8_t *left[MAX_MB_PLANE],
887 int left_stride[MAX_MB_PLANE]) {
888 const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
889
890 // handle above row
891 struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
892 foreach_overlappable_nb_above(cm, xd, mi_col,
893 max_neighbor_obmc[mi_size_wide_log2[bsize]],
894 build_obmc_inter_pred_above, &ctxt_above);
895
896 // handle left column
897 struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
898 foreach_overlappable_nb_left(cm, xd, mi_row,
899 max_neighbor_obmc[mi_size_high_log2[bsize]],
900 build_obmc_inter_pred_left, &ctxt_left);
901 }
902
av1_setup_build_prediction_by_above_pred(MACROBLOCKD * xd,int rel_mi_col,uint8_t above_mi_width,MB_MODE_INFO * above_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)903 void av1_setup_build_prediction_by_above_pred(
904 MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
905 MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
906 const int num_planes) {
907 const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type);
908 const int above_mi_col = ctxt->mi_col + rel_mi_col;
909
910 av1_modify_neighbor_predictor_for_obmc(above_mbmi);
911
912 for (int j = 0; j < num_planes; ++j) {
913 struct macroblockd_plane *const pd = &xd->plane[j];
914 setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
915 ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
916 NULL, pd->subsampling_x, pd->subsampling_y);
917 }
918
919 const int num_refs = 1 + has_second_ref(above_mbmi);
920
921 for (int ref = 0; ref < num_refs; ++ref) {
922 const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
923
924 const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
925 const struct scale_factors *const sf =
926 get_ref_scale_factors_const(ctxt->cm, frame);
927 xd->block_ref_scale_factors[ref] = sf;
928 if ((!av1_is_valid_scale(sf)))
929 aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
930 "Reference frame has invalid dimensions");
931 av1_setup_pre_planes(xd, ref, &ref_buf->buf, ctxt->mi_row, above_mi_col, sf,
932 num_planes);
933 }
934
935 xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
936 xd->mb_to_right_edge = ctxt->mb_to_far_edge +
937 (xd->n4_w - rel_mi_col - above_mi_width) * MI_SIZE * 8;
938 }
939
av1_setup_build_prediction_by_left_pred(MACROBLOCKD * xd,int rel_mi_row,uint8_t left_mi_height,MB_MODE_INFO * left_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)940 void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
941 uint8_t left_mi_height,
942 MB_MODE_INFO *left_mbmi,
943 struct build_prediction_ctxt *ctxt,
944 const int num_planes) {
945 const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->sb_type);
946 const int left_mi_row = ctxt->mi_row + rel_mi_row;
947
948 av1_modify_neighbor_predictor_for_obmc(left_mbmi);
949
950 for (int j = 0; j < num_planes; ++j) {
951 struct macroblockd_plane *const pd = &xd->plane[j];
952 setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
953 ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
954 NULL, pd->subsampling_x, pd->subsampling_y);
955 }
956
957 const int num_refs = 1 + has_second_ref(left_mbmi);
958
959 for (int ref = 0; ref < num_refs; ++ref) {
960 const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
961
962 const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
963 const struct scale_factors *const ref_scale_factors =
964 get_ref_scale_factors_const(ctxt->cm, frame);
965
966 xd->block_ref_scale_factors[ref] = ref_scale_factors;
967 if ((!av1_is_valid_scale(ref_scale_factors)))
968 aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
969 "Reference frame has invalid dimensions");
970 av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, ctxt->mi_col,
971 ref_scale_factors, num_planes);
972 }
973
974 xd->mb_to_top_edge = 8 * MI_SIZE * (-left_mi_row);
975 xd->mb_to_bottom_edge =
976 ctxt->mb_to_far_edge +
977 (xd->n4_h - rel_mi_row - left_mi_height) * MI_SIZE * 8;
978 }
979
980 /* clang-format off */
981 static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
982 60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
983 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
984 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8,
985 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4,
986 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2,
987 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
988 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
989 };
990 static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
991 32, 16, 16, 16, 8, 8, 8, 4,
992 4, 4, 2, 2, 2, 1, 1, 1,
993 8, 8, 4, 4, 2, 2
994 };
995 /* clang-format on */
996
build_smooth_interintra_mask(uint8_t * mask,int stride,BLOCK_SIZE plane_bsize,INTERINTRA_MODE mode)997 static void build_smooth_interintra_mask(uint8_t *mask, int stride,
998 BLOCK_SIZE plane_bsize,
999 INTERINTRA_MODE mode) {
1000 int i, j;
1001 const int bw = block_size_wide[plane_bsize];
1002 const int bh = block_size_high[plane_bsize];
1003 const int size_scale = ii_size_scales[plane_bsize];
1004
1005 switch (mode) {
1006 case II_V_PRED:
1007 for (i = 0; i < bh; ++i) {
1008 memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
1009 mask += stride;
1010 }
1011 break;
1012
1013 case II_H_PRED:
1014 for (i = 0; i < bh; ++i) {
1015 for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
1016 mask += stride;
1017 }
1018 break;
1019
1020 case II_SMOOTH_PRED:
1021 for (i = 0; i < bh; ++i) {
1022 for (j = 0; j < bw; ++j)
1023 mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
1024 mask += stride;
1025 }
1026 break;
1027
1028 case II_DC_PRED:
1029 default:
1030 for (i = 0; i < bh; ++i) {
1031 memset(mask, 32, bw * sizeof(mask[0]));
1032 mask += stride;
1033 }
1034 break;
1035 }
1036 }
1037
combine_interintra(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int wedge_index,int wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred,int compstride,const uint8_t * interpred,int interstride,const uint8_t * intrapred,int intrastride)1038 static void combine_interintra(INTERINTRA_MODE mode,
1039 int8_t use_wedge_interintra, int wedge_index,
1040 int wedge_sign, BLOCK_SIZE bsize,
1041 BLOCK_SIZE plane_bsize, uint8_t *comppred,
1042 int compstride, const uint8_t *interpred,
1043 int interstride, const uint8_t *intrapred,
1044 int intrastride) {
1045 const int bw = block_size_wide[plane_bsize];
1046 const int bh = block_size_high[plane_bsize];
1047
1048 if (use_wedge_interintra) {
1049 if (is_interintra_wedge_used(bsize)) {
1050 const uint8_t *mask =
1051 av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1052 const int subw = 2 * mi_size_wide[bsize] == bw;
1053 const int subh = 2 * mi_size_high[bsize] == bh;
1054 aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1055 interpred, interstride, mask, block_size_wide[bsize],
1056 bw, bh, subw, subh);
1057 }
1058 return;
1059 }
1060
1061 uint8_t mask[MAX_SB_SQUARE];
1062 build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1063 aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1064 interstride, mask, bw, bw, bh, 0, 0);
1065 }
1066
combine_interintra_highbd(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int wedge_index,int wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred8,int compstride,const uint8_t * interpred8,int interstride,const uint8_t * intrapred8,int intrastride,int bd)1067 static void combine_interintra_highbd(
1068 INTERINTRA_MODE mode, int8_t use_wedge_interintra, int wedge_index,
1069 int wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1070 uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1071 int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1072 const int bw = block_size_wide[plane_bsize];
1073 const int bh = block_size_high[plane_bsize];
1074
1075 if (use_wedge_interintra) {
1076 if (is_interintra_wedge_used(bsize)) {
1077 const uint8_t *mask =
1078 av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1079 const int subh = 2 * mi_size_high[bsize] == bh;
1080 const int subw = 2 * mi_size_wide[bsize] == bw;
1081 aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1082 interpred8, interstride, mask,
1083 block_size_wide[bsize], bw, bh, subw, subh, bd);
1084 }
1085 return;
1086 }
1087
1088 uint8_t mask[MAX_SB_SQUARE];
1089 build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1090 aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1091 interpred8, interstride, mask, bw, bw, bh, 0, 0,
1092 bd);
1093 }
1094
av1_build_intra_predictors_for_interintra(const AV1_COMMON * cm,MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const BUFFER_SET * ctx,uint8_t * dst,int dst_stride)1095 void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1096 MACROBLOCKD *xd,
1097 BLOCK_SIZE bsize, int plane,
1098 const BUFFER_SET *ctx,
1099 uint8_t *dst, int dst_stride) {
1100 struct macroblockd_plane *const pd = &xd->plane[plane];
1101 const int ssx = xd->plane[plane].subsampling_x;
1102 const int ssy = xd->plane[plane].subsampling_y;
1103 BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1104 PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1105 assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1106 assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1107 assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1108 assert(xd->mi[0]->use_intrabc == 0);
1109
1110 av1_predict_intra_block(cm, xd, pd->width, pd->height,
1111 max_txsize_rect_lookup[plane_bsize], mode, 0, 0,
1112 FILTER_INTRA_MODES, ctx->plane[plane],
1113 ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1114 }
1115
av1_combine_interintra(MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const uint8_t * inter_pred,int inter_stride,const uint8_t * intra_pred,int intra_stride)1116 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1117 const uint8_t *inter_pred, int inter_stride,
1118 const uint8_t *intra_pred, int intra_stride) {
1119 const int ssx = xd->plane[plane].subsampling_x;
1120 const int ssy = xd->plane[plane].subsampling_y;
1121 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1122 if (is_cur_buf_hbd(xd)) {
1123 combine_interintra_highbd(
1124 xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1125 xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign,
1126 bsize, plane_bsize, xd->plane[plane].dst.buf,
1127 xd->plane[plane].dst.stride, inter_pred, inter_stride, intra_pred,
1128 intra_stride, xd->bd);
1129 return;
1130 }
1131 combine_interintra(
1132 xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1133 xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign,
1134 bsize, plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1135 inter_pred, inter_stride, intra_pred, intra_stride);
1136 }
1137
1138 // build interintra_predictors for one plane
av1_build_interintra_predictors_sbp(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * pred,int stride,const BUFFER_SET * ctx,int plane,BLOCK_SIZE bsize)1139 void av1_build_interintra_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
1140 uint8_t *pred, int stride,
1141 const BUFFER_SET *ctx, int plane,
1142 BLOCK_SIZE bsize) {
1143 if (is_cur_buf_hbd(xd)) {
1144 DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1145 av1_build_intra_predictors_for_interintra(
1146 cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1147 MAX_SB_SIZE);
1148 av1_combine_interintra(xd, bsize, plane, pred, stride,
1149 CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1150 } else {
1151 DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1152 av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1153 intrapredictor, MAX_SB_SIZE);
1154 av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1155 MAX_SB_SIZE);
1156 }
1157 }
1158
av1_build_interintra_predictors_sbuv(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * upred,uint8_t * vpred,int ustride,int vstride,const BUFFER_SET * ctx,BLOCK_SIZE bsize)1159 void av1_build_interintra_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
1160 uint8_t *upred, uint8_t *vpred,
1161 int ustride, int vstride,
1162 const BUFFER_SET *ctx,
1163 BLOCK_SIZE bsize) {
1164 av1_build_interintra_predictors_sbp(cm, xd, upred, ustride, ctx, 1, bsize);
1165 av1_build_interintra_predictors_sbp(cm, xd, vpred, vstride, ctx, 2, bsize);
1166 }
1167