1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <stdio.h>
14 #include <limits.h>
15
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/aom_scale_rtcd.h"
19
20 #include "aom/aom_integer.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_ports/aom_once.h"
23
24 #include "av1/common/av1_common_int.h"
25 #include "av1/common/blockd.h"
26 #include "av1/common/mvref_common.h"
27 #include "av1/common/obmc.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/common/reconintra.h"
30
31 // This function will determine whether or not to create a warped
32 // prediction.
av1_allow_warp(const MB_MODE_INFO * const mbmi,const WarpTypesAllowed * const warp_types,const WarpedMotionParams * const gm_params,int build_for_obmc,const struct scale_factors * const sf,WarpedMotionParams * final_warp_params)33 int av1_allow_warp(const MB_MODE_INFO *const mbmi,
34 const WarpTypesAllowed *const warp_types,
35 const WarpedMotionParams *const gm_params,
36 int build_for_obmc, const struct scale_factors *const sf,
37 WarpedMotionParams *final_warp_params) {
38 // Note: As per the spec, we must test the fixed point scales here, which are
39 // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
40 // have 1 << 10 precision).
41 if (av1_is_scaled(sf)) return 0;
42
43 if (final_warp_params != NULL) *final_warp_params = default_warp_params;
44
45 if (build_for_obmc) return 0;
46
47 if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
48 if (final_warp_params != NULL)
49 memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
50 return 1;
51 } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
52 if (final_warp_params != NULL)
53 memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
54 return 1;
55 }
56
57 return 0;
58 }
59
av1_init_warp_params(InterPredParams * inter_pred_params,const WarpTypesAllowed * warp_types,int ref,const MACROBLOCKD * xd,const MB_MODE_INFO * mi)60 void av1_init_warp_params(InterPredParams *inter_pred_params,
61 const WarpTypesAllowed *warp_types, int ref,
62 const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
63 if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
64 return;
65
66 if (xd->cur_frame_force_integer_mv) return;
67
68 if (av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
69 inter_pred_params->scale_factors,
70 &inter_pred_params->warp_params)) {
71 inter_pred_params->mode = WARP_PRED;
72 }
73 }
74
av1_make_inter_predictor(const uint8_t * src,int src_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)75 void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
76 int dst_stride,
77 InterPredParams *inter_pred_params,
78 const SubpelParams *subpel_params) {
79 assert(IMPLIES(inter_pred_params->conv_params.is_compound,
80 inter_pred_params->conv_params.dst != NULL));
81
82 if (inter_pred_params->mode == TRANSLATION_PRED) {
83 #if CONFIG_AV1_HIGHBITDEPTH
84 if (inter_pred_params->use_hbd_buf) {
85 highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
86 inter_pred_params->block_width,
87 inter_pred_params->block_height,
88 &inter_pred_params->conv_params,
89 inter_pred_params->interp_filter_params,
90 inter_pred_params->bit_depth);
91 } else {
92 inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
93 inter_pred_params->block_width,
94 inter_pred_params->block_height,
95 &inter_pred_params->conv_params,
96 inter_pred_params->interp_filter_params);
97 }
98 #else
99 inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
100 inter_pred_params->block_width,
101 inter_pred_params->block_height,
102 &inter_pred_params->conv_params,
103 inter_pred_params->interp_filter_params);
104 #endif
105 }
106 // TODO(jingning): av1_warp_plane() can be further cleaned up.
107 else if (inter_pred_params->mode == WARP_PRED) {
108 av1_warp_plane(
109 &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
110 inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
111 inter_pred_params->ref_frame_buf.width,
112 inter_pred_params->ref_frame_buf.height,
113 inter_pred_params->ref_frame_buf.stride, dst,
114 inter_pred_params->pix_col, inter_pred_params->pix_row,
115 inter_pred_params->block_width, inter_pred_params->block_height,
116 dst_stride, inter_pred_params->subsampling_x,
117 inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
118 } else {
119 assert(0 && "Unsupported inter_pred_params->mode");
120 }
121 }
122
123 static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
124 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 6, 18,
126 37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
127 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
128 };
129 static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
130 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
131 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 11, 27,
132 46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
133 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
134 };
135 static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
136 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
137 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7, 21,
138 43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
139 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
140 };
141
shift_copy(const uint8_t * src,uint8_t * dst,int shift,int width)142 static AOM_INLINE void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
143 int width) {
144 if (shift >= 0) {
145 memcpy(dst + shift, src, width - shift);
146 memset(dst, src[0], shift);
147 } else {
148 shift = -shift;
149 memcpy(dst, src + shift, width - shift);
150 memset(dst + width - shift, src[width - 1], shift);
151 }
152 }
153
154 /* clang-format off */
155 DECLARE_ALIGNED(16, static uint8_t,
156 wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
157 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
158 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
159 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
160 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
161 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
162 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
163 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
164 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
165 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
166 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
167 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
168 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
169 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
170 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
171 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
172 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
173 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
174 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
175 { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
176 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
177 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
178 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
179 };
180 /* clang-format on */
181
182 // [negative][direction]
183 DECLARE_ALIGNED(
184 16, static uint8_t,
185 wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
186
187 // 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
188 // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
189 DECLARE_ALIGNED(16, static uint8_t,
190 wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
191
192 DECLARE_ALIGNED(16, static uint8_t,
193 smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
194 [MAX_WEDGE_SQUARE]);
195
196 static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
197
198 static const wedge_code_type wedge_codebook_16_hgtw[16] = {
199 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
200 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
201 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
202 { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
203 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
204 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
205 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
206 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
207 };
208
209 static const wedge_code_type wedge_codebook_16_hltw[16] = {
210 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
211 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
212 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 },
213 { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
214 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
215 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
216 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
217 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
218 };
219
220 static const wedge_code_type wedge_codebook_16_heqw[16] = {
221 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
222 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
223 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
224 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
225 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
226 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
227 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
228 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
229 };
230
231 const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
232 { 0, NULL, NULL, NULL },
233 { 0, NULL, NULL, NULL },
234 { 0, NULL, NULL, NULL },
235 { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
236 wedge_masks[BLOCK_8X8] },
237 { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
238 wedge_masks[BLOCK_8X16] },
239 { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
240 wedge_masks[BLOCK_16X8] },
241 { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
242 wedge_masks[BLOCK_16X16] },
243 { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
244 wedge_masks[BLOCK_16X32] },
245 { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
246 wedge_masks[BLOCK_32X16] },
247 { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
248 wedge_masks[BLOCK_32X32] },
249 { 0, NULL, NULL, NULL },
250 { 0, NULL, NULL, NULL },
251 { 0, NULL, NULL, NULL },
252 { 0, NULL, NULL, NULL },
253 { 0, NULL, NULL, NULL },
254 { 0, NULL, NULL, NULL },
255 { 0, NULL, NULL, NULL },
256 { 0, NULL, NULL, NULL },
257 { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
258 wedge_masks[BLOCK_8X32] },
259 { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
260 wedge_masks[BLOCK_32X8] },
261 { 0, NULL, NULL, NULL },
262 { 0, NULL, NULL, NULL },
263 };
264
get_wedge_mask_inplace(int wedge_index,int neg,BLOCK_SIZE sb_type)265 static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
266 BLOCK_SIZE sb_type) {
267 const uint8_t *master;
268 const int bh = block_size_high[sb_type];
269 const int bw = block_size_wide[sb_type];
270 const wedge_code_type *a =
271 av1_wedge_params_lookup[sb_type].codebook + wedge_index;
272 int woff, hoff;
273 const uint8_t wsignflip =
274 av1_wedge_params_lookup[sb_type].signflip[wedge_index];
275
276 assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
277 woff = (a->x_offset * bw) >> 3;
278 hoff = (a->y_offset * bh) >> 3;
279 master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
280 MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
281 MASK_MASTER_SIZE / 2 - woff;
282 return master;
283 }
284
av1_get_compound_type_mask(const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type)285 const uint8_t *av1_get_compound_type_mask(
286 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
287 (void)sb_type;
288 switch (comp_data->type) {
289 case COMPOUND_WEDGE:
290 return av1_get_contiguous_soft_mask(comp_data->wedge_index,
291 comp_data->wedge_sign, sb_type);
292 default: return comp_data->seg_mask;
293 }
294 }
295
diffwtd_mask_d16(uint8_t * mask,int which_inverse,int mask_base,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)296 static AOM_INLINE void diffwtd_mask_d16(
297 uint8_t *mask, int which_inverse, int mask_base, const CONV_BUF_TYPE *src0,
298 int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
299 ConvolveParams *conv_params, int bd) {
300 int round =
301 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
302 int i, j, m, diff;
303 for (i = 0; i < h; ++i) {
304 for (j = 0; j < w; ++j) {
305 diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
306 diff = ROUND_POWER_OF_TWO(diff, round);
307 m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
308 mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
309 }
310 }
311 }
312
av1_build_compound_diffwtd_mask_d16_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)313 void av1_build_compound_diffwtd_mask_d16_c(
314 uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
315 int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
316 ConvolveParams *conv_params, int bd) {
317 switch (mask_type) {
318 case DIFFWTD_38:
319 diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
320 conv_params, bd);
321 break;
322 case DIFFWTD_38_INV:
323 diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
324 conv_params, bd);
325 break;
326 default: assert(0);
327 }
328 }
329
diffwtd_mask(uint8_t * mask,int which_inverse,int mask_base,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)330 static AOM_INLINE void diffwtd_mask(uint8_t *mask, int which_inverse,
331 int mask_base, const uint8_t *src0,
332 int src0_stride, const uint8_t *src1,
333 int src1_stride, int h, int w) {
334 int i, j, m, diff;
335 for (i = 0; i < h; ++i) {
336 for (j = 0; j < w; ++j) {
337 diff =
338 abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
339 m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
340 mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
341 }
342 }
343 }
344
av1_build_compound_diffwtd_mask_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)345 void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
346 DIFFWTD_MASK_TYPE mask_type,
347 const uint8_t *src0, int src0_stride,
348 const uint8_t *src1, int src1_stride,
349 int h, int w) {
350 switch (mask_type) {
351 case DIFFWTD_38:
352 diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
353 break;
354 case DIFFWTD_38_INV:
355 diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
356 break;
357 default: assert(0);
358 }
359 }
360
diffwtd_mask_highbd(uint8_t * mask,int which_inverse,int mask_base,const uint16_t * src0,int src0_stride,const uint16_t * src1,int src1_stride,int h,int w,const unsigned int bd)361 static AOM_FORCE_INLINE void diffwtd_mask_highbd(
362 uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
363 int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
364 const unsigned int bd) {
365 assert(bd >= 8);
366 if (bd == 8) {
367 if (which_inverse) {
368 for (int i = 0; i < h; ++i) {
369 for (int j = 0; j < w; ++j) {
370 int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
371 unsigned int m = negative_to_zero(mask_base + diff);
372 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
373 mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
374 }
375 src0 += src0_stride;
376 src1 += src1_stride;
377 mask += w;
378 }
379 } else {
380 for (int i = 0; i < h; ++i) {
381 for (int j = 0; j < w; ++j) {
382 int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
383 unsigned int m = negative_to_zero(mask_base + diff);
384 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
385 mask[j] = m;
386 }
387 src0 += src0_stride;
388 src1 += src1_stride;
389 mask += w;
390 }
391 }
392 } else {
393 const unsigned int bd_shift = bd - 8;
394 if (which_inverse) {
395 for (int i = 0; i < h; ++i) {
396 for (int j = 0; j < w; ++j) {
397 int diff =
398 (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
399 unsigned int m = negative_to_zero(mask_base + diff);
400 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
401 mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
402 }
403 src0 += src0_stride;
404 src1 += src1_stride;
405 mask += w;
406 }
407 } else {
408 for (int i = 0; i < h; ++i) {
409 for (int j = 0; j < w; ++j) {
410 int diff =
411 (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
412 unsigned int m = negative_to_zero(mask_base + diff);
413 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
414 mask[j] = m;
415 }
416 src0 += src0_stride;
417 src1 += src1_stride;
418 mask += w;
419 }
420 }
421 }
422 }
423
av1_build_compound_diffwtd_mask_highbd_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w,int bd)424 void av1_build_compound_diffwtd_mask_highbd_c(
425 uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
426 int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
427 int bd) {
428 switch (mask_type) {
429 case DIFFWTD_38:
430 diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
431 CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
432 break;
433 case DIFFWTD_38_INV:
434 diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
435 CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
436 break;
437 default: assert(0);
438 }
439 }
440
init_wedge_master_masks(void)441 static AOM_INLINE void init_wedge_master_masks(void) {
442 int i, j;
443 const int w = MASK_MASTER_SIZE;
444 const int h = MASK_MASTER_SIZE;
445 const int stride = MASK_MASTER_STRIDE;
446 // Note: index [0] stores the masters, and [1] its complement.
447 // Generate prototype by shifting the masters
448 int shift = h / 4;
449 for (i = 0; i < h; i += 2) {
450 shift_copy(wedge_master_oblique_even,
451 &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
452 MASK_MASTER_SIZE);
453 shift--;
454 shift_copy(wedge_master_oblique_odd,
455 &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
456 MASK_MASTER_SIZE);
457 memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
458 wedge_master_vertical,
459 MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
460 memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
461 wedge_master_vertical,
462 MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
463 }
464
465 for (i = 0; i < h; ++i) {
466 for (j = 0; j < w; ++j) {
467 const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
468 wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
469 wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
470 wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
471 (1 << WEDGE_WEIGHT_BITS) - msk;
472 wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
473 wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
474 (1 << WEDGE_WEIGHT_BITS) - msk;
475 wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
476 wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
477 const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
478 wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
479 wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
480 wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
481 (1 << WEDGE_WEIGHT_BITS) - mskx;
482 }
483 }
484 }
485
init_wedge_masks(void)486 static AOM_INLINE void init_wedge_masks(void) {
487 uint8_t *dst = wedge_mask_buf;
488 BLOCK_SIZE bsize;
489 memset(wedge_masks, 0, sizeof(wedge_masks));
490 for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
491 const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
492 const int wtypes = wedge_params->wedge_types;
493 if (wtypes == 0) continue;
494 const uint8_t *mask;
495 const int bw = block_size_wide[bsize];
496 const int bh = block_size_high[bsize];
497 int w;
498 for (w = 0; w < wtypes; ++w) {
499 mask = get_wedge_mask_inplace(w, 0, bsize);
500 aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
501 bh);
502 wedge_params->masks[0][w] = dst;
503 dst += bw * bh;
504
505 mask = get_wedge_mask_inplace(w, 1, bsize);
506 aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
507 bh);
508 wedge_params->masks[1][w] = dst;
509 dst += bw * bh;
510 }
511 assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
512 }
513 }
514
515 /* clang-format off */
516 static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
517 60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
518 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
519 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8,
520 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4,
521 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2,
522 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
524 };
525 static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
526 32, 16, 16, 16, 8, 8, 8, 4,
527 4, 4, 2, 2, 2, 1, 1, 1,
528 8, 8, 4, 4, 2, 2
529 };
530 /* clang-format on */
531
build_smooth_interintra_mask(uint8_t * mask,int stride,BLOCK_SIZE plane_bsize,INTERINTRA_MODE mode)532 static AOM_INLINE void build_smooth_interintra_mask(uint8_t *mask, int stride,
533 BLOCK_SIZE plane_bsize,
534 INTERINTRA_MODE mode) {
535 int i, j;
536 const int bw = block_size_wide[plane_bsize];
537 const int bh = block_size_high[plane_bsize];
538 const int size_scale = ii_size_scales[plane_bsize];
539
540 switch (mode) {
541 case II_V_PRED:
542 for (i = 0; i < bh; ++i) {
543 memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
544 mask += stride;
545 }
546 break;
547
548 case II_H_PRED:
549 for (i = 0; i < bh; ++i) {
550 for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
551 mask += stride;
552 }
553 break;
554
555 case II_SMOOTH_PRED:
556 for (i = 0; i < bh; ++i) {
557 for (j = 0; j < bw; ++j)
558 mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
559 mask += stride;
560 }
561 break;
562
563 case II_DC_PRED:
564 default:
565 for (i = 0; i < bh; ++i) {
566 memset(mask, 32, bw * sizeof(mask[0]));
567 mask += stride;
568 }
569 break;
570 }
571 }
572
init_smooth_interintra_masks(void)573 static AOM_INLINE void init_smooth_interintra_masks(void) {
574 for (int m = 0; m < INTERINTRA_MODES; ++m) {
575 for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
576 const int bw = block_size_wide[bs];
577 const int bh = block_size_high[bs];
578 if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
579 build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
580 m);
581 }
582 }
583 }
584
585 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
init_all_wedge_masks(void)586 static void init_all_wedge_masks(void) {
587 init_wedge_master_masks();
588 init_wedge_masks();
589 init_smooth_interintra_masks();
590 }
591
av1_init_wedge_masks(void)592 void av1_init_wedge_masks(void) { aom_once(init_all_wedge_masks); }
593
build_masked_compound_no_round(uint8_t * dst,int dst_stride,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type,int h,int w,InterPredParams * inter_pred_params)594 static AOM_INLINE void build_masked_compound_no_round(
595 uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
596 const CONV_BUF_TYPE *src1, int src1_stride,
597 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
598 int w, InterPredParams *inter_pred_params) {
599 const int ssy = inter_pred_params->subsampling_y;
600 const int ssx = inter_pred_params->subsampling_x;
601 const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
602 const int mask_stride = block_size_wide[sb_type];
603 #if CONFIG_AV1_HIGHBITDEPTH
604 if (inter_pred_params->use_hbd_buf) {
605 aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
606 src1_stride, mask, mask_stride, w, h, ssx,
607 ssy, &inter_pred_params->conv_params,
608 inter_pred_params->bit_depth);
609 } else {
610 aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
611 src1_stride, mask, mask_stride, w, h, ssx, ssy,
612 &inter_pred_params->conv_params);
613 }
614 #else
615 aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
616 src1_stride, mask, mask_stride, w, h, ssx, ssy,
617 &inter_pred_params->conv_params);
618 #endif
619 }
620
av1_make_masked_inter_predictor(const uint8_t * pre,int pre_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)621 void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
622 uint8_t *dst, int dst_stride,
623 InterPredParams *inter_pred_params,
624 const SubpelParams *subpel_params) {
625 const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
626 BLOCK_SIZE sb_type = inter_pred_params->sb_type;
627
628 // We're going to call av1_make_inter_predictor to generate a prediction into
629 // a temporary buffer, then will blend that temporary buffer with that from
630 // the other reference.
631 DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
632 uint8_t *tmp_dst =
633 inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
634
635 const int tmp_buf_stride = MAX_SB_SIZE;
636 CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
637 int org_dst_stride = inter_pred_params->conv_params.dst_stride;
638 CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
639 inter_pred_params->conv_params.dst = tmp_buf16;
640 inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
641 assert(inter_pred_params->conv_params.do_average == 0);
642
643 // This will generate a prediction in tmp_buf for the second reference
644 av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
645 inter_pred_params, subpel_params);
646
647 if (!inter_pred_params->conv_params.plane &&
648 comp_data->type == COMPOUND_DIFFWTD) {
649 av1_build_compound_diffwtd_mask_d16(
650 comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
651 tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
652 inter_pred_params->block_width, &inter_pred_params->conv_params,
653 inter_pred_params->bit_depth);
654 }
655 build_masked_compound_no_round(
656 dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
657 comp_data, sb_type, inter_pred_params->block_height,
658 inter_pred_params->block_width, inter_pred_params);
659 }
660
av1_dist_wtd_comp_weight_assign(const AV1_COMMON * cm,const MB_MODE_INFO * mbmi,int * fwd_offset,int * bck_offset,int * use_dist_wtd_comp_avg,int is_compound)661 void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
662 const MB_MODE_INFO *mbmi, int *fwd_offset,
663 int *bck_offset,
664 int *use_dist_wtd_comp_avg,
665 int is_compound) {
666 assert(fwd_offset != NULL && bck_offset != NULL);
667 if (!is_compound || mbmi->compound_idx) {
668 *fwd_offset = 8;
669 *bck_offset = 8;
670 *use_dist_wtd_comp_avg = 0;
671 return;
672 }
673
674 *use_dist_wtd_comp_avg = 1;
675 const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
676 const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
677 const int cur_frame_index = cm->cur_frame->order_hint;
678 int bck_frame_index = 0, fwd_frame_index = 0;
679
680 if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
681 if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
682
683 int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
684 fwd_frame_index, cur_frame_index)),
685 0, MAX_FRAME_DISTANCE);
686 int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
687 cur_frame_index, bck_frame_index)),
688 0, MAX_FRAME_DISTANCE);
689
690 const int order = d0 <= d1;
691
692 if (d0 == 0 || d1 == 0) {
693 *fwd_offset = quant_dist_lookup_table[3][order];
694 *bck_offset = quant_dist_lookup_table[3][1 - order];
695 return;
696 }
697
698 int i;
699 for (i = 0; i < 3; ++i) {
700 int c0 = quant_dist_weight[i][order];
701 int c1 = quant_dist_weight[i][!order];
702 int d0_c0 = d0 * c0;
703 int d1_c1 = d1 * c1;
704 if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
705 }
706
707 *fwd_offset = quant_dist_lookup_table[i][order];
708 *bck_offset = quant_dist_lookup_table[i][1 - order];
709 }
710
av1_setup_dst_planes(struct macroblockd_plane * planes,BLOCK_SIZE bsize,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int plane_start,const int plane_end)711 void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
712 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
713 const int plane_start, const int plane_end) {
714 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
715 // the static analysis warnings.
716 for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
717 struct macroblockd_plane *const pd = &planes[i];
718 const int is_uv = i > 0;
719 setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
720 src->crop_heights[is_uv], src->strides[is_uv], mi_row,
721 mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
722 }
723 }
724
av1_setup_pre_planes(MACROBLOCKD * xd,int idx,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * sf,const int num_planes)725 void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
726 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
727 const struct scale_factors *sf,
728 const int num_planes) {
729 if (src != NULL) {
730 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
731 // the static analysis warnings.
732 for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
733 struct macroblockd_plane *const pd = &xd->plane[i];
734 const int is_uv = i > 0;
735 setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
736 src->crop_widths[is_uv], src->crop_heights[is_uv],
737 src->strides[is_uv], mi_row, mi_col, sf,
738 pd->subsampling_x, pd->subsampling_y);
739 }
740 }
741 }
742
743 // obmc_mask_N[overlap_position]
744 static const uint8_t obmc_mask_1[1] = { 64 };
745 DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
746
747 DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
748
749 static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
750
751 static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
752 56, 58, 60, 61, 64, 64, 64, 64 };
753
754 static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
755 45, 47, 48, 50, 51, 52, 53, 55,
756 56, 57, 58, 59, 60, 60, 61, 62,
757 64, 64, 64, 64, 64, 64, 64, 64 };
758
759 static const uint8_t obmc_mask_64[64] = {
760 33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
761 45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
762 56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
763 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
764 };
765
av1_get_obmc_mask(int length)766 const uint8_t *av1_get_obmc_mask(int length) {
767 switch (length) {
768 case 1: return obmc_mask_1;
769 case 2: return obmc_mask_2;
770 case 4: return obmc_mask_4;
771 case 8: return obmc_mask_8;
772 case 16: return obmc_mask_16;
773 case 32: return obmc_mask_32;
774 case 64: return obmc_mask_64;
775 default: assert(0); return NULL;
776 }
777 }
778
increment_int_ptr(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * mi,void * fun_ctxt,const int num_planes)779 static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
780 int rel_mi_col, uint8_t op_mi_size,
781 int dir, MB_MODE_INFO *mi, void *fun_ctxt,
782 const int num_planes) {
783 (void)xd;
784 (void)rel_mi_row;
785 (void)rel_mi_col;
786 (void)op_mi_size;
787 (void)dir;
788 (void)mi;
789 ++*(uint8_t *)fun_ctxt;
790 (void)num_planes;
791 }
792
av1_count_overlappable_neighbors(const AV1_COMMON * cm,MACROBLOCKD * xd)793 void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
794 MB_MODE_INFO *mbmi = xd->mi[0];
795
796 mbmi->overlappable_neighbors = 0;
797
798 if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
799
800 foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
801 &mbmi->overlappable_neighbors);
802 if (mbmi->overlappable_neighbors) return;
803 foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
804 &mbmi->overlappable_neighbors);
805 }
806
807 // HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
808 // block-size of current plane is smaller than 8x8, always only blend with the
809 // left neighbor(s) (skip blending with the above side).
810 #define DISABLE_CHROMA_U8X8_OBMC 0 // 0: one-sided obmc; 1: disable
811
av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,const struct macroblockd_plane * pd,int dir)812 int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
813 const struct macroblockd_plane *pd, int dir) {
814 assert(is_motion_variation_allowed_bsize(bsize));
815
816 const BLOCK_SIZE bsize_plane =
817 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
818 switch (bsize_plane) {
819 #if DISABLE_CHROMA_U8X8_OBMC
820 case BLOCK_4X4:
821 case BLOCK_8X4:
822 case BLOCK_4X8: return 1; break;
823 #else
824 case BLOCK_4X4:
825 case BLOCK_8X4:
826 case BLOCK_4X8: return dir == 0; break;
827 #endif
828 default: return 0;
829 }
830 }
831
av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO * mbmi)832 void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
833 mbmi->ref_frame[1] = NONE_FRAME;
834 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
835
836 return;
837 }
838
839 struct obmc_inter_pred_ctxt {
840 uint8_t **adjacent;
841 int *adjacent_stride;
842 };
843
build_obmc_inter_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * above_mi,void * fun_ctxt,const int num_planes)844 static INLINE void build_obmc_inter_pred_above(
845 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
846 int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
847 (void)above_mi;
848 (void)rel_mi_row;
849 (void)dir;
850 struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
851 const BLOCK_SIZE bsize = xd->mi[0]->bsize;
852 const int overlap =
853 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
854
855 for (int plane = 0; plane < num_planes; ++plane) {
856 const struct macroblockd_plane *pd = &xd->plane[plane];
857 const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
858 const int bh = overlap >> pd->subsampling_y;
859 const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
860
861 if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
862
863 const int dst_stride = pd->dst.stride;
864 uint8_t *const dst = &pd->dst.buf[plane_col];
865 const int tmp_stride = ctxt->adjacent_stride[plane];
866 const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
867 const uint8_t *const mask = av1_get_obmc_mask(bh);
868 #if CONFIG_AV1_HIGHBITDEPTH
869 const int is_hbd = is_cur_buf_hbd(xd);
870 if (is_hbd)
871 aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
872 tmp_stride, mask, bw, bh, xd->bd);
873 else
874 aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
875 mask, bw, bh);
876 #else
877 aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
878 bw, bh);
879 #endif
880 }
881 }
882
build_obmc_inter_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * left_mi,void * fun_ctxt,const int num_planes)883 static INLINE void build_obmc_inter_pred_left(
884 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
885 int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
886 (void)left_mi;
887 (void)rel_mi_col;
888 (void)dir;
889 struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
890 const BLOCK_SIZE bsize = xd->mi[0]->bsize;
891 const int overlap =
892 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
893
894 for (int plane = 0; plane < num_planes; ++plane) {
895 const struct macroblockd_plane *pd = &xd->plane[plane];
896 const int bw = overlap >> pd->subsampling_x;
897 const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
898 const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
899
900 if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
901
902 const int dst_stride = pd->dst.stride;
903 uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
904 const int tmp_stride = ctxt->adjacent_stride[plane];
905 const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
906 const uint8_t *const mask = av1_get_obmc_mask(bw);
907
908 #if CONFIG_AV1_HIGHBITDEPTH
909 const int is_hbd = is_cur_buf_hbd(xd);
910 if (is_hbd)
911 aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
912 tmp_stride, mask, bw, bh, xd->bd);
913 else
914 aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
915 mask, bw, bh);
916 #else
917 aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
918 bw, bh);
919 #endif
920 }
921 }
922
923 // This function combines motion compensated predictions that are generated by
924 // top/left neighboring blocks' inter predictors with the regular inter
925 // prediction. We assume the original prediction (bmc) is stored in
926 // xd->plane[].dst.buf
av1_build_obmc_inter_prediction(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * above[MAX_MB_PLANE],int above_stride[MAX_MB_PLANE],uint8_t * left[MAX_MB_PLANE],int left_stride[MAX_MB_PLANE])927 void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
928 uint8_t *above[MAX_MB_PLANE],
929 int above_stride[MAX_MB_PLANE],
930 uint8_t *left[MAX_MB_PLANE],
931 int left_stride[MAX_MB_PLANE]) {
932 const BLOCK_SIZE bsize = xd->mi[0]->bsize;
933
934 // handle above row
935 struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
936 foreach_overlappable_nb_above(cm, xd,
937 max_neighbor_obmc[mi_size_wide_log2[bsize]],
938 build_obmc_inter_pred_above, &ctxt_above);
939
940 // handle left column
941 struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
942 foreach_overlappable_nb_left(cm, xd,
943 max_neighbor_obmc[mi_size_high_log2[bsize]],
944 build_obmc_inter_pred_left, &ctxt_left);
945 }
946
av1_setup_obmc_dst_bufs(MACROBLOCKD * xd,uint8_t ** dst_buf1,uint8_t ** dst_buf2)947 void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
948 uint8_t **dst_buf2) {
949 if (is_cur_buf_hbd(xd)) {
950 int len = sizeof(uint16_t);
951 dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
952 dst_buf1[1] =
953 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
954 dst_buf1[2] =
955 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
956 dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
957 dst_buf2[1] =
958 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
959 dst_buf2[2] =
960 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
961 } else {
962 dst_buf1[0] = xd->tmp_obmc_bufs[0];
963 dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
964 dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
965 dst_buf2[0] = xd->tmp_obmc_bufs[1];
966 dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
967 dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
968 }
969 }
970
av1_setup_build_prediction_by_above_pred(MACROBLOCKD * xd,int rel_mi_col,uint8_t above_mi_width,MB_MODE_INFO * above_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)971 void av1_setup_build_prediction_by_above_pred(
972 MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
973 MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
974 const int num_planes) {
975 const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
976 const int above_mi_col = xd->mi_col + rel_mi_col;
977
978 av1_modify_neighbor_predictor_for_obmc(above_mbmi);
979
980 for (int j = 0; j < num_planes; ++j) {
981 struct macroblockd_plane *const pd = &xd->plane[j];
982 setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
983 ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
984 NULL, pd->subsampling_x, pd->subsampling_y);
985 }
986
987 const int num_refs = 1 + has_second_ref(above_mbmi);
988
989 for (int ref = 0; ref < num_refs; ++ref) {
990 const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
991
992 const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
993 const struct scale_factors *const sf =
994 get_ref_scale_factors_const(ctxt->cm, frame);
995 xd->block_ref_scale_factors[ref] = sf;
996 if ((!av1_is_valid_scale(sf)))
997 aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
998 "Reference frame has invalid dimensions");
999 av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
1000 num_planes);
1001 }
1002
1003 xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
1004 xd->mb_to_right_edge =
1005 ctxt->mb_to_far_edge +
1006 (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
1007 }
1008
av1_setup_build_prediction_by_left_pred(MACROBLOCKD * xd,int rel_mi_row,uint8_t left_mi_height,MB_MODE_INFO * left_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)1009 void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
1010 uint8_t left_mi_height,
1011 MB_MODE_INFO *left_mbmi,
1012 struct build_prediction_ctxt *ctxt,
1013 const int num_planes) {
1014 const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
1015 const int left_mi_row = xd->mi_row + rel_mi_row;
1016
1017 av1_modify_neighbor_predictor_for_obmc(left_mbmi);
1018
1019 for (int j = 0; j < num_planes; ++j) {
1020 struct macroblockd_plane *const pd = &xd->plane[j];
1021 setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1022 ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
1023 NULL, pd->subsampling_x, pd->subsampling_y);
1024 }
1025
1026 const int num_refs = 1 + has_second_ref(left_mbmi);
1027
1028 for (int ref = 0; ref < num_refs; ++ref) {
1029 const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
1030
1031 const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1032 const struct scale_factors *const ref_scale_factors =
1033 get_ref_scale_factors_const(ctxt->cm, frame);
1034
1035 xd->block_ref_scale_factors[ref] = ref_scale_factors;
1036 if ((!av1_is_valid_scale(ref_scale_factors)))
1037 aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1038 "Reference frame has invalid dimensions");
1039 av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
1040 ref_scale_factors, num_planes);
1041 }
1042
1043 xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
1044 xd->mb_to_bottom_edge =
1045 ctxt->mb_to_far_edge +
1046 GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
1047 }
1048
combine_interintra(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred,int compstride,const uint8_t * interpred,int interstride,const uint8_t * intrapred,int intrastride)1049 static AOM_INLINE void combine_interintra(
1050 INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1051 int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1052 uint8_t *comppred, int compstride, const uint8_t *interpred,
1053 int interstride, const uint8_t *intrapred, int intrastride) {
1054 const int bw = block_size_wide[plane_bsize];
1055 const int bh = block_size_high[plane_bsize];
1056
1057 if (use_wedge_interintra) {
1058 if (av1_is_wedge_used(bsize)) {
1059 const uint8_t *mask =
1060 av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1061 const int subw = 2 * mi_size_wide[bsize] == bw;
1062 const int subh = 2 * mi_size_high[bsize] == bh;
1063 aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1064 interpred, interstride, mask, block_size_wide[bsize],
1065 bw, bh, subw, subh);
1066 }
1067 return;
1068 }
1069
1070 const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
1071 aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1072 interstride, mask, bw, bw, bh, 0, 0);
1073 }
1074
1075 #if CONFIG_AV1_HIGHBITDEPTH
combine_interintra_highbd(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred8,int compstride,const uint8_t * interpred8,int interstride,const uint8_t * intrapred8,int intrastride,int bd)1076 static AOM_INLINE void combine_interintra_highbd(
1077 INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1078 int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1079 uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1080 int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1081 const int bw = block_size_wide[plane_bsize];
1082 const int bh = block_size_high[plane_bsize];
1083
1084 if (use_wedge_interintra) {
1085 if (av1_is_wedge_used(bsize)) {
1086 const uint8_t *mask =
1087 av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1088 const int subh = 2 * mi_size_high[bsize] == bh;
1089 const int subw = 2 * mi_size_wide[bsize] == bw;
1090 aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1091 interpred8, interstride, mask,
1092 block_size_wide[bsize], bw, bh, subw, subh, bd);
1093 }
1094 return;
1095 }
1096
1097 uint8_t mask[MAX_SB_SQUARE];
1098 build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1099 aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1100 interpred8, interstride, mask, bw, bw, bh, 0, 0,
1101 bd);
1102 }
1103 #endif
1104
av1_build_intra_predictors_for_interintra(const AV1_COMMON * cm,MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const BUFFER_SET * ctx,uint8_t * dst,int dst_stride)1105 void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1106 MACROBLOCKD *xd,
1107 BLOCK_SIZE bsize, int plane,
1108 const BUFFER_SET *ctx,
1109 uint8_t *dst, int dst_stride) {
1110 struct macroblockd_plane *const pd = &xd->plane[plane];
1111 const int ssx = xd->plane[plane].subsampling_x;
1112 const int ssy = xd->plane[plane].subsampling_y;
1113 BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1114 PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1115 assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1116 assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1117 assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1118 assert(xd->mi[0]->use_intrabc == 0);
1119 const SequenceHeader *seq_params = cm->seq_params;
1120
1121 av1_predict_intra_block(xd, seq_params->sb_size,
1122 seq_params->enable_intra_edge_filter, pd->width,
1123 pd->height, max_txsize_rect_lookup[plane_bsize], mode,
1124 0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
1125 ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1126 }
1127
av1_combine_interintra(MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const uint8_t * inter_pred,int inter_stride,const uint8_t * intra_pred,int intra_stride)1128 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1129 const uint8_t *inter_pred, int inter_stride,
1130 const uint8_t *intra_pred, int intra_stride) {
1131 const int ssx = xd->plane[plane].subsampling_x;
1132 const int ssy = xd->plane[plane].subsampling_y;
1133 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1134 #if CONFIG_AV1_HIGHBITDEPTH
1135 if (is_cur_buf_hbd(xd)) {
1136 combine_interintra_highbd(
1137 xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1138 xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1139 plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1140 inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1141 return;
1142 }
1143 #endif
1144 combine_interintra(
1145 xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1146 xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1147 plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1148 inter_pred, inter_stride, intra_pred, intra_stride);
1149 }
1150
1151 // build interintra_predictors for one plane
av1_build_interintra_predictor(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * pred,int stride,const BUFFER_SET * ctx,int plane,BLOCK_SIZE bsize)1152 void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1153 uint8_t *pred, int stride,
1154 const BUFFER_SET *ctx, int plane,
1155 BLOCK_SIZE bsize) {
1156 assert(bsize < BLOCK_SIZES_ALL);
1157 if (is_cur_buf_hbd(xd)) {
1158 DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1159 av1_build_intra_predictors_for_interintra(
1160 cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1161 MAX_SB_SIZE);
1162 av1_combine_interintra(xd, bsize, plane, pred, stride,
1163 CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1164 } else {
1165 DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1166 av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1167 intrapredictor, MAX_SB_SIZE);
1168 av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1169 MAX_SB_SIZE);
1170 }
1171 }
1172