1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <stdio.h>
14 #include <limits.h>
15
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/aom_scale_rtcd.h"
19
20 #include "aom/aom_integer.h"
21 #include "aom_dsp/blend.h"
22
23 #include "av1/common/av1_common_int.h"
24 #include "av1/common/blockd.h"
25 #include "av1/common/mvref_common.h"
26 #include "av1/common/obmc.h"
27 #include "av1/common/reconinter.h"
28 #include "av1/common/reconintra.h"
29
30 // This function will determine whether or not to create a warped
31 // prediction.
av1_allow_warp(const MB_MODE_INFO * const mbmi,const WarpTypesAllowed * const warp_types,const WarpedMotionParams * const gm_params,int build_for_obmc,const struct scale_factors * const sf,WarpedMotionParams * final_warp_params)32 int av1_allow_warp(const MB_MODE_INFO *const mbmi,
33 const WarpTypesAllowed *const warp_types,
34 const WarpedMotionParams *const gm_params,
35 int build_for_obmc, const struct scale_factors *const sf,
36 WarpedMotionParams *final_warp_params) {
37 // Note: As per the spec, we must test the fixed point scales here, which are
38 // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
39 // have 1 << 10 precision).
40 if (av1_is_scaled(sf)) return 0;
41
42 if (final_warp_params != NULL) *final_warp_params = default_warp_params;
43
44 if (build_for_obmc) return 0;
45
46 if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
47 if (final_warp_params != NULL)
48 memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
49 return 1;
50 } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
51 if (final_warp_params != NULL)
52 memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
53 return 1;
54 }
55
56 return 0;
57 }
58
av1_init_inter_params(InterPredParams * inter_pred_params,int block_width,int block_height,int pix_row,int pix_col,int subsampling_x,int subsampling_y,int bit_depth,int use_hbd_buf,int is_intrabc,const struct scale_factors * sf,const struct buf_2d * ref_buf,int_interpfilters interp_filters)59 void av1_init_inter_params(InterPredParams *inter_pred_params, int block_width,
60 int block_height, int pix_row, int pix_col,
61 int subsampling_x, int subsampling_y, int bit_depth,
62 int use_hbd_buf, int is_intrabc,
63 const struct scale_factors *sf,
64 const struct buf_2d *ref_buf,
65 int_interpfilters interp_filters) {
66 inter_pred_params->block_width = block_width;
67 inter_pred_params->block_height = block_height;
68 inter_pred_params->pix_row = pix_row;
69 inter_pred_params->pix_col = pix_col;
70 inter_pred_params->subsampling_x = subsampling_x;
71 inter_pred_params->subsampling_y = subsampling_y;
72 inter_pred_params->bit_depth = bit_depth;
73 inter_pred_params->use_hbd_buf = use_hbd_buf;
74 inter_pred_params->is_intrabc = is_intrabc;
75 inter_pred_params->scale_factors = sf;
76 inter_pred_params->ref_frame_buf = *ref_buf;
77 inter_pred_params->mode = TRANSLATION_PRED;
78 inter_pred_params->comp_mode = UNIFORM_SINGLE;
79
80 if (is_intrabc) {
81 inter_pred_params->interp_filter_params[0] = &av1_intrabc_filter_params;
82 inter_pred_params->interp_filter_params[1] = &av1_intrabc_filter_params;
83 } else {
84 inter_pred_params->interp_filter_params[0] =
85 av1_get_interp_filter_params_with_block_size(
86 interp_filters.as_filters.x_filter, block_width);
87 inter_pred_params->interp_filter_params[1] =
88 av1_get_interp_filter_params_with_block_size(
89 interp_filters.as_filters.y_filter, block_height);
90 }
91 }
92
av1_init_comp_mode(InterPredParams * inter_pred_params)93 void av1_init_comp_mode(InterPredParams *inter_pred_params) {
94 inter_pred_params->comp_mode = UNIFORM_COMP;
95 }
96
av1_init_warp_params(InterPredParams * inter_pred_params,const WarpTypesAllowed * warp_types,int ref,const MACROBLOCKD * xd,const MB_MODE_INFO * mi)97 void av1_init_warp_params(InterPredParams *inter_pred_params,
98 const WarpTypesAllowed *warp_types, int ref,
99 const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
100 if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
101 return;
102
103 if (xd->cur_frame_force_integer_mv) return;
104
105 if (av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
106 inter_pred_params->scale_factors,
107 &inter_pred_params->warp_params)) {
108 #if CONFIG_REALTIME_ONLY
109 aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_FEATURE,
110 "Warped motion is disabled in realtime only build.");
111 #endif
112 inter_pred_params->mode = WARP_PRED;
113 }
114 }
115
av1_make_inter_predictor(const uint8_t * src,int src_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)116 void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
117 int dst_stride,
118 InterPredParams *inter_pred_params,
119 const SubpelParams *subpel_params) {
120 assert(IMPLIES(inter_pred_params->conv_params.is_compound,
121 inter_pred_params->conv_params.dst != NULL));
122
123 if (inter_pred_params->mode == TRANSLATION_PRED) {
124 #if CONFIG_AV1_HIGHBITDEPTH
125 if (inter_pred_params->use_hbd_buf) {
126 highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
127 inter_pred_params->block_width,
128 inter_pred_params->block_height,
129 &inter_pred_params->conv_params,
130 inter_pred_params->interp_filter_params,
131 inter_pred_params->bit_depth);
132 } else {
133 inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
134 inter_pred_params->block_width,
135 inter_pred_params->block_height,
136 &inter_pred_params->conv_params,
137 inter_pred_params->interp_filter_params);
138 }
139 #else
140 inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
141 inter_pred_params->block_width,
142 inter_pred_params->block_height,
143 &inter_pred_params->conv_params,
144 inter_pred_params->interp_filter_params);
145 #endif
146 }
147 #if !CONFIG_REALTIME_ONLY
148 // TODO(jingning): av1_warp_plane() can be further cleaned up.
149 else if (inter_pred_params->mode == WARP_PRED) {
150 av1_warp_plane(
151 &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
152 inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
153 inter_pred_params->ref_frame_buf.width,
154 inter_pred_params->ref_frame_buf.height,
155 inter_pred_params->ref_frame_buf.stride, dst,
156 inter_pred_params->pix_col, inter_pred_params->pix_row,
157 inter_pred_params->block_width, inter_pred_params->block_height,
158 dst_stride, inter_pred_params->subsampling_x,
159 inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
160 }
161 #endif
162 else {
163 assert(0 && "Unsupported inter_pred_params->mode");
164 }
165 }
166
167 static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
168 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
169 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 6, 18,
170 37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
171 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
172 };
173 static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 11, 27,
176 46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
177 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
178 };
179 static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7, 21,
182 43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
183 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
184 };
185
shift_copy(const uint8_t * src,uint8_t * dst,int shift,int width)186 static AOM_INLINE void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
187 int width) {
188 if (shift >= 0) {
189 memcpy(dst + shift, src, width - shift);
190 memset(dst, src[0], shift);
191 } else {
192 shift = -shift;
193 memcpy(dst, src + shift, width - shift);
194 memset(dst + width - shift, src[width - 1], shift);
195 }
196 }
197
198 /* clang-format off */
199 DECLARE_ALIGNED(16, static uint8_t,
200 wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
201 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
202 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
203 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
204 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
205 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
206 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
207 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
208 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
209 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
210 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
211 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
212 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
213 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
214 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
215 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
216 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
217 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
218 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
219 { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
220 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
221 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
222 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
223 };
224 /* clang-format on */
225
226 // [negative][direction]
227 DECLARE_ALIGNED(
228 16, static uint8_t,
229 wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
230
231 // 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
232 // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
233 DECLARE_ALIGNED(16, static uint8_t,
234 wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
235
236 DECLARE_ALIGNED(16, static uint8_t,
237 smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
238 [MAX_WEDGE_SQUARE]);
239
240 static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
241
242 static const wedge_code_type wedge_codebook_16_hgtw[16] = {
243 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
244 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
245 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
246 { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
247 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
248 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
249 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
250 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
251 };
252
253 static const wedge_code_type wedge_codebook_16_hltw[16] = {
254 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
255 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
256 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 },
257 { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
258 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
259 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
260 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
261 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
262 };
263
264 static const wedge_code_type wedge_codebook_16_heqw[16] = {
265 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
266 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
267 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
268 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
269 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
270 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
271 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
272 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
273 };
274
275 const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
276 { 0, NULL, NULL, NULL },
277 { 0, NULL, NULL, NULL },
278 { 0, NULL, NULL, NULL },
279 { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
280 wedge_masks[BLOCK_8X8] },
281 { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
282 wedge_masks[BLOCK_8X16] },
283 { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
284 wedge_masks[BLOCK_16X8] },
285 { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
286 wedge_masks[BLOCK_16X16] },
287 { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
288 wedge_masks[BLOCK_16X32] },
289 { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
290 wedge_masks[BLOCK_32X16] },
291 { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
292 wedge_masks[BLOCK_32X32] },
293 { 0, NULL, NULL, NULL },
294 { 0, NULL, NULL, NULL },
295 { 0, NULL, NULL, NULL },
296 { 0, NULL, NULL, NULL },
297 { 0, NULL, NULL, NULL },
298 { 0, NULL, NULL, NULL },
299 { 0, NULL, NULL, NULL },
300 { 0, NULL, NULL, NULL },
301 { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
302 wedge_masks[BLOCK_8X32] },
303 { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
304 wedge_masks[BLOCK_32X8] },
305 { 0, NULL, NULL, NULL },
306 { 0, NULL, NULL, NULL },
307 };
308
get_wedge_mask_inplace(int wedge_index,int neg,BLOCK_SIZE sb_type)309 static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
310 BLOCK_SIZE sb_type) {
311 const uint8_t *master;
312 const int bh = block_size_high[sb_type];
313 const int bw = block_size_wide[sb_type];
314 const wedge_code_type *a =
315 av1_wedge_params_lookup[sb_type].codebook + wedge_index;
316 int woff, hoff;
317 const uint8_t wsignflip =
318 av1_wedge_params_lookup[sb_type].signflip[wedge_index];
319
320 assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
321 woff = (a->x_offset * bw) >> 3;
322 hoff = (a->y_offset * bh) >> 3;
323 master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
324 MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
325 MASK_MASTER_SIZE / 2 - woff;
326 return master;
327 }
328
av1_get_compound_type_mask(const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type)329 const uint8_t *av1_get_compound_type_mask(
330 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
331 (void)sb_type;
332 switch (comp_data->type) {
333 case COMPOUND_WEDGE:
334 return av1_get_contiguous_soft_mask(comp_data->wedge_index,
335 comp_data->wedge_sign, sb_type);
336 default: return comp_data->seg_mask;
337 }
338 }
339
diffwtd_mask_d16(uint8_t * mask,int which_inverse,int mask_base,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)340 static AOM_INLINE void diffwtd_mask_d16(
341 uint8_t *mask, int which_inverse, int mask_base, const CONV_BUF_TYPE *src0,
342 int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
343 ConvolveParams *conv_params, int bd) {
344 int round =
345 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
346 int i, j, m, diff;
347 for (i = 0; i < h; ++i) {
348 for (j = 0; j < w; ++j) {
349 diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
350 diff = ROUND_POWER_OF_TWO(diff, round);
351 m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
352 mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
353 }
354 }
355 }
356
av1_build_compound_diffwtd_mask_d16_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)357 void av1_build_compound_diffwtd_mask_d16_c(
358 uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
359 int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
360 ConvolveParams *conv_params, int bd) {
361 switch (mask_type) {
362 case DIFFWTD_38:
363 diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
364 conv_params, bd);
365 break;
366 case DIFFWTD_38_INV:
367 diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
368 conv_params, bd);
369 break;
370 default: assert(0);
371 }
372 }
373
diffwtd_mask(uint8_t * mask,int which_inverse,int mask_base,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)374 static AOM_INLINE void diffwtd_mask(uint8_t *mask, int which_inverse,
375 int mask_base, const uint8_t *src0,
376 int src0_stride, const uint8_t *src1,
377 int src1_stride, int h, int w) {
378 int i, j, m, diff;
379 for (i = 0; i < h; ++i) {
380 for (j = 0; j < w; ++j) {
381 diff =
382 abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
383 m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
384 mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
385 }
386 }
387 }
388
av1_build_compound_diffwtd_mask_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)389 void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
390 DIFFWTD_MASK_TYPE mask_type,
391 const uint8_t *src0, int src0_stride,
392 const uint8_t *src1, int src1_stride,
393 int h, int w) {
394 switch (mask_type) {
395 case DIFFWTD_38:
396 diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
397 break;
398 case DIFFWTD_38_INV:
399 diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
400 break;
401 default: assert(0);
402 }
403 }
404
diffwtd_mask_highbd(uint8_t * mask,int which_inverse,int mask_base,const uint16_t * src0,int src0_stride,const uint16_t * src1,int src1_stride,int h,int w,const unsigned int bd)405 static AOM_FORCE_INLINE void diffwtd_mask_highbd(
406 uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
407 int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
408 const unsigned int bd) {
409 assert(bd >= 8);
410 if (bd == 8) {
411 if (which_inverse) {
412 for (int i = 0; i < h; ++i) {
413 for (int j = 0; j < w; ++j) {
414 int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
415 unsigned int m = negative_to_zero(mask_base + diff);
416 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
417 mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
418 }
419 src0 += src0_stride;
420 src1 += src1_stride;
421 mask += w;
422 }
423 } else {
424 for (int i = 0; i < h; ++i) {
425 for (int j = 0; j < w; ++j) {
426 int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
427 unsigned int m = negative_to_zero(mask_base + diff);
428 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
429 mask[j] = m;
430 }
431 src0 += src0_stride;
432 src1 += src1_stride;
433 mask += w;
434 }
435 }
436 } else {
437 const unsigned int bd_shift = bd - 8;
438 if (which_inverse) {
439 for (int i = 0; i < h; ++i) {
440 for (int j = 0; j < w; ++j) {
441 int diff =
442 (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
443 unsigned int m = negative_to_zero(mask_base + diff);
444 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
445 mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
446 }
447 src0 += src0_stride;
448 src1 += src1_stride;
449 mask += w;
450 }
451 } else {
452 for (int i = 0; i < h; ++i) {
453 for (int j = 0; j < w; ++j) {
454 int diff =
455 (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
456 unsigned int m = negative_to_zero(mask_base + diff);
457 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
458 mask[j] = m;
459 }
460 src0 += src0_stride;
461 src1 += src1_stride;
462 mask += w;
463 }
464 }
465 }
466 }
467
av1_build_compound_diffwtd_mask_highbd_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w,int bd)468 void av1_build_compound_diffwtd_mask_highbd_c(
469 uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
470 int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
471 int bd) {
472 switch (mask_type) {
473 case DIFFWTD_38:
474 diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
475 CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
476 break;
477 case DIFFWTD_38_INV:
478 diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
479 CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
480 break;
481 default: assert(0);
482 }
483 }
484
init_wedge_master_masks()485 static AOM_INLINE void init_wedge_master_masks() {
486 int i, j;
487 const int w = MASK_MASTER_SIZE;
488 const int h = MASK_MASTER_SIZE;
489 const int stride = MASK_MASTER_STRIDE;
490 // Note: index [0] stores the masters, and [1] its complement.
491 // Generate prototype by shifting the masters
492 int shift = h / 4;
493 for (i = 0; i < h; i += 2) {
494 shift_copy(wedge_master_oblique_even,
495 &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
496 MASK_MASTER_SIZE);
497 shift--;
498 shift_copy(wedge_master_oblique_odd,
499 &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
500 MASK_MASTER_SIZE);
501 memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
502 wedge_master_vertical,
503 MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
504 memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
505 wedge_master_vertical,
506 MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
507 }
508
509 for (i = 0; i < h; ++i) {
510 for (j = 0; j < w; ++j) {
511 const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
512 wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
513 wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
514 wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
515 (1 << WEDGE_WEIGHT_BITS) - msk;
516 wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
517 wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
518 (1 << WEDGE_WEIGHT_BITS) - msk;
519 wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
520 wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
521 const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
522 wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
523 wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
524 wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
525 (1 << WEDGE_WEIGHT_BITS) - mskx;
526 }
527 }
528 }
529
init_wedge_masks()530 static AOM_INLINE void init_wedge_masks() {
531 uint8_t *dst = wedge_mask_buf;
532 BLOCK_SIZE bsize;
533 memset(wedge_masks, 0, sizeof(wedge_masks));
534 for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
535 const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
536 const int wtypes = wedge_params->wedge_types;
537 if (wtypes == 0) continue;
538 const uint8_t *mask;
539 const int bw = block_size_wide[bsize];
540 const int bh = block_size_high[bsize];
541 int w;
542 for (w = 0; w < wtypes; ++w) {
543 mask = get_wedge_mask_inplace(w, 0, bsize);
544 aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
545 bh);
546 wedge_params->masks[0][w] = dst;
547 dst += bw * bh;
548
549 mask = get_wedge_mask_inplace(w, 1, bsize);
550 aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
551 bh);
552 wedge_params->masks[1][w] = dst;
553 dst += bw * bh;
554 }
555 assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
556 }
557 }
558
559 /* clang-format off */
560 static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
561 60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
562 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
563 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8,
564 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4,
565 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2,
566 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
567 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
568 };
569 static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
570 32, 16, 16, 16, 8, 8, 8, 4,
571 4, 4, 2, 2, 2, 1, 1, 1,
572 8, 8, 4, 4, 2, 2
573 };
574 /* clang-format on */
575
build_smooth_interintra_mask(uint8_t * mask,int stride,BLOCK_SIZE plane_bsize,INTERINTRA_MODE mode)576 static AOM_INLINE void build_smooth_interintra_mask(uint8_t *mask, int stride,
577 BLOCK_SIZE plane_bsize,
578 INTERINTRA_MODE mode) {
579 int i, j;
580 const int bw = block_size_wide[plane_bsize];
581 const int bh = block_size_high[plane_bsize];
582 const int size_scale = ii_size_scales[plane_bsize];
583
584 switch (mode) {
585 case II_V_PRED:
586 for (i = 0; i < bh; ++i) {
587 memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
588 mask += stride;
589 }
590 break;
591
592 case II_H_PRED:
593 for (i = 0; i < bh; ++i) {
594 for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
595 mask += stride;
596 }
597 break;
598
599 case II_SMOOTH_PRED:
600 for (i = 0; i < bh; ++i) {
601 for (j = 0; j < bw; ++j)
602 mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
603 mask += stride;
604 }
605 break;
606
607 case II_DC_PRED:
608 default:
609 for (i = 0; i < bh; ++i) {
610 memset(mask, 32, bw * sizeof(mask[0]));
611 mask += stride;
612 }
613 break;
614 }
615 }
616
init_smooth_interintra_masks()617 static AOM_INLINE void init_smooth_interintra_masks() {
618 for (int m = 0; m < INTERINTRA_MODES; ++m) {
619 for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
620 const int bw = block_size_wide[bs];
621 const int bh = block_size_high[bs];
622 if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
623 build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
624 m);
625 }
626 }
627 }
628
629 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
av1_init_wedge_masks()630 void av1_init_wedge_masks() {
631 init_wedge_master_masks();
632 init_wedge_masks();
633 init_smooth_interintra_masks();
634 }
635
build_masked_compound_no_round(uint8_t * dst,int dst_stride,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type,int h,int w,InterPredParams * inter_pred_params)636 static AOM_INLINE void build_masked_compound_no_round(
637 uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
638 const CONV_BUF_TYPE *src1, int src1_stride,
639 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
640 int w, InterPredParams *inter_pred_params) {
641 const int ssy = inter_pred_params->subsampling_y;
642 const int ssx = inter_pred_params->subsampling_x;
643 const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
644 const int mask_stride = block_size_wide[sb_type];
645 #if CONFIG_AV1_HIGHBITDEPTH
646 if (inter_pred_params->use_hbd_buf) {
647 aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
648 src1_stride, mask, mask_stride, w, h, ssx,
649 ssy, &inter_pred_params->conv_params,
650 inter_pred_params->bit_depth);
651 } else {
652 aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
653 src1_stride, mask, mask_stride, w, h, ssx, ssy,
654 &inter_pred_params->conv_params);
655 }
656 #else
657 aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
658 src1_stride, mask, mask_stride, w, h, ssx, ssy,
659 &inter_pred_params->conv_params);
660 #endif
661 }
662
make_masked_inter_predictor(const uint8_t * pre,int pre_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)663 static void make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
664 uint8_t *dst, int dst_stride,
665 InterPredParams *inter_pred_params,
666 const SubpelParams *subpel_params) {
667 const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
668 BLOCK_SIZE sb_type = inter_pred_params->sb_type;
669
670 // We're going to call av1_make_inter_predictor to generate a prediction into
671 // a temporary buffer, then will blend that temporary buffer with that from
672 // the other reference.
673 DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
674 uint8_t *tmp_dst =
675 inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
676
677 const int tmp_buf_stride = MAX_SB_SIZE;
678 CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
679 int org_dst_stride = inter_pred_params->conv_params.dst_stride;
680 CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
681 inter_pred_params->conv_params.dst = tmp_buf16;
682 inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
683 assert(inter_pred_params->conv_params.do_average == 0);
684
685 // This will generate a prediction in tmp_buf for the second reference
686 av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
687 inter_pred_params, subpel_params);
688
689 if (!inter_pred_params->conv_params.plane &&
690 comp_data->type == COMPOUND_DIFFWTD) {
691 av1_build_compound_diffwtd_mask_d16(
692 comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
693 tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
694 inter_pred_params->block_width, &inter_pred_params->conv_params,
695 inter_pred_params->bit_depth);
696 }
697 build_masked_compound_no_round(
698 dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
699 comp_data, sb_type, inter_pred_params->block_height,
700 inter_pred_params->block_width, inter_pred_params);
701 }
702
av1_build_one_inter_predictor(uint8_t * dst,int dst_stride,const MV * const src_mv,InterPredParams * inter_pred_params,MACROBLOCKD * xd,int mi_x,int mi_y,int ref,uint8_t ** mc_buf,CalcSubpelParamsFunc calc_subpel_params_func)703 void av1_build_one_inter_predictor(
704 uint8_t *dst, int dst_stride, const MV *const src_mv,
705 InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y,
706 int ref, uint8_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func) {
707 SubpelParams subpel_params;
708 uint8_t *src;
709 int src_stride;
710 calc_subpel_params_func(src_mv, inter_pred_params, xd, mi_x, mi_y, ref,
711 mc_buf, &src, &subpel_params, &src_stride);
712
713 if (inter_pred_params->comp_mode == UNIFORM_SINGLE ||
714 inter_pred_params->comp_mode == UNIFORM_COMP) {
715 av1_make_inter_predictor(src, src_stride, dst, dst_stride,
716 inter_pred_params, &subpel_params);
717 } else {
718 make_masked_inter_predictor(src, src_stride, dst, dst_stride,
719 inter_pred_params, &subpel_params);
720 }
721 }
722
av1_dist_wtd_comp_weight_assign(const AV1_COMMON * cm,const MB_MODE_INFO * mbmi,int * fwd_offset,int * bck_offset,int * use_dist_wtd_comp_avg,int is_compound)723 void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
724 const MB_MODE_INFO *mbmi, int *fwd_offset,
725 int *bck_offset,
726 int *use_dist_wtd_comp_avg,
727 int is_compound) {
728 assert(fwd_offset != NULL && bck_offset != NULL);
729 if (!is_compound || mbmi->compound_idx) {
730 *fwd_offset = 8;
731 *bck_offset = 8;
732 *use_dist_wtd_comp_avg = 0;
733 return;
734 }
735
736 *use_dist_wtd_comp_avg = 1;
737 const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
738 const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
739 const int cur_frame_index = cm->cur_frame->order_hint;
740 int bck_frame_index = 0, fwd_frame_index = 0;
741
742 if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
743 if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
744
745 int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
746 fwd_frame_index, cur_frame_index)),
747 0, MAX_FRAME_DISTANCE);
748 int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
749 cur_frame_index, bck_frame_index)),
750 0, MAX_FRAME_DISTANCE);
751
752 const int order = d0 <= d1;
753
754 if (d0 == 0 || d1 == 0) {
755 *fwd_offset = quant_dist_lookup_table[3][order];
756 *bck_offset = quant_dist_lookup_table[3][1 - order];
757 return;
758 }
759
760 int i;
761 for (i = 0; i < 3; ++i) {
762 int c0 = quant_dist_weight[i][order];
763 int c1 = quant_dist_weight[i][!order];
764 int d0_c0 = d0 * c0;
765 int d1_c1 = d1 * c1;
766 if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
767 }
768
769 *fwd_offset = quant_dist_lookup_table[i][order];
770 *bck_offset = quant_dist_lookup_table[i][1 - order];
771 }
772
773 // True if the following hold:
774 // 1. Not intrabc and not build_for_obmc
775 // 2. At least one dimension is size 4 with subsampling
776 // 3. If sub-sampled, none of the previous blocks around the sub-sample
777 // are intrabc or inter-blocks
is_sub8x8_inter(const MACROBLOCKD * xd,int plane,BLOCK_SIZE bsize,int is_intrabc,int build_for_obmc)778 static bool is_sub8x8_inter(const MACROBLOCKD *xd, int plane, BLOCK_SIZE bsize,
779 int is_intrabc, int build_for_obmc) {
780 if (is_intrabc || build_for_obmc) {
781 return false;
782 }
783
784 const struct macroblockd_plane *const pd = &xd->plane[plane];
785 const int ss_x = pd->subsampling_x;
786 const int ss_y = pd->subsampling_y;
787 const int is_sub4_x = (block_size_wide[bsize] == 4) && ss_x;
788 const int is_sub4_y = (block_size_high[bsize] == 4) && ss_y;
789 if (!is_sub4_x && !is_sub4_y) {
790 return false;
791 }
792
793 // For sub8x8 chroma blocks, we may be covering more than one luma block's
794 // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
795 // the top-left corner of the prediction source - the correct top-left corner
796 // is at (pre_x, pre_y).
797 const int row_start = is_sub4_y ? -1 : 0;
798 const int col_start = is_sub4_x ? -1 : 0;
799
800 for (int row = row_start; row <= 0; ++row) {
801 for (int col = col_start; col <= 0; ++col) {
802 const MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
803 if (!is_inter_block(this_mbmi)) return false;
804 if (is_intrabc_block(this_mbmi)) return false;
805 }
806 }
807 return true;
808 }
809
build_inter_predictors_sub8x8(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,const MB_MODE_INFO * mi,int mi_x,int mi_y,uint8_t ** mc_buf,CalcSubpelParamsFunc calc_subpel_params_func)810 static void build_inter_predictors_sub8x8(
811 const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
812 int mi_x, int mi_y, uint8_t **mc_buf,
813 CalcSubpelParamsFunc calc_subpel_params_func) {
814 const BLOCK_SIZE bsize = mi->bsize;
815 struct macroblockd_plane *const pd = &xd->plane[plane];
816 const bool ss_x = pd->subsampling_x;
817 const bool ss_y = pd->subsampling_y;
818 const int b4_w = block_size_wide[bsize] >> ss_x;
819 const int b4_h = block_size_high[bsize] >> ss_y;
820 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y);
821 const int b8_w = block_size_wide[plane_bsize];
822 const int b8_h = block_size_high[plane_bsize];
823 const int is_compound = has_second_ref(mi);
824 assert(!is_compound);
825 assert(!is_intrabc_block(mi));
826
827 // For sub8x8 chroma blocks, we may be covering more than one luma block's
828 // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
829 // the top-left corner of the prediction source - the correct top-left corner
830 // is at (pre_x, pre_y).
831 const int row_start = (block_size_high[bsize] == 4) && ss_y ? -1 : 0;
832 const int col_start = (block_size_wide[bsize] == 4) && ss_x ? -1 : 0;
833 const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
834 const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
835
836 int row = row_start;
837 for (int y = 0; y < b8_h; y += b4_h) {
838 int col = col_start;
839 for (int x = 0; x < b8_w; x += b4_w) {
840 MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
841 struct buf_2d *const dst_buf = &pd->dst;
842 uint8_t *dst = dst_buf->buf + dst_buf->stride * y + x;
843 int ref = 0;
844 const RefCntBuffer *ref_buf =
845 get_ref_frame_buf(cm, this_mbmi->ref_frame[ref]);
846 const struct scale_factors *ref_scale_factors =
847 get_ref_scale_factors_const(cm, this_mbmi->ref_frame[ref]);
848 const struct scale_factors *const sf = ref_scale_factors;
849 const struct buf_2d pre_buf = {
850 NULL,
851 (plane == 1) ? ref_buf->buf.u_buffer : ref_buf->buf.v_buffer,
852 ref_buf->buf.uv_crop_width,
853 ref_buf->buf.uv_crop_height,
854 ref_buf->buf.uv_stride,
855 };
856
857 const MV mv = this_mbmi->mv[ref].as_mv;
858
859 InterPredParams inter_pred_params;
860 av1_init_inter_params(&inter_pred_params, b4_w, b4_h, pre_y + y,
861 pre_x + x, pd->subsampling_x, pd->subsampling_y,
862 xd->bd, is_cur_buf_hbd(xd), mi->use_intrabc, sf,
863 &pre_buf, this_mbmi->interp_filters);
864 inter_pred_params.conv_params =
865 get_conv_params_no_round(ref, plane, NULL, 0, is_compound, xd->bd);
866
867 av1_build_one_inter_predictor(dst, dst_buf->stride, &mv,
868 &inter_pred_params, xd, mi_x + x, mi_y + y,
869 ref, mc_buf, calc_subpel_params_func);
870
871 ++col;
872 }
873 ++row;
874 }
875 }
876
build_inter_predictors_8x8_and_bigger(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,const MB_MODE_INFO * mi,int build_for_obmc,int bw,int bh,int mi_x,int mi_y,uint8_t ** mc_buf,CalcSubpelParamsFunc calc_subpel_params_func)877 static void build_inter_predictors_8x8_and_bigger(
878 const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
879 int build_for_obmc, int bw, int bh, int mi_x, int mi_y, uint8_t **mc_buf,
880 CalcSubpelParamsFunc calc_subpel_params_func) {
881 const int is_compound = has_second_ref(mi);
882 const int is_intrabc = is_intrabc_block(mi);
883 assert(IMPLIES(is_intrabc, !is_compound));
884 struct macroblockd_plane *const pd = &xd->plane[plane];
885 struct buf_2d *const dst_buf = &pd->dst;
886 uint8_t *const dst = dst_buf->buf;
887
888 int is_global[2] = { 0, 0 };
889 for (int ref = 0; ref < 1 + is_compound; ++ref) {
890 const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]];
891 is_global[ref] = is_global_mv_block(mi, wm->wmtype);
892 }
893
894 const BLOCK_SIZE bsize = mi->bsize;
895 const int ss_x = pd->subsampling_x;
896 const int ss_y = pd->subsampling_y;
897 const int row_start =
898 (block_size_high[bsize] == 4) && ss_y && !build_for_obmc ? -1 : 0;
899 const int col_start =
900 (block_size_wide[bsize] == 4) && ss_x && !build_for_obmc ? -1 : 0;
901 const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
902 const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
903
904 for (int ref = 0; ref < 1 + is_compound; ++ref) {
905 const struct scale_factors *const sf =
906 is_intrabc ? &cm->sf_identity : xd->block_ref_scale_factors[ref];
907 struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
908 const MV mv = mi->mv[ref].as_mv;
909 const WarpTypesAllowed warp_types = { is_global[ref],
910 mi->motion_mode == WARPED_CAUSAL };
911
912 InterPredParams inter_pred_params;
913 av1_init_inter_params(&inter_pred_params, bw, bh, pre_y, pre_x,
914 pd->subsampling_x, pd->subsampling_y, xd->bd,
915 is_cur_buf_hbd(xd), mi->use_intrabc, sf, pre_buf,
916 mi->interp_filters);
917 if (is_compound) av1_init_comp_mode(&inter_pred_params);
918 inter_pred_params.conv_params = get_conv_params_no_round(
919 ref, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd);
920
921 av1_dist_wtd_comp_weight_assign(
922 cm, mi, &inter_pred_params.conv_params.fwd_offset,
923 &inter_pred_params.conv_params.bck_offset,
924 &inter_pred_params.conv_params.use_dist_wtd_comp_avg, is_compound);
925
926 if (!build_for_obmc)
927 av1_init_warp_params(&inter_pred_params, &warp_types, ref, xd, mi);
928
929 if (is_masked_compound_type(mi->interinter_comp.type)) {
930 inter_pred_params.sb_type = mi->bsize;
931 inter_pred_params.mask_comp = mi->interinter_comp;
932 if (ref == 1) {
933 inter_pred_params.conv_params.do_average = 0;
934 inter_pred_params.comp_mode = MASK_COMP;
935 }
936 // Assign physical buffer.
937 inter_pred_params.mask_comp.seg_mask = xd->seg_mask;
938 }
939
940 av1_build_one_inter_predictor(dst, dst_buf->stride, &mv, &inter_pred_params,
941 xd, mi_x, mi_y, ref, mc_buf,
942 calc_subpel_params_func);
943 }
944 }
945
av1_build_inter_predictors(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,const MB_MODE_INFO * mi,int build_for_obmc,int bw,int bh,int mi_x,int mi_y,uint8_t ** mc_buf,CalcSubpelParamsFunc calc_subpel_params_func)946 void av1_build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
947 int plane, const MB_MODE_INFO *mi,
948 int build_for_obmc, int bw, int bh, int mi_x,
949 int mi_y, uint8_t **mc_buf,
950 CalcSubpelParamsFunc calc_subpel_params_func) {
951 if (is_sub8x8_inter(xd, plane, mi->bsize, is_intrabc_block(mi),
952 build_for_obmc)) {
953 assert(bw < 8 || bh < 8);
954 build_inter_predictors_sub8x8(cm, xd, plane, mi, mi_x, mi_y, mc_buf,
955 calc_subpel_params_func);
956 } else {
957 build_inter_predictors_8x8_and_bigger(cm, xd, plane, mi, build_for_obmc, bw,
958 bh, mi_x, mi_y, mc_buf,
959 calc_subpel_params_func);
960 }
961 }
av1_setup_dst_planes(struct macroblockd_plane * planes,BLOCK_SIZE bsize,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int plane_start,const int plane_end)962 void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
963 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
964 const int plane_start, const int plane_end) {
965 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
966 // the static analysis warnings.
967 for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
968 struct macroblockd_plane *const pd = &planes[i];
969 const int is_uv = i > 0;
970 setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
971 src->crop_heights[is_uv], src->strides[is_uv], mi_row,
972 mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
973 }
974 }
975
av1_setup_pre_planes(MACROBLOCKD * xd,int idx,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * sf,const int num_planes)976 void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
977 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
978 const struct scale_factors *sf,
979 const int num_planes) {
980 if (src != NULL) {
981 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
982 // the static analysis warnings.
983 for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
984 struct macroblockd_plane *const pd = &xd->plane[i];
985 const int is_uv = i > 0;
986 setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
987 src->crop_widths[is_uv], src->crop_heights[is_uv],
988 src->strides[is_uv], mi_row, mi_col, sf,
989 pd->subsampling_x, pd->subsampling_y);
990 }
991 }
992 }
993
994 // obmc_mask_N[overlap_position]
995 static const uint8_t obmc_mask_1[1] = { 64 };
996 DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
997
998 DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
999
1000 static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
1001
1002 static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
1003 56, 58, 60, 61, 64, 64, 64, 64 };
1004
1005 static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
1006 45, 47, 48, 50, 51, 52, 53, 55,
1007 56, 57, 58, 59, 60, 60, 61, 62,
1008 64, 64, 64, 64, 64, 64, 64, 64 };
1009
1010 static const uint8_t obmc_mask_64[64] = {
1011 33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
1012 45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
1013 56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
1014 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
1015 };
1016
av1_get_obmc_mask(int length)1017 const uint8_t *av1_get_obmc_mask(int length) {
1018 switch (length) {
1019 case 1: return obmc_mask_1;
1020 case 2: return obmc_mask_2;
1021 case 4: return obmc_mask_4;
1022 case 8: return obmc_mask_8;
1023 case 16: return obmc_mask_16;
1024 case 32: return obmc_mask_32;
1025 case 64: return obmc_mask_64;
1026 default: assert(0); return NULL;
1027 }
1028 }
1029
increment_int_ptr(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * mi,void * fun_ctxt,const int num_planes)1030 static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
1031 int rel_mi_col, uint8_t op_mi_size,
1032 int dir, MB_MODE_INFO *mi, void *fun_ctxt,
1033 const int num_planes) {
1034 (void)xd;
1035 (void)rel_mi_row;
1036 (void)rel_mi_col;
1037 (void)op_mi_size;
1038 (void)dir;
1039 (void)mi;
1040 ++*(int *)fun_ctxt;
1041 (void)num_planes;
1042 }
1043
av1_count_overlappable_neighbors(const AV1_COMMON * cm,MACROBLOCKD * xd)1044 void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
1045 MB_MODE_INFO *mbmi = xd->mi[0];
1046
1047 mbmi->overlappable_neighbors = 0;
1048
1049 if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
1050
1051 foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
1052 &mbmi->overlappable_neighbors);
1053 if (mbmi->overlappable_neighbors) return;
1054 foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
1055 &mbmi->overlappable_neighbors);
1056 }
1057
1058 // HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
1059 // block-size of current plane is smaller than 8x8, always only blend with the
1060 // left neighbor(s) (skip blending with the above side).
1061 #define DISABLE_CHROMA_U8X8_OBMC 0 // 0: one-sided obmc; 1: disable
1062
av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,const struct macroblockd_plane * pd,int dir)1063 int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
1064 const struct macroblockd_plane *pd, int dir) {
1065 assert(is_motion_variation_allowed_bsize(bsize));
1066
1067 const BLOCK_SIZE bsize_plane =
1068 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1069 switch (bsize_plane) {
1070 #if DISABLE_CHROMA_U8X8_OBMC
1071 case BLOCK_4X4:
1072 case BLOCK_8X4:
1073 case BLOCK_4X8: return 1; break;
1074 #else
1075 case BLOCK_4X4:
1076 case BLOCK_8X4:
1077 case BLOCK_4X8: return dir == 0; break;
1078 #endif
1079 default: return 0;
1080 }
1081 }
1082
av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO * mbmi)1083 void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
1084 mbmi->ref_frame[1] = NONE_FRAME;
1085 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1086
1087 return;
1088 }
1089
1090 struct obmc_inter_pred_ctxt {
1091 uint8_t **adjacent;
1092 int *adjacent_stride;
1093 };
1094
build_obmc_inter_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * above_mi,void * fun_ctxt,const int num_planes)1095 static INLINE void build_obmc_inter_pred_above(
1096 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
1097 int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
1098 (void)above_mi;
1099 (void)rel_mi_row;
1100 (void)dir;
1101 struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
1102 const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1103 const int overlap =
1104 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
1105
1106 for (int plane = 0; plane < num_planes; ++plane) {
1107 const struct macroblockd_plane *pd = &xd->plane[plane];
1108 const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
1109 const int bh = overlap >> pd->subsampling_y;
1110 const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
1111
1112 if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
1113
1114 const int dst_stride = pd->dst.stride;
1115 uint8_t *const dst = &pd->dst.buf[plane_col];
1116 const int tmp_stride = ctxt->adjacent_stride[plane];
1117 const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
1118 const uint8_t *const mask = av1_get_obmc_mask(bh);
1119 #if CONFIG_AV1_HIGHBITDEPTH
1120 const int is_hbd = is_cur_buf_hbd(xd);
1121 if (is_hbd)
1122 aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
1123 tmp_stride, mask, bw, bh, xd->bd);
1124 else
1125 aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
1126 mask, bw, bh);
1127 #else
1128 aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
1129 bw, bh);
1130 #endif
1131 }
1132 }
1133
build_obmc_inter_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * left_mi,void * fun_ctxt,const int num_planes)1134 static INLINE void build_obmc_inter_pred_left(
1135 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
1136 int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
1137 (void)left_mi;
1138 (void)rel_mi_col;
1139 (void)dir;
1140 struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
1141 const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1142 const int overlap =
1143 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
1144
1145 for (int plane = 0; plane < num_planes; ++plane) {
1146 const struct macroblockd_plane *pd = &xd->plane[plane];
1147 const int bw = overlap >> pd->subsampling_x;
1148 const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
1149 const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
1150
1151 if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
1152
1153 const int dst_stride = pd->dst.stride;
1154 uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
1155 const int tmp_stride = ctxt->adjacent_stride[plane];
1156 const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
1157 const uint8_t *const mask = av1_get_obmc_mask(bw);
1158
1159 #if CONFIG_AV1_HIGHBITDEPTH
1160 const int is_hbd = is_cur_buf_hbd(xd);
1161 if (is_hbd)
1162 aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
1163 tmp_stride, mask, bw, bh, xd->bd);
1164 else
1165 aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
1166 mask, bw, bh);
1167 #else
1168 aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
1169 bw, bh);
1170 #endif
1171 }
1172 }
1173
1174 // This function combines motion compensated predictions that are generated by
1175 // top/left neighboring blocks' inter predictors with the regular inter
1176 // prediction. We assume the original prediction (bmc) is stored in
1177 // xd->plane[].dst.buf
av1_build_obmc_inter_prediction(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * above[MAX_MB_PLANE],int above_stride[MAX_MB_PLANE],uint8_t * left[MAX_MB_PLANE],int left_stride[MAX_MB_PLANE])1178 void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
1179 uint8_t *above[MAX_MB_PLANE],
1180 int above_stride[MAX_MB_PLANE],
1181 uint8_t *left[MAX_MB_PLANE],
1182 int left_stride[MAX_MB_PLANE]) {
1183 const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1184
1185 // handle above row
1186 struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
1187 foreach_overlappable_nb_above(cm, xd,
1188 max_neighbor_obmc[mi_size_wide_log2[bsize]],
1189 build_obmc_inter_pred_above, &ctxt_above);
1190
1191 // handle left column
1192 struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
1193 foreach_overlappable_nb_left(cm, xd,
1194 max_neighbor_obmc[mi_size_high_log2[bsize]],
1195 build_obmc_inter_pred_left, &ctxt_left);
1196 }
1197
av1_setup_obmc_dst_bufs(MACROBLOCKD * xd,uint8_t ** dst_buf1,uint8_t ** dst_buf2)1198 void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
1199 uint8_t **dst_buf2) {
1200 if (is_cur_buf_hbd(xd)) {
1201 int len = sizeof(uint16_t);
1202 dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
1203 dst_buf1[1] =
1204 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
1205 dst_buf1[2] =
1206 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
1207 dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
1208 dst_buf2[1] =
1209 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
1210 dst_buf2[2] =
1211 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
1212 } else {
1213 dst_buf1[0] = xd->tmp_obmc_bufs[0];
1214 dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
1215 dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
1216 dst_buf2[0] = xd->tmp_obmc_bufs[1];
1217 dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
1218 dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
1219 }
1220 }
1221
av1_setup_build_prediction_by_above_pred(MACROBLOCKD * xd,int rel_mi_col,uint8_t above_mi_width,MB_MODE_INFO * above_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)1222 void av1_setup_build_prediction_by_above_pred(
1223 MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
1224 MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
1225 const int num_planes) {
1226 const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
1227 const int above_mi_col = xd->mi_col + rel_mi_col;
1228
1229 av1_modify_neighbor_predictor_for_obmc(above_mbmi);
1230
1231 for (int j = 0; j < num_planes; ++j) {
1232 struct macroblockd_plane *const pd = &xd->plane[j];
1233 setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1234 ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
1235 NULL, pd->subsampling_x, pd->subsampling_y);
1236 }
1237
1238 const int num_refs = 1 + has_second_ref(above_mbmi);
1239
1240 for (int ref = 0; ref < num_refs; ++ref) {
1241 const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
1242
1243 const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1244 const struct scale_factors *const sf =
1245 get_ref_scale_factors_const(ctxt->cm, frame);
1246 xd->block_ref_scale_factors[ref] = sf;
1247 if ((!av1_is_valid_scale(sf)))
1248 aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1249 "Reference frame has invalid dimensions");
1250 av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
1251 num_planes);
1252 }
1253
1254 xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
1255 xd->mb_to_right_edge =
1256 ctxt->mb_to_far_edge +
1257 (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
1258 }
1259
av1_setup_build_prediction_by_left_pred(MACROBLOCKD * xd,int rel_mi_row,uint8_t left_mi_height,MB_MODE_INFO * left_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)1260 void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
1261 uint8_t left_mi_height,
1262 MB_MODE_INFO *left_mbmi,
1263 struct build_prediction_ctxt *ctxt,
1264 const int num_planes) {
1265 const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
1266 const int left_mi_row = xd->mi_row + rel_mi_row;
1267
1268 av1_modify_neighbor_predictor_for_obmc(left_mbmi);
1269
1270 for (int j = 0; j < num_planes; ++j) {
1271 struct macroblockd_plane *const pd = &xd->plane[j];
1272 setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1273 ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
1274 NULL, pd->subsampling_x, pd->subsampling_y);
1275 }
1276
1277 const int num_refs = 1 + has_second_ref(left_mbmi);
1278
1279 for (int ref = 0; ref < num_refs; ++ref) {
1280 const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
1281
1282 const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1283 const struct scale_factors *const ref_scale_factors =
1284 get_ref_scale_factors_const(ctxt->cm, frame);
1285
1286 xd->block_ref_scale_factors[ref] = ref_scale_factors;
1287 if ((!av1_is_valid_scale(ref_scale_factors)))
1288 aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1289 "Reference frame has invalid dimensions");
1290 av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
1291 ref_scale_factors, num_planes);
1292 }
1293
1294 xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
1295 xd->mb_to_bottom_edge =
1296 ctxt->mb_to_far_edge +
1297 GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
1298 }
1299
combine_interintra(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred,int compstride,const uint8_t * interpred,int interstride,const uint8_t * intrapred,int intrastride)1300 static AOM_INLINE void combine_interintra(
1301 INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1302 int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1303 uint8_t *comppred, int compstride, const uint8_t *interpred,
1304 int interstride, const uint8_t *intrapred, int intrastride) {
1305 const int bw = block_size_wide[plane_bsize];
1306 const int bh = block_size_high[plane_bsize];
1307
1308 if (use_wedge_interintra) {
1309 if (av1_is_wedge_used(bsize)) {
1310 const uint8_t *mask =
1311 av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1312 const int subw = 2 * mi_size_wide[bsize] == bw;
1313 const int subh = 2 * mi_size_high[bsize] == bh;
1314 aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1315 interpred, interstride, mask, block_size_wide[bsize],
1316 bw, bh, subw, subh);
1317 }
1318 return;
1319 }
1320
1321 const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
1322 aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1323 interstride, mask, bw, bw, bh, 0, 0);
1324 }
1325
1326 #if CONFIG_AV1_HIGHBITDEPTH
combine_interintra_highbd(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred8,int compstride,const uint8_t * interpred8,int interstride,const uint8_t * intrapred8,int intrastride,int bd)1327 static AOM_INLINE void combine_interintra_highbd(
1328 INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1329 int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1330 uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1331 int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1332 const int bw = block_size_wide[plane_bsize];
1333 const int bh = block_size_high[plane_bsize];
1334
1335 if (use_wedge_interintra) {
1336 if (av1_is_wedge_used(bsize)) {
1337 const uint8_t *mask =
1338 av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1339 const int subh = 2 * mi_size_high[bsize] == bh;
1340 const int subw = 2 * mi_size_wide[bsize] == bw;
1341 aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1342 interpred8, interstride, mask,
1343 block_size_wide[bsize], bw, bh, subw, subh, bd);
1344 }
1345 return;
1346 }
1347
1348 uint8_t mask[MAX_SB_SQUARE];
1349 build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1350 aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1351 interpred8, interstride, mask, bw, bw, bh, 0, 0,
1352 bd);
1353 }
1354 #endif
1355
av1_build_intra_predictors_for_interintra(const AV1_COMMON * cm,MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const BUFFER_SET * ctx,uint8_t * dst,int dst_stride)1356 void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1357 MACROBLOCKD *xd,
1358 BLOCK_SIZE bsize, int plane,
1359 const BUFFER_SET *ctx,
1360 uint8_t *dst, int dst_stride) {
1361 struct macroblockd_plane *const pd = &xd->plane[plane];
1362 const int ssx = xd->plane[plane].subsampling_x;
1363 const int ssy = xd->plane[plane].subsampling_y;
1364 BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1365 PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1366 assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1367 assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1368 assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1369 assert(xd->mi[0]->use_intrabc == 0);
1370 const SequenceHeader *seq_params = cm->seq_params;
1371
1372 av1_predict_intra_block(xd, seq_params->sb_size,
1373 seq_params->enable_intra_edge_filter, pd->width,
1374 pd->height, max_txsize_rect_lookup[plane_bsize], mode,
1375 0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
1376 ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1377 }
1378
av1_combine_interintra(MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const uint8_t * inter_pred,int inter_stride,const uint8_t * intra_pred,int intra_stride)1379 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1380 const uint8_t *inter_pred, int inter_stride,
1381 const uint8_t *intra_pred, int intra_stride) {
1382 const int ssx = xd->plane[plane].subsampling_x;
1383 const int ssy = xd->plane[plane].subsampling_y;
1384 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1385 #if CONFIG_AV1_HIGHBITDEPTH
1386 if (is_cur_buf_hbd(xd)) {
1387 combine_interintra_highbd(
1388 xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1389 xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1390 plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1391 inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1392 return;
1393 }
1394 #endif
1395 combine_interintra(
1396 xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1397 xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1398 plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1399 inter_pred, inter_stride, intra_pred, intra_stride);
1400 }
1401
1402 // build interintra_predictors for one plane
av1_build_interintra_predictor(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * pred,int stride,const BUFFER_SET * ctx,int plane,BLOCK_SIZE bsize)1403 void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1404 uint8_t *pred, int stride,
1405 const BUFFER_SET *ctx, int plane,
1406 BLOCK_SIZE bsize) {
1407 assert(bsize < BLOCK_SIZES_ALL);
1408 if (is_cur_buf_hbd(xd)) {
1409 DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1410 av1_build_intra_predictors_for_interintra(
1411 cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1412 MAX_SB_SIZE);
1413 av1_combine_interintra(xd, bsize, plane, pred, stride,
1414 CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1415 } else {
1416 DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1417 av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1418 intrapredictor, MAX_SB_SIZE);
1419 av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1420 MAX_SB_SIZE);
1421 }
1422 }
1423