1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_mem/aom_mem.h"
19 #include "aom_ports/mem.h"
20 #include "av1/common/av1_common_int.h"
21 #include "av1/common/av1_loopfilter.h"
22 #include "av1/common/reconinter.h"
23 #include "av1/common/seg_common.h"
24
25 enum {
26 USE_SINGLE,
27 USE_DUAL,
28 USE_QUAD,
29 } UENUM1BYTE(USE_FILTER_TYPE);
30
31 static const SEG_LVL_FEATURES seg_lvl_lf_lut[MAX_MB_PLANE][2] = {
32 { SEG_LVL_ALT_LF_Y_V, SEG_LVL_ALT_LF_Y_H },
33 { SEG_LVL_ALT_LF_U, SEG_LVL_ALT_LF_U },
34 { SEG_LVL_ALT_LF_V, SEG_LVL_ALT_LF_V }
35 };
36
37 static const int delta_lf_id_lut[MAX_MB_PLANE][2] = { { 0, 1 },
38 { 2, 2 },
39 { 3, 3 } };
40
41 static const int mode_lf_lut[] = {
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
43 1, 1, 0, 1, // INTER_MODES (GLOBALMV == 0)
44 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (GLOBAL_GLOBALMV == 0)
45 };
46
update_sharpness(loop_filter_info_n * lfi,int sharpness_lvl)47 static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
48 int lvl;
49
50 // For each possible value for the loop filter fill out limits
51 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
52 // Set loop filter parameters that control sharpness.
53 int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
54
55 if (sharpness_lvl > 0) {
56 if (block_inside_limit > (9 - sharpness_lvl))
57 block_inside_limit = (9 - sharpness_lvl);
58 }
59
60 if (block_inside_limit < 1) block_inside_limit = 1;
61
62 memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
63 memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
64 SIMD_WIDTH);
65 }
66 }
67
av1_get_filter_level(const AV1_COMMON * cm,const loop_filter_info_n * lfi_n,const int dir_idx,int plane,const MB_MODE_INFO * mbmi)68 uint8_t av1_get_filter_level(const AV1_COMMON *cm,
69 const loop_filter_info_n *lfi_n, const int dir_idx,
70 int plane, const MB_MODE_INFO *mbmi) {
71 const int segment_id = mbmi->segment_id;
72 if (cm->delta_q_info.delta_lf_present_flag) {
73 int8_t delta_lf;
74 if (cm->delta_q_info.delta_lf_multi) {
75 const int delta_lf_idx = delta_lf_id_lut[plane][dir_idx];
76 delta_lf = mbmi->delta_lf[delta_lf_idx];
77 } else {
78 delta_lf = mbmi->delta_lf_from_base;
79 }
80 int base_level;
81 if (plane == 0)
82 base_level = cm->lf.filter_level[dir_idx];
83 else if (plane == 1)
84 base_level = cm->lf.filter_level_u;
85 else
86 base_level = cm->lf.filter_level_v;
87 int lvl_seg = clamp(delta_lf + base_level, 0, MAX_LOOP_FILTER);
88 assert(plane >= 0 && plane <= 2);
89 const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir_idx];
90 if (segfeature_active(&cm->seg, segment_id, seg_lf_feature_id)) {
91 const int data = get_segdata(&cm->seg, segment_id, seg_lf_feature_id);
92 lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
93 }
94
95 if (cm->lf.mode_ref_delta_enabled) {
96 const int scale = 1 << (lvl_seg >> 5);
97 lvl_seg += cm->lf.ref_deltas[mbmi->ref_frame[0]] * scale;
98 if (mbmi->ref_frame[0] > INTRA_FRAME)
99 lvl_seg += cm->lf.mode_deltas[mode_lf_lut[mbmi->mode]] * scale;
100 lvl_seg = clamp(lvl_seg, 0, MAX_LOOP_FILTER);
101 }
102 return lvl_seg;
103 } else {
104 return lfi_n->lvl[plane][segment_id][dir_idx][mbmi->ref_frame[0]]
105 [mode_lf_lut[mbmi->mode]];
106 }
107 }
108
av1_loop_filter_init(AV1_COMMON * cm)109 void av1_loop_filter_init(AV1_COMMON *cm) {
110 assert(MB_MODE_COUNT == NELEMENTS(mode_lf_lut));
111 loop_filter_info_n *lfi = &cm->lf_info;
112 struct loopfilter *lf = &cm->lf;
113 int lvl;
114
115 // init limits for given sharpness
116 update_sharpness(lfi, lf->sharpness_level);
117
118 // init hev threshold const vectors
119 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
120 memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
121 }
122
123 // Update the loop filter for the current frame.
124 // This should be called before loop_filter_rows(),
125 // av1_loop_filter_frame() calls this function directly.
av1_loop_filter_frame_init(AV1_COMMON * cm,int plane_start,int plane_end)126 void av1_loop_filter_frame_init(AV1_COMMON *cm, int plane_start,
127 int plane_end) {
128 int filt_lvl[MAX_MB_PLANE], filt_lvl_r[MAX_MB_PLANE];
129 int plane;
130 int seg_id;
131 // n_shift is the multiplier for lf_deltas
132 // the multiplier is 1 for when filter_lvl is between 0 and 31;
133 // 2 when filter_lvl is between 32 and 63
134 loop_filter_info_n *const lfi = &cm->lf_info;
135 struct loopfilter *const lf = &cm->lf;
136 const struct segmentation *const seg = &cm->seg;
137
138 // update sharpness limits
139 update_sharpness(lfi, lf->sharpness_level);
140
141 filt_lvl[0] = cm->lf.filter_level[0];
142 filt_lvl[1] = cm->lf.filter_level_u;
143 filt_lvl[2] = cm->lf.filter_level_v;
144
145 filt_lvl_r[0] = cm->lf.filter_level[1];
146 filt_lvl_r[1] = cm->lf.filter_level_u;
147 filt_lvl_r[2] = cm->lf.filter_level_v;
148
149 assert(plane_start >= AOM_PLANE_Y);
150 assert(plane_end <= MAX_MB_PLANE);
151
152 for (plane = plane_start; plane < plane_end; plane++) {
153 if (plane == 0 && !filt_lvl[0] && !filt_lvl_r[0])
154 break;
155 else if (plane == 1 && !filt_lvl[1])
156 continue;
157 else if (plane == 2 && !filt_lvl[2])
158 continue;
159
160 for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
161 for (int dir = 0; dir < 2; ++dir) {
162 int lvl_seg = (dir == 0) ? filt_lvl[plane] : filt_lvl_r[plane];
163 const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir];
164 if (segfeature_active(seg, seg_id, seg_lf_feature_id)) {
165 const int data = get_segdata(&cm->seg, seg_id, seg_lf_feature_id);
166 lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
167 }
168
169 if (!lf->mode_ref_delta_enabled) {
170 // we could get rid of this if we assume that deltas are set to
171 // zero when not in use; encoder always uses deltas
172 memset(lfi->lvl[plane][seg_id][dir], lvl_seg,
173 sizeof(lfi->lvl[plane][seg_id][dir]));
174 } else {
175 int ref, mode;
176 const int scale = 1 << (lvl_seg >> 5);
177 const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
178 lfi->lvl[plane][seg_id][dir][INTRA_FRAME][0] =
179 clamp(intra_lvl, 0, MAX_LOOP_FILTER);
180
181 for (ref = LAST_FRAME; ref < REF_FRAMES; ++ref) {
182 for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
183 const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
184 lf->mode_deltas[mode] * scale;
185 lfi->lvl[plane][seg_id][dir][ref][mode] =
186 clamp(inter_lvl, 0, MAX_LOOP_FILTER);
187 }
188 }
189 }
190 }
191 }
192 }
193 }
194
195 static AOM_FORCE_INLINE TX_SIZE
get_transform_size(const MACROBLOCKD * const xd,const MB_MODE_INFO * const mbmi,const int mi_row,const int mi_col,const int plane,const int ss_x,const int ss_y)196 get_transform_size(const MACROBLOCKD *const xd, const MB_MODE_INFO *const mbmi,
197 const int mi_row, const int mi_col, const int plane,
198 const int ss_x, const int ss_y) {
199 assert(mbmi != NULL);
200 if (xd && xd->lossless[mbmi->segment_id]) return TX_4X4;
201
202 TX_SIZE tx_size = (plane == AOM_PLANE_Y)
203 ? mbmi->tx_size
204 : av1_get_max_uv_txsize(mbmi->bsize, ss_x, ss_y);
205 assert(tx_size < TX_SIZES_ALL);
206 if ((plane == AOM_PLANE_Y) && is_inter_block(mbmi) && !mbmi->skip_txfm) {
207 const BLOCK_SIZE sb_type = mbmi->bsize;
208 const int blk_row = mi_row & (mi_size_high[sb_type] - 1);
209 const int blk_col = mi_col & (mi_size_wide[sb_type] - 1);
210 const TX_SIZE mb_tx_size =
211 mbmi->inter_tx_size[av1_get_txb_size_index(sb_type, blk_row, blk_col)];
212 assert(mb_tx_size < TX_SIZES_ALL);
213 tx_size = mb_tx_size;
214 }
215
216 return tx_size;
217 }
218
219 static const int tx_dim_to_filter_length[TX_SIZES] = { 4, 8, 14, 14, 14 };
220
221 // Return TX_SIZE from get_transform_size(), so it is plane and direction
222 // aware
set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS * const params,const ptrdiff_t mode_step,const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const EDGE_DIR edge_dir,const uint32_t x,const uint32_t y,const int plane,const struct macroblockd_plane * const plane_ptr)223 static TX_SIZE set_lpf_parameters(
224 AV1_DEBLOCKING_PARAMETERS *const params, const ptrdiff_t mode_step,
225 const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
226 const EDGE_DIR edge_dir, const uint32_t x, const uint32_t y,
227 const int plane, const struct macroblockd_plane *const plane_ptr) {
228 // reset to initial values
229 params->filter_length = 0;
230
231 // no deblocking is required
232 const uint32_t width = plane_ptr->dst.width;
233 const uint32_t height = plane_ptr->dst.height;
234 if ((width <= x) || (height <= y)) {
235 // just return the smallest transform unit size
236 return TX_4X4;
237 }
238
239 const uint32_t scale_horz = plane_ptr->subsampling_x;
240 const uint32_t scale_vert = plane_ptr->subsampling_y;
241 // for sub8x8 block, chroma prediction mode is obtained from the bottom/right
242 // mi structure of the co-located 8x8 luma block. so for chroma plane, mi_row
243 // and mi_col should map to the bottom/right mi structure, i.e, both mi_row
244 // and mi_col should be odd number for chroma plane.
245 const int mi_row = scale_vert | ((y << scale_vert) >> MI_SIZE_LOG2);
246 const int mi_col = scale_horz | ((x << scale_horz) >> MI_SIZE_LOG2);
247 MB_MODE_INFO **mi =
248 cm->mi_params.mi_grid_base + mi_row * cm->mi_params.mi_stride + mi_col;
249 const MB_MODE_INFO *mbmi = mi[0];
250 // If current mbmi is not correctly setup, return an invalid value to stop
251 // filtering. One example is that if this tile is not coded, then its mbmi
252 // it not set up.
253 if (mbmi == NULL) return TX_INVALID;
254
255 const TX_SIZE ts = get_transform_size(xd, mi[0], mi_row, mi_col, plane,
256 scale_horz, scale_vert);
257
258 {
259 const uint32_t coord = (VERT_EDGE == edge_dir) ? (x) : (y);
260 const uint32_t transform_masks =
261 edge_dir == VERT_EDGE ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1;
262 const int32_t tu_edge = (coord & transform_masks) ? (0) : (1);
263
264 if (!tu_edge) return ts;
265
266 // prepare outer edge parameters. deblock the edge if it's an edge of a TU
267 {
268 const uint32_t curr_level =
269 av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi);
270 const int curr_skipped = mbmi->skip_txfm && is_inter_block(mbmi);
271 uint32_t level = curr_level;
272 if (coord) {
273 {
274 const MB_MODE_INFO *const mi_prev = *(mi - mode_step);
275 if (mi_prev == NULL) return TX_INVALID;
276 const int pv_row =
277 (VERT_EDGE == edge_dir) ? (mi_row) : (mi_row - (1 << scale_vert));
278 const int pv_col =
279 (VERT_EDGE == edge_dir) ? (mi_col - (1 << scale_horz)) : (mi_col);
280 const TX_SIZE pv_ts = get_transform_size(
281 xd, mi_prev, pv_row, pv_col, plane, scale_horz, scale_vert);
282
283 const uint32_t pv_lvl =
284 av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mi_prev);
285
286 const int pv_skip_txfm =
287 mi_prev->skip_txfm && is_inter_block(mi_prev);
288 const BLOCK_SIZE bsize = get_plane_block_size(
289 mbmi->bsize, plane_ptr->subsampling_x, plane_ptr->subsampling_y);
290 assert(bsize < BLOCK_SIZES_ALL);
291 const int prediction_masks = edge_dir == VERT_EDGE
292 ? block_size_wide[bsize] - 1
293 : block_size_high[bsize] - 1;
294 const int32_t pu_edge = !(coord & prediction_masks);
295 // if the current and the previous blocks are skipped,
296 // deblock the edge if the edge belongs to a PU's edge only.
297 if ((curr_level || pv_lvl) &&
298 (!pv_skip_txfm || !curr_skipped || pu_edge)) {
299 const int dim = (VERT_EDGE == edge_dir)
300 ? AOMMIN(tx_size_wide_unit_log2[ts],
301 tx_size_wide_unit_log2[pv_ts])
302 : AOMMIN(tx_size_high_unit_log2[ts],
303 tx_size_high_unit_log2[pv_ts]);
304 if (plane) {
305 params->filter_length = (dim == 0) ? 4 : 6;
306 } else {
307 assert(dim < TX_SIZES);
308 assert(dim >= 0);
309 params->filter_length = tx_dim_to_filter_length[dim];
310 }
311
312 // update the level if the current block is skipped,
313 // but the previous one is not
314 level = (curr_level) ? (curr_level) : (pv_lvl);
315 }
316 }
317 }
318 // prepare common parameters
319 if (params->filter_length) {
320 const loop_filter_thresh *const limits = cm->lf_info.lfthr + level;
321 params->lfthr = limits;
322 }
323 }
324 }
325
326 return ts;
327 }
328
329 static const uint32_t vert_filter_length_luma[TX_SIZES_ALL][TX_SIZES_ALL] = {
330 // TX_4X4
331 {
332 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
333 },
334 // TX_8X8
335 {
336 4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8,
337 },
338 // TX_16X16
339 {
340 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
341 },
342 // TX_32X32
343 {
344 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
345 },
346 // TX_64X64
347 {
348 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
349 },
350 // TX_4X8
351 {
352 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
353 },
354 // TX_8X4
355 {
356 4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8,
357 },
358 // TX_8X16
359 {
360 4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8,
361 },
362 // TX_16X8
363 {
364 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
365 },
366 // TX_16X32
367 {
368 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
369 },
370 // TX_32X16
371 {
372 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
373 },
374 // TX_32X64
375 {
376 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
377 },
378 // TX_64X32
379 {
380 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
381 },
382 // TX_4X16
383 {
384 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
385 },
386 // TX_16X4
387 {
388 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
389 },
390 // TX_8X32
391 {
392 4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8,
393 },
394 // TX_32X8
395 {
396 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
397 },
398 // TX_16X64
399 {
400 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
401 },
402 // TX_64X16
403 {
404 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
405 },
406 };
407
408 static const uint32_t horz_filter_length_luma[TX_SIZES_ALL][TX_SIZES_ALL] = {
409 // TX_4X4
410 {
411 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
412 },
413 // TX_8X8
414 {
415 4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8,
416 },
417 // TX_16X16
418 {
419 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
420 },
421 // TX_32X32
422 {
423 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
424 },
425 // TX_64X64
426 {
427 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
428 },
429 // TX_4X8
430 {
431 4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8,
432 },
433 // TX_8X4
434 {
435 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
436 },
437 // TX_8X16
438 {
439 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
440 },
441 // TX_16X8
442 {
443 4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8,
444 },
445 // TX_16X32
446 {
447 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
448 },
449 // TX_32X16
450 {
451 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
452 },
453 // TX_32X64
454 {
455 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
456 },
457 // TX_64X32
458 {
459 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
460 },
461 // TX_4X16
462 {
463 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
464 },
465 // TX_16X4
466 {
467 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
468 },
469 // TX_8X32
470 {
471 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
472 },
473 // TX_32X8
474 {
475 4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8,
476 },
477 // TX_16X64
478 {
479 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
480 },
481 // TX_64X16
482 {
483 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
484 },
485 };
486
487 static const uint32_t vert_filter_length_chroma[TX_SIZES_ALL][TX_SIZES_ALL] = {
488 // TX_4X4
489 {
490 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
491 },
492 // TX_8X8
493 {
494 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
495 },
496 // TX_16X16
497 {
498 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
499 },
500 // TX_32X32
501 {
502 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
503 },
504 // TX_64X64
505 {
506 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
507 },
508 // TX_4X8
509 {
510 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
511 },
512 // TX_8X4
513 {
514 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
515 },
516 // TX_8X16
517 {
518 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
519 },
520 // TX_16X8
521 {
522 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
523 },
524 // TX_16X32
525 {
526 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
527 },
528 // TX_32X16
529 {
530 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
531 },
532 // TX_32X64
533 {
534 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
535 },
536 // TX_64X32
537 {
538 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
539 },
540 // TX_4X16
541 {
542 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
543 },
544 // TX_16X4
545 {
546 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
547 },
548 // TX_8X32
549 {
550 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
551 },
552 // TX_32X8
553 {
554 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
555 },
556 // TX_16X64
557 {
558 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
559 },
560 // TX_64X16
561 {
562 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
563 },
564 };
565
566 static const uint32_t horz_filter_length_chroma[TX_SIZES_ALL][TX_SIZES_ALL] = {
567 // TX_4X4
568 {
569 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
570 },
571 // TX_8X8
572 {
573 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
574 },
575 // TX_16X16
576 {
577 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
578 },
579 // TX_32X32
580 {
581 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
582 },
583 // TX_64X64
584 {
585 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
586 },
587 // TX_4X8
588 {
589 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
590 },
591 // TX_8X4
592 {
593 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
594 },
595 // TX_8X16
596 {
597 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
598 },
599 // TX_16X8
600 {
601 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
602 },
603 // TX_16X32
604 {
605 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
606 },
607 // TX_32X16
608 {
609 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
610 },
611 // TX_32X64
612 {
613 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
614 },
615 // TX_64X32
616 {
617 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
618 },
619 // TX_4X16
620 {
621 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
622 },
623 // TX_16X4
624 {
625 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
626 },
627 // TX_8X32
628 {
629 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
630 },
631 // TX_32X8
632 {
633 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
634 },
635 // TX_16X64
636 {
637 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
638 },
639 // TX_64X16
640 {
641 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
642 },
643 };
644
set_one_param_for_line_luma(AV1_DEBLOCKING_PARAMETERS * const params,TX_SIZE * tx_size,const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const EDGE_DIR edge_dir,uint32_t mi_col,uint32_t mi_row,const struct macroblockd_plane * const plane_ptr,int coord,bool is_first_block,TX_SIZE prev_tx_size,const ptrdiff_t mode_step,int * min_dim)645 static AOM_FORCE_INLINE void set_one_param_for_line_luma(
646 AV1_DEBLOCKING_PARAMETERS *const params, TX_SIZE *tx_size,
647 const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
648 const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row,
649 const struct macroblockd_plane *const plane_ptr, int coord,
650 bool is_first_block, TX_SIZE prev_tx_size, const ptrdiff_t mode_step,
651 int *min_dim) {
652 (void)plane_ptr;
653 assert(mi_col << MI_SIZE_LOG2 < (uint32_t)plane_ptr->dst.width &&
654 mi_row << MI_SIZE_LOG2 < (uint32_t)plane_ptr->dst.height);
655 const int is_vert = edge_dir == VERT_EDGE;
656 // reset to initial values
657 params->filter_length = 0;
658
659 MB_MODE_INFO **mi =
660 cm->mi_params.mi_grid_base + mi_row * cm->mi_params.mi_stride + mi_col;
661 const MB_MODE_INFO *mbmi = mi[0];
662 assert(mbmi);
663
664 const TX_SIZE ts =
665 get_transform_size(xd, mi[0], mi_row, mi_col, AOM_PLANE_Y, 0, 0);
666
667 #ifndef NDEBUG
668 const uint32_t transform_masks =
669 is_vert ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1;
670 const int32_t tu_edge = ((coord * MI_SIZE) & transform_masks) ? (0) : (1);
671 assert(tu_edge);
672 #endif // NDEBUG
673 // If we are not the first block, then coord is always true, so
674 // !is_first_block is technically redundant. But we are keeping it here so the
675 // compiler can compile away this conditional if we pass in is_first_block :=
676 // false
677 bool curr_skipped = false;
678 if (!is_first_block || coord) {
679 const MB_MODE_INFO *const mi_prev = *(mi - mode_step);
680 const int pv_row = is_vert ? mi_row : (mi_row - 1);
681 const int pv_col = is_vert ? (mi_col - 1) : mi_col;
682 const TX_SIZE pv_ts =
683 is_first_block
684 ? get_transform_size(xd, mi_prev, pv_row, pv_col, AOM_PLANE_Y, 0, 0)
685 : prev_tx_size;
686 if (is_first_block) {
687 *min_dim = is_vert ? block_size_high[mi_prev->bsize]
688 : block_size_wide[mi_prev->bsize];
689 }
690 assert(mi_prev);
691 uint8_t level =
692 av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_Y, mbmi);
693 if (!level) {
694 level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_Y,
695 mi_prev);
696 }
697
698 const int32_t pu_edge = mi_prev != mbmi;
699
700 // The quad loop filter assumes that all the transform blocks within a
701 // 8x16/16x8/16x16 prediction block are of the same size.
702 assert(IMPLIES(
703 !pu_edge && (mbmi->bsize >= BLOCK_8X16 && mbmi->bsize <= BLOCK_16X16),
704 pv_ts == ts));
705
706 if (!pu_edge) {
707 curr_skipped = mbmi->skip_txfm && is_inter_block(mbmi);
708 }
709 if ((pu_edge || !curr_skipped) && level) {
710 params->filter_length = is_vert ? vert_filter_length_luma[ts][pv_ts]
711 : horz_filter_length_luma[ts][pv_ts];
712
713 // prepare common parameters
714 const loop_filter_thresh *const limits = cm->lf_info.lfthr + level;
715 params->lfthr = limits;
716 }
717 }
718 const int block_dim =
719 is_vert ? block_size_high[mbmi->bsize] : block_size_wide[mbmi->bsize];
720 *min_dim = AOMMIN(*min_dim, block_dim);
721
722 *tx_size = ts;
723 }
724
725 // Similar to set_lpf_parameters, but does so one row/col at a time to reduce
726 // calls to \ref get_transform_size and \ref av1_get_filter_level
set_lpf_parameters_for_line_luma(AV1_DEBLOCKING_PARAMETERS * const params_buf,TX_SIZE * tx_buf,const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const EDGE_DIR edge_dir,uint32_t mi_col,uint32_t mi_row,const struct macroblockd_plane * const plane_ptr,const uint32_t mi_range,const ptrdiff_t mode_step,int * min_dim)727 static AOM_FORCE_INLINE void set_lpf_parameters_for_line_luma(
728 AV1_DEBLOCKING_PARAMETERS *const params_buf, TX_SIZE *tx_buf,
729 const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
730 const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row,
731 const struct macroblockd_plane *const plane_ptr, const uint32_t mi_range,
732 const ptrdiff_t mode_step, int *min_dim) {
733 const int is_vert = edge_dir == VERT_EDGE;
734
735 AV1_DEBLOCKING_PARAMETERS *params = params_buf;
736 TX_SIZE *tx_size = tx_buf;
737 uint32_t *counter_ptr = is_vert ? &mi_col : &mi_row;
738 TX_SIZE prev_tx_size = TX_INVALID;
739
740 // Unroll the first iteration of the loop
741 set_one_param_for_line_luma(params, tx_size, cm, xd, edge_dir, mi_col, mi_row,
742 plane_ptr, *counter_ptr, true, prev_tx_size,
743 mode_step, min_dim);
744
745 // Advance
746 int advance_units =
747 is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size];
748 prev_tx_size = *tx_size;
749 *counter_ptr += advance_units;
750 params += advance_units;
751 tx_size += advance_units;
752
753 while (*counter_ptr < mi_range) {
754 set_one_param_for_line_luma(params, tx_size, cm, xd, edge_dir, mi_col,
755 mi_row, plane_ptr, *counter_ptr, false,
756 prev_tx_size, mode_step, min_dim);
757
758 // Advance
759 advance_units =
760 is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size];
761 prev_tx_size = *tx_size;
762 *counter_ptr += advance_units;
763 params += advance_units;
764 tx_size += advance_units;
765 }
766 }
767
set_one_param_for_line_chroma(AV1_DEBLOCKING_PARAMETERS * const params,TX_SIZE * tx_size,const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const EDGE_DIR edge_dir,uint32_t mi_col,uint32_t mi_row,int coord,bool is_first_block,TX_SIZE prev_tx_size,const struct macroblockd_plane * const plane_ptr,const ptrdiff_t mode_step,const int scale_horz,const int scale_vert,int * min_dim,int plane,int joint_filter_chroma)768 static AOM_FORCE_INLINE void set_one_param_for_line_chroma(
769 AV1_DEBLOCKING_PARAMETERS *const params, TX_SIZE *tx_size,
770 const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
771 const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row, int coord,
772 bool is_first_block, TX_SIZE prev_tx_size,
773 const struct macroblockd_plane *const plane_ptr, const ptrdiff_t mode_step,
774 const int scale_horz, const int scale_vert, int *min_dim, int plane,
775 int joint_filter_chroma) {
776 const int is_vert = edge_dir == VERT_EDGE;
777 (void)plane_ptr;
778 assert((mi_col << MI_SIZE_LOG2) <
779 (uint32_t)(plane_ptr->dst.width << scale_horz) &&
780 (mi_row << MI_SIZE_LOG2) <
781 (uint32_t)(plane_ptr->dst.height << scale_vert));
782 // reset to initial values
783 params->filter_length = 0;
784
785 // for sub8x8 block, chroma prediction mode is obtained from the
786 // bottom/right mi structure of the co-located 8x8 luma block. so for chroma
787 // plane, mi_row and mi_col should map to the bottom/right mi structure,
788 // i.e, both mi_row and mi_col should be odd number for chroma plane.
789 mi_row |= scale_vert;
790 mi_col |= scale_horz;
791 MB_MODE_INFO **mi =
792 cm->mi_params.mi_grid_base + mi_row * cm->mi_params.mi_stride + mi_col;
793 const MB_MODE_INFO *mbmi = mi[0];
794 assert(mbmi);
795
796 const TX_SIZE ts = get_transform_size(xd, mi[0], mi_row, mi_col, plane,
797 scale_horz, scale_vert);
798 *tx_size = ts;
799
800 #ifndef NDEBUG
801 const uint32_t transform_masks =
802 is_vert ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1;
803 const int32_t tu_edge = ((coord * MI_SIZE) & transform_masks) ? (0) : (1);
804 assert(tu_edge);
805 #endif // NDEBUG
806
807 // If we are not the first block, then coord is always true, so
808 // !is_first_block is technically redundant. But we are keeping it here so the
809 // compiler can compile away this conditional if we pass in is_first_block :=
810 // false
811 bool curr_skipped = false;
812 if (!is_first_block || coord) {
813 const MB_MODE_INFO *const mi_prev = *(mi - mode_step);
814 assert(mi_prev);
815 const int pv_row = is_vert ? (mi_row) : (mi_row - (1 << scale_vert));
816 const int pv_col = is_vert ? (mi_col - (1 << scale_horz)) : (mi_col);
817 const TX_SIZE pv_ts =
818 is_first_block ? get_transform_size(xd, mi_prev, pv_row, pv_col, plane,
819 scale_horz, scale_vert)
820 : prev_tx_size;
821 if (is_first_block) {
822 *min_dim = is_vert ? tx_size_high[pv_ts] : tx_size_wide[pv_ts];
823 }
824
825 uint8_t level =
826 av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi);
827 if (!level) {
828 level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mi_prev);
829 }
830 #ifndef NDEBUG
831 if (joint_filter_chroma) {
832 uint8_t v_level =
833 av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_V, mbmi);
834 if (!v_level) {
835 v_level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_V,
836 mi_prev);
837 }
838 assert(level == v_level);
839 }
840 #else
841 (void)joint_filter_chroma;
842 #endif // NDEBUG
843 const int32_t pu_edge = mi_prev != mbmi;
844
845 if (!pu_edge) {
846 curr_skipped = mbmi->skip_txfm && is_inter_block(mbmi);
847 }
848 // For realtime mode, u and v have the same level
849 if ((!curr_skipped || pu_edge) && level) {
850 params->filter_length = is_vert ? vert_filter_length_chroma[ts][pv_ts]
851 : horz_filter_length_chroma[ts][pv_ts];
852
853 const loop_filter_thresh *const limits = cm->lf_info.lfthr;
854 params->lfthr = limits + level;
855 }
856 }
857 const int tx_dim = is_vert ? tx_size_high[ts] : tx_size_wide[ts];
858 *min_dim = AOMMIN(*min_dim, tx_dim);
859 }
860
set_lpf_parameters_for_line_chroma(AV1_DEBLOCKING_PARAMETERS * const params_buf,TX_SIZE * tx_buf,const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const EDGE_DIR edge_dir,uint32_t mi_col,uint32_t mi_row,const struct macroblockd_plane * const plane_ptr,const uint32_t mi_range,const ptrdiff_t mode_step,const int scale_horz,const int scale_vert,int * min_dim,int plane,int joint_filter_chroma)861 static AOM_FORCE_INLINE void set_lpf_parameters_for_line_chroma(
862 AV1_DEBLOCKING_PARAMETERS *const params_buf, TX_SIZE *tx_buf,
863 const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
864 const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row,
865 const struct macroblockd_plane *const plane_ptr, const uint32_t mi_range,
866 const ptrdiff_t mode_step, const int scale_horz, const int scale_vert,
867 int *min_dim, int plane, int joint_filter_chroma) {
868 const int is_vert = edge_dir == VERT_EDGE;
869
870 AV1_DEBLOCKING_PARAMETERS *params = params_buf;
871 TX_SIZE *tx_size = tx_buf;
872 uint32_t *counter_ptr = is_vert ? &mi_col : &mi_row;
873 const uint32_t scale = is_vert ? scale_horz : scale_vert;
874 TX_SIZE prev_tx_size = TX_INVALID;
875
876 // Unroll the first iteration of the loop
877 set_one_param_for_line_chroma(params, tx_size, cm, xd, edge_dir, mi_col,
878 mi_row, *counter_ptr, true, prev_tx_size,
879 plane_ptr, mode_step, scale_horz, scale_vert,
880 min_dim, plane, joint_filter_chroma);
881
882 // Advance
883 int advance_units =
884 is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size];
885 prev_tx_size = *tx_size;
886 *counter_ptr += advance_units << scale;
887 params += advance_units;
888 tx_size += advance_units;
889
890 while (*counter_ptr < mi_range) {
891 set_one_param_for_line_chroma(params, tx_size, cm, xd, edge_dir, mi_col,
892 mi_row, *counter_ptr, false, prev_tx_size,
893 plane_ptr, mode_step, scale_horz, scale_vert,
894 min_dim, plane, joint_filter_chroma);
895
896 // Advance
897 advance_units =
898 is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size];
899 prev_tx_size = *tx_size;
900 *counter_ptr += advance_units << scale;
901 params += advance_units;
902 tx_size += advance_units;
903 }
904 }
905
filter_vert(uint8_t * dst,int dst_stride,const AV1_DEBLOCKING_PARAMETERS * params,const SequenceHeader * seq_params,USE_FILTER_TYPE use_filter_type)906 static AOM_INLINE void filter_vert(uint8_t *dst, int dst_stride,
907 const AV1_DEBLOCKING_PARAMETERS *params,
908 const SequenceHeader *seq_params,
909 USE_FILTER_TYPE use_filter_type) {
910 const loop_filter_thresh *limits = params->lfthr;
911 #if CONFIG_AV1_HIGHBITDEPTH
912 const int use_highbitdepth = seq_params->use_highbitdepth;
913 const aom_bit_depth_t bit_depth = seq_params->bit_depth;
914 if (use_highbitdepth) {
915 uint16_t *dst_shortptr = CONVERT_TO_SHORTPTR(dst);
916 if (use_filter_type == USE_QUAD) {
917 switch (params->filter_length) {
918 // apply 4-tap filtering
919 case 4:
920 aom_highbd_lpf_vertical_4_dual(
921 dst_shortptr, dst_stride, limits->mblim, limits->lim,
922 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
923 bit_depth);
924 aom_highbd_lpf_vertical_4_dual(
925 dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
926 limits->mblim, limits->lim, limits->hev_thr, limits->mblim,
927 limits->lim, limits->hev_thr, bit_depth);
928 break;
929 case 6: // apply 6-tap filter for chroma plane only
930 aom_highbd_lpf_vertical_6_dual(
931 dst_shortptr, dst_stride, limits->mblim, limits->lim,
932 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
933 bit_depth);
934 aom_highbd_lpf_vertical_6_dual(
935 dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
936 limits->mblim, limits->lim, limits->hev_thr, limits->mblim,
937 limits->lim, limits->hev_thr, bit_depth);
938 break;
939 // apply 8-tap filtering
940 case 8:
941 aom_highbd_lpf_vertical_8_dual(
942 dst_shortptr, dst_stride, limits->mblim, limits->lim,
943 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
944 bit_depth);
945 aom_highbd_lpf_vertical_8_dual(
946 dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
947 limits->mblim, limits->lim, limits->hev_thr, limits->mblim,
948 limits->lim, limits->hev_thr, bit_depth);
949 break;
950 // apply 14-tap filtering
951 case 14:
952 aom_highbd_lpf_vertical_14_dual(
953 dst_shortptr, dst_stride, limits->mblim, limits->lim,
954 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
955 bit_depth);
956 aom_highbd_lpf_vertical_14_dual(
957 dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
958 limits->mblim, limits->lim, limits->hev_thr, limits->mblim,
959 limits->lim, limits->hev_thr, bit_depth);
960 break;
961 // no filtering
962 default: break;
963 }
964 } else if (use_filter_type == USE_DUAL) {
965 switch (params->filter_length) {
966 // apply 4-tap filtering
967 case 4:
968 aom_highbd_lpf_vertical_4_dual(
969 dst_shortptr, dst_stride, limits->mblim, limits->lim,
970 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
971 bit_depth);
972 break;
973 case 6: // apply 6-tap filter for chroma plane only
974 aom_highbd_lpf_vertical_6_dual(
975 dst_shortptr, dst_stride, limits->mblim, limits->lim,
976 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
977 bit_depth);
978 break;
979 // apply 8-tap filtering
980 case 8:
981 aom_highbd_lpf_vertical_8_dual(
982 dst_shortptr, dst_stride, limits->mblim, limits->lim,
983 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
984 bit_depth);
985 break;
986 // apply 14-tap filtering
987 case 14:
988 aom_highbd_lpf_vertical_14_dual(
989 dst_shortptr, dst_stride, limits->mblim, limits->lim,
990 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
991 bit_depth);
992 break;
993 // no filtering
994 default: break;
995 }
996 } else {
997 assert(use_filter_type == USE_SINGLE);
998 switch (params->filter_length) {
999 // apply 4-tap filtering
1000 case 4:
1001 aom_highbd_lpf_vertical_4(dst_shortptr, dst_stride, limits->mblim,
1002 limits->lim, limits->hev_thr, bit_depth);
1003 break;
1004 case 6: // apply 6-tap filter for chroma plane only
1005 aom_highbd_lpf_vertical_6(dst_shortptr, dst_stride, limits->mblim,
1006 limits->lim, limits->hev_thr, bit_depth);
1007 break;
1008 // apply 8-tap filtering
1009 case 8:
1010 aom_highbd_lpf_vertical_8(dst_shortptr, dst_stride, limits->mblim,
1011 limits->lim, limits->hev_thr, bit_depth);
1012 break;
1013 // apply 14-tap filtering
1014 case 14:
1015 aom_highbd_lpf_vertical_14(dst_shortptr, dst_stride, limits->mblim,
1016 limits->lim, limits->hev_thr, bit_depth);
1017 break;
1018 // no filtering
1019 default: break;
1020 }
1021 }
1022 return;
1023 }
1024 #endif // CONFIG_AV1_HIGHBITDEPTH
1025 if (use_filter_type == USE_QUAD) {
1026 // Only one set of loop filter parameters (mblim, lim and hev_thr) is
1027 // passed as argument to quad loop filter because quad loop filter is
1028 // called for those cases where all the 4 set of loop filter parameters
1029 // are equal.
1030 switch (params->filter_length) {
1031 // apply 4-tap filtering
1032 case 4:
1033 aom_lpf_vertical_4_quad(dst, dst_stride, limits->mblim, limits->lim,
1034 limits->hev_thr);
1035 break;
1036 case 6: // apply 6-tap filter for chroma plane only
1037 aom_lpf_vertical_6_quad(dst, dst_stride, limits->mblim, limits->lim,
1038 limits->hev_thr);
1039 break;
1040 // apply 8-tap filtering
1041 case 8:
1042 aom_lpf_vertical_8_quad(dst, dst_stride, limits->mblim, limits->lim,
1043 limits->hev_thr);
1044 break;
1045 // apply 14-tap filtering
1046 case 14:
1047 aom_lpf_vertical_14_quad(dst, dst_stride, limits->mblim, limits->lim,
1048 limits->hev_thr);
1049 break;
1050 // no filtering
1051 default: break;
1052 }
1053 } else if (use_filter_type == USE_DUAL) {
1054 switch (params->filter_length) {
1055 // apply 4-tap filtering
1056 case 4:
1057 aom_lpf_vertical_4_dual(dst, dst_stride, limits->mblim, limits->lim,
1058 limits->hev_thr, limits->mblim, limits->lim,
1059 limits->hev_thr);
1060 break;
1061 case 6: // apply 6-tap filter for chroma plane only
1062 aom_lpf_vertical_6_dual(dst, dst_stride, limits->mblim, limits->lim,
1063 limits->hev_thr, limits->mblim, limits->lim,
1064 limits->hev_thr);
1065 break;
1066 // apply 8-tap filtering
1067 case 8:
1068 aom_lpf_vertical_8_dual(dst, dst_stride, limits->mblim, limits->lim,
1069 limits->hev_thr, limits->mblim, limits->lim,
1070 limits->hev_thr);
1071 break;
1072 // apply 14-tap filtering
1073 case 14:
1074 aom_lpf_vertical_14_dual(dst, dst_stride, limits->mblim, limits->lim,
1075 limits->hev_thr, limits->mblim, limits->lim,
1076 limits->hev_thr);
1077 break;
1078 // no filtering
1079 default: break;
1080 }
1081 } else {
1082 assert(use_filter_type == USE_SINGLE);
1083 switch (params->filter_length) {
1084 // apply 4-tap filtering
1085 case 4:
1086 aom_lpf_vertical_4(dst, dst_stride, limits->mblim, limits->lim,
1087 limits->hev_thr);
1088 break;
1089 case 6: // apply 6-tap filter for chroma plane only
1090 aom_lpf_vertical_6(dst, dst_stride, limits->mblim, limits->lim,
1091 limits->hev_thr);
1092 break;
1093 // apply 8-tap filtering
1094 case 8:
1095 aom_lpf_vertical_8(dst, dst_stride, limits->mblim, limits->lim,
1096 limits->hev_thr);
1097 break;
1098 // apply 14-tap filtering
1099 case 14:
1100 aom_lpf_vertical_14(dst, dst_stride, limits->mblim, limits->lim,
1101 limits->hev_thr);
1102 break;
1103 // no filtering
1104 default: break;
1105 }
1106 }
1107 #if !CONFIG_AV1_HIGHBITDEPTH
1108 (void)seq_params;
1109 #endif // !CONFIG_AV1_HIGHBITDEPTH
1110 }
1111
filter_vert_chroma(uint8_t * u_dst,uint8_t * v_dst,int dst_stride,const AV1_DEBLOCKING_PARAMETERS * params,const SequenceHeader * seq_params,USE_FILTER_TYPE use_filter_type)1112 static AOM_INLINE void filter_vert_chroma(
1113 uint8_t *u_dst, uint8_t *v_dst, int dst_stride,
1114 const AV1_DEBLOCKING_PARAMETERS *params, const SequenceHeader *seq_params,
1115 USE_FILTER_TYPE use_filter_type) {
1116 const loop_filter_thresh *u_limits = params->lfthr;
1117 const loop_filter_thresh *v_limits = params->lfthr;
1118 #if CONFIG_AV1_HIGHBITDEPTH
1119 const int use_highbitdepth = seq_params->use_highbitdepth;
1120 const aom_bit_depth_t bit_depth = seq_params->bit_depth;
1121 if (use_highbitdepth) {
1122 uint16_t *u_dst_shortptr = CONVERT_TO_SHORTPTR(u_dst);
1123 uint16_t *v_dst_shortptr = CONVERT_TO_SHORTPTR(v_dst);
1124 if (use_filter_type == USE_QUAD) {
1125 switch (params->filter_length) {
1126 // apply 4-tap filtering
1127 case 4:
1128 aom_highbd_lpf_vertical_4_dual(
1129 u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1130 u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1131 u_limits->hev_thr, bit_depth);
1132 aom_highbd_lpf_vertical_4_dual(
1133 u_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
1134 u_limits->mblim, u_limits->lim, u_limits->hev_thr,
1135 u_limits->mblim, u_limits->lim, u_limits->hev_thr, bit_depth);
1136 aom_highbd_lpf_vertical_4_dual(
1137 v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1138 v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1139 v_limits->hev_thr, bit_depth);
1140 aom_highbd_lpf_vertical_4_dual(
1141 v_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
1142 v_limits->mblim, v_limits->lim, v_limits->hev_thr,
1143 v_limits->mblim, v_limits->lim, v_limits->hev_thr, bit_depth);
1144 break;
1145 case 6: // apply 6-tap filter for chroma plane only
1146 aom_highbd_lpf_vertical_6_dual(
1147 u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1148 u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1149 u_limits->hev_thr, bit_depth);
1150 aom_highbd_lpf_vertical_6_dual(
1151 u_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
1152 u_limits->mblim, u_limits->lim, u_limits->hev_thr,
1153 u_limits->mblim, u_limits->lim, u_limits->hev_thr, bit_depth);
1154 aom_highbd_lpf_vertical_6_dual(
1155 v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1156 v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1157 v_limits->hev_thr, bit_depth);
1158 aom_highbd_lpf_vertical_6_dual(
1159 v_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
1160 v_limits->mblim, v_limits->lim, v_limits->hev_thr,
1161 v_limits->mblim, v_limits->lim, v_limits->hev_thr, bit_depth);
1162 break;
1163 case 8:
1164 case 14: assert(0);
1165 // no filtering
1166 default: break;
1167 }
1168 } else if (use_filter_type == USE_DUAL) {
1169 switch (params->filter_length) {
1170 // apply 4-tap filtering
1171 case 4:
1172 aom_highbd_lpf_vertical_4_dual(
1173 u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1174 u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1175 u_limits->hev_thr, bit_depth);
1176 aom_highbd_lpf_vertical_4_dual(
1177 v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1178 v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1179 v_limits->hev_thr, bit_depth);
1180 break;
1181 case 6: // apply 6-tap filter for chroma plane only
1182 aom_highbd_lpf_vertical_6_dual(
1183 u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1184 u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1185 u_limits->hev_thr, bit_depth);
1186 aom_highbd_lpf_vertical_6_dual(
1187 v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1188 v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1189 v_limits->hev_thr, bit_depth);
1190 break;
1191 case 8:
1192 case 14: assert(0);
1193 // no filtering
1194 default: break;
1195 }
1196 } else {
1197 assert(use_filter_type == USE_SINGLE);
1198 switch (params->filter_length) {
1199 // apply 4-tap filtering
1200 case 4:
1201 aom_highbd_lpf_vertical_4(u_dst_shortptr, dst_stride, u_limits->mblim,
1202 u_limits->lim, u_limits->hev_thr,
1203 bit_depth);
1204 aom_highbd_lpf_vertical_4(v_dst_shortptr, dst_stride, v_limits->mblim,
1205 v_limits->lim, v_limits->hev_thr,
1206 bit_depth);
1207 break;
1208 case 6: // apply 6-tap filter for chroma plane only
1209 aom_highbd_lpf_vertical_6(u_dst_shortptr, dst_stride, u_limits->mblim,
1210 u_limits->lim, u_limits->hev_thr,
1211 bit_depth);
1212 aom_highbd_lpf_vertical_6(v_dst_shortptr, dst_stride, v_limits->mblim,
1213 v_limits->lim, v_limits->hev_thr,
1214 bit_depth);
1215 break;
1216 case 8:
1217 case 14: assert(0); break;
1218 // no filtering
1219 default: break;
1220 }
1221 }
1222 return;
1223 }
1224 #endif // CONFIG_AV1_HIGHBITDEPTH
1225 if (use_filter_type == USE_QUAD) {
1226 // Only one set of loop filter parameters (mblim, lim and hev_thr) is
1227 // passed as argument to quad loop filter because quad loop filter is
1228 // called for those cases where all the 4 set of loop filter parameters
1229 // are equal.
1230 switch (params->filter_length) {
1231 // apply 4-tap filtering
1232 case 4:
1233 aom_lpf_vertical_4_quad(u_dst, dst_stride, u_limits->mblim,
1234 u_limits->lim, u_limits->hev_thr);
1235 aom_lpf_vertical_4_quad(v_dst, dst_stride, v_limits->mblim,
1236 v_limits->lim, v_limits->hev_thr);
1237 break;
1238 case 6: // apply 6-tap filter for chroma plane only
1239 aom_lpf_vertical_6_quad(u_dst, dst_stride, u_limits->mblim,
1240 u_limits->lim, u_limits->hev_thr);
1241 aom_lpf_vertical_6_quad(v_dst, dst_stride, v_limits->mblim,
1242 v_limits->lim, v_limits->hev_thr);
1243 break;
1244 case 8:
1245 case 14: assert(0);
1246 // no filtering
1247 default: break;
1248 }
1249 } else if (use_filter_type == USE_DUAL) {
1250 switch (params->filter_length) {
1251 // apply 4-tap filtering
1252 case 4:
1253 aom_lpf_vertical_4_dual(u_dst, dst_stride, u_limits->mblim,
1254 u_limits->lim, u_limits->hev_thr,
1255 u_limits->mblim, u_limits->lim,
1256 u_limits->hev_thr);
1257 aom_lpf_vertical_4_dual(v_dst, dst_stride, v_limits->mblim,
1258 v_limits->lim, v_limits->hev_thr,
1259 v_limits->mblim, v_limits->lim,
1260 v_limits->hev_thr);
1261 break;
1262 case 6: // apply 6-tap filter for chroma plane only
1263 aom_lpf_vertical_6_dual(u_dst, dst_stride, u_limits->mblim,
1264 u_limits->lim, u_limits->hev_thr,
1265 u_limits->mblim, u_limits->lim,
1266 u_limits->hev_thr);
1267 aom_lpf_vertical_6_dual(v_dst, dst_stride, v_limits->mblim,
1268 v_limits->lim, v_limits->hev_thr,
1269 v_limits->mblim, v_limits->lim,
1270 v_limits->hev_thr);
1271 break;
1272 case 8:
1273 case 14: assert(0);
1274 // no filtering
1275 default: break;
1276 }
1277 } else {
1278 assert(use_filter_type == USE_SINGLE);
1279 switch (params->filter_length) {
1280 // apply 4-tap filtering
1281 case 4:
1282 aom_lpf_vertical_4(u_dst, dst_stride, u_limits->mblim, u_limits->lim,
1283 u_limits->hev_thr);
1284 aom_lpf_vertical_4(v_dst, dst_stride, v_limits->mblim, v_limits->lim,
1285 u_limits->hev_thr);
1286 break;
1287 case 6: // apply 6-tap filter for chroma plane only
1288 aom_lpf_vertical_6(u_dst, dst_stride, u_limits->mblim, u_limits->lim,
1289 u_limits->hev_thr);
1290 aom_lpf_vertical_6(v_dst, dst_stride, v_limits->mblim, v_limits->lim,
1291 v_limits->hev_thr);
1292 break;
1293 case 8:
1294 case 14: assert(0); break;
1295 // no filtering
1296 default: break;
1297 }
1298 }
1299 #if !CONFIG_AV1_HIGHBITDEPTH
1300 (void)seq_params;
1301 #endif // !CONFIG_AV1_HIGHBITDEPTH
1302 }
1303
av1_filter_block_plane_vert(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const int plane,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)1304 void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
1305 const MACROBLOCKD *const xd, const int plane,
1306 const MACROBLOCKD_PLANE *const plane_ptr,
1307 const uint32_t mi_row, const uint32_t mi_col) {
1308 const uint32_t scale_horz = plane_ptr->subsampling_x;
1309 const uint32_t scale_vert = plane_ptr->subsampling_y;
1310 uint8_t *const dst_ptr = plane_ptr->dst.buf;
1311 const int dst_stride = plane_ptr->dst.stride;
1312 const int plane_mi_rows =
1313 ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
1314 const int plane_mi_cols =
1315 ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
1316 const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
1317 (MAX_MIB_SIZE >> scale_vert));
1318 const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
1319 (MAX_MIB_SIZE >> scale_horz));
1320
1321 for (int y = 0; y < y_range; y++) {
1322 uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
1323 for (int x = 0; x < x_range;) {
1324 // inner loop always filter vertical edges in a MI block. If MI size
1325 // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
1326 // If 4x4 transform is used, it will then filter the internal edge
1327 // aligned with a 4x4 block
1328 const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
1329 const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
1330 uint32_t advance_units;
1331 TX_SIZE tx_size;
1332 AV1_DEBLOCKING_PARAMETERS params;
1333 memset(¶ms, 0, sizeof(params));
1334
1335 tx_size =
1336 set_lpf_parameters(¶ms, ((ptrdiff_t)1 << scale_horz), cm, xd,
1337 VERT_EDGE, curr_x, curr_y, plane, plane_ptr);
1338 if (tx_size == TX_INVALID) {
1339 params.filter_length = 0;
1340 tx_size = TX_4X4;
1341 }
1342
1343 filter_vert(p, dst_stride, ¶ms, cm->seq_params, USE_SINGLE);
1344
1345 // advance the destination pointer
1346 advance_units = tx_size_wide_unit[tx_size];
1347 x += advance_units;
1348 p += advance_units * MI_SIZE;
1349 }
1350 }
1351 }
1352
av1_filter_block_plane_vert_opt(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col,AV1_DEBLOCKING_PARAMETERS * params_buf,TX_SIZE * tx_buf,int num_mis_in_lpf_unit_height_log2)1353 void av1_filter_block_plane_vert_opt(
1354 const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
1355 const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
1356 const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
1357 TX_SIZE *tx_buf, int num_mis_in_lpf_unit_height_log2) {
1358 uint8_t *const dst_ptr = plane_ptr->dst.buf;
1359 const int dst_stride = plane_ptr->dst.stride;
1360 // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
1361 // to MI_SIZE.
1362 const int plane_mi_cols =
1363 CEIL_POWER_OF_TWO(plane_ptr->dst.width, MI_SIZE_LOG2);
1364 const int plane_mi_rows =
1365 CEIL_POWER_OF_TWO(plane_ptr->dst.height, MI_SIZE_LOG2);
1366 // Whenever 'pipeline_lpf_mt_with_enc' is enabled, height of the unit to
1367 // filter (i.e., y_range) is calculated based on the size of the superblock
1368 // used.
1369 const int y_range = AOMMIN((int)(plane_mi_rows - mi_row),
1370 (1 << num_mis_in_lpf_unit_height_log2));
1371 // Width of the unit to filter (i.e., x_range) should always be calculated
1372 // based on maximum superblock size as this function is called for mi_col = 0,
1373 // MAX_MIB_SIZE, 2 * MAX_MIB_SIZE etc.
1374 const int x_range = AOMMIN((int)(plane_mi_cols - mi_col), MAX_MIB_SIZE);
1375 const ptrdiff_t mode_step = 1;
1376 for (int y = 0; y < y_range; y++) {
1377 const uint32_t curr_y = mi_row + y;
1378 const uint32_t x_start = mi_col;
1379 const uint32_t x_end = mi_col + x_range;
1380 int min_block_height = block_size_high[BLOCK_128X128];
1381 set_lpf_parameters_for_line_luma(params_buf, tx_buf, cm, xd, VERT_EDGE,
1382 x_start, curr_y, plane_ptr, x_end,
1383 mode_step, &min_block_height);
1384
1385 AV1_DEBLOCKING_PARAMETERS *params = params_buf;
1386 TX_SIZE *tx_size = tx_buf;
1387 USE_FILTER_TYPE use_filter_type = USE_SINGLE;
1388
1389 uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
1390
1391 if ((y & 3) == 0 && (y + 3) < y_range && min_block_height >= 16) {
1392 // If we are on a row which is a multiple of 4, and the minimum height is
1393 // 16 pixels, then the current and right 3 cols must contain the same
1394 // prediction block. This is because dim 16 can only happen every unit of
1395 // 4 mi's.
1396 use_filter_type = USE_QUAD;
1397 y += 3;
1398 } else if ((y + 1) < y_range && min_block_height >= 8) {
1399 use_filter_type = USE_DUAL;
1400 y += 1;
1401 }
1402
1403 for (int x = 0; x < x_range;) {
1404 if (*tx_size == TX_INVALID) {
1405 params->filter_length = 0;
1406 *tx_size = TX_4X4;
1407 }
1408
1409 filter_vert(p, dst_stride, params, cm->seq_params, use_filter_type);
1410
1411 // advance the destination pointer
1412 const uint32_t advance_units = tx_size_wide_unit[*tx_size];
1413 x += advance_units;
1414 p += advance_units * MI_SIZE;
1415 params += advance_units;
1416 tx_size += advance_units;
1417 }
1418 }
1419 }
1420
av1_filter_block_plane_vert_opt_chroma(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col,AV1_DEBLOCKING_PARAMETERS * params_buf,TX_SIZE * tx_buf,int plane,bool joint_filter_chroma,int num_mis_in_lpf_unit_height_log2)1421 void av1_filter_block_plane_vert_opt_chroma(
1422 const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
1423 const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
1424 const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
1425 TX_SIZE *tx_buf, int plane, bool joint_filter_chroma,
1426 int num_mis_in_lpf_unit_height_log2) {
1427 const uint32_t scale_horz = plane_ptr->subsampling_x;
1428 const uint32_t scale_vert = plane_ptr->subsampling_y;
1429 const int dst_stride = plane_ptr->dst.stride;
1430 // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
1431 // to MI_SIZE.
1432 const int mi_cols =
1433 ((plane_ptr->dst.width << scale_horz) + MI_SIZE - 1) >> MI_SIZE_LOG2;
1434 const int mi_rows =
1435 ((plane_ptr->dst.height << scale_vert) + MI_SIZE - 1) >> MI_SIZE_LOG2;
1436 const int plane_mi_rows = ROUND_POWER_OF_TWO(mi_rows, scale_vert);
1437 const int plane_mi_cols = ROUND_POWER_OF_TWO(mi_cols, scale_horz);
1438 const int y_range =
1439 AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
1440 ((1 << num_mis_in_lpf_unit_height_log2) >> scale_vert));
1441 const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
1442 (MAX_MIB_SIZE >> scale_horz));
1443 const ptrdiff_t mode_step = (ptrdiff_t)1 << scale_horz;
1444
1445 for (int y = 0; y < y_range; y++) {
1446 const uint32_t curr_y = mi_row + (y << scale_vert);
1447 const uint32_t x_start = mi_col + (0 << scale_horz);
1448 const uint32_t x_end = mi_col + (x_range << scale_horz);
1449 int min_height = tx_size_high[TX_64X64];
1450 set_lpf_parameters_for_line_chroma(params_buf, tx_buf, cm, xd, VERT_EDGE,
1451 x_start, curr_y, plane_ptr, x_end,
1452 mode_step, scale_horz, scale_vert,
1453 &min_height, plane, joint_filter_chroma);
1454
1455 AV1_DEBLOCKING_PARAMETERS *params = params_buf;
1456 TX_SIZE *tx_size = tx_buf;
1457 int use_filter_type = USE_SINGLE;
1458 int y_inc = 0;
1459
1460 if ((y & 3) == 0 && (y + 3) < y_range && min_height >= 16) {
1461 // If we are on a row which is a multiple of 4, and the minimum height is
1462 // 16 pixels, then the current and below 3 rows must contain the same tx
1463 // block. This is because dim 16 can only happen every unit of 4 mi's.
1464 use_filter_type = USE_QUAD;
1465 y_inc = 3;
1466 } else if (y % 2 == 0 && (y + 1) < y_range && min_height >= 8) {
1467 // If we are on an even row, and the minimum height is 8 pixels, then the
1468 // current and below rows must contain the same tx block. This is because
1469 // dim 4 can only happen every unit of 2**0, and 8 every unit of 2**1,
1470 // etc.
1471 use_filter_type = USE_DUAL;
1472 y_inc = 1;
1473 }
1474
1475 for (int x = 0; x < x_range;) {
1476 // inner loop always filter vertical edges in a MI block. If MI size
1477 // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
1478 // If 4x4 transform is used, it will then filter the internal edge
1479 // aligned with a 4x4 block
1480 if (*tx_size == TX_INVALID) {
1481 params->filter_length = 0;
1482 *tx_size = TX_4X4;
1483 }
1484
1485 const int offset = y * MI_SIZE * dst_stride + x * MI_SIZE;
1486 if (joint_filter_chroma) {
1487 uint8_t *u_dst = plane_ptr[0].dst.buf + offset;
1488 uint8_t *v_dst = plane_ptr[1].dst.buf + offset;
1489 filter_vert_chroma(u_dst, v_dst, dst_stride, params, cm->seq_params,
1490 use_filter_type);
1491 } else {
1492 uint8_t *dst_ptr = plane_ptr->dst.buf + offset;
1493 filter_vert(dst_ptr, dst_stride, params, cm->seq_params,
1494 use_filter_type);
1495 }
1496
1497 // advance the destination pointer
1498 const uint32_t advance_units = tx_size_wide_unit[*tx_size];
1499 x += advance_units;
1500 params += advance_units;
1501 tx_size += advance_units;
1502 }
1503 y += y_inc;
1504 }
1505 }
1506
filter_horz(uint8_t * dst,int dst_stride,const AV1_DEBLOCKING_PARAMETERS * params,const SequenceHeader * seq_params,USE_FILTER_TYPE use_filter_type)1507 static AOM_INLINE void filter_horz(uint8_t *dst, int dst_stride,
1508 const AV1_DEBLOCKING_PARAMETERS *params,
1509 const SequenceHeader *seq_params,
1510 USE_FILTER_TYPE use_filter_type) {
1511 const loop_filter_thresh *limits = params->lfthr;
1512 #if CONFIG_AV1_HIGHBITDEPTH
1513 const int use_highbitdepth = seq_params->use_highbitdepth;
1514 const aom_bit_depth_t bit_depth = seq_params->bit_depth;
1515 if (use_highbitdepth) {
1516 uint16_t *dst_shortptr = CONVERT_TO_SHORTPTR(dst);
1517 if (use_filter_type == USE_QUAD) {
1518 switch (params->filter_length) {
1519 // apply 4-tap filtering
1520 case 4:
1521 aom_highbd_lpf_horizontal_4_dual(
1522 dst_shortptr, dst_stride, limits->mblim, limits->lim,
1523 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1524 bit_depth);
1525 aom_highbd_lpf_horizontal_4_dual(
1526 dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim,
1527 limits->lim, limits->hev_thr, limits->mblim, limits->lim,
1528 limits->hev_thr, bit_depth);
1529 break;
1530 case 6: // apply 6-tap filter for chroma plane only
1531 aom_highbd_lpf_horizontal_6_dual(
1532 dst_shortptr, dst_stride, limits->mblim, limits->lim,
1533 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1534 bit_depth);
1535 aom_highbd_lpf_horizontal_6_dual(
1536 dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim,
1537 limits->lim, limits->hev_thr, limits->mblim, limits->lim,
1538 limits->hev_thr, bit_depth);
1539 break;
1540 // apply 8-tap filtering
1541 case 8:
1542 aom_highbd_lpf_horizontal_8_dual(
1543 dst_shortptr, dst_stride, limits->mblim, limits->lim,
1544 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1545 bit_depth);
1546 aom_highbd_lpf_horizontal_8_dual(
1547 dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim,
1548 limits->lim, limits->hev_thr, limits->mblim, limits->lim,
1549 limits->hev_thr, bit_depth);
1550 break;
1551 // apply 14-tap filtering
1552 case 14:
1553 aom_highbd_lpf_horizontal_14_dual(
1554 dst_shortptr, dst_stride, limits->mblim, limits->lim,
1555 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1556 bit_depth);
1557 aom_highbd_lpf_horizontal_14_dual(
1558 dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim,
1559 limits->lim, limits->hev_thr, limits->mblim, limits->lim,
1560 limits->hev_thr, bit_depth);
1561 break;
1562 // no filtering
1563 default: break;
1564 }
1565 } else if (use_filter_type == USE_DUAL) {
1566 switch (params->filter_length) {
1567 // apply 4-tap filtering
1568 case 4:
1569 aom_highbd_lpf_horizontal_4_dual(
1570 dst_shortptr, dst_stride, limits->mblim, limits->lim,
1571 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1572 bit_depth);
1573 break;
1574 case 6: // apply 6-tap filter for chroma plane only
1575 aom_highbd_lpf_horizontal_6_dual(
1576 dst_shortptr, dst_stride, limits->mblim, limits->lim,
1577 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1578 bit_depth);
1579 break;
1580 // apply 8-tap filtering
1581 case 8:
1582 aom_highbd_lpf_horizontal_8_dual(
1583 dst_shortptr, dst_stride, limits->mblim, limits->lim,
1584 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1585 bit_depth);
1586 break;
1587 // apply 14-tap filtering
1588 case 14:
1589 aom_highbd_lpf_horizontal_14_dual(
1590 dst_shortptr, dst_stride, limits->mblim, limits->lim,
1591 limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1592 bit_depth);
1593 break;
1594 // no filtering
1595 default: break;
1596 }
1597 } else {
1598 assert(use_filter_type == USE_SINGLE);
1599 switch (params->filter_length) {
1600 // apply 4-tap filtering
1601 case 4:
1602 aom_highbd_lpf_horizontal_4(dst_shortptr, dst_stride, limits->mblim,
1603 limits->lim, limits->hev_thr, bit_depth);
1604 break;
1605 case 6: // apply 6-tap filter for chroma plane only
1606 aom_highbd_lpf_horizontal_6(dst_shortptr, dst_stride, limits->mblim,
1607 limits->lim, limits->hev_thr, bit_depth);
1608 break;
1609 // apply 8-tap filtering
1610 case 8:
1611 aom_highbd_lpf_horizontal_8(dst_shortptr, dst_stride, limits->mblim,
1612 limits->lim, limits->hev_thr, bit_depth);
1613 break;
1614 // apply 14-tap filtering
1615 case 14:
1616 aom_highbd_lpf_horizontal_14(dst_shortptr, dst_stride, limits->mblim,
1617 limits->lim, limits->hev_thr, bit_depth);
1618 break;
1619 // no filtering
1620 default: break;
1621 }
1622 }
1623 return;
1624 }
1625 #endif // CONFIG_AV1_HIGHBITDEPTH
1626 if (use_filter_type == USE_QUAD) {
1627 // Only one set of loop filter parameters (mblim, lim and hev_thr) is
1628 // passed as argument to quad loop filter because quad loop filter is
1629 // called for those cases where all the 4 set of loop filter parameters
1630 // are equal.
1631 switch (params->filter_length) {
1632 // apply 4-tap filtering
1633 case 4:
1634 aom_lpf_horizontal_4_quad(dst, dst_stride, limits->mblim, limits->lim,
1635 limits->hev_thr);
1636 break;
1637 case 6: // apply 6-tap filter for chroma plane only
1638 aom_lpf_horizontal_6_quad(dst, dst_stride, limits->mblim, limits->lim,
1639 limits->hev_thr);
1640 break;
1641 // apply 8-tap filtering
1642 case 8:
1643 aom_lpf_horizontal_8_quad(dst, dst_stride, limits->mblim, limits->lim,
1644 limits->hev_thr);
1645 break;
1646 // apply 14-tap filtering
1647 case 14:
1648 aom_lpf_horizontal_14_quad(dst, dst_stride, limits->mblim, limits->lim,
1649 limits->hev_thr);
1650 break;
1651 // no filtering
1652 default: break;
1653 }
1654 } else if (use_filter_type == USE_DUAL) {
1655 switch (params->filter_length) {
1656 // apply 4-tap filtering
1657 case 4:
1658 aom_lpf_horizontal_4_dual(dst, dst_stride, limits->mblim, limits->lim,
1659 limits->hev_thr, limits->mblim, limits->lim,
1660 limits->hev_thr);
1661 break;
1662 case 6: // apply 6-tap filter for chroma plane only
1663 aom_lpf_horizontal_6_dual(dst, dst_stride, limits->mblim, limits->lim,
1664 limits->hev_thr, limits->mblim, limits->lim,
1665 limits->hev_thr);
1666 break;
1667 // apply 8-tap filtering
1668 case 8:
1669 aom_lpf_horizontal_8_dual(dst, dst_stride, limits->mblim, limits->lim,
1670 limits->hev_thr, limits->mblim, limits->lim,
1671 limits->hev_thr);
1672 break;
1673 // apply 14-tap filtering
1674 case 14:
1675 aom_lpf_horizontal_14_dual(dst, dst_stride, limits->mblim, limits->lim,
1676 limits->hev_thr, limits->mblim, limits->lim,
1677 limits->hev_thr);
1678 break;
1679 // no filtering
1680 default: break;
1681 }
1682 } else {
1683 assert(use_filter_type == USE_SINGLE);
1684 switch (params->filter_length) {
1685 // apply 4-tap filtering
1686 case 4:
1687 aom_lpf_horizontal_4(dst, dst_stride, limits->mblim, limits->lim,
1688 limits->hev_thr);
1689 break;
1690 case 6: // apply 6-tap filter for chroma plane only
1691 aom_lpf_horizontal_6(dst, dst_stride, limits->mblim, limits->lim,
1692 limits->hev_thr);
1693 break;
1694 // apply 8-tap filtering
1695 case 8:
1696 aom_lpf_horizontal_8(dst, dst_stride, limits->mblim, limits->lim,
1697 limits->hev_thr);
1698 break;
1699 // apply 14-tap filtering
1700 case 14:
1701 aom_lpf_horizontal_14(dst, dst_stride, limits->mblim, limits->lim,
1702 limits->hev_thr);
1703 break;
1704 // no filtering
1705 default: break;
1706 }
1707 }
1708 #if !CONFIG_AV1_HIGHBITDEPTH
1709 (void)seq_params;
1710 #endif // !CONFIG_AV1_HIGHBITDEPTH
1711 }
1712
filter_horz_chroma(uint8_t * u_dst,uint8_t * v_dst,int dst_stride,const AV1_DEBLOCKING_PARAMETERS * params,const SequenceHeader * seq_params,USE_FILTER_TYPE use_filter_type)1713 static AOM_INLINE void filter_horz_chroma(
1714 uint8_t *u_dst, uint8_t *v_dst, int dst_stride,
1715 const AV1_DEBLOCKING_PARAMETERS *params, const SequenceHeader *seq_params,
1716 USE_FILTER_TYPE use_filter_type) {
1717 const loop_filter_thresh *u_limits = params->lfthr;
1718 const loop_filter_thresh *v_limits = params->lfthr;
1719 #if CONFIG_AV1_HIGHBITDEPTH
1720 const int use_highbitdepth = seq_params->use_highbitdepth;
1721 const aom_bit_depth_t bit_depth = seq_params->bit_depth;
1722 if (use_highbitdepth) {
1723 uint16_t *u_dst_shortptr = CONVERT_TO_SHORTPTR(u_dst);
1724 uint16_t *v_dst_shortptr = CONVERT_TO_SHORTPTR(v_dst);
1725 if (use_filter_type == USE_QUAD) {
1726 switch (params->filter_length) {
1727 // apply 4-tap filtering
1728 case 4:
1729 aom_highbd_lpf_horizontal_4_dual(
1730 u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1731 u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1732 u_limits->hev_thr, bit_depth);
1733 aom_highbd_lpf_horizontal_4_dual(
1734 u_dst_shortptr + (2 * MI_SIZE), dst_stride, u_limits->mblim,
1735 u_limits->lim, u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1736 u_limits->hev_thr, bit_depth);
1737 aom_highbd_lpf_horizontal_4_dual(
1738 v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1739 v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1740 v_limits->hev_thr, bit_depth);
1741 aom_highbd_lpf_horizontal_4_dual(
1742 v_dst_shortptr + (2 * MI_SIZE), dst_stride, v_limits->mblim,
1743 v_limits->lim, v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1744 v_limits->hev_thr, bit_depth);
1745 break;
1746 case 6: // apply 6-tap filter for chroma plane only
1747 aom_highbd_lpf_horizontal_6_dual(
1748 u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1749 u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1750 u_limits->hev_thr, bit_depth);
1751 aom_highbd_lpf_horizontal_6_dual(
1752 u_dst_shortptr + (2 * MI_SIZE), dst_stride, u_limits->mblim,
1753 u_limits->lim, u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1754 u_limits->hev_thr, bit_depth);
1755 aom_highbd_lpf_horizontal_6_dual(
1756 v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1757 v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1758 v_limits->hev_thr, bit_depth);
1759 aom_highbd_lpf_horizontal_6_dual(
1760 v_dst_shortptr + (2 * MI_SIZE), dst_stride, v_limits->mblim,
1761 v_limits->lim, v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1762 v_limits->hev_thr, bit_depth);
1763 break;
1764 case 8:
1765 case 14: assert(0);
1766 // no filtering
1767 default: break;
1768 }
1769 } else if (use_filter_type == USE_DUAL) {
1770 switch (params->filter_length) {
1771 // apply 4-tap filtering
1772 case 4:
1773 aom_highbd_lpf_horizontal_4_dual(
1774 u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1775 u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1776 u_limits->hev_thr, bit_depth);
1777 aom_highbd_lpf_horizontal_4_dual(
1778 v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1779 v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1780 v_limits->hev_thr, bit_depth);
1781 break;
1782 case 6: // apply 6-tap filter for chroma plane only
1783 aom_highbd_lpf_horizontal_6_dual(
1784 u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1785 u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1786 u_limits->hev_thr, bit_depth);
1787 aom_highbd_lpf_horizontal_6_dual(
1788 v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1789 v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1790 v_limits->hev_thr, bit_depth);
1791 break;
1792 case 8:
1793 case 14: assert(0);
1794 // no filtering
1795 default: break;
1796 }
1797 } else {
1798 assert(use_filter_type == USE_SINGLE);
1799 switch (params->filter_length) {
1800 // apply 4-tap filtering
1801 case 4:
1802 aom_highbd_lpf_horizontal_4(u_dst_shortptr, dst_stride,
1803 u_limits->mblim, u_limits->lim,
1804 u_limits->hev_thr, bit_depth);
1805 aom_highbd_lpf_horizontal_4(v_dst_shortptr, dst_stride,
1806 v_limits->mblim, v_limits->lim,
1807 v_limits->hev_thr, bit_depth);
1808 break;
1809 case 6: // apply 6-tap filter for chroma plane only
1810 aom_highbd_lpf_horizontal_6(u_dst_shortptr, dst_stride,
1811 u_limits->mblim, u_limits->lim,
1812 u_limits->hev_thr, bit_depth);
1813 aom_highbd_lpf_horizontal_6(v_dst_shortptr, dst_stride,
1814 v_limits->mblim, v_limits->lim,
1815 v_limits->hev_thr, bit_depth);
1816 break;
1817 case 8:
1818 case 14: assert(0); break;
1819 // no filtering
1820 default: break;
1821 }
1822 }
1823 return;
1824 }
1825 #endif // CONFIG_AV1_HIGHBITDEPTH
1826 if (use_filter_type == USE_QUAD) {
1827 // Only one set of loop filter parameters (mblim, lim and hev_thr) is
1828 // passed as argument to quad loop filter because quad loop filter is
1829 // called for those cases where all the 4 set of loop filter parameters
1830 // are equal.
1831 switch (params->filter_length) {
1832 // apply 4-tap filtering
1833 case 4:
1834 aom_lpf_horizontal_4_quad(u_dst, dst_stride, u_limits->mblim,
1835 u_limits->lim, u_limits->hev_thr);
1836 aom_lpf_horizontal_4_quad(v_dst, dst_stride, v_limits->mblim,
1837 v_limits->lim, v_limits->hev_thr);
1838 break;
1839 case 6: // apply 6-tap filter for chroma plane only
1840 aom_lpf_horizontal_6_quad(u_dst, dst_stride, u_limits->mblim,
1841 u_limits->lim, u_limits->hev_thr);
1842 aom_lpf_horizontal_6_quad(v_dst, dst_stride, v_limits->mblim,
1843 v_limits->lim, v_limits->hev_thr);
1844 break;
1845 case 8:
1846 case 14: assert(0);
1847 // no filtering
1848 default: break;
1849 }
1850 } else if (use_filter_type == USE_DUAL) {
1851 switch (params->filter_length) {
1852 // apply 4-tap filtering
1853 case 4:
1854 aom_lpf_horizontal_4_dual(u_dst, dst_stride, u_limits->mblim,
1855 u_limits->lim, u_limits->hev_thr,
1856 u_limits->mblim, u_limits->lim,
1857 u_limits->hev_thr);
1858 aom_lpf_horizontal_4_dual(v_dst, dst_stride, v_limits->mblim,
1859 v_limits->lim, v_limits->hev_thr,
1860 v_limits->mblim, v_limits->lim,
1861 v_limits->hev_thr);
1862 break;
1863 case 6: // apply 6-tap filter for chroma plane only
1864 aom_lpf_horizontal_6_dual(u_dst, dst_stride, u_limits->mblim,
1865 u_limits->lim, u_limits->hev_thr,
1866 u_limits->mblim, u_limits->lim,
1867 u_limits->hev_thr);
1868 aom_lpf_horizontal_6_dual(v_dst, dst_stride, v_limits->mblim,
1869 v_limits->lim, v_limits->hev_thr,
1870 v_limits->mblim, v_limits->lim,
1871 v_limits->hev_thr);
1872 break;
1873 case 8:
1874 case 14: assert(0);
1875 // no filtering
1876 default: break;
1877 }
1878 } else {
1879 assert(use_filter_type == USE_SINGLE);
1880 switch (params->filter_length) {
1881 // apply 4-tap filtering
1882 case 4:
1883 aom_lpf_horizontal_4(u_dst, dst_stride, u_limits->mblim, u_limits->lim,
1884 u_limits->hev_thr);
1885 aom_lpf_horizontal_4(v_dst, dst_stride, v_limits->mblim, v_limits->lim,
1886 u_limits->hev_thr);
1887 break;
1888 case 6: // apply 6-tap filter for chroma plane only
1889 aom_lpf_horizontal_6(u_dst, dst_stride, u_limits->mblim, u_limits->lim,
1890 u_limits->hev_thr);
1891 aom_lpf_horizontal_6(v_dst, dst_stride, v_limits->mblim, v_limits->lim,
1892 v_limits->hev_thr);
1893 break;
1894 case 8:
1895 case 14: assert(0); break;
1896 // no filtering
1897 default: break;
1898 }
1899 }
1900 #if !CONFIG_AV1_HIGHBITDEPTH
1901 (void)seq_params;
1902 #endif // !CONFIG_AV1_HIGHBITDEPTH
1903 }
1904
av1_filter_block_plane_horz(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const int plane,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)1905 void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
1906 const MACROBLOCKD *const xd, const int plane,
1907 const MACROBLOCKD_PLANE *const plane_ptr,
1908 const uint32_t mi_row, const uint32_t mi_col) {
1909 const uint32_t scale_horz = plane_ptr->subsampling_x;
1910 const uint32_t scale_vert = plane_ptr->subsampling_y;
1911 uint8_t *const dst_ptr = plane_ptr->dst.buf;
1912 const int dst_stride = plane_ptr->dst.stride;
1913 const int plane_mi_rows =
1914 ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
1915 const int plane_mi_cols =
1916 ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
1917 const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
1918 (MAX_MIB_SIZE >> scale_vert));
1919 const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
1920 (MAX_MIB_SIZE >> scale_horz));
1921 for (int x = 0; x < x_range; x++) {
1922 uint8_t *p = dst_ptr + x * MI_SIZE;
1923 for (int y = 0; y < y_range;) {
1924 // inner loop always filter vertical edges in a MI block. If MI size
1925 // is 8x8, it will first filter the vertical edge aligned with a 8x8
1926 // block. If 4x4 transform is used, it will then filter the internal
1927 // edge aligned with a 4x4 block
1928 const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
1929 const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
1930 uint32_t advance_units;
1931 TX_SIZE tx_size;
1932 AV1_DEBLOCKING_PARAMETERS params;
1933 memset(¶ms, 0, sizeof(params));
1934
1935 tx_size = set_lpf_parameters(
1936 ¶ms, (cm->mi_params.mi_stride << scale_vert), cm, xd, HORZ_EDGE,
1937 curr_x, curr_y, plane, plane_ptr);
1938 if (tx_size == TX_INVALID) {
1939 params.filter_length = 0;
1940 tx_size = TX_4X4;
1941 }
1942
1943 filter_horz(p, dst_stride, ¶ms, cm->seq_params, USE_SINGLE);
1944
1945 // advance the destination pointer
1946 advance_units = tx_size_high_unit[tx_size];
1947 y += advance_units;
1948 p += advance_units * dst_stride * MI_SIZE;
1949 }
1950 }
1951 }
1952
av1_filter_block_plane_horz_opt(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col,AV1_DEBLOCKING_PARAMETERS * params_buf,TX_SIZE * tx_buf,int num_mis_in_lpf_unit_height_log2)1953 void av1_filter_block_plane_horz_opt(
1954 const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
1955 const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
1956 const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
1957 TX_SIZE *tx_buf, int num_mis_in_lpf_unit_height_log2) {
1958 uint8_t *const dst_ptr = plane_ptr->dst.buf;
1959 const int dst_stride = plane_ptr->dst.stride;
1960 // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
1961 // to MI_SIZE.
1962 const int plane_mi_cols =
1963 CEIL_POWER_OF_TWO(plane_ptr->dst.width, MI_SIZE_LOG2);
1964 const int plane_mi_rows =
1965 CEIL_POWER_OF_TWO(plane_ptr->dst.height, MI_SIZE_LOG2);
1966 const int y_range = AOMMIN((int)(plane_mi_rows - mi_row),
1967 (1 << num_mis_in_lpf_unit_height_log2));
1968 const int x_range = AOMMIN((int)(plane_mi_cols - mi_col), MAX_MIB_SIZE);
1969
1970 const ptrdiff_t mode_step = cm->mi_params.mi_stride;
1971 for (int x = 0; x < x_range; x++) {
1972 const uint32_t curr_x = mi_col + x;
1973 const uint32_t y_start = mi_row;
1974 const uint32_t y_end = mi_row + y_range;
1975 int min_block_width = block_size_high[BLOCK_128X128];
1976 set_lpf_parameters_for_line_luma(params_buf, tx_buf, cm, xd, HORZ_EDGE,
1977 curr_x, y_start, plane_ptr, y_end,
1978 mode_step, &min_block_width);
1979
1980 AV1_DEBLOCKING_PARAMETERS *params = params_buf;
1981 TX_SIZE *tx_size = tx_buf;
1982 USE_FILTER_TYPE filter_type = USE_SINGLE;
1983
1984 uint8_t *p = dst_ptr + x * MI_SIZE;
1985
1986 if ((x & 3) == 0 && (x + 3) < x_range && min_block_width >= 16) {
1987 // If we are on a col which is a multiple of 4, and the minimum width is
1988 // 16 pixels, then the current and right 3 cols must contain the same
1989 // prediction block. This is because dim 16 can only happen every unit of
1990 // 4 mi's.
1991 filter_type = USE_QUAD;
1992 x += 3;
1993 } else if ((x + 1) < x_range && min_block_width >= 8) {
1994 filter_type = USE_DUAL;
1995 x += 1;
1996 }
1997
1998 for (int y = 0; y < y_range;) {
1999 if (*tx_size == TX_INVALID) {
2000 params->filter_length = 0;
2001 *tx_size = TX_4X4;
2002 }
2003
2004 filter_horz(p, dst_stride, params, cm->seq_params, filter_type);
2005
2006 // advance the destination pointer
2007 const uint32_t advance_units = tx_size_high_unit[*tx_size];
2008 y += advance_units;
2009 p += advance_units * dst_stride * MI_SIZE;
2010 params += advance_units;
2011 tx_size += advance_units;
2012 }
2013 }
2014 }
2015
av1_filter_block_plane_horz_opt_chroma(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col,AV1_DEBLOCKING_PARAMETERS * params_buf,TX_SIZE * tx_buf,int plane,bool joint_filter_chroma,int num_mis_in_lpf_unit_height_log2)2016 void av1_filter_block_plane_horz_opt_chroma(
2017 const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
2018 const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
2019 const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
2020 TX_SIZE *tx_buf, int plane, bool joint_filter_chroma,
2021 int num_mis_in_lpf_unit_height_log2) {
2022 const uint32_t scale_horz = plane_ptr->subsampling_x;
2023 const uint32_t scale_vert = plane_ptr->subsampling_y;
2024 const int dst_stride = plane_ptr->dst.stride;
2025 // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
2026 // to MI_SIZE.
2027 const int mi_cols =
2028 ((plane_ptr->dst.width << scale_horz) + MI_SIZE - 1) >> MI_SIZE_LOG2;
2029 const int mi_rows =
2030 ((plane_ptr->dst.height << scale_vert) + MI_SIZE - 1) >> MI_SIZE_LOG2;
2031 const int plane_mi_rows = ROUND_POWER_OF_TWO(mi_rows, scale_vert);
2032 const int plane_mi_cols = ROUND_POWER_OF_TWO(mi_cols, scale_horz);
2033 const int y_range =
2034 AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
2035 ((1 << num_mis_in_lpf_unit_height_log2) >> scale_vert));
2036 const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
2037 (MAX_MIB_SIZE >> scale_horz));
2038 const ptrdiff_t mode_step = cm->mi_params.mi_stride << scale_vert;
2039 for (int x = 0; x < x_range; x++) {
2040 const uint32_t y_start = mi_row + (0 << scale_vert);
2041 const uint32_t curr_x = mi_col + (x << scale_horz);
2042 const uint32_t y_end = mi_row + (y_range << scale_vert);
2043 int min_width = tx_size_wide[TX_64X64];
2044 set_lpf_parameters_for_line_chroma(params_buf, tx_buf, cm, xd, HORZ_EDGE,
2045 curr_x, y_start, plane_ptr, y_end,
2046 mode_step, scale_horz, scale_vert,
2047 &min_width, plane, joint_filter_chroma);
2048
2049 AV1_DEBLOCKING_PARAMETERS *params = params_buf;
2050 TX_SIZE *tx_size = tx_buf;
2051 USE_FILTER_TYPE use_filter_type = USE_SINGLE;
2052 int x_inc = 0;
2053
2054 if ((x & 3) == 0 && (x + 3) < x_range && min_width >= 16) {
2055 // If we are on a col which is a multiple of 4, and the minimum width is
2056 // 16 pixels, then the current and right 3 cols must contain the same tx
2057 // block. This is because dim 16 can only happen every unit of 4 mi's.
2058 use_filter_type = USE_QUAD;
2059 x_inc = 3;
2060 } else if (x % 2 == 0 && (x + 1) < x_range && min_width >= 8) {
2061 // If we are on an even col, and the minimum width is 8 pixels, then the
2062 // current and left cols must contain the same tx block. This is because
2063 // dim 4 can only happen every unit of 2**0, and 8 every unit of 2**1,
2064 // etc.
2065 use_filter_type = USE_DUAL;
2066 x_inc = 1;
2067 }
2068
2069 for (int y = 0; y < y_range;) {
2070 // inner loop always filter vertical edges in a MI block. If MI size
2071 // is 8x8, it will first filter the vertical edge aligned with a 8x8
2072 // block. If 4x4 transform is used, it will then filter the internal
2073 // edge aligned with a 4x4 block
2074 if (*tx_size == TX_INVALID) {
2075 params->filter_length = 0;
2076 *tx_size = TX_4X4;
2077 }
2078
2079 const int offset = y * MI_SIZE * dst_stride + x * MI_SIZE;
2080 if (joint_filter_chroma) {
2081 uint8_t *u_dst = plane_ptr[0].dst.buf + offset;
2082 uint8_t *v_dst = plane_ptr[1].dst.buf + offset;
2083 filter_horz_chroma(u_dst, v_dst, dst_stride, params, cm->seq_params,
2084 use_filter_type);
2085 } else {
2086 uint8_t *dst_ptr = plane_ptr->dst.buf + offset;
2087 filter_horz(dst_ptr, dst_stride, params, cm->seq_params,
2088 use_filter_type);
2089 }
2090
2091 // advance the destination pointer
2092 const int advance_units = tx_size_high_unit[*tx_size];
2093 y += advance_units;
2094 params += advance_units;
2095 tx_size += advance_units;
2096 }
2097 x += x_inc;
2098 }
2099 }
2100