• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16 
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_mem/aom_mem.h"
19 #include "aom_ports/mem.h"
20 #include "av1/common/av1_common_int.h"
21 #include "av1/common/av1_loopfilter.h"
22 #include "av1/common/reconinter.h"
23 #include "av1/common/seg_common.h"
24 
25 enum {
26   USE_SINGLE,
27   USE_DUAL,
28   USE_QUAD,
29 } UENUM1BYTE(USE_FILTER_TYPE);
30 
31 static const SEG_LVL_FEATURES seg_lvl_lf_lut[MAX_MB_PLANE][2] = {
32   { SEG_LVL_ALT_LF_Y_V, SEG_LVL_ALT_LF_Y_H },
33   { SEG_LVL_ALT_LF_U, SEG_LVL_ALT_LF_U },
34   { SEG_LVL_ALT_LF_V, SEG_LVL_ALT_LF_V }
35 };
36 
37 static const int delta_lf_id_lut[MAX_MB_PLANE][2] = { { 0, 1 },
38                                                       { 2, 2 },
39                                                       { 3, 3 } };
40 
41 static const int mode_lf_lut[] = {
42   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // INTRA_MODES
43   1, 1, 0, 1,                             // INTER_MODES (GLOBALMV == 0)
44   1, 1, 1, 1, 1, 1, 0, 1  // INTER_COMPOUND_MODES (GLOBAL_GLOBALMV == 0)
45 };
46 
update_sharpness(loop_filter_info_n * lfi,int sharpness_lvl)47 static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
48   int lvl;
49 
50   // For each possible value for the loop filter fill out limits
51   for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
52     // Set loop filter parameters that control sharpness.
53     int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
54 
55     if (sharpness_lvl > 0) {
56       if (block_inside_limit > (9 - sharpness_lvl))
57         block_inside_limit = (9 - sharpness_lvl);
58     }
59 
60     if (block_inside_limit < 1) block_inside_limit = 1;
61 
62     memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
63     memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
64            SIMD_WIDTH);
65   }
66 }
67 
av1_get_filter_level(const AV1_COMMON * cm,const loop_filter_info_n * lfi_n,const int dir_idx,int plane,const MB_MODE_INFO * mbmi)68 uint8_t av1_get_filter_level(const AV1_COMMON *cm,
69                              const loop_filter_info_n *lfi_n, const int dir_idx,
70                              int plane, const MB_MODE_INFO *mbmi) {
71   const int segment_id = mbmi->segment_id;
72   if (cm->delta_q_info.delta_lf_present_flag) {
73     int8_t delta_lf;
74     if (cm->delta_q_info.delta_lf_multi) {
75       const int delta_lf_idx = delta_lf_id_lut[plane][dir_idx];
76       delta_lf = mbmi->delta_lf[delta_lf_idx];
77     } else {
78       delta_lf = mbmi->delta_lf_from_base;
79     }
80     int base_level;
81     if (plane == 0)
82       base_level = cm->lf.filter_level[dir_idx];
83     else if (plane == 1)
84       base_level = cm->lf.filter_level_u;
85     else
86       base_level = cm->lf.filter_level_v;
87     int lvl_seg = clamp(delta_lf + base_level, 0, MAX_LOOP_FILTER);
88     assert(plane >= 0 && plane <= 2);
89     const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir_idx];
90     if (segfeature_active(&cm->seg, segment_id, seg_lf_feature_id)) {
91       const int data = get_segdata(&cm->seg, segment_id, seg_lf_feature_id);
92       lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
93     }
94 
95     if (cm->lf.mode_ref_delta_enabled) {
96       const int scale = 1 << (lvl_seg >> 5);
97       lvl_seg += cm->lf.ref_deltas[mbmi->ref_frame[0]] * scale;
98       if (mbmi->ref_frame[0] > INTRA_FRAME)
99         lvl_seg += cm->lf.mode_deltas[mode_lf_lut[mbmi->mode]] * scale;
100       lvl_seg = clamp(lvl_seg, 0, MAX_LOOP_FILTER);
101     }
102     return lvl_seg;
103   } else {
104     return lfi_n->lvl[plane][segment_id][dir_idx][mbmi->ref_frame[0]]
105                      [mode_lf_lut[mbmi->mode]];
106   }
107 }
108 
av1_loop_filter_init(AV1_COMMON * cm)109 void av1_loop_filter_init(AV1_COMMON *cm) {
110   assert(MB_MODE_COUNT == NELEMENTS(mode_lf_lut));
111   loop_filter_info_n *lfi = &cm->lf_info;
112   struct loopfilter *lf = &cm->lf;
113   int lvl;
114 
115   // init limits for given sharpness
116   update_sharpness(lfi, lf->sharpness_level);
117 
118   // init hev threshold const vectors
119   for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
120     memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
121 }
122 
123 // Update the loop filter for the current frame.
124 // This should be called before loop_filter_rows(),
125 // av1_loop_filter_frame() calls this function directly.
av1_loop_filter_frame_init(AV1_COMMON * cm,int plane_start,int plane_end)126 void av1_loop_filter_frame_init(AV1_COMMON *cm, int plane_start,
127                                 int plane_end) {
128   int filt_lvl[MAX_MB_PLANE], filt_lvl_r[MAX_MB_PLANE];
129   int plane;
130   int seg_id;
131   // n_shift is the multiplier for lf_deltas
132   // the multiplier is 1 for when filter_lvl is between 0 and 31;
133   // 2 when filter_lvl is between 32 and 63
134   loop_filter_info_n *const lfi = &cm->lf_info;
135   struct loopfilter *const lf = &cm->lf;
136   const struct segmentation *const seg = &cm->seg;
137 
138   // update sharpness limits
139   update_sharpness(lfi, lf->sharpness_level);
140 
141   filt_lvl[0] = cm->lf.filter_level[0];
142   filt_lvl[1] = cm->lf.filter_level_u;
143   filt_lvl[2] = cm->lf.filter_level_v;
144 
145   filt_lvl_r[0] = cm->lf.filter_level[1];
146   filt_lvl_r[1] = cm->lf.filter_level_u;
147   filt_lvl_r[2] = cm->lf.filter_level_v;
148 
149   assert(plane_start >= AOM_PLANE_Y);
150   assert(plane_end <= MAX_MB_PLANE);
151 
152   for (plane = plane_start; plane < plane_end; plane++) {
153     if (plane == 0 && !filt_lvl[0] && !filt_lvl_r[0])
154       break;
155     else if (plane == 1 && !filt_lvl[1])
156       continue;
157     else if (plane == 2 && !filt_lvl[2])
158       continue;
159 
160     for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
161       for (int dir = 0; dir < 2; ++dir) {
162         int lvl_seg = (dir == 0) ? filt_lvl[plane] : filt_lvl_r[plane];
163         const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir];
164         if (segfeature_active(seg, seg_id, seg_lf_feature_id)) {
165           const int data = get_segdata(&cm->seg, seg_id, seg_lf_feature_id);
166           lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
167         }
168 
169         if (!lf->mode_ref_delta_enabled) {
170           // we could get rid of this if we assume that deltas are set to
171           // zero when not in use; encoder always uses deltas
172           memset(lfi->lvl[plane][seg_id][dir], lvl_seg,
173                  sizeof(lfi->lvl[plane][seg_id][dir]));
174         } else {
175           int ref, mode;
176           const int scale = 1 << (lvl_seg >> 5);
177           const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
178           lfi->lvl[plane][seg_id][dir][INTRA_FRAME][0] =
179               clamp(intra_lvl, 0, MAX_LOOP_FILTER);
180 
181           for (ref = LAST_FRAME; ref < REF_FRAMES; ++ref) {
182             for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
183               const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
184                                     lf->mode_deltas[mode] * scale;
185               lfi->lvl[plane][seg_id][dir][ref][mode] =
186                   clamp(inter_lvl, 0, MAX_LOOP_FILTER);
187             }
188           }
189         }
190       }
191     }
192   }
193 }
194 
195 static AOM_FORCE_INLINE TX_SIZE
get_transform_size(const MACROBLOCKD * const xd,const MB_MODE_INFO * const mbmi,const int mi_row,const int mi_col,const int plane,const int ss_x,const int ss_y)196 get_transform_size(const MACROBLOCKD *const xd, const MB_MODE_INFO *const mbmi,
197                    const int mi_row, const int mi_col, const int plane,
198                    const int ss_x, const int ss_y) {
199   assert(mbmi != NULL);
200   if (xd && xd->lossless[mbmi->segment_id]) return TX_4X4;
201 
202   TX_SIZE tx_size = (plane == AOM_PLANE_Y)
203                         ? mbmi->tx_size
204                         : av1_get_max_uv_txsize(mbmi->bsize, ss_x, ss_y);
205   assert(tx_size < TX_SIZES_ALL);
206   if ((plane == AOM_PLANE_Y) && is_inter_block(mbmi) && !mbmi->skip_txfm) {
207     const BLOCK_SIZE sb_type = mbmi->bsize;
208     const int blk_row = mi_row & (mi_size_high[sb_type] - 1);
209     const int blk_col = mi_col & (mi_size_wide[sb_type] - 1);
210     const TX_SIZE mb_tx_size =
211         mbmi->inter_tx_size[av1_get_txb_size_index(sb_type, blk_row, blk_col)];
212     assert(mb_tx_size < TX_SIZES_ALL);
213     tx_size = mb_tx_size;
214   }
215 
216   return tx_size;
217 }
218 
219 static const int tx_dim_to_filter_length[TX_SIZES] = { 4, 8, 14, 14, 14 };
220 
221 // Return TX_SIZE from get_transform_size(), so it is plane and direction
222 // aware
set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS * const params,const ptrdiff_t mode_step,const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const EDGE_DIR edge_dir,const uint32_t x,const uint32_t y,const int plane,const struct macroblockd_plane * const plane_ptr)223 static TX_SIZE set_lpf_parameters(
224     AV1_DEBLOCKING_PARAMETERS *const params, const ptrdiff_t mode_step,
225     const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
226     const EDGE_DIR edge_dir, const uint32_t x, const uint32_t y,
227     const int plane, const struct macroblockd_plane *const plane_ptr) {
228   // reset to initial values
229   params->filter_length = 0;
230 
231   // no deblocking is required
232   const uint32_t width = plane_ptr->dst.width;
233   const uint32_t height = plane_ptr->dst.height;
234   if ((width <= x) || (height <= y)) {
235     // just return the smallest transform unit size
236     return TX_4X4;
237   }
238 
239   const uint32_t scale_horz = plane_ptr->subsampling_x;
240   const uint32_t scale_vert = plane_ptr->subsampling_y;
241   // for sub8x8 block, chroma prediction mode is obtained from the bottom/right
242   // mi structure of the co-located 8x8 luma block. so for chroma plane, mi_row
243   // and mi_col should map to the bottom/right mi structure, i.e, both mi_row
244   // and mi_col should be odd number for chroma plane.
245   const int mi_row = scale_vert | ((y << scale_vert) >> MI_SIZE_LOG2);
246   const int mi_col = scale_horz | ((x << scale_horz) >> MI_SIZE_LOG2);
247   MB_MODE_INFO **mi =
248       cm->mi_params.mi_grid_base + mi_row * cm->mi_params.mi_stride + mi_col;
249   const MB_MODE_INFO *mbmi = mi[0];
250   // If current mbmi is not correctly setup, return an invalid value to stop
251   // filtering. One example is that if this tile is not coded, then its mbmi
252   // it not set up.
253   if (mbmi == NULL) return TX_INVALID;
254 
255   const TX_SIZE ts = get_transform_size(xd, mi[0], mi_row, mi_col, plane,
256                                         scale_horz, scale_vert);
257 
258   {
259     const uint32_t coord = (VERT_EDGE == edge_dir) ? (x) : (y);
260     const uint32_t transform_masks =
261         edge_dir == VERT_EDGE ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1;
262     const int32_t tu_edge = (coord & transform_masks) ? (0) : (1);
263 
264     if (!tu_edge) return ts;
265 
266     // prepare outer edge parameters. deblock the edge if it's an edge of a TU
267     {
268       const uint32_t curr_level =
269           av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi);
270       const int curr_skipped = mbmi->skip_txfm && is_inter_block(mbmi);
271       uint32_t level = curr_level;
272       if (coord) {
273         {
274           const MB_MODE_INFO *const mi_prev = *(mi - mode_step);
275           if (mi_prev == NULL) return TX_INVALID;
276           const int pv_row =
277               (VERT_EDGE == edge_dir) ? (mi_row) : (mi_row - (1 << scale_vert));
278           const int pv_col =
279               (VERT_EDGE == edge_dir) ? (mi_col - (1 << scale_horz)) : (mi_col);
280           const TX_SIZE pv_ts = get_transform_size(
281               xd, mi_prev, pv_row, pv_col, plane, scale_horz, scale_vert);
282 
283           const uint32_t pv_lvl =
284               av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mi_prev);
285 
286           const int pv_skip_txfm =
287               mi_prev->skip_txfm && is_inter_block(mi_prev);
288           const BLOCK_SIZE bsize = get_plane_block_size(
289               mbmi->bsize, plane_ptr->subsampling_x, plane_ptr->subsampling_y);
290           assert(bsize < BLOCK_SIZES_ALL);
291           const int prediction_masks = edge_dir == VERT_EDGE
292                                            ? block_size_wide[bsize] - 1
293                                            : block_size_high[bsize] - 1;
294           const int32_t pu_edge = !(coord & prediction_masks);
295           // if the current and the previous blocks are skipped,
296           // deblock the edge if the edge belongs to a PU's edge only.
297           if ((curr_level || pv_lvl) &&
298               (!pv_skip_txfm || !curr_skipped || pu_edge)) {
299             const int dim = (VERT_EDGE == edge_dir)
300                                 ? AOMMIN(tx_size_wide_unit_log2[ts],
301                                          tx_size_wide_unit_log2[pv_ts])
302                                 : AOMMIN(tx_size_high_unit_log2[ts],
303                                          tx_size_high_unit_log2[pv_ts]);
304             if (plane) {
305               params->filter_length = (dim == 0) ? 4 : 6;
306             } else {
307               assert(dim < TX_SIZES);
308               assert(dim >= 0);
309               params->filter_length = tx_dim_to_filter_length[dim];
310             }
311 
312             // update the level if the current block is skipped,
313             // but the previous one is not
314             level = (curr_level) ? (curr_level) : (pv_lvl);
315           }
316         }
317       }
318       // prepare common parameters
319       if (params->filter_length) {
320         const loop_filter_thresh *const limits = cm->lf_info.lfthr + level;
321         params->lfthr = limits;
322       }
323     }
324   }
325 
326   return ts;
327 }
328 
329 static const uint32_t vert_filter_length_luma[TX_SIZES_ALL][TX_SIZES_ALL] = {
330   // TX_4X4
331   {
332       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
333   },
334   // TX_8X8
335   {
336       4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8,
337   },
338   // TX_16X16
339   {
340       4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
341   },
342   // TX_32X32
343   {
344       4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
345   },
346   // TX_64X64
347   {
348       4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
349   },
350   // TX_4X8
351   {
352       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
353   },
354   // TX_8X4
355   {
356       4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8,
357   },
358   // TX_8X16
359   {
360       4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8,
361   },
362   // TX_16X8
363   {
364       4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
365   },
366   // TX_16X32
367   {
368       4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
369   },
370   // TX_32X16
371   {
372       4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
373   },
374   // TX_32X64
375   {
376       4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
377   },
378   // TX_64X32
379   {
380       4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
381   },
382   // TX_4X16
383   {
384       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
385   },
386   // TX_16X4
387   {
388       4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
389   },
390   // TX_8X32
391   {
392       4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8,
393   },
394   // TX_32X8
395   {
396       4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
397   },
398   // TX_16X64
399   {
400       4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
401   },
402   // TX_64X16
403   {
404       4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
405   },
406 };
407 
408 static const uint32_t horz_filter_length_luma[TX_SIZES_ALL][TX_SIZES_ALL] = {
409   // TX_4X4
410   {
411       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
412   },
413   // TX_8X8
414   {
415       4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8,
416   },
417   // TX_16X16
418   {
419       4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
420   },
421   // TX_32X32
422   {
423       4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
424   },
425   // TX_64X64
426   {
427       4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
428   },
429   // TX_4X8
430   {
431       4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8,
432   },
433   // TX_8X4
434   {
435       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
436   },
437   // TX_8X16
438   {
439       4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
440   },
441   // TX_16X8
442   {
443       4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8,
444   },
445   // TX_16X32
446   {
447       4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
448   },
449   // TX_32X16
450   {
451       4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
452   },
453   // TX_32X64
454   {
455       4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
456   },
457   // TX_64X32
458   {
459       4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
460   },
461   // TX_4X16
462   {
463       4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
464   },
465   // TX_16X4
466   {
467       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
468   },
469   // TX_8X32
470   {
471       4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
472   },
473   // TX_32X8
474   {
475       4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8,
476   },
477   // TX_16X64
478   {
479       4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
480   },
481   // TX_64X16
482   {
483       4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
484   },
485 };
486 
487 static const uint32_t vert_filter_length_chroma[TX_SIZES_ALL][TX_SIZES_ALL] = {
488   // TX_4X4
489   {
490       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
491   },
492   // TX_8X8
493   {
494       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
495   },
496   // TX_16X16
497   {
498       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
499   },
500   // TX_32X32
501   {
502       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
503   },
504   // TX_64X64
505   {
506       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
507   },
508   // TX_4X8
509   {
510       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
511   },
512   // TX_8X4
513   {
514       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
515   },
516   // TX_8X16
517   {
518       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
519   },
520   // TX_16X8
521   {
522       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
523   },
524   // TX_16X32
525   {
526       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
527   },
528   // TX_32X16
529   {
530       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
531   },
532   // TX_32X64
533   {
534       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
535   },
536   // TX_64X32
537   {
538       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
539   },
540   // TX_4X16
541   {
542       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
543   },
544   // TX_16X4
545   {
546       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
547   },
548   // TX_8X32
549   {
550       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
551   },
552   // TX_32X8
553   {
554       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
555   },
556   // TX_16X64
557   {
558       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
559   },
560   // TX_64X16
561   {
562       4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
563   },
564 };
565 
566 static const uint32_t horz_filter_length_chroma[TX_SIZES_ALL][TX_SIZES_ALL] = {
567   // TX_4X4
568   {
569       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
570   },
571   // TX_8X8
572   {
573       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
574   },
575   // TX_16X16
576   {
577       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
578   },
579   // TX_32X32
580   {
581       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
582   },
583   // TX_64X64
584   {
585       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
586   },
587   // TX_4X8
588   {
589       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
590   },
591   // TX_8X4
592   {
593       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
594   },
595   // TX_8X16
596   {
597       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
598   },
599   // TX_16X8
600   {
601       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
602   },
603   // TX_16X32
604   {
605       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
606   },
607   // TX_32X16
608   {
609       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
610   },
611   // TX_32X64
612   {
613       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
614   },
615   // TX_64X32
616   {
617       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
618   },
619   // TX_4X16
620   {
621       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
622   },
623   // TX_16X4
624   {
625       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
626   },
627   // TX_8X32
628   {
629       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
630   },
631   // TX_32X8
632   {
633       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
634   },
635   // TX_16X64
636   {
637       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
638   },
639   // TX_64X16
640   {
641       4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
642   },
643 };
644 
set_one_param_for_line_luma(AV1_DEBLOCKING_PARAMETERS * const params,TX_SIZE * tx_size,const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const EDGE_DIR edge_dir,uint32_t mi_col,uint32_t mi_row,const struct macroblockd_plane * const plane_ptr,int coord,bool is_first_block,TX_SIZE prev_tx_size,const ptrdiff_t mode_step,int * min_dim)645 static AOM_FORCE_INLINE void set_one_param_for_line_luma(
646     AV1_DEBLOCKING_PARAMETERS *const params, TX_SIZE *tx_size,
647     const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
648     const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row,
649     const struct macroblockd_plane *const plane_ptr, int coord,
650     bool is_first_block, TX_SIZE prev_tx_size, const ptrdiff_t mode_step,
651     int *min_dim) {
652   (void)plane_ptr;
653   assert(mi_col << MI_SIZE_LOG2 < (uint32_t)plane_ptr->dst.width &&
654          mi_row << MI_SIZE_LOG2 < (uint32_t)plane_ptr->dst.height);
655   const int is_vert = edge_dir == VERT_EDGE;
656   // reset to initial values
657   params->filter_length = 0;
658 
659   MB_MODE_INFO **mi =
660       cm->mi_params.mi_grid_base + mi_row * cm->mi_params.mi_stride + mi_col;
661   const MB_MODE_INFO *mbmi = mi[0];
662   assert(mbmi);
663 
664   const TX_SIZE ts =
665       get_transform_size(xd, mi[0], mi_row, mi_col, AOM_PLANE_Y, 0, 0);
666 
667 #ifndef NDEBUG
668   const uint32_t transform_masks =
669       is_vert ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1;
670   const int32_t tu_edge = ((coord * MI_SIZE) & transform_masks) ? (0) : (1);
671   assert(tu_edge);
672 #endif  // NDEBUG
673   // If we are not the first block, then coord is always true, so
674   // !is_first_block is technically redundant. But we are keeping it here so the
675   // compiler can compile away this conditional if we pass in is_first_block :=
676   // false
677   bool curr_skipped = false;
678   if (!is_first_block || coord) {
679     const MB_MODE_INFO *const mi_prev = *(mi - mode_step);
680     const int pv_row = is_vert ? mi_row : (mi_row - 1);
681     const int pv_col = is_vert ? (mi_col - 1) : mi_col;
682     const TX_SIZE pv_ts =
683         is_first_block
684             ? get_transform_size(xd, mi_prev, pv_row, pv_col, AOM_PLANE_Y, 0, 0)
685             : prev_tx_size;
686     if (is_first_block) {
687       *min_dim = is_vert ? block_size_high[mi_prev->bsize]
688                          : block_size_wide[mi_prev->bsize];
689     }
690     assert(mi_prev);
691     uint8_t level =
692         av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_Y, mbmi);
693     if (!level) {
694       level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_Y,
695                                    mi_prev);
696     }
697 
698     const int32_t pu_edge = mi_prev != mbmi;
699 
700     // The quad loop filter assumes that all the transform blocks within a
701     // 8x16/16x8/16x16 prediction block are of the same size.
702     assert(IMPLIES(
703         !pu_edge && (mbmi->bsize >= BLOCK_8X16 && mbmi->bsize <= BLOCK_16X16),
704         pv_ts == ts));
705 
706     if (!pu_edge) {
707       curr_skipped = mbmi->skip_txfm && is_inter_block(mbmi);
708     }
709     if ((pu_edge || !curr_skipped) && level) {
710       params->filter_length = is_vert ? vert_filter_length_luma[ts][pv_ts]
711                                       : horz_filter_length_luma[ts][pv_ts];
712 
713       // prepare common parameters
714       const loop_filter_thresh *const limits = cm->lf_info.lfthr + level;
715       params->lfthr = limits;
716     }
717   }
718   const int block_dim =
719       is_vert ? block_size_high[mbmi->bsize] : block_size_wide[mbmi->bsize];
720   *min_dim = AOMMIN(*min_dim, block_dim);
721 
722   *tx_size = ts;
723 }
724 
725 // Similar to set_lpf_parameters, but does so one row/col at a time to reduce
726 // calls to \ref get_transform_size and \ref av1_get_filter_level
set_lpf_parameters_for_line_luma(AV1_DEBLOCKING_PARAMETERS * const params_buf,TX_SIZE * tx_buf,const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const EDGE_DIR edge_dir,uint32_t mi_col,uint32_t mi_row,const struct macroblockd_plane * const plane_ptr,const uint32_t mi_range,const ptrdiff_t mode_step,int * min_dim)727 static AOM_FORCE_INLINE void set_lpf_parameters_for_line_luma(
728     AV1_DEBLOCKING_PARAMETERS *const params_buf, TX_SIZE *tx_buf,
729     const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
730     const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row,
731     const struct macroblockd_plane *const plane_ptr, const uint32_t mi_range,
732     const ptrdiff_t mode_step, int *min_dim) {
733   const int is_vert = edge_dir == VERT_EDGE;
734 
735   AV1_DEBLOCKING_PARAMETERS *params = params_buf;
736   TX_SIZE *tx_size = tx_buf;
737   uint32_t *counter_ptr = is_vert ? &mi_col : &mi_row;
738   TX_SIZE prev_tx_size = TX_INVALID;
739 
740   // Unroll the first iteration of the loop
741   set_one_param_for_line_luma(params, tx_size, cm, xd, edge_dir, mi_col, mi_row,
742                               plane_ptr, *counter_ptr, true, prev_tx_size,
743                               mode_step, min_dim);
744 
745   // Advance
746   int advance_units =
747       is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size];
748   prev_tx_size = *tx_size;
749   *counter_ptr += advance_units;
750   params += advance_units;
751   tx_size += advance_units;
752 
753   while (*counter_ptr < mi_range) {
754     set_one_param_for_line_luma(params, tx_size, cm, xd, edge_dir, mi_col,
755                                 mi_row, plane_ptr, *counter_ptr, false,
756                                 prev_tx_size, mode_step, min_dim);
757 
758     // Advance
759     advance_units =
760         is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size];
761     prev_tx_size = *tx_size;
762     *counter_ptr += advance_units;
763     params += advance_units;
764     tx_size += advance_units;
765   }
766 }
767 
set_one_param_for_line_chroma(AV1_DEBLOCKING_PARAMETERS * const params,TX_SIZE * tx_size,const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const EDGE_DIR edge_dir,uint32_t mi_col,uint32_t mi_row,int coord,bool is_first_block,TX_SIZE prev_tx_size,const struct macroblockd_plane * const plane_ptr,const ptrdiff_t mode_step,const int scale_horz,const int scale_vert,int * min_dim,int plane,int joint_filter_chroma)768 static AOM_FORCE_INLINE void set_one_param_for_line_chroma(
769     AV1_DEBLOCKING_PARAMETERS *const params, TX_SIZE *tx_size,
770     const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
771     const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row, int coord,
772     bool is_first_block, TX_SIZE prev_tx_size,
773     const struct macroblockd_plane *const plane_ptr, const ptrdiff_t mode_step,
774     const int scale_horz, const int scale_vert, int *min_dim, int plane,
775     int joint_filter_chroma) {
776   const int is_vert = edge_dir == VERT_EDGE;
777   (void)plane_ptr;
778   assert((mi_col << MI_SIZE_LOG2) <
779              (uint32_t)(plane_ptr->dst.width << scale_horz) &&
780          (mi_row << MI_SIZE_LOG2) <
781              (uint32_t)(plane_ptr->dst.height << scale_vert));
782   // reset to initial values
783   params->filter_length = 0;
784 
785   // for sub8x8 block, chroma prediction mode is obtained from the
786   // bottom/right mi structure of the co-located 8x8 luma block. so for chroma
787   // plane, mi_row and mi_col should map to the bottom/right mi structure,
788   // i.e, both mi_row and mi_col should be odd number for chroma plane.
789   mi_row |= scale_vert;
790   mi_col |= scale_horz;
791   MB_MODE_INFO **mi =
792       cm->mi_params.mi_grid_base + mi_row * cm->mi_params.mi_stride + mi_col;
793   const MB_MODE_INFO *mbmi = mi[0];
794   assert(mbmi);
795 
796   const TX_SIZE ts = get_transform_size(xd, mi[0], mi_row, mi_col, plane,
797                                         scale_horz, scale_vert);
798   *tx_size = ts;
799 
800 #ifndef NDEBUG
801   const uint32_t transform_masks =
802       is_vert ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1;
803   const int32_t tu_edge = ((coord * MI_SIZE) & transform_masks) ? (0) : (1);
804   assert(tu_edge);
805 #endif  // NDEBUG
806 
807   // If we are not the first block, then coord is always true, so
808   // !is_first_block is technically redundant. But we are keeping it here so the
809   // compiler can compile away this conditional if we pass in is_first_block :=
810   // false
811   bool curr_skipped = false;
812   if (!is_first_block || coord) {
813     const MB_MODE_INFO *const mi_prev = *(mi - mode_step);
814     assert(mi_prev);
815     const int pv_row = is_vert ? (mi_row) : (mi_row - (1 << scale_vert));
816     const int pv_col = is_vert ? (mi_col - (1 << scale_horz)) : (mi_col);
817     const TX_SIZE pv_ts =
818         is_first_block ? get_transform_size(xd, mi_prev, pv_row, pv_col, plane,
819                                             scale_horz, scale_vert)
820                        : prev_tx_size;
821     if (is_first_block) {
822       *min_dim = is_vert ? tx_size_high[pv_ts] : tx_size_wide[pv_ts];
823     }
824 
825     uint8_t level =
826         av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi);
827     if (!level) {
828       level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mi_prev);
829     }
830 #ifndef NDEBUG
831     if (joint_filter_chroma) {
832       uint8_t v_level =
833           av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_V, mbmi);
834       if (!v_level) {
835         v_level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_V,
836                                        mi_prev);
837       }
838       assert(level == v_level);
839     }
840 #else
841     (void)joint_filter_chroma;
842 #endif  // NDEBUG
843     const int32_t pu_edge = mi_prev != mbmi;
844 
845     if (!pu_edge) {
846       curr_skipped = mbmi->skip_txfm && is_inter_block(mbmi);
847     }
848     // For realtime mode, u and v have the same level
849     if ((!curr_skipped || pu_edge) && level) {
850       params->filter_length = is_vert ? vert_filter_length_chroma[ts][pv_ts]
851                                       : horz_filter_length_chroma[ts][pv_ts];
852 
853       const loop_filter_thresh *const limits = cm->lf_info.lfthr;
854       params->lfthr = limits + level;
855     }
856   }
857   const int tx_dim = is_vert ? tx_size_high[ts] : tx_size_wide[ts];
858   *min_dim = AOMMIN(*min_dim, tx_dim);
859 }
860 
set_lpf_parameters_for_line_chroma(AV1_DEBLOCKING_PARAMETERS * const params_buf,TX_SIZE * tx_buf,const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const EDGE_DIR edge_dir,uint32_t mi_col,uint32_t mi_row,const struct macroblockd_plane * const plane_ptr,const uint32_t mi_range,const ptrdiff_t mode_step,const int scale_horz,const int scale_vert,int * min_dim,int plane,int joint_filter_chroma)861 static AOM_FORCE_INLINE void set_lpf_parameters_for_line_chroma(
862     AV1_DEBLOCKING_PARAMETERS *const params_buf, TX_SIZE *tx_buf,
863     const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
864     const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row,
865     const struct macroblockd_plane *const plane_ptr, const uint32_t mi_range,
866     const ptrdiff_t mode_step, const int scale_horz, const int scale_vert,
867     int *min_dim, int plane, int joint_filter_chroma) {
868   const int is_vert = edge_dir == VERT_EDGE;
869 
870   AV1_DEBLOCKING_PARAMETERS *params = params_buf;
871   TX_SIZE *tx_size = tx_buf;
872   uint32_t *counter_ptr = is_vert ? &mi_col : &mi_row;
873   const uint32_t scale = is_vert ? scale_horz : scale_vert;
874   TX_SIZE prev_tx_size = TX_INVALID;
875 
876   // Unroll the first iteration of the loop
877   set_one_param_for_line_chroma(params, tx_size, cm, xd, edge_dir, mi_col,
878                                 mi_row, *counter_ptr, true, prev_tx_size,
879                                 plane_ptr, mode_step, scale_horz, scale_vert,
880                                 min_dim, plane, joint_filter_chroma);
881 
882   // Advance
883   int advance_units =
884       is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size];
885   prev_tx_size = *tx_size;
886   *counter_ptr += advance_units << scale;
887   params += advance_units;
888   tx_size += advance_units;
889 
890   while (*counter_ptr < mi_range) {
891     set_one_param_for_line_chroma(params, tx_size, cm, xd, edge_dir, mi_col,
892                                   mi_row, *counter_ptr, false, prev_tx_size,
893                                   plane_ptr, mode_step, scale_horz, scale_vert,
894                                   min_dim, plane, joint_filter_chroma);
895 
896     // Advance
897     advance_units =
898         is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size];
899     prev_tx_size = *tx_size;
900     *counter_ptr += advance_units << scale;
901     params += advance_units;
902     tx_size += advance_units;
903   }
904 }
905 
filter_vert(uint8_t * dst,int dst_stride,const AV1_DEBLOCKING_PARAMETERS * params,const SequenceHeader * seq_params,USE_FILTER_TYPE use_filter_type)906 static AOM_INLINE void filter_vert(uint8_t *dst, int dst_stride,
907                                    const AV1_DEBLOCKING_PARAMETERS *params,
908                                    const SequenceHeader *seq_params,
909                                    USE_FILTER_TYPE use_filter_type) {
910   const loop_filter_thresh *limits = params->lfthr;
911 #if CONFIG_AV1_HIGHBITDEPTH
912   const int use_highbitdepth = seq_params->use_highbitdepth;
913   const aom_bit_depth_t bit_depth = seq_params->bit_depth;
914   if (use_highbitdepth) {
915     uint16_t *dst_shortptr = CONVERT_TO_SHORTPTR(dst);
916     if (use_filter_type == USE_QUAD) {
917       switch (params->filter_length) {
918         // apply 4-tap filtering
919         case 4:
920           aom_highbd_lpf_vertical_4_dual(
921               dst_shortptr, dst_stride, limits->mblim, limits->lim,
922               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
923               bit_depth);
924           aom_highbd_lpf_vertical_4_dual(
925               dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
926               limits->mblim, limits->lim, limits->hev_thr, limits->mblim,
927               limits->lim, limits->hev_thr, bit_depth);
928           break;
929         case 6:  // apply 6-tap filter for chroma plane only
930           aom_highbd_lpf_vertical_6_dual(
931               dst_shortptr, dst_stride, limits->mblim, limits->lim,
932               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
933               bit_depth);
934           aom_highbd_lpf_vertical_6_dual(
935               dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
936               limits->mblim, limits->lim, limits->hev_thr, limits->mblim,
937               limits->lim, limits->hev_thr, bit_depth);
938           break;
939         // apply 8-tap filtering
940         case 8:
941           aom_highbd_lpf_vertical_8_dual(
942               dst_shortptr, dst_stride, limits->mblim, limits->lim,
943               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
944               bit_depth);
945           aom_highbd_lpf_vertical_8_dual(
946               dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
947               limits->mblim, limits->lim, limits->hev_thr, limits->mblim,
948               limits->lim, limits->hev_thr, bit_depth);
949           break;
950         // apply 14-tap filtering
951         case 14:
952           aom_highbd_lpf_vertical_14_dual(
953               dst_shortptr, dst_stride, limits->mblim, limits->lim,
954               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
955               bit_depth);
956           aom_highbd_lpf_vertical_14_dual(
957               dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
958               limits->mblim, limits->lim, limits->hev_thr, limits->mblim,
959               limits->lim, limits->hev_thr, bit_depth);
960           break;
961         // no filtering
962         default: break;
963       }
964     } else if (use_filter_type == USE_DUAL) {
965       switch (params->filter_length) {
966         // apply 4-tap filtering
967         case 4:
968           aom_highbd_lpf_vertical_4_dual(
969               dst_shortptr, dst_stride, limits->mblim, limits->lim,
970               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
971               bit_depth);
972           break;
973         case 6:  // apply 6-tap filter for chroma plane only
974           aom_highbd_lpf_vertical_6_dual(
975               dst_shortptr, dst_stride, limits->mblim, limits->lim,
976               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
977               bit_depth);
978           break;
979         // apply 8-tap filtering
980         case 8:
981           aom_highbd_lpf_vertical_8_dual(
982               dst_shortptr, dst_stride, limits->mblim, limits->lim,
983               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
984               bit_depth);
985           break;
986         // apply 14-tap filtering
987         case 14:
988           aom_highbd_lpf_vertical_14_dual(
989               dst_shortptr, dst_stride, limits->mblim, limits->lim,
990               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
991               bit_depth);
992           break;
993         // no filtering
994         default: break;
995       }
996     } else {
997       assert(use_filter_type == USE_SINGLE);
998       switch (params->filter_length) {
999         // apply 4-tap filtering
1000         case 4:
1001           aom_highbd_lpf_vertical_4(dst_shortptr, dst_stride, limits->mblim,
1002                                     limits->lim, limits->hev_thr, bit_depth);
1003           break;
1004         case 6:  // apply 6-tap filter for chroma plane only
1005           aom_highbd_lpf_vertical_6(dst_shortptr, dst_stride, limits->mblim,
1006                                     limits->lim, limits->hev_thr, bit_depth);
1007           break;
1008         // apply 8-tap filtering
1009         case 8:
1010           aom_highbd_lpf_vertical_8(dst_shortptr, dst_stride, limits->mblim,
1011                                     limits->lim, limits->hev_thr, bit_depth);
1012           break;
1013         // apply 14-tap filtering
1014         case 14:
1015           aom_highbd_lpf_vertical_14(dst_shortptr, dst_stride, limits->mblim,
1016                                      limits->lim, limits->hev_thr, bit_depth);
1017           break;
1018         // no filtering
1019         default: break;
1020       }
1021     }
1022     return;
1023   }
1024 #endif  // CONFIG_AV1_HIGHBITDEPTH
1025   if (use_filter_type == USE_QUAD) {
1026     // Only one set of loop filter parameters (mblim, lim and hev_thr) is
1027     // passed as argument to quad loop filter because quad loop filter is
1028     // called for those cases where all the 4 set of loop filter parameters
1029     // are equal.
1030     switch (params->filter_length) {
1031       // apply 4-tap filtering
1032       case 4:
1033         aom_lpf_vertical_4_quad(dst, dst_stride, limits->mblim, limits->lim,
1034                                 limits->hev_thr);
1035         break;
1036       case 6:  // apply 6-tap filter for chroma plane only
1037         aom_lpf_vertical_6_quad(dst, dst_stride, limits->mblim, limits->lim,
1038                                 limits->hev_thr);
1039         break;
1040       // apply 8-tap filtering
1041       case 8:
1042         aom_lpf_vertical_8_quad(dst, dst_stride, limits->mblim, limits->lim,
1043                                 limits->hev_thr);
1044         break;
1045       // apply 14-tap filtering
1046       case 14:
1047         aom_lpf_vertical_14_quad(dst, dst_stride, limits->mblim, limits->lim,
1048                                  limits->hev_thr);
1049         break;
1050       // no filtering
1051       default: break;
1052     }
1053   } else if (use_filter_type == USE_DUAL) {
1054     switch (params->filter_length) {
1055       // apply 4-tap filtering
1056       case 4:
1057         aom_lpf_vertical_4_dual(dst, dst_stride, limits->mblim, limits->lim,
1058                                 limits->hev_thr, limits->mblim, limits->lim,
1059                                 limits->hev_thr);
1060         break;
1061       case 6:  // apply 6-tap filter for chroma plane only
1062         aom_lpf_vertical_6_dual(dst, dst_stride, limits->mblim, limits->lim,
1063                                 limits->hev_thr, limits->mblim, limits->lim,
1064                                 limits->hev_thr);
1065         break;
1066       // apply 8-tap filtering
1067       case 8:
1068         aom_lpf_vertical_8_dual(dst, dst_stride, limits->mblim, limits->lim,
1069                                 limits->hev_thr, limits->mblim, limits->lim,
1070                                 limits->hev_thr);
1071         break;
1072       // apply 14-tap filtering
1073       case 14:
1074         aom_lpf_vertical_14_dual(dst, dst_stride, limits->mblim, limits->lim,
1075                                  limits->hev_thr, limits->mblim, limits->lim,
1076                                  limits->hev_thr);
1077         break;
1078       // no filtering
1079       default: break;
1080     }
1081   } else {
1082     assert(use_filter_type == USE_SINGLE);
1083     switch (params->filter_length) {
1084       // apply 4-tap filtering
1085       case 4:
1086         aom_lpf_vertical_4(dst, dst_stride, limits->mblim, limits->lim,
1087                            limits->hev_thr);
1088         break;
1089       case 6:  // apply 6-tap filter for chroma plane only
1090         aom_lpf_vertical_6(dst, dst_stride, limits->mblim, limits->lim,
1091                            limits->hev_thr);
1092         break;
1093       // apply 8-tap filtering
1094       case 8:
1095         aom_lpf_vertical_8(dst, dst_stride, limits->mblim, limits->lim,
1096                            limits->hev_thr);
1097         break;
1098       // apply 14-tap filtering
1099       case 14:
1100         aom_lpf_vertical_14(dst, dst_stride, limits->mblim, limits->lim,
1101                             limits->hev_thr);
1102         break;
1103       // no filtering
1104       default: break;
1105     }
1106   }
1107 #if !CONFIG_AV1_HIGHBITDEPTH
1108   (void)seq_params;
1109 #endif  // !CONFIG_AV1_HIGHBITDEPTH
1110 }
1111 
filter_vert_chroma(uint8_t * u_dst,uint8_t * v_dst,int dst_stride,const AV1_DEBLOCKING_PARAMETERS * params,const SequenceHeader * seq_params,USE_FILTER_TYPE use_filter_type)1112 static AOM_INLINE void filter_vert_chroma(
1113     uint8_t *u_dst, uint8_t *v_dst, int dst_stride,
1114     const AV1_DEBLOCKING_PARAMETERS *params, const SequenceHeader *seq_params,
1115     USE_FILTER_TYPE use_filter_type) {
1116   const loop_filter_thresh *u_limits = params->lfthr;
1117   const loop_filter_thresh *v_limits = params->lfthr;
1118 #if CONFIG_AV1_HIGHBITDEPTH
1119   const int use_highbitdepth = seq_params->use_highbitdepth;
1120   const aom_bit_depth_t bit_depth = seq_params->bit_depth;
1121   if (use_highbitdepth) {
1122     uint16_t *u_dst_shortptr = CONVERT_TO_SHORTPTR(u_dst);
1123     uint16_t *v_dst_shortptr = CONVERT_TO_SHORTPTR(v_dst);
1124     if (use_filter_type == USE_QUAD) {
1125       switch (params->filter_length) {
1126         // apply 4-tap filtering
1127         case 4:
1128           aom_highbd_lpf_vertical_4_dual(
1129               u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1130               u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1131               u_limits->hev_thr, bit_depth);
1132           aom_highbd_lpf_vertical_4_dual(
1133               u_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
1134               u_limits->mblim, u_limits->lim, u_limits->hev_thr,
1135               u_limits->mblim, u_limits->lim, u_limits->hev_thr, bit_depth);
1136           aom_highbd_lpf_vertical_4_dual(
1137               v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1138               v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1139               v_limits->hev_thr, bit_depth);
1140           aom_highbd_lpf_vertical_4_dual(
1141               v_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
1142               v_limits->mblim, v_limits->lim, v_limits->hev_thr,
1143               v_limits->mblim, v_limits->lim, v_limits->hev_thr, bit_depth);
1144           break;
1145         case 6:  // apply 6-tap filter for chroma plane only
1146           aom_highbd_lpf_vertical_6_dual(
1147               u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1148               u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1149               u_limits->hev_thr, bit_depth);
1150           aom_highbd_lpf_vertical_6_dual(
1151               u_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
1152               u_limits->mblim, u_limits->lim, u_limits->hev_thr,
1153               u_limits->mblim, u_limits->lim, u_limits->hev_thr, bit_depth);
1154           aom_highbd_lpf_vertical_6_dual(
1155               v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1156               v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1157               v_limits->hev_thr, bit_depth);
1158           aom_highbd_lpf_vertical_6_dual(
1159               v_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
1160               v_limits->mblim, v_limits->lim, v_limits->hev_thr,
1161               v_limits->mblim, v_limits->lim, v_limits->hev_thr, bit_depth);
1162           break;
1163         case 8:
1164         case 14: assert(0);
1165         // no filtering
1166         default: break;
1167       }
1168     } else if (use_filter_type == USE_DUAL) {
1169       switch (params->filter_length) {
1170         // apply 4-tap filtering
1171         case 4:
1172           aom_highbd_lpf_vertical_4_dual(
1173               u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1174               u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1175               u_limits->hev_thr, bit_depth);
1176           aom_highbd_lpf_vertical_4_dual(
1177               v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1178               v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1179               v_limits->hev_thr, bit_depth);
1180           break;
1181         case 6:  // apply 6-tap filter for chroma plane only
1182           aom_highbd_lpf_vertical_6_dual(
1183               u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1184               u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1185               u_limits->hev_thr, bit_depth);
1186           aom_highbd_lpf_vertical_6_dual(
1187               v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1188               v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1189               v_limits->hev_thr, bit_depth);
1190           break;
1191         case 8:
1192         case 14: assert(0);
1193         // no filtering
1194         default: break;
1195       }
1196     } else {
1197       assert(use_filter_type == USE_SINGLE);
1198       switch (params->filter_length) {
1199         // apply 4-tap filtering
1200         case 4:
1201           aom_highbd_lpf_vertical_4(u_dst_shortptr, dst_stride, u_limits->mblim,
1202                                     u_limits->lim, u_limits->hev_thr,
1203                                     bit_depth);
1204           aom_highbd_lpf_vertical_4(v_dst_shortptr, dst_stride, v_limits->mblim,
1205                                     v_limits->lim, v_limits->hev_thr,
1206                                     bit_depth);
1207           break;
1208         case 6:  // apply 6-tap filter for chroma plane only
1209           aom_highbd_lpf_vertical_6(u_dst_shortptr, dst_stride, u_limits->mblim,
1210                                     u_limits->lim, u_limits->hev_thr,
1211                                     bit_depth);
1212           aom_highbd_lpf_vertical_6(v_dst_shortptr, dst_stride, v_limits->mblim,
1213                                     v_limits->lim, v_limits->hev_thr,
1214                                     bit_depth);
1215           break;
1216         case 8:
1217         case 14: assert(0); break;
1218         // no filtering
1219         default: break;
1220       }
1221     }
1222     return;
1223   }
1224 #endif  // CONFIG_AV1_HIGHBITDEPTH
1225   if (use_filter_type == USE_QUAD) {
1226     // Only one set of loop filter parameters (mblim, lim and hev_thr) is
1227     // passed as argument to quad loop filter because quad loop filter is
1228     // called for those cases where all the 4 set of loop filter parameters
1229     // are equal.
1230     switch (params->filter_length) {
1231       // apply 4-tap filtering
1232       case 4:
1233         aom_lpf_vertical_4_quad(u_dst, dst_stride, u_limits->mblim,
1234                                 u_limits->lim, u_limits->hev_thr);
1235         aom_lpf_vertical_4_quad(v_dst, dst_stride, v_limits->mblim,
1236                                 v_limits->lim, v_limits->hev_thr);
1237         break;
1238       case 6:  // apply 6-tap filter for chroma plane only
1239         aom_lpf_vertical_6_quad(u_dst, dst_stride, u_limits->mblim,
1240                                 u_limits->lim, u_limits->hev_thr);
1241         aom_lpf_vertical_6_quad(v_dst, dst_stride, v_limits->mblim,
1242                                 v_limits->lim, v_limits->hev_thr);
1243         break;
1244       case 8:
1245       case 14: assert(0);
1246       // no filtering
1247       default: break;
1248     }
1249   } else if (use_filter_type == USE_DUAL) {
1250     switch (params->filter_length) {
1251       // apply 4-tap filtering
1252       case 4:
1253         aom_lpf_vertical_4_dual(u_dst, dst_stride, u_limits->mblim,
1254                                 u_limits->lim, u_limits->hev_thr,
1255                                 u_limits->mblim, u_limits->lim,
1256                                 u_limits->hev_thr);
1257         aom_lpf_vertical_4_dual(v_dst, dst_stride, v_limits->mblim,
1258                                 v_limits->lim, v_limits->hev_thr,
1259                                 v_limits->mblim, v_limits->lim,
1260                                 v_limits->hev_thr);
1261         break;
1262       case 6:  // apply 6-tap filter for chroma plane only
1263         aom_lpf_vertical_6_dual(u_dst, dst_stride, u_limits->mblim,
1264                                 u_limits->lim, u_limits->hev_thr,
1265                                 u_limits->mblim, u_limits->lim,
1266                                 u_limits->hev_thr);
1267         aom_lpf_vertical_6_dual(v_dst, dst_stride, v_limits->mblim,
1268                                 v_limits->lim, v_limits->hev_thr,
1269                                 v_limits->mblim, v_limits->lim,
1270                                 v_limits->hev_thr);
1271         break;
1272       case 8:
1273       case 14: assert(0);
1274       // no filtering
1275       default: break;
1276     }
1277   } else {
1278     assert(use_filter_type == USE_SINGLE);
1279     switch (params->filter_length) {
1280       // apply 4-tap filtering
1281       case 4:
1282         aom_lpf_vertical_4(u_dst, dst_stride, u_limits->mblim, u_limits->lim,
1283                            u_limits->hev_thr);
1284         aom_lpf_vertical_4(v_dst, dst_stride, v_limits->mblim, v_limits->lim,
1285                            u_limits->hev_thr);
1286         break;
1287       case 6:  // apply 6-tap filter for chroma plane only
1288         aom_lpf_vertical_6(u_dst, dst_stride, u_limits->mblim, u_limits->lim,
1289                            u_limits->hev_thr);
1290         aom_lpf_vertical_6(v_dst, dst_stride, v_limits->mblim, v_limits->lim,
1291                            v_limits->hev_thr);
1292         break;
1293       case 8:
1294       case 14: assert(0); break;
1295       // no filtering
1296       default: break;
1297     }
1298   }
1299 #if !CONFIG_AV1_HIGHBITDEPTH
1300   (void)seq_params;
1301 #endif  // !CONFIG_AV1_HIGHBITDEPTH
1302 }
1303 
av1_filter_block_plane_vert(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const int plane,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)1304 void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
1305                                  const MACROBLOCKD *const xd, const int plane,
1306                                  const MACROBLOCKD_PLANE *const plane_ptr,
1307                                  const uint32_t mi_row, const uint32_t mi_col) {
1308   const uint32_t scale_horz = plane_ptr->subsampling_x;
1309   const uint32_t scale_vert = plane_ptr->subsampling_y;
1310   uint8_t *const dst_ptr = plane_ptr->dst.buf;
1311   const int dst_stride = plane_ptr->dst.stride;
1312   const int plane_mi_rows =
1313       ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
1314   const int plane_mi_cols =
1315       ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
1316   const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
1317                              (MAX_MIB_SIZE >> scale_vert));
1318   const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
1319                              (MAX_MIB_SIZE >> scale_horz));
1320 
1321   for (int y = 0; y < y_range; y++) {
1322     uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
1323     for (int x = 0; x < x_range;) {
1324       // inner loop always filter vertical edges in a MI block. If MI size
1325       // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
1326       // If 4x4 transform is used, it will then filter the internal edge
1327       //  aligned with a 4x4 block
1328       const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
1329       const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
1330       uint32_t advance_units;
1331       TX_SIZE tx_size;
1332       AV1_DEBLOCKING_PARAMETERS params;
1333       memset(&params, 0, sizeof(params));
1334 
1335       tx_size =
1336           set_lpf_parameters(&params, ((ptrdiff_t)1 << scale_horz), cm, xd,
1337                              VERT_EDGE, curr_x, curr_y, plane, plane_ptr);
1338       if (tx_size == TX_INVALID) {
1339         params.filter_length = 0;
1340         tx_size = TX_4X4;
1341       }
1342 
1343       filter_vert(p, dst_stride, &params, cm->seq_params, USE_SINGLE);
1344 
1345       // advance the destination pointer
1346       advance_units = tx_size_wide_unit[tx_size];
1347       x += advance_units;
1348       p += advance_units * MI_SIZE;
1349     }
1350   }
1351 }
1352 
av1_filter_block_plane_vert_opt(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col,AV1_DEBLOCKING_PARAMETERS * params_buf,TX_SIZE * tx_buf,int num_mis_in_lpf_unit_height_log2)1353 void av1_filter_block_plane_vert_opt(
1354     const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
1355     const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
1356     const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
1357     TX_SIZE *tx_buf, int num_mis_in_lpf_unit_height_log2) {
1358   uint8_t *const dst_ptr = plane_ptr->dst.buf;
1359   const int dst_stride = plane_ptr->dst.stride;
1360   // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
1361   // to MI_SIZE.
1362   const int plane_mi_cols =
1363       CEIL_POWER_OF_TWO(plane_ptr->dst.width, MI_SIZE_LOG2);
1364   const int plane_mi_rows =
1365       CEIL_POWER_OF_TWO(plane_ptr->dst.height, MI_SIZE_LOG2);
1366   // Whenever 'pipeline_lpf_mt_with_enc' is enabled, height of the unit to
1367   // filter (i.e., y_range) is calculated based on the size of the superblock
1368   // used.
1369   const int y_range = AOMMIN((int)(plane_mi_rows - mi_row),
1370                              (1 << num_mis_in_lpf_unit_height_log2));
1371   // Width of the unit to filter (i.e., x_range) should always be calculated
1372   // based on maximum superblock size as this function is called for mi_col = 0,
1373   // MAX_MIB_SIZE, 2 * MAX_MIB_SIZE etc.
1374   const int x_range = AOMMIN((int)(plane_mi_cols - mi_col), MAX_MIB_SIZE);
1375   const ptrdiff_t mode_step = 1;
1376   for (int y = 0; y < y_range; y++) {
1377     const uint32_t curr_y = mi_row + y;
1378     const uint32_t x_start = mi_col;
1379     const uint32_t x_end = mi_col + x_range;
1380     int min_block_height = block_size_high[BLOCK_128X128];
1381     set_lpf_parameters_for_line_luma(params_buf, tx_buf, cm, xd, VERT_EDGE,
1382                                      x_start, curr_y, plane_ptr, x_end,
1383                                      mode_step, &min_block_height);
1384 
1385     AV1_DEBLOCKING_PARAMETERS *params = params_buf;
1386     TX_SIZE *tx_size = tx_buf;
1387     USE_FILTER_TYPE use_filter_type = USE_SINGLE;
1388 
1389     uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
1390 
1391     if ((y & 3) == 0 && (y + 3) < y_range && min_block_height >= 16) {
1392       // If we are on a row which is a multiple of 4, and the minimum height is
1393       // 16 pixels, then the current and right 3 cols must contain the same
1394       // prediction block. This is because dim 16 can only happen every unit of
1395       // 4 mi's.
1396       use_filter_type = USE_QUAD;
1397       y += 3;
1398     } else if ((y + 1) < y_range && min_block_height >= 8) {
1399       use_filter_type = USE_DUAL;
1400       y += 1;
1401     }
1402 
1403     for (int x = 0; x < x_range;) {
1404       if (*tx_size == TX_INVALID) {
1405         params->filter_length = 0;
1406         *tx_size = TX_4X4;
1407       }
1408 
1409       filter_vert(p, dst_stride, params, cm->seq_params, use_filter_type);
1410 
1411       // advance the destination pointer
1412       const uint32_t advance_units = tx_size_wide_unit[*tx_size];
1413       x += advance_units;
1414       p += advance_units * MI_SIZE;
1415       params += advance_units;
1416       tx_size += advance_units;
1417     }
1418   }
1419 }
1420 
av1_filter_block_plane_vert_opt_chroma(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col,AV1_DEBLOCKING_PARAMETERS * params_buf,TX_SIZE * tx_buf,int plane,bool joint_filter_chroma,int num_mis_in_lpf_unit_height_log2)1421 void av1_filter_block_plane_vert_opt_chroma(
1422     const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
1423     const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
1424     const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
1425     TX_SIZE *tx_buf, int plane, bool joint_filter_chroma,
1426     int num_mis_in_lpf_unit_height_log2) {
1427   const uint32_t scale_horz = plane_ptr->subsampling_x;
1428   const uint32_t scale_vert = plane_ptr->subsampling_y;
1429   const int dst_stride = plane_ptr->dst.stride;
1430   // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
1431   // to MI_SIZE.
1432   const int mi_cols =
1433       ((plane_ptr->dst.width << scale_horz) + MI_SIZE - 1) >> MI_SIZE_LOG2;
1434   const int mi_rows =
1435       ((plane_ptr->dst.height << scale_vert) + MI_SIZE - 1) >> MI_SIZE_LOG2;
1436   const int plane_mi_rows = ROUND_POWER_OF_TWO(mi_rows, scale_vert);
1437   const int plane_mi_cols = ROUND_POWER_OF_TWO(mi_cols, scale_horz);
1438   const int y_range =
1439       AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
1440              ((1 << num_mis_in_lpf_unit_height_log2) >> scale_vert));
1441   const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
1442                              (MAX_MIB_SIZE >> scale_horz));
1443   const ptrdiff_t mode_step = (ptrdiff_t)1 << scale_horz;
1444 
1445   for (int y = 0; y < y_range; y++) {
1446     const uint32_t curr_y = mi_row + (y << scale_vert);
1447     const uint32_t x_start = mi_col + (0 << scale_horz);
1448     const uint32_t x_end = mi_col + (x_range << scale_horz);
1449     int min_height = tx_size_high[TX_64X64];
1450     set_lpf_parameters_for_line_chroma(params_buf, tx_buf, cm, xd, VERT_EDGE,
1451                                        x_start, curr_y, plane_ptr, x_end,
1452                                        mode_step, scale_horz, scale_vert,
1453                                        &min_height, plane, joint_filter_chroma);
1454 
1455     AV1_DEBLOCKING_PARAMETERS *params = params_buf;
1456     TX_SIZE *tx_size = tx_buf;
1457     int use_filter_type = USE_SINGLE;
1458     int y_inc = 0;
1459 
1460     if ((y & 3) == 0 && (y + 3) < y_range && min_height >= 16) {
1461       // If we are on a row which is a multiple of 4, and the minimum height is
1462       // 16 pixels, then the current and below 3 rows must contain the same tx
1463       // block. This is because dim 16 can only happen every unit of 4 mi's.
1464       use_filter_type = USE_QUAD;
1465       y_inc = 3;
1466     } else if (y % 2 == 0 && (y + 1) < y_range && min_height >= 8) {
1467       // If we are on an even row, and the minimum height is 8 pixels, then the
1468       // current and below rows must contain the same tx block. This is because
1469       // dim 4 can only happen every unit of 2**0, and 8 every unit of 2**1,
1470       // etc.
1471       use_filter_type = USE_DUAL;
1472       y_inc = 1;
1473     }
1474 
1475     for (int x = 0; x < x_range;) {
1476       // inner loop always filter vertical edges in a MI block. If MI size
1477       // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
1478       // If 4x4 transform is used, it will then filter the internal edge
1479       //  aligned with a 4x4 block
1480       if (*tx_size == TX_INVALID) {
1481         params->filter_length = 0;
1482         *tx_size = TX_4X4;
1483       }
1484 
1485       const int offset = y * MI_SIZE * dst_stride + x * MI_SIZE;
1486       if (joint_filter_chroma) {
1487         uint8_t *u_dst = plane_ptr[0].dst.buf + offset;
1488         uint8_t *v_dst = plane_ptr[1].dst.buf + offset;
1489         filter_vert_chroma(u_dst, v_dst, dst_stride, params, cm->seq_params,
1490                            use_filter_type);
1491       } else {
1492         uint8_t *dst_ptr = plane_ptr->dst.buf + offset;
1493         filter_vert(dst_ptr, dst_stride, params, cm->seq_params,
1494                     use_filter_type);
1495       }
1496 
1497       // advance the destination pointer
1498       const uint32_t advance_units = tx_size_wide_unit[*tx_size];
1499       x += advance_units;
1500       params += advance_units;
1501       tx_size += advance_units;
1502     }
1503     y += y_inc;
1504   }
1505 }
1506 
filter_horz(uint8_t * dst,int dst_stride,const AV1_DEBLOCKING_PARAMETERS * params,const SequenceHeader * seq_params,USE_FILTER_TYPE use_filter_type)1507 static AOM_INLINE void filter_horz(uint8_t *dst, int dst_stride,
1508                                    const AV1_DEBLOCKING_PARAMETERS *params,
1509                                    const SequenceHeader *seq_params,
1510                                    USE_FILTER_TYPE use_filter_type) {
1511   const loop_filter_thresh *limits = params->lfthr;
1512 #if CONFIG_AV1_HIGHBITDEPTH
1513   const int use_highbitdepth = seq_params->use_highbitdepth;
1514   const aom_bit_depth_t bit_depth = seq_params->bit_depth;
1515   if (use_highbitdepth) {
1516     uint16_t *dst_shortptr = CONVERT_TO_SHORTPTR(dst);
1517     if (use_filter_type == USE_QUAD) {
1518       switch (params->filter_length) {
1519         // apply 4-tap filtering
1520         case 4:
1521           aom_highbd_lpf_horizontal_4_dual(
1522               dst_shortptr, dst_stride, limits->mblim, limits->lim,
1523               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1524               bit_depth);
1525           aom_highbd_lpf_horizontal_4_dual(
1526               dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim,
1527               limits->lim, limits->hev_thr, limits->mblim, limits->lim,
1528               limits->hev_thr, bit_depth);
1529           break;
1530         case 6:  // apply 6-tap filter for chroma plane only
1531           aom_highbd_lpf_horizontal_6_dual(
1532               dst_shortptr, dst_stride, limits->mblim, limits->lim,
1533               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1534               bit_depth);
1535           aom_highbd_lpf_horizontal_6_dual(
1536               dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim,
1537               limits->lim, limits->hev_thr, limits->mblim, limits->lim,
1538               limits->hev_thr, bit_depth);
1539           break;
1540         // apply 8-tap filtering
1541         case 8:
1542           aom_highbd_lpf_horizontal_8_dual(
1543               dst_shortptr, dst_stride, limits->mblim, limits->lim,
1544               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1545               bit_depth);
1546           aom_highbd_lpf_horizontal_8_dual(
1547               dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim,
1548               limits->lim, limits->hev_thr, limits->mblim, limits->lim,
1549               limits->hev_thr, bit_depth);
1550           break;
1551         // apply 14-tap filtering
1552         case 14:
1553           aom_highbd_lpf_horizontal_14_dual(
1554               dst_shortptr, dst_stride, limits->mblim, limits->lim,
1555               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1556               bit_depth);
1557           aom_highbd_lpf_horizontal_14_dual(
1558               dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim,
1559               limits->lim, limits->hev_thr, limits->mblim, limits->lim,
1560               limits->hev_thr, bit_depth);
1561           break;
1562         // no filtering
1563         default: break;
1564       }
1565     } else if (use_filter_type == USE_DUAL) {
1566       switch (params->filter_length) {
1567         // apply 4-tap filtering
1568         case 4:
1569           aom_highbd_lpf_horizontal_4_dual(
1570               dst_shortptr, dst_stride, limits->mblim, limits->lim,
1571               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1572               bit_depth);
1573           break;
1574         case 6:  // apply 6-tap filter for chroma plane only
1575           aom_highbd_lpf_horizontal_6_dual(
1576               dst_shortptr, dst_stride, limits->mblim, limits->lim,
1577               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1578               bit_depth);
1579           break;
1580         // apply 8-tap filtering
1581         case 8:
1582           aom_highbd_lpf_horizontal_8_dual(
1583               dst_shortptr, dst_stride, limits->mblim, limits->lim,
1584               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1585               bit_depth);
1586           break;
1587         // apply 14-tap filtering
1588         case 14:
1589           aom_highbd_lpf_horizontal_14_dual(
1590               dst_shortptr, dst_stride, limits->mblim, limits->lim,
1591               limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1592               bit_depth);
1593           break;
1594         // no filtering
1595         default: break;
1596       }
1597     } else {
1598       assert(use_filter_type == USE_SINGLE);
1599       switch (params->filter_length) {
1600         // apply 4-tap filtering
1601         case 4:
1602           aom_highbd_lpf_horizontal_4(dst_shortptr, dst_stride, limits->mblim,
1603                                       limits->lim, limits->hev_thr, bit_depth);
1604           break;
1605         case 6:  // apply 6-tap filter for chroma plane only
1606           aom_highbd_lpf_horizontal_6(dst_shortptr, dst_stride, limits->mblim,
1607                                       limits->lim, limits->hev_thr, bit_depth);
1608           break;
1609         // apply 8-tap filtering
1610         case 8:
1611           aom_highbd_lpf_horizontal_8(dst_shortptr, dst_stride, limits->mblim,
1612                                       limits->lim, limits->hev_thr, bit_depth);
1613           break;
1614         // apply 14-tap filtering
1615         case 14:
1616           aom_highbd_lpf_horizontal_14(dst_shortptr, dst_stride, limits->mblim,
1617                                        limits->lim, limits->hev_thr, bit_depth);
1618           break;
1619         // no filtering
1620         default: break;
1621       }
1622     }
1623     return;
1624   }
1625 #endif  // CONFIG_AV1_HIGHBITDEPTH
1626   if (use_filter_type == USE_QUAD) {
1627     // Only one set of loop filter parameters (mblim, lim and hev_thr) is
1628     // passed as argument to quad loop filter because quad loop filter is
1629     // called for those cases where all the 4 set of loop filter parameters
1630     // are equal.
1631     switch (params->filter_length) {
1632       // apply 4-tap filtering
1633       case 4:
1634         aom_lpf_horizontal_4_quad(dst, dst_stride, limits->mblim, limits->lim,
1635                                   limits->hev_thr);
1636         break;
1637       case 6:  // apply 6-tap filter for chroma plane only
1638         aom_lpf_horizontal_6_quad(dst, dst_stride, limits->mblim, limits->lim,
1639                                   limits->hev_thr);
1640         break;
1641       // apply 8-tap filtering
1642       case 8:
1643         aom_lpf_horizontal_8_quad(dst, dst_stride, limits->mblim, limits->lim,
1644                                   limits->hev_thr);
1645         break;
1646       // apply 14-tap filtering
1647       case 14:
1648         aom_lpf_horizontal_14_quad(dst, dst_stride, limits->mblim, limits->lim,
1649                                    limits->hev_thr);
1650         break;
1651       // no filtering
1652       default: break;
1653     }
1654   } else if (use_filter_type == USE_DUAL) {
1655     switch (params->filter_length) {
1656       // apply 4-tap filtering
1657       case 4:
1658         aom_lpf_horizontal_4_dual(dst, dst_stride, limits->mblim, limits->lim,
1659                                   limits->hev_thr, limits->mblim, limits->lim,
1660                                   limits->hev_thr);
1661         break;
1662       case 6:  // apply 6-tap filter for chroma plane only
1663         aom_lpf_horizontal_6_dual(dst, dst_stride, limits->mblim, limits->lim,
1664                                   limits->hev_thr, limits->mblim, limits->lim,
1665                                   limits->hev_thr);
1666         break;
1667       // apply 8-tap filtering
1668       case 8:
1669         aom_lpf_horizontal_8_dual(dst, dst_stride, limits->mblim, limits->lim,
1670                                   limits->hev_thr, limits->mblim, limits->lim,
1671                                   limits->hev_thr);
1672         break;
1673       // apply 14-tap filtering
1674       case 14:
1675         aom_lpf_horizontal_14_dual(dst, dst_stride, limits->mblim, limits->lim,
1676                                    limits->hev_thr, limits->mblim, limits->lim,
1677                                    limits->hev_thr);
1678         break;
1679       // no filtering
1680       default: break;
1681     }
1682   } else {
1683     assert(use_filter_type == USE_SINGLE);
1684     switch (params->filter_length) {
1685       // apply 4-tap filtering
1686       case 4:
1687         aom_lpf_horizontal_4(dst, dst_stride, limits->mblim, limits->lim,
1688                              limits->hev_thr);
1689         break;
1690       case 6:  // apply 6-tap filter for chroma plane only
1691         aom_lpf_horizontal_6(dst, dst_stride, limits->mblim, limits->lim,
1692                              limits->hev_thr);
1693         break;
1694       // apply 8-tap filtering
1695       case 8:
1696         aom_lpf_horizontal_8(dst, dst_stride, limits->mblim, limits->lim,
1697                              limits->hev_thr);
1698         break;
1699       // apply 14-tap filtering
1700       case 14:
1701         aom_lpf_horizontal_14(dst, dst_stride, limits->mblim, limits->lim,
1702                               limits->hev_thr);
1703         break;
1704       // no filtering
1705       default: break;
1706     }
1707   }
1708 #if !CONFIG_AV1_HIGHBITDEPTH
1709   (void)seq_params;
1710 #endif  // !CONFIG_AV1_HIGHBITDEPTH
1711 }
1712 
filter_horz_chroma(uint8_t * u_dst,uint8_t * v_dst,int dst_stride,const AV1_DEBLOCKING_PARAMETERS * params,const SequenceHeader * seq_params,USE_FILTER_TYPE use_filter_type)1713 static AOM_INLINE void filter_horz_chroma(
1714     uint8_t *u_dst, uint8_t *v_dst, int dst_stride,
1715     const AV1_DEBLOCKING_PARAMETERS *params, const SequenceHeader *seq_params,
1716     USE_FILTER_TYPE use_filter_type) {
1717   const loop_filter_thresh *u_limits = params->lfthr;
1718   const loop_filter_thresh *v_limits = params->lfthr;
1719 #if CONFIG_AV1_HIGHBITDEPTH
1720   const int use_highbitdepth = seq_params->use_highbitdepth;
1721   const aom_bit_depth_t bit_depth = seq_params->bit_depth;
1722   if (use_highbitdepth) {
1723     uint16_t *u_dst_shortptr = CONVERT_TO_SHORTPTR(u_dst);
1724     uint16_t *v_dst_shortptr = CONVERT_TO_SHORTPTR(v_dst);
1725     if (use_filter_type == USE_QUAD) {
1726       switch (params->filter_length) {
1727         // apply 4-tap filtering
1728         case 4:
1729           aom_highbd_lpf_horizontal_4_dual(
1730               u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1731               u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1732               u_limits->hev_thr, bit_depth);
1733           aom_highbd_lpf_horizontal_4_dual(
1734               u_dst_shortptr + (2 * MI_SIZE), dst_stride, u_limits->mblim,
1735               u_limits->lim, u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1736               u_limits->hev_thr, bit_depth);
1737           aom_highbd_lpf_horizontal_4_dual(
1738               v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1739               v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1740               v_limits->hev_thr, bit_depth);
1741           aom_highbd_lpf_horizontal_4_dual(
1742               v_dst_shortptr + (2 * MI_SIZE), dst_stride, v_limits->mblim,
1743               v_limits->lim, v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1744               v_limits->hev_thr, bit_depth);
1745           break;
1746         case 6:  // apply 6-tap filter for chroma plane only
1747           aom_highbd_lpf_horizontal_6_dual(
1748               u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1749               u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1750               u_limits->hev_thr, bit_depth);
1751           aom_highbd_lpf_horizontal_6_dual(
1752               u_dst_shortptr + (2 * MI_SIZE), dst_stride, u_limits->mblim,
1753               u_limits->lim, u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1754               u_limits->hev_thr, bit_depth);
1755           aom_highbd_lpf_horizontal_6_dual(
1756               v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1757               v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1758               v_limits->hev_thr, bit_depth);
1759           aom_highbd_lpf_horizontal_6_dual(
1760               v_dst_shortptr + (2 * MI_SIZE), dst_stride, v_limits->mblim,
1761               v_limits->lim, v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1762               v_limits->hev_thr, bit_depth);
1763           break;
1764         case 8:
1765         case 14: assert(0);
1766         // no filtering
1767         default: break;
1768       }
1769     } else if (use_filter_type == USE_DUAL) {
1770       switch (params->filter_length) {
1771         // apply 4-tap filtering
1772         case 4:
1773           aom_highbd_lpf_horizontal_4_dual(
1774               u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1775               u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1776               u_limits->hev_thr, bit_depth);
1777           aom_highbd_lpf_horizontal_4_dual(
1778               v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1779               v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1780               v_limits->hev_thr, bit_depth);
1781           break;
1782         case 6:  // apply 6-tap filter for chroma plane only
1783           aom_highbd_lpf_horizontal_6_dual(
1784               u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1785               u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1786               u_limits->hev_thr, bit_depth);
1787           aom_highbd_lpf_horizontal_6_dual(
1788               v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1789               v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1790               v_limits->hev_thr, bit_depth);
1791           break;
1792         case 8:
1793         case 14: assert(0);
1794         // no filtering
1795         default: break;
1796       }
1797     } else {
1798       assert(use_filter_type == USE_SINGLE);
1799       switch (params->filter_length) {
1800         // apply 4-tap filtering
1801         case 4:
1802           aom_highbd_lpf_horizontal_4(u_dst_shortptr, dst_stride,
1803                                       u_limits->mblim, u_limits->lim,
1804                                       u_limits->hev_thr, bit_depth);
1805           aom_highbd_lpf_horizontal_4(v_dst_shortptr, dst_stride,
1806                                       v_limits->mblim, v_limits->lim,
1807                                       v_limits->hev_thr, bit_depth);
1808           break;
1809         case 6:  // apply 6-tap filter for chroma plane only
1810           aom_highbd_lpf_horizontal_6(u_dst_shortptr, dst_stride,
1811                                       u_limits->mblim, u_limits->lim,
1812                                       u_limits->hev_thr, bit_depth);
1813           aom_highbd_lpf_horizontal_6(v_dst_shortptr, dst_stride,
1814                                       v_limits->mblim, v_limits->lim,
1815                                       v_limits->hev_thr, bit_depth);
1816           break;
1817         case 8:
1818         case 14: assert(0); break;
1819         // no filtering
1820         default: break;
1821       }
1822     }
1823     return;
1824   }
1825 #endif  // CONFIG_AV1_HIGHBITDEPTH
1826   if (use_filter_type == USE_QUAD) {
1827     // Only one set of loop filter parameters (mblim, lim and hev_thr) is
1828     // passed as argument to quad loop filter because quad loop filter is
1829     // called for those cases where all the 4 set of loop filter parameters
1830     // are equal.
1831     switch (params->filter_length) {
1832       // apply 4-tap filtering
1833       case 4:
1834         aom_lpf_horizontal_4_quad(u_dst, dst_stride, u_limits->mblim,
1835                                   u_limits->lim, u_limits->hev_thr);
1836         aom_lpf_horizontal_4_quad(v_dst, dst_stride, v_limits->mblim,
1837                                   v_limits->lim, v_limits->hev_thr);
1838         break;
1839       case 6:  // apply 6-tap filter for chroma plane only
1840         aom_lpf_horizontal_6_quad(u_dst, dst_stride, u_limits->mblim,
1841                                   u_limits->lim, u_limits->hev_thr);
1842         aom_lpf_horizontal_6_quad(v_dst, dst_stride, v_limits->mblim,
1843                                   v_limits->lim, v_limits->hev_thr);
1844         break;
1845       case 8:
1846       case 14: assert(0);
1847       // no filtering
1848       default: break;
1849     }
1850   } else if (use_filter_type == USE_DUAL) {
1851     switch (params->filter_length) {
1852       // apply 4-tap filtering
1853       case 4:
1854         aom_lpf_horizontal_4_dual(u_dst, dst_stride, u_limits->mblim,
1855                                   u_limits->lim, u_limits->hev_thr,
1856                                   u_limits->mblim, u_limits->lim,
1857                                   u_limits->hev_thr);
1858         aom_lpf_horizontal_4_dual(v_dst, dst_stride, v_limits->mblim,
1859                                   v_limits->lim, v_limits->hev_thr,
1860                                   v_limits->mblim, v_limits->lim,
1861                                   v_limits->hev_thr);
1862         break;
1863       case 6:  // apply 6-tap filter for chroma plane only
1864         aom_lpf_horizontal_6_dual(u_dst, dst_stride, u_limits->mblim,
1865                                   u_limits->lim, u_limits->hev_thr,
1866                                   u_limits->mblim, u_limits->lim,
1867                                   u_limits->hev_thr);
1868         aom_lpf_horizontal_6_dual(v_dst, dst_stride, v_limits->mblim,
1869                                   v_limits->lim, v_limits->hev_thr,
1870                                   v_limits->mblim, v_limits->lim,
1871                                   v_limits->hev_thr);
1872         break;
1873       case 8:
1874       case 14: assert(0);
1875       // no filtering
1876       default: break;
1877     }
1878   } else {
1879     assert(use_filter_type == USE_SINGLE);
1880     switch (params->filter_length) {
1881       // apply 4-tap filtering
1882       case 4:
1883         aom_lpf_horizontal_4(u_dst, dst_stride, u_limits->mblim, u_limits->lim,
1884                              u_limits->hev_thr);
1885         aom_lpf_horizontal_4(v_dst, dst_stride, v_limits->mblim, v_limits->lim,
1886                              u_limits->hev_thr);
1887         break;
1888       case 6:  // apply 6-tap filter for chroma plane only
1889         aom_lpf_horizontal_6(u_dst, dst_stride, u_limits->mblim, u_limits->lim,
1890                              u_limits->hev_thr);
1891         aom_lpf_horizontal_6(v_dst, dst_stride, v_limits->mblim, v_limits->lim,
1892                              v_limits->hev_thr);
1893         break;
1894       case 8:
1895       case 14: assert(0); break;
1896       // no filtering
1897       default: break;
1898     }
1899   }
1900 #if !CONFIG_AV1_HIGHBITDEPTH
1901   (void)seq_params;
1902 #endif  // !CONFIG_AV1_HIGHBITDEPTH
1903 }
1904 
av1_filter_block_plane_horz(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const int plane,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)1905 void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
1906                                  const MACROBLOCKD *const xd, const int plane,
1907                                  const MACROBLOCKD_PLANE *const plane_ptr,
1908                                  const uint32_t mi_row, const uint32_t mi_col) {
1909   const uint32_t scale_horz = plane_ptr->subsampling_x;
1910   const uint32_t scale_vert = plane_ptr->subsampling_y;
1911   uint8_t *const dst_ptr = plane_ptr->dst.buf;
1912   const int dst_stride = plane_ptr->dst.stride;
1913   const int plane_mi_rows =
1914       ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
1915   const int plane_mi_cols =
1916       ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
1917   const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
1918                              (MAX_MIB_SIZE >> scale_vert));
1919   const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
1920                              (MAX_MIB_SIZE >> scale_horz));
1921   for (int x = 0; x < x_range; x++) {
1922     uint8_t *p = dst_ptr + x * MI_SIZE;
1923     for (int y = 0; y < y_range;) {
1924       // inner loop always filter vertical edges in a MI block. If MI size
1925       // is 8x8, it will first filter the vertical edge aligned with a 8x8
1926       // block. If 4x4 transform is used, it will then filter the internal
1927       // edge aligned with a 4x4 block
1928       const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
1929       const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
1930       uint32_t advance_units;
1931       TX_SIZE tx_size;
1932       AV1_DEBLOCKING_PARAMETERS params;
1933       memset(&params, 0, sizeof(params));
1934 
1935       tx_size = set_lpf_parameters(
1936           &params, (cm->mi_params.mi_stride << scale_vert), cm, xd, HORZ_EDGE,
1937           curr_x, curr_y, plane, plane_ptr);
1938       if (tx_size == TX_INVALID) {
1939         params.filter_length = 0;
1940         tx_size = TX_4X4;
1941       }
1942 
1943       filter_horz(p, dst_stride, &params, cm->seq_params, USE_SINGLE);
1944 
1945       // advance the destination pointer
1946       advance_units = tx_size_high_unit[tx_size];
1947       y += advance_units;
1948       p += advance_units * dst_stride * MI_SIZE;
1949     }
1950   }
1951 }
1952 
av1_filter_block_plane_horz_opt(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col,AV1_DEBLOCKING_PARAMETERS * params_buf,TX_SIZE * tx_buf,int num_mis_in_lpf_unit_height_log2)1953 void av1_filter_block_plane_horz_opt(
1954     const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
1955     const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
1956     const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
1957     TX_SIZE *tx_buf, int num_mis_in_lpf_unit_height_log2) {
1958   uint8_t *const dst_ptr = plane_ptr->dst.buf;
1959   const int dst_stride = plane_ptr->dst.stride;
1960   // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
1961   // to MI_SIZE.
1962   const int plane_mi_cols =
1963       CEIL_POWER_OF_TWO(plane_ptr->dst.width, MI_SIZE_LOG2);
1964   const int plane_mi_rows =
1965       CEIL_POWER_OF_TWO(plane_ptr->dst.height, MI_SIZE_LOG2);
1966   const int y_range = AOMMIN((int)(plane_mi_rows - mi_row),
1967                              (1 << num_mis_in_lpf_unit_height_log2));
1968   const int x_range = AOMMIN((int)(plane_mi_cols - mi_col), MAX_MIB_SIZE);
1969 
1970   const ptrdiff_t mode_step = cm->mi_params.mi_stride;
1971   for (int x = 0; x < x_range; x++) {
1972     const uint32_t curr_x = mi_col + x;
1973     const uint32_t y_start = mi_row;
1974     const uint32_t y_end = mi_row + y_range;
1975     int min_block_width = block_size_high[BLOCK_128X128];
1976     set_lpf_parameters_for_line_luma(params_buf, tx_buf, cm, xd, HORZ_EDGE,
1977                                      curr_x, y_start, plane_ptr, y_end,
1978                                      mode_step, &min_block_width);
1979 
1980     AV1_DEBLOCKING_PARAMETERS *params = params_buf;
1981     TX_SIZE *tx_size = tx_buf;
1982     USE_FILTER_TYPE filter_type = USE_SINGLE;
1983 
1984     uint8_t *p = dst_ptr + x * MI_SIZE;
1985 
1986     if ((x & 3) == 0 && (x + 3) < x_range && min_block_width >= 16) {
1987       // If we are on a col which is a multiple of 4, and the minimum width is
1988       // 16 pixels, then the current and right 3 cols must contain the same
1989       // prediction block. This is because dim 16 can only happen every unit of
1990       // 4 mi's.
1991       filter_type = USE_QUAD;
1992       x += 3;
1993     } else if ((x + 1) < x_range && min_block_width >= 8) {
1994       filter_type = USE_DUAL;
1995       x += 1;
1996     }
1997 
1998     for (int y = 0; y < y_range;) {
1999       if (*tx_size == TX_INVALID) {
2000         params->filter_length = 0;
2001         *tx_size = TX_4X4;
2002       }
2003 
2004       filter_horz(p, dst_stride, params, cm->seq_params, filter_type);
2005 
2006       // advance the destination pointer
2007       const uint32_t advance_units = tx_size_high_unit[*tx_size];
2008       y += advance_units;
2009       p += advance_units * dst_stride * MI_SIZE;
2010       params += advance_units;
2011       tx_size += advance_units;
2012     }
2013   }
2014 }
2015 
av1_filter_block_plane_horz_opt_chroma(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col,AV1_DEBLOCKING_PARAMETERS * params_buf,TX_SIZE * tx_buf,int plane,bool joint_filter_chroma,int num_mis_in_lpf_unit_height_log2)2016 void av1_filter_block_plane_horz_opt_chroma(
2017     const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
2018     const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
2019     const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
2020     TX_SIZE *tx_buf, int plane, bool joint_filter_chroma,
2021     int num_mis_in_lpf_unit_height_log2) {
2022   const uint32_t scale_horz = plane_ptr->subsampling_x;
2023   const uint32_t scale_vert = plane_ptr->subsampling_y;
2024   const int dst_stride = plane_ptr->dst.stride;
2025   // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
2026   // to MI_SIZE.
2027   const int mi_cols =
2028       ((plane_ptr->dst.width << scale_horz) + MI_SIZE - 1) >> MI_SIZE_LOG2;
2029   const int mi_rows =
2030       ((plane_ptr->dst.height << scale_vert) + MI_SIZE - 1) >> MI_SIZE_LOG2;
2031   const int plane_mi_rows = ROUND_POWER_OF_TWO(mi_rows, scale_vert);
2032   const int plane_mi_cols = ROUND_POWER_OF_TWO(mi_cols, scale_horz);
2033   const int y_range =
2034       AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
2035              ((1 << num_mis_in_lpf_unit_height_log2) >> scale_vert));
2036   const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
2037                              (MAX_MIB_SIZE >> scale_horz));
2038   const ptrdiff_t mode_step = cm->mi_params.mi_stride << scale_vert;
2039   for (int x = 0; x < x_range; x++) {
2040     const uint32_t y_start = mi_row + (0 << scale_vert);
2041     const uint32_t curr_x = mi_col + (x << scale_horz);
2042     const uint32_t y_end = mi_row + (y_range << scale_vert);
2043     int min_width = tx_size_wide[TX_64X64];
2044     set_lpf_parameters_for_line_chroma(params_buf, tx_buf, cm, xd, HORZ_EDGE,
2045                                        curr_x, y_start, plane_ptr, y_end,
2046                                        mode_step, scale_horz, scale_vert,
2047                                        &min_width, plane, joint_filter_chroma);
2048 
2049     AV1_DEBLOCKING_PARAMETERS *params = params_buf;
2050     TX_SIZE *tx_size = tx_buf;
2051     USE_FILTER_TYPE use_filter_type = USE_SINGLE;
2052     int x_inc = 0;
2053 
2054     if ((x & 3) == 0 && (x + 3) < x_range && min_width >= 16) {
2055       // If we are on a col which is a multiple of 4, and the minimum width is
2056       // 16 pixels, then the current and right 3 cols must contain the same tx
2057       // block. This is because dim 16 can only happen every unit of 4 mi's.
2058       use_filter_type = USE_QUAD;
2059       x_inc = 3;
2060     } else if (x % 2 == 0 && (x + 1) < x_range && min_width >= 8) {
2061       // If we are on an even col, and the minimum width is 8 pixels, then the
2062       // current and left cols must contain the same tx block. This is because
2063       // dim 4 can only happen every unit of 2**0, and 8 every unit of 2**1,
2064       // etc.
2065       use_filter_type = USE_DUAL;
2066       x_inc = 1;
2067     }
2068 
2069     for (int y = 0; y < y_range;) {
2070       // inner loop always filter vertical edges in a MI block. If MI size
2071       // is 8x8, it will first filter the vertical edge aligned with a 8x8
2072       // block. If 4x4 transform is used, it will then filter the internal
2073       // edge aligned with a 4x4 block
2074       if (*tx_size == TX_INVALID) {
2075         params->filter_length = 0;
2076         *tx_size = TX_4X4;
2077       }
2078 
2079       const int offset = y * MI_SIZE * dst_stride + x * MI_SIZE;
2080       if (joint_filter_chroma) {
2081         uint8_t *u_dst = plane_ptr[0].dst.buf + offset;
2082         uint8_t *v_dst = plane_ptr[1].dst.buf + offset;
2083         filter_horz_chroma(u_dst, v_dst, dst_stride, params, cm->seq_params,
2084                            use_filter_type);
2085       } else {
2086         uint8_t *dst_ptr = plane_ptr->dst.buf + offset;
2087         filter_horz(dst_ptr, dst_stride, params, cm->seq_params,
2088                     use_filter_type);
2089       }
2090 
2091       // advance the destination pointer
2092       const int advance_units = tx_size_high_unit[*tx_size];
2093       y += advance_units;
2094       params += advance_units;
2095       tx_size += advance_units;
2096     }
2097     x += x_inc;
2098   }
2099 }
2100