• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 #include <atomic>
15 
16 #include "src/post_filter.h"
17 #include "src/utils/blocking_counter.h"
18 
19 namespace libgav1 {
20 namespace {
21 
HevThresh(int level)22 constexpr uint8_t HevThresh(int level) { return DivideBy16(level); }
23 
24 // GetLoopFilterSize* functions depend on this exact ordering of the
25 // LoopFilterSize enums.
26 static_assert(dsp::kLoopFilterSize4 == 0, "");
27 static_assert(dsp::kLoopFilterSize6 == 1, "");
28 static_assert(dsp::kLoopFilterSize8 == 2, "");
29 static_assert(dsp::kLoopFilterSize14 == 3, "");
30 
GetLoopFilterSizeY(int filter_length)31 dsp::LoopFilterSize GetLoopFilterSizeY(int filter_length) {
32   // |filter_length| must be a power of 2.
33   assert((filter_length & (filter_length - 1)) == 0);
34   // This code is the branch free equivalent of:
35   //   if (filter_length == 4) return kLoopFilterSize4;
36   //   if (filter_length == 8) return kLoopFilterSize8;
37   //   return kLoopFilterSize14;
38   return static_cast<dsp::LoopFilterSize>(
39       MultiplyBy2(static_cast<int>(filter_length > 4)) +
40       static_cast<int>(filter_length > 8));
41 }
42 
GetLoopFilterSizeUV(int filter_length)43 constexpr dsp::LoopFilterSize GetLoopFilterSizeUV(int filter_length) {
44   // For U & V planes, size is kLoopFilterSize4 if |filter_length| is 4,
45   // otherwise size is kLoopFilterSize6.
46   return static_cast<dsp::LoopFilterSize>(filter_length != 4);
47 }
48 
NonBlockBorderNeedsFilter(const BlockParameters & bp,int filter_id,uint8_t * const level)49 bool NonBlockBorderNeedsFilter(const BlockParameters& bp, int filter_id,
50                                uint8_t* const level) {
51   if (bp.deblock_filter_level[filter_id] == 0 || (bp.skip && bp.is_inter)) {
52     return false;
53   }
54   *level = bp.deblock_filter_level[filter_id];
55   return true;
56 }
57 
58 // 7.14.5.
ComputeDeblockFilterLevelsHelper(const ObuFrameHeader & frame_header,int segment_id,int level_index,const int8_t delta_lf[kFrameLfCount],uint8_t deblock_filter_levels[kNumReferenceFrameTypes][2])59 void ComputeDeblockFilterLevelsHelper(
60     const ObuFrameHeader& frame_header, int segment_id, int level_index,
61     const int8_t delta_lf[kFrameLfCount],
62     uint8_t deblock_filter_levels[kNumReferenceFrameTypes][2]) {
63   const int delta = delta_lf[frame_header.delta_lf.multi ? level_index : 0];
64   uint8_t level = Clip3(frame_header.loop_filter.level[level_index] + delta, 0,
65                         kMaxLoopFilterValue);
66   const auto feature = static_cast<SegmentFeature>(
67       kSegmentFeatureLoopFilterYVertical + level_index);
68   level =
69       Clip3(level + frame_header.segmentation.feature_data[segment_id][feature],
70             0, kMaxLoopFilterValue);
71   if (!frame_header.loop_filter.delta_enabled) {
72     static_assert(sizeof(deblock_filter_levels[0][0]) == 1, "");
73     memset(deblock_filter_levels, level, kNumReferenceFrameTypes * 2);
74     return;
75   }
76   assert(frame_header.loop_filter.delta_enabled);
77   const int shift = level >> 5;
78   deblock_filter_levels[kReferenceFrameIntra][0] = Clip3(
79       level +
80           LeftShift(frame_header.loop_filter.ref_deltas[kReferenceFrameIntra],
81                     shift),
82       0, kMaxLoopFilterValue);
83   // deblock_filter_levels[kReferenceFrameIntra][1] is never used. So it does
84   // not have to be populated.
85   for (int reference_frame = kReferenceFrameIntra + 1;
86        reference_frame < kNumReferenceFrameTypes; ++reference_frame) {
87     for (int mode_id = 0; mode_id < 2; ++mode_id) {
88       deblock_filter_levels[reference_frame][mode_id] = Clip3(
89           level +
90               LeftShift(frame_header.loop_filter.ref_deltas[reference_frame] +
91                             frame_header.loop_filter.mode_deltas[mode_id],
92                         shift),
93           0, kMaxLoopFilterValue);
94     }
95   }
96 }
97 
98 }  // namespace
99 
ComputeDeblockFilterLevels(const int8_t delta_lf[kFrameLfCount],uint8_t deblock_filter_levels[kMaxSegments][kFrameLfCount][kNumReferenceFrameTypes][2]) const100 void PostFilter::ComputeDeblockFilterLevels(
101     const int8_t delta_lf[kFrameLfCount],
102     uint8_t deblock_filter_levels[kMaxSegments][kFrameLfCount]
103                                  [kNumReferenceFrameTypes][2]) const {
104   if (!DoDeblock()) return;
105   for (int segment_id = 0;
106        segment_id < (frame_header_.segmentation.enabled ? kMaxSegments : 1);
107        ++segment_id) {
108     int level_index = 0;
109     for (; level_index < 2; ++level_index) {
110       ComputeDeblockFilterLevelsHelper(
111           frame_header_, segment_id, level_index, delta_lf,
112           deblock_filter_levels[segment_id][level_index]);
113     }
114     for (; level_index < kFrameLfCount; ++level_index) {
115       if (frame_header_.loop_filter.level[level_index] != 0) {
116         ComputeDeblockFilterLevelsHelper(
117             frame_header_, segment_id, level_index, delta_lf,
118             deblock_filter_levels[segment_id][level_index]);
119       }
120     }
121   }
122 }
123 
GetHorizontalDeblockFilterEdgeInfo(int row4x4,int column4x4,uint8_t * level,int * step,int * filter_length) const124 bool PostFilter::GetHorizontalDeblockFilterEdgeInfo(int row4x4, int column4x4,
125                                                     uint8_t* level, int* step,
126                                                     int* filter_length) const {
127   *step = kTransformHeight[inter_transform_sizes_[row4x4][column4x4]];
128   if (row4x4 == 0) return false;
129 
130   const BlockParameters* bp = block_parameters_.Find(row4x4, column4x4);
131   const int row4x4_prev = row4x4 - 1;
132   assert(row4x4_prev >= 0);
133   const BlockParameters* bp_prev =
134       block_parameters_.Find(row4x4_prev, column4x4);
135 
136   if (bp == bp_prev) {
137     // Not a border.
138     if (!NonBlockBorderNeedsFilter(*bp, 1, level)) return false;
139   } else {
140     const uint8_t level_this = bp->deblock_filter_level[1];
141     *level = level_this;
142     if (level_this == 0) {
143       const uint8_t level_prev = bp_prev->deblock_filter_level[1];
144       if (level_prev == 0) return false;
145       *level = level_prev;
146     }
147   }
148   const int step_prev =
149       kTransformHeight[inter_transform_sizes_[row4x4_prev][column4x4]];
150   *filter_length = std::min(*step, step_prev);
151   return true;
152 }
153 
GetHorizontalDeblockFilterEdgeInfoUV(int row4x4,int column4x4,uint8_t * level_u,uint8_t * level_v,int * step,int * filter_length) const154 void PostFilter::GetHorizontalDeblockFilterEdgeInfoUV(
155     int row4x4, int column4x4, uint8_t* level_u, uint8_t* level_v, int* step,
156     int* filter_length) const {
157   const int subsampling_x = subsampling_x_[kPlaneU];
158   const int subsampling_y = subsampling_y_[kPlaneU];
159   row4x4 = GetDeblockPosition(row4x4, subsampling_y);
160   column4x4 = GetDeblockPosition(column4x4, subsampling_x);
161   const BlockParameters* bp = block_parameters_.Find(row4x4, column4x4);
162   *level_u = 0;
163   *level_v = 0;
164   *step = kTransformHeight[bp->uv_transform_size];
165   if (row4x4 == subsampling_y) {
166     return;
167   }
168 
169   bool need_filter_u = frame_header_.loop_filter.level[kPlaneU + 1] != 0;
170   bool need_filter_v = frame_header_.loop_filter.level[kPlaneV + 1] != 0;
171   assert(need_filter_u || need_filter_v);
172   const int filter_id_u =
173       kDeblockFilterLevelIndex[kPlaneU][kLoopFilterTypeHorizontal];
174   const int filter_id_v =
175       kDeblockFilterLevelIndex[kPlaneV][kLoopFilterTypeHorizontal];
176   const int row4x4_prev = row4x4 - (1 << subsampling_y);
177   assert(row4x4_prev >= 0);
178   const BlockParameters* bp_prev =
179       block_parameters_.Find(row4x4_prev, column4x4);
180 
181   if (bp == bp_prev) {
182     // Not a border.
183     const bool skip = bp->skip && bp->is_inter;
184     need_filter_u =
185         need_filter_u && bp->deblock_filter_level[filter_id_u] != 0 && !skip;
186     need_filter_v =
187         need_filter_v && bp->deblock_filter_level[filter_id_v] != 0 && !skip;
188     if (!need_filter_u && !need_filter_v) return;
189     if (need_filter_u) *level_u = bp->deblock_filter_level[filter_id_u];
190     if (need_filter_v) *level_v = bp->deblock_filter_level[filter_id_v];
191     *filter_length = *step;
192     return;
193   }
194 
195   // It is a border.
196   if (need_filter_u) {
197     const uint8_t level_u_this = bp->deblock_filter_level[filter_id_u];
198     *level_u = level_u_this;
199     if (level_u_this == 0) {
200       *level_u = bp_prev->deblock_filter_level[filter_id_u];
201     }
202   }
203   if (need_filter_v) {
204     const uint8_t level_v_this = bp->deblock_filter_level[filter_id_v];
205     *level_v = level_v_this;
206     if (level_v_this == 0) {
207       *level_v = bp_prev->deblock_filter_level[filter_id_v];
208     }
209   }
210   const int step_prev = kTransformHeight[bp_prev->uv_transform_size];
211   *filter_length = std::min(*step, step_prev);
212 }
213 
GetVerticalDeblockFilterEdgeInfo(int row4x4,int column4x4,BlockParameters * const * bp_ptr,uint8_t * level,int * step,int * filter_length) const214 bool PostFilter::GetVerticalDeblockFilterEdgeInfo(
215     int row4x4, int column4x4, BlockParameters* const* bp_ptr, uint8_t* level,
216     int* step, int* filter_length) const {
217   const BlockParameters* bp = *bp_ptr;
218   *step = kTransformWidth[inter_transform_sizes_[row4x4][column4x4]];
219   if (column4x4 == 0) return false;
220 
221   const int filter_id = 0;
222   const int column4x4_prev = column4x4 - 1;
223   assert(column4x4_prev >= 0);
224   const BlockParameters* bp_prev = *(bp_ptr - 1);
225   if (bp == bp_prev) {
226     // Not a border.
227     if (!NonBlockBorderNeedsFilter(*bp, filter_id, level)) return false;
228   } else {
229     // It is a border.
230     const uint8_t level_this = bp->deblock_filter_level[filter_id];
231     *level = level_this;
232     if (level_this == 0) {
233       const uint8_t level_prev = bp_prev->deblock_filter_level[filter_id];
234       if (level_prev == 0) return false;
235       *level = level_prev;
236     }
237   }
238   const int step_prev =
239       kTransformWidth[inter_transform_sizes_[row4x4][column4x4_prev]];
240   *filter_length = std::min(*step, step_prev);
241   return true;
242 }
243 
GetVerticalDeblockFilterEdgeInfoUV(int column4x4,BlockParameters * const * bp_ptr,uint8_t * level_u,uint8_t * level_v,int * step,int * filter_length) const244 void PostFilter::GetVerticalDeblockFilterEdgeInfoUV(
245     int column4x4, BlockParameters* const* bp_ptr, uint8_t* level_u,
246     uint8_t* level_v, int* step, int* filter_length) const {
247   const int subsampling_x = subsampling_x_[kPlaneU];
248   column4x4 = GetDeblockPosition(column4x4, subsampling_x);
249   const BlockParameters* bp = *bp_ptr;
250   *level_u = 0;
251   *level_v = 0;
252   *step = kTransformWidth[bp->uv_transform_size];
253   if (column4x4 == subsampling_x) {
254     return;
255   }
256 
257   bool need_filter_u = frame_header_.loop_filter.level[kPlaneU + 1] != 0;
258   bool need_filter_v = frame_header_.loop_filter.level[kPlaneV + 1] != 0;
259   assert(need_filter_u || need_filter_v);
260   const int filter_id_u =
261       kDeblockFilterLevelIndex[kPlaneU][kLoopFilterTypeVertical];
262   const int filter_id_v =
263       kDeblockFilterLevelIndex[kPlaneV][kLoopFilterTypeVertical];
264   const BlockParameters* bp_prev = *(bp_ptr - (1 << subsampling_x));
265 
266   if (bp == bp_prev) {
267     // Not a border.
268     const bool skip = bp->skip && bp->is_inter;
269     need_filter_u =
270         need_filter_u && bp->deblock_filter_level[filter_id_u] != 0 && !skip;
271     need_filter_v =
272         need_filter_v && bp->deblock_filter_level[filter_id_v] != 0 && !skip;
273     if (!need_filter_u && !need_filter_v) return;
274     if (need_filter_u) *level_u = bp->deblock_filter_level[filter_id_u];
275     if (need_filter_v) *level_v = bp->deblock_filter_level[filter_id_v];
276     *filter_length = *step;
277     return;
278   }
279 
280   // It is a border.
281   if (need_filter_u) {
282     const uint8_t level_u_this = bp->deblock_filter_level[filter_id_u];
283     *level_u = level_u_this;
284     if (level_u_this == 0) {
285       *level_u = bp_prev->deblock_filter_level[filter_id_u];
286     }
287   }
288   if (need_filter_v) {
289     const uint8_t level_v_this = bp->deblock_filter_level[filter_id_v];
290     *level_v = level_v_this;
291     if (level_v_this == 0) {
292       *level_v = bp_prev->deblock_filter_level[filter_id_v];
293     }
294   }
295   const int step_prev = kTransformWidth[bp_prev->uv_transform_size];
296   *filter_length = std::min(*step, step_prev);
297 }
298 
HorizontalDeblockFilter(int row4x4_start,int column4x4_start)299 void PostFilter::HorizontalDeblockFilter(int row4x4_start,
300                                          int column4x4_start) {
301   const int column_step = 1;
302   const size_t src_step = MultiplyBy4(pixel_size_);
303   const ptrdiff_t src_stride = frame_buffer_.stride(kPlaneY);
304   uint8_t* src = GetSourceBuffer(kPlaneY, row4x4_start, column4x4_start);
305   int row_step;
306   uint8_t level;
307   int filter_length;
308 
309   for (int column4x4 = 0; column4x4 < kNum4x4InLoopFilterUnit &&
310                           MultiplyBy4(column4x4_start + column4x4) < width_;
311        column4x4 += column_step, src += src_step) {
312     uint8_t* src_row = src;
313     for (int row4x4 = 0; row4x4 < kNum4x4InLoopFilterUnit &&
314                          MultiplyBy4(row4x4_start + row4x4) < height_;
315          row4x4 += row_step) {
316       const bool need_filter = GetHorizontalDeblockFilterEdgeInfo(
317           row4x4_start + row4x4, column4x4_start + column4x4, &level, &row_step,
318           &filter_length);
319       if (need_filter) {
320         const dsp::LoopFilterSize size = GetLoopFilterSizeY(filter_length);
321         dsp_.loop_filters[size][kLoopFilterTypeHorizontal](
322             src_row, src_stride, outer_thresh_[level], inner_thresh_[level],
323             HevThresh(level));
324       }
325       // TODO(chengchen): use shifts instead of multiplication.
326       src_row += row_step * src_stride;
327       row_step = DivideBy4(row_step);
328     }
329   }
330 
331   if (needs_chroma_deblock_) {
332     const int8_t subsampling_x = subsampling_x_[kPlaneU];
333     const int8_t subsampling_y = subsampling_y_[kPlaneU];
334     const int column_step = 1 << subsampling_x;
335     const ptrdiff_t src_stride_u = frame_buffer_.stride(kPlaneU);
336     const ptrdiff_t src_stride_v = frame_buffer_.stride(kPlaneV);
337     uint8_t* src_u = GetSourceBuffer(kPlaneU, row4x4_start, column4x4_start);
338     uint8_t* src_v = GetSourceBuffer(kPlaneV, row4x4_start, column4x4_start);
339     int row_step;
340     uint8_t level_u;
341     uint8_t level_v;
342     int filter_length;
343 
344     for (int column4x4 = 0; column4x4 < kNum4x4InLoopFilterUnit &&
345                             MultiplyBy4(column4x4_start + column4x4) < width_;
346          column4x4 += column_step, src_u += src_step, src_v += src_step) {
347       uint8_t* src_row_u = src_u;
348       uint8_t* src_row_v = src_v;
349       for (int row4x4 = 0; row4x4 < kNum4x4InLoopFilterUnit &&
350                            MultiplyBy4(row4x4_start + row4x4) < height_;
351            row4x4 += row_step) {
352         GetHorizontalDeblockFilterEdgeInfoUV(
353             row4x4_start + row4x4, column4x4_start + column4x4, &level_u,
354             &level_v, &row_step, &filter_length);
355         if (level_u != 0) {
356           const dsp::LoopFilterSize size = GetLoopFilterSizeUV(filter_length);
357           dsp_.loop_filters[size][kLoopFilterTypeHorizontal](
358               src_row_u, src_stride_u, outer_thresh_[level_u],
359               inner_thresh_[level_u], HevThresh(level_u));
360         }
361         if (level_v != 0) {
362           const dsp::LoopFilterSize size = GetLoopFilterSizeUV(filter_length);
363           dsp_.loop_filters[size][kLoopFilterTypeHorizontal](
364               src_row_v, src_stride_v, outer_thresh_[level_v],
365               inner_thresh_[level_v], HevThresh(level_v));
366         }
367         src_row_u += row_step * src_stride_u;
368         src_row_v += row_step * src_stride_v;
369         row_step = DivideBy4(row_step << subsampling_y);
370       }
371     }
372   }
373 }
374 
VerticalDeblockFilter(int row4x4_start,int column4x4_start)375 void PostFilter::VerticalDeblockFilter(int row4x4_start, int column4x4_start) {
376   const ptrdiff_t row_stride = MultiplyBy4(frame_buffer_.stride(kPlaneY));
377   const ptrdiff_t src_stride = frame_buffer_.stride(kPlaneY);
378   uint8_t* src = GetSourceBuffer(kPlaneY, row4x4_start, column4x4_start);
379   int column_step;
380   uint8_t level;
381   int filter_length;
382 
383   BlockParameters* const* bp_row_base =
384       block_parameters_.Address(row4x4_start, column4x4_start);
385   const int bp_stride = block_parameters_.columns4x4();
386   for (int row4x4 = 0; row4x4 < kNum4x4InLoopFilterUnit &&
387                        MultiplyBy4(row4x4_start + row4x4) < height_;
388        ++row4x4, src += row_stride, bp_row_base += bp_stride) {
389     uint8_t* src_row = src;
390     BlockParameters* const* bp = bp_row_base;
391     for (int column4x4 = 0; column4x4 < kNum4x4InLoopFilterUnit &&
392                             MultiplyBy4(column4x4_start + column4x4) < width_;
393          column4x4 += column_step, bp += column_step) {
394       const bool need_filter = GetVerticalDeblockFilterEdgeInfo(
395           row4x4_start + row4x4, column4x4_start + column4x4, bp, &level,
396           &column_step, &filter_length);
397       if (need_filter) {
398         const dsp::LoopFilterSize size = GetLoopFilterSizeY(filter_length);
399         dsp_.loop_filters[size][kLoopFilterTypeVertical](
400             src_row, src_stride, outer_thresh_[level], inner_thresh_[level],
401             HevThresh(level));
402       }
403       src_row += column_step * pixel_size_;
404       column_step = DivideBy4(column_step);
405     }
406   }
407 
408   if (needs_chroma_deblock_) {
409     const int8_t subsampling_x = subsampling_x_[kPlaneU];
410     const int8_t subsampling_y = subsampling_y_[kPlaneU];
411     const int row_step = 1 << subsampling_y;
412     uint8_t* src_u = GetSourceBuffer(kPlaneU, row4x4_start, column4x4_start);
413     uint8_t* src_v = GetSourceBuffer(kPlaneV, row4x4_start, column4x4_start);
414     const ptrdiff_t src_stride_u = frame_buffer_.stride(kPlaneU);
415     const ptrdiff_t src_stride_v = frame_buffer_.stride(kPlaneV);
416     const ptrdiff_t row_stride_u = MultiplyBy4(frame_buffer_.stride(kPlaneU));
417     const ptrdiff_t row_stride_v = MultiplyBy4(frame_buffer_.stride(kPlaneV));
418     const LoopFilterType type = kLoopFilterTypeVertical;
419     int column_step;
420     uint8_t level_u;
421     uint8_t level_v;
422     int filter_length;
423 
424     BlockParameters* const* bp_row_base = block_parameters_.Address(
425         GetDeblockPosition(row4x4_start, subsampling_y),
426         GetDeblockPosition(column4x4_start, subsampling_x));
427     const int bp_stride = block_parameters_.columns4x4() * row_step;
428     for (int row4x4 = 0; row4x4 < kNum4x4InLoopFilterUnit &&
429                          MultiplyBy4(row4x4_start + row4x4) < height_;
430          row4x4 += row_step, src_u += row_stride_u, src_v += row_stride_v,
431              bp_row_base += bp_stride) {
432       uint8_t* src_row_u = src_u;
433       uint8_t* src_row_v = src_v;
434       BlockParameters* const* bp = bp_row_base;
435       for (int column4x4 = 0; column4x4 < kNum4x4InLoopFilterUnit &&
436                               MultiplyBy4(column4x4_start + column4x4) < width_;
437            column4x4 += column_step, bp += column_step) {
438         GetVerticalDeblockFilterEdgeInfoUV(column4x4_start + column4x4, bp,
439                                            &level_u, &level_v, &column_step,
440                                            &filter_length);
441         if (level_u != 0) {
442           const dsp::LoopFilterSize size = GetLoopFilterSizeUV(filter_length);
443           dsp_.loop_filters[size][type](
444               src_row_u, src_stride_u, outer_thresh_[level_u],
445               inner_thresh_[level_u], HevThresh(level_u));
446         }
447         if (level_v != 0) {
448           const dsp::LoopFilterSize size = GetLoopFilterSizeUV(filter_length);
449           dsp_.loop_filters[size][type](
450               src_row_v, src_stride_v, outer_thresh_[level_v],
451               inner_thresh_[level_v], HevThresh(level_v));
452         }
453         src_row_u += column_step * pixel_size_;
454         src_row_v += column_step * pixel_size_;
455         column_step = DivideBy4(column_step << subsampling_x);
456       }
457     }
458   }
459 }
460 
ApplyDeblockFilterForOneSuperBlockRow(int row4x4_start,int sb4x4)461 void PostFilter::ApplyDeblockFilterForOneSuperBlockRow(int row4x4_start,
462                                                        int sb4x4) {
463   assert(row4x4_start >= 0);
464   assert(DoDeblock());
465   for (int y = 0; y < sb4x4; y += 16) {
466     const int row4x4 = row4x4_start + y;
467     if (row4x4 >= frame_header_.rows4x4) break;
468     int column4x4;
469     for (column4x4 = 0; column4x4 < frame_header_.columns4x4;
470          column4x4 += kNum4x4InLoopFilterUnit) {
471       // First apply vertical filtering
472       VerticalDeblockFilter(row4x4, column4x4);
473 
474       // Delay one superblock to apply horizontal filtering.
475       if (column4x4 != 0) {
476         HorizontalDeblockFilter(row4x4, column4x4 - kNum4x4InLoopFilterUnit);
477       }
478     }
479     // Horizontal filtering for the last 64x64 block.
480     HorizontalDeblockFilter(row4x4, column4x4 - kNum4x4InLoopFilterUnit);
481   }
482 }
483 
DeblockFilterWorker(int jobs_per_plane,const Plane *,int,std::atomic<int> * job_counter,DeblockFilter deblock_filter)484 void PostFilter::DeblockFilterWorker(int jobs_per_plane,
485                                      const Plane* /*planes*/,
486                                      int /*num_planes*/,
487                                      std::atomic<int>* job_counter,
488                                      DeblockFilter deblock_filter) {
489   const int total_jobs = jobs_per_plane;
490   int job_index;
491   while ((job_index = job_counter->fetch_add(1, std::memory_order_relaxed)) <
492          total_jobs) {
493     const int row_unit = job_index % jobs_per_plane;
494     const int row4x4 = row_unit * kNum4x4InLoopFilterUnit;
495     for (int column4x4 = 0; column4x4 < frame_header_.columns4x4;
496          column4x4 += kNum4x4InLoopFilterUnit) {
497       (this->*deblock_filter)(row4x4, column4x4);
498     }
499   }
500 }
501 
ApplyDeblockFilterThreaded()502 void PostFilter::ApplyDeblockFilterThreaded() {
503   const int jobs_per_plane = DivideBy16(frame_header_.rows4x4 + 15);
504   const int num_workers = thread_pool_->num_threads();
505   std::array<Plane, kMaxPlanes> planes;
506   planes[0] = kPlaneY;
507   int num_planes = 1;
508   for (int plane = kPlaneU; plane < planes_; ++plane) {
509     if (frame_header_.loop_filter.level[plane + 1] != 0) {
510       planes[num_planes++] = static_cast<Plane>(plane);
511     }
512   }
513   // The vertical filters are not dependent on each other. So simply schedule
514   // them for all possible rows.
515   //
516   // The horizontal filter for a row/column depends on the vertical filter being
517   // finished for the blocks to the top and to the right. To work around
518   // this synchronization, we simply wait for the vertical filter to finish for
519   // all rows. Now, the horizontal filters can also be scheduled
520   // unconditionally similar to the vertical filters.
521   //
522   // The only synchronization involved is to know when the each directional
523   // filter is complete for the entire frame.
524   for (const auto& type :
525        {kLoopFilterTypeVertical, kLoopFilterTypeHorizontal}) {
526     const DeblockFilter deblock_filter = deblock_filter_func_[type];
527     std::atomic<int> job_counter(0);
528     BlockingCounter pending_workers(num_workers);
529     for (int i = 0; i < num_workers; ++i) {
530       thread_pool_->Schedule([this, jobs_per_plane, &planes, num_planes,
531                               &job_counter, deblock_filter,
532                               &pending_workers]() {
533         DeblockFilterWorker(jobs_per_plane, planes.data(), num_planes,
534                             &job_counter, deblock_filter);
535         pending_workers.Decrement();
536       });
537     }
538     // Run the jobs on the current thread.
539     DeblockFilterWorker(jobs_per_plane, planes.data(), num_planes, &job_counter,
540                         deblock_filter);
541     // Wait for the threadpool jobs to finish.
542     pending_workers.Wait();
543   }
544 }
545 
ApplyDeblockFilter(LoopFilterType loop_filter_type,int row4x4_start,int column4x4_start,int column4x4_end,int sb4x4)546 void PostFilter::ApplyDeblockFilter(LoopFilterType loop_filter_type,
547                                     int row4x4_start, int column4x4_start,
548                                     int column4x4_end, int sb4x4) {
549   assert(row4x4_start >= 0);
550   assert(DoDeblock());
551 
552   column4x4_end = std::min(column4x4_end, frame_header_.columns4x4);
553   if (column4x4_start >= column4x4_end) return;
554 
555   const DeblockFilter deblock_filter = deblock_filter_func_[loop_filter_type];
556   const int sb_height4x4 =
557       std::min(sb4x4, frame_header_.rows4x4 - row4x4_start);
558   for (int y = 0; y < sb_height4x4; y += kNum4x4InLoopFilterUnit) {
559     const int row4x4 = row4x4_start + y;
560     for (int column4x4 = column4x4_start; column4x4 < column4x4_end;
561          column4x4 += kNum4x4InLoopFilterUnit) {
562       (this->*deblock_filter)(row4x4, column4x4);
563     }
564   }
565 }
566 
567 }  // namespace libgav1
568