• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/post_filter.h"
16 
17 #include <algorithm>
18 #include <array>
19 #include <atomic>
20 #include <cassert>
21 #include <cstddef>
22 #include <cstdint>
23 #include <cstring>
24 
25 #include "src/dsp/constants.h"
26 #include "src/dsp/dsp.h"
27 #include "src/utils/array_2d.h"
28 #include "src/utils/blocking_counter.h"
29 #include "src/utils/common.h"
30 #include "src/utils/compiler_attributes.h"
31 #include "src/utils/constants.h"
32 #include "src/utils/memory.h"
33 #include "src/utils/types.h"
34 
35 namespace libgav1 {
36 namespace {
37 
38 // Import all the constants in the anonymous namespace.
39 #include "src/post_filter/deblock_thresholds.inc"
40 
41 // Row indices of loop restoration border. This is used to populate the
42 // |loop_restoration_border_| when either cdef is on or multithreading is
43 // enabled. The dimension is subsampling_y.
44 constexpr int kLoopRestorationBorderRows[2] = {54, 26};
45 
46 }  // namespace
47 
PostFilter(const ObuFrameHeader & frame_header,const ObuSequenceHeader & sequence_header,FrameScratchBuffer * const frame_scratch_buffer,YuvBuffer * const frame_buffer,const dsp::Dsp * dsp,int do_post_filter_mask)48 PostFilter::PostFilter(const ObuFrameHeader& frame_header,
49                        const ObuSequenceHeader& sequence_header,
50                        FrameScratchBuffer* const frame_scratch_buffer,
51                        YuvBuffer* const frame_buffer, const dsp::Dsp* dsp,
52                        int do_post_filter_mask)
53     : frame_header_(frame_header),
54       loop_restoration_(frame_header.loop_restoration),
55       dsp_(*dsp),
56       bitdepth_(sequence_header.color_config.bitdepth),
57       subsampling_x_{0, sequence_header.color_config.subsampling_x,
58                      sequence_header.color_config.subsampling_x},
59       subsampling_y_{0, sequence_header.color_config.subsampling_y,
60                      sequence_header.color_config.subsampling_y},
61       planes_(sequence_header.color_config.is_monochrome ? kMaxPlanesMonochrome
62                                                          : kMaxPlanes),
63       pixel_size_log2_(static_cast<int>((bitdepth_ == 8) ? sizeof(uint8_t)
64                                                          : sizeof(uint16_t)) -
65                        1),
66       inner_thresh_(kInnerThresh[frame_header.loop_filter.sharpness]),
67       outer_thresh_(kOuterThresh[frame_header.loop_filter.sharpness]),
68       needs_chroma_deblock_(frame_header.loop_filter.level[kPlaneU + 1] != 0 ||
69                             frame_header.loop_filter.level[kPlaneV + 1] != 0),
70       do_cdef_(DoCdef(frame_header, do_post_filter_mask)),
71       do_deblock_(DoDeblock(frame_header, do_post_filter_mask)),
72       do_restoration_(
73           DoRestoration(loop_restoration_, do_post_filter_mask, planes_)),
74       do_superres_(DoSuperRes(frame_header, do_post_filter_mask)),
75       cdef_index_(frame_scratch_buffer->cdef_index),
76       cdef_skip_(frame_scratch_buffer->cdef_skip),
77       inter_transform_sizes_(frame_scratch_buffer->inter_transform_sizes),
78       restoration_info_(&frame_scratch_buffer->loop_restoration_info),
79       superres_coefficients_{
80           frame_scratch_buffer->superres_coefficients[kPlaneTypeY].get(),
81           frame_scratch_buffer
82               ->superres_coefficients
83                   [(sequence_header.color_config.is_monochrome ||
84                     sequence_header.color_config.subsampling_x == 0)
85                        ? kPlaneTypeY
86                        : kPlaneTypeUV]
87               .get()},
88       superres_line_buffer_(frame_scratch_buffer->superres_line_buffer),
89       block_parameters_(frame_scratch_buffer->block_parameters_holder),
90       frame_buffer_(*frame_buffer),
91       cdef_border_(frame_scratch_buffer->cdef_border),
92       loop_restoration_border_(frame_scratch_buffer->loop_restoration_border),
93       thread_pool_(
94           frame_scratch_buffer->threading_strategy.post_filter_thread_pool()) {
95   const int8_t zero_delta_lf[kFrameLfCount] = {};
96   ComputeDeblockFilterLevels(zero_delta_lf, deblock_filter_levels_);
97   if (DoSuperRes()) {
98     int plane = kPlaneY;
99     const int width = frame_header_.width;
100     const int upscaled_width_fh = frame_header_.upscaled_width;
101     do {
102       const int downscaled_width =
103           SubsampledValue(width, subsampling_x_[plane]);
104       const int upscaled_width =
105           SubsampledValue(upscaled_width_fh, subsampling_x_[plane]);
106       const int superres_width = downscaled_width << kSuperResScaleBits;
107       super_res_info_[plane].step =
108           (superres_width + upscaled_width / 2) / upscaled_width;
109       const int error =
110           super_res_info_[plane].step * upscaled_width - superres_width;
111       super_res_info_[plane].initial_subpixel_x =
112           ((-((upscaled_width - downscaled_width) << (kSuperResScaleBits - 1)) +
113             DivideBy2(upscaled_width)) /
114                upscaled_width +
115            (1 << (kSuperResExtraBits - 1)) - error / 2) &
116           kSuperResScaleMask;
117       super_res_info_[plane].upscaled_width = upscaled_width;
118     } while (++plane < planes_);
119     if (dsp->super_res_coefficients != nullptr) {
120       int plane = kPlaneY;
121       const int number_loops = (superres_coefficients_[kPlaneTypeY] ==
122                                 superres_coefficients_[kPlaneTypeUV])
123                                    ? kMaxPlanesMonochrome
124                                    : static_cast<int>(kNumPlaneTypes);
125       do {
126         dsp->super_res_coefficients(super_res_info_[plane].upscaled_width,
127                                     super_res_info_[plane].initial_subpixel_x,
128                                     super_res_info_[plane].step,
129                                     superres_coefficients_[plane]);
130       } while (++plane < number_loops);
131     }
132   }
133   int plane = kPlaneY;
134   do {
135     loop_restoration_buffer_[plane] = frame_buffer_.data(plane);
136     cdef_buffer_[plane] = frame_buffer_.data(plane);
137     superres_buffer_[plane] = frame_buffer_.data(plane);
138     source_buffer_[plane] = frame_buffer_.data(plane);
139   } while (++plane < planes_);
140   if (DoCdef() || DoRestoration() || DoSuperRes()) {
141     plane = kPlaneY;
142     const int pixel_size_log2 = pixel_size_log2_;
143     do {
144       int horizontal_shift = 0;
145       int vertical_shift = 0;
146       if (DoRestoration() &&
147           loop_restoration_.type[plane] != kLoopRestorationTypeNone) {
148         horizontal_shift += frame_buffer_.alignment();
149         if (!DoCdef() && thread_pool_ == nullptr) {
150           vertical_shift += kRestorationVerticalBorder;
151         }
152         superres_buffer_[plane] +=
153             vertical_shift * frame_buffer_.stride(plane) +
154             (horizontal_shift << pixel_size_log2);
155       }
156       if (DoSuperRes()) {
157         vertical_shift += kSuperResVerticalBorder;
158       }
159       cdef_buffer_[plane] += vertical_shift * frame_buffer_.stride(plane) +
160                              (horizontal_shift << pixel_size_log2);
161       if (DoCdef() && thread_pool_ == nullptr) {
162         horizontal_shift += frame_buffer_.alignment();
163         vertical_shift += kCdefBorder;
164       }
165       assert(horizontal_shift <= frame_buffer_.right_border(plane));
166       assert(vertical_shift <= frame_buffer_.bottom_border(plane));
167       source_buffer_[plane] += vertical_shift * frame_buffer_.stride(plane) +
168                                (horizontal_shift << pixel_size_log2);
169     } while (++plane < planes_);
170   }
171 }
172 
173 // The following example illustrates how ExtendFrame() extends a frame.
174 // Suppose the frame width is 8 and height is 4, and left, right, top, and
175 // bottom are all equal to 3.
176 //
177 // Before:
178 //
179 //       ABCDEFGH
180 //       IJKLMNOP
181 //       QRSTUVWX
182 //       YZabcdef
183 //
184 // After:
185 //
186 //   AAA|ABCDEFGH|HHH  [3]
187 //   AAA|ABCDEFGH|HHH
188 //   AAA|ABCDEFGH|HHH
189 //   ---+--------+---
190 //   AAA|ABCDEFGH|HHH  [1]
191 //   III|IJKLMNOP|PPP
192 //   QQQ|QRSTUVWX|XXX
193 //   YYY|YZabcdef|fff
194 //   ---+--------+---
195 //   YYY|YZabcdef|fff  [2]
196 //   YYY|YZabcdef|fff
197 //   YYY|YZabcdef|fff
198 //
199 // ExtendFrame() first extends the rows to the left and to the right[1]. Then
200 // it copies the extended last row to the bottom borders[2]. Finally it copies
201 // the extended first row to the top borders[3].
202 // static
203 template <typename Pixel>
ExtendFrame(Pixel * const frame_start,const int width,const int height,const ptrdiff_t stride,const int left,const int right,const int top,const int bottom)204 void PostFilter::ExtendFrame(Pixel* const frame_start, const int width,
205                              const int height, const ptrdiff_t stride,
206                              const int left, const int right, const int top,
207                              const int bottom) {
208   Pixel* src = frame_start;
209   // Copy to left and right borders.
210   int y = height;
211   do {
212     ExtendLine<Pixel>(src, width, left, right);
213     src += stride;
214   } while (--y != 0);
215   // Copy to bottom borders. For performance we copy |stride| pixels
216   // (including some padding pixels potentially) in each row, ending at the
217   // bottom right border pixel. In the diagram the asterisks indicate padding
218   // pixels.
219   //
220   // |<--- stride --->|
221   // **YYY|YZabcdef|fff <-- Copy from the extended last row.
222   // -----+--------+---
223   // **YYY|YZabcdef|fff
224   // **YYY|YZabcdef|fff
225   // **YYY|YZabcdef|fff <-- bottom right border pixel
226   assert(src == frame_start + height * stride);
227   Pixel* dst = src - left;
228   src = dst - stride;
229   for (int y = 0; y < bottom; ++y) {
230     memcpy(dst, src, sizeof(Pixel) * stride);
231     dst += stride;
232   }
233   // Copy to top borders. For performance we copy |stride| pixels (including
234   // some padding pixels potentially) in each row, starting from the top left
235   // border pixel. In the diagram the asterisks indicate padding pixels.
236   //
237   // +-- top left border pixel
238   // |
239   // v
240   // AAA|ABCDEFGH|HHH**
241   // AAA|ABCDEFGH|HHH**
242   // AAA|ABCDEFGH|HHH**
243   // ---+--------+-----
244   // AAA|ABCDEFGH|HHH** <-- Copy from the extended first row.
245   // |<--- stride --->|
246   src = frame_start - left;
247   dst = frame_start - left - top * stride;
248   for (int y = 0; y < top; ++y) {
249     memcpy(dst, src, sizeof(Pixel) * stride);
250     dst += stride;
251   }
252 }
253 
254 template void PostFilter::ExtendFrame<uint8_t>(uint8_t* const frame_start,
255                                                const int width,
256                                                const int height,
257                                                const ptrdiff_t stride,
258                                                const int left, const int right,
259                                                const int top, const int bottom);
260 
261 #if LIBGAV1_MAX_BITDEPTH >= 10
262 template void PostFilter::ExtendFrame<uint16_t>(
263     uint16_t* const frame_start, const int width, const int height,
264     const ptrdiff_t stride, const int left, const int right, const int top,
265     const int bottom);
266 #endif
267 
ExtendFrameBoundary(uint8_t * const frame_start,const int width,const int height,const ptrdiff_t stride,const int left,const int right,const int top,const int bottom) const268 void PostFilter::ExtendFrameBoundary(uint8_t* const frame_start,
269                                      const int width, const int height,
270                                      const ptrdiff_t stride, const int left,
271                                      const int right, const int top,
272                                      const int bottom) const {
273 #if LIBGAV1_MAX_BITDEPTH >= 10
274   if (bitdepth_ >= 10) {
275     ExtendFrame<uint16_t>(reinterpret_cast<uint16_t*>(frame_start), width,
276                           height, stride >> 1, left, right, top, bottom);
277     return;
278   }
279 #endif
280   ExtendFrame<uint8_t>(frame_start, width, height, stride, left, right, top,
281                        bottom);
282 }
283 
ExtendBordersForReferenceFrame()284 void PostFilter::ExtendBordersForReferenceFrame() {
285   if (frame_header_.refresh_frame_flags == 0) return;
286   const int upscaled_width = frame_header_.upscaled_width;
287   const int height = frame_header_.height;
288   int plane = kPlaneY;
289   do {
290     const int plane_width =
291         SubsampledValue(upscaled_width, subsampling_x_[plane]);
292     const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
293     assert(frame_buffer_.left_border(plane) >= kMinLeftBorderPixels &&
294            frame_buffer_.right_border(plane) >= kMinRightBorderPixels &&
295            frame_buffer_.top_border(plane) >= kMinTopBorderPixels &&
296            frame_buffer_.bottom_border(plane) >= kMinBottomBorderPixels);
297     // plane subsampling_x_ left_border
298     //   Y        N/A         64, 48
299     //  U,V        0          64, 48
300     //  U,V        1          32, 16
301     assert(frame_buffer_.left_border(plane) >= 16);
302     // The |left| argument to ExtendFrameBoundary() must be at least
303     // kMinLeftBorderPixels (13) for warp.
304     static_assert(16 >= kMinLeftBorderPixels, "");
305     ExtendFrameBoundary(
306         frame_buffer_.data(plane), plane_width, plane_height,
307         frame_buffer_.stride(plane), frame_buffer_.left_border(plane),
308         frame_buffer_.right_border(plane), frame_buffer_.top_border(plane),
309         frame_buffer_.bottom_border(plane));
310   } while (++plane < planes_);
311 }
312 
CopyDeblockedPixels(Plane plane,int row4x4)313 void PostFilter::CopyDeblockedPixels(Plane plane, int row4x4) {
314   const ptrdiff_t src_stride = frame_buffer_.stride(plane);
315   const uint8_t* const src = GetSourceBuffer(plane, row4x4, 0);
316   const int row_offset = DivideBy4(row4x4);
317   const ptrdiff_t dst_stride = loop_restoration_border_.stride(plane);
318   uint8_t* dst = loop_restoration_border_.data(plane) + row_offset * dst_stride;
319   const int num_pixels = SubsampledValue(MultiplyBy4(frame_header_.columns4x4),
320                                          subsampling_x_[plane]);
321   const int row_width = num_pixels << pixel_size_log2_;
322   int last_valid_row = -1;
323   const int plane_height =
324       SubsampledValue(frame_header_.height, subsampling_y_[plane]);
325   int row = kLoopRestorationBorderRows[subsampling_y_[plane]];
326   const int absolute_row = (MultiplyBy4(row4x4) >> subsampling_y_[plane]) + row;
327   for (int i = 0; i < 4; ++i, ++row) {
328     if (absolute_row + i >= plane_height) {
329       if (last_valid_row == -1) break;
330       // If we run out of rows, copy the last valid row (mimics the bottom
331       // border extension).
332       row = last_valid_row;
333     }
334     memcpy(dst, src + row * src_stride, row_width);
335     last_valid_row = row;
336     dst += dst_stride;
337   }
338 }
339 
CopyBordersForOneSuperBlockRow(int row4x4,int sb4x4,bool for_loop_restoration)340 void PostFilter::CopyBordersForOneSuperBlockRow(int row4x4, int sb4x4,
341                                                 bool for_loop_restoration) {
342   // Number of rows to be subtracted from the start position described by
343   // row4x4. We always lag by 8 rows (to account for in-loop post filters).
344   const int row_offset = (row4x4 == 0) ? 0 : 8;
345   // Number of rows to be subtracted from the height described by sb4x4.
346   const int height_offset = (row4x4 == 0) ? 8 : 0;
347   // If cdef is off and post filter multithreading is off, then loop restoration
348   // needs 2 extra rows for the bottom border in each plane.
349   const int extra_rows =
350       (for_loop_restoration && thread_pool_ == nullptr && !DoCdef()) ? 2 : 0;
351   const int upscaled_width = frame_header_.upscaled_width;
352   const int height = frame_header_.height;
353   int plane = kPlaneY;
354   do {
355     const int plane_width =
356         SubsampledValue(upscaled_width, subsampling_x_[plane]);
357     const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
358     const int row = (MultiplyBy4(row4x4) - row_offset) >> subsampling_y_[plane];
359     assert(row >= 0);
360     if (row >= plane_height) break;
361     const int num_rows =
362         std::min(SubsampledValue(MultiplyBy4(sb4x4) - height_offset,
363                                  subsampling_y_[plane]) +
364                      extra_rows,
365                  plane_height - row);
366     // We only need to track the progress of the Y plane since the progress of
367     // the U and V planes will be inferred from the progress of the Y plane.
368     if (!for_loop_restoration && plane == kPlaneY) {
369       progress_row_ = row + num_rows;
370     }
371     const bool copy_bottom = row + num_rows == plane_height;
372     const ptrdiff_t stride = frame_buffer_.stride(plane);
373     uint8_t* const start = (for_loop_restoration ? superres_buffer_[plane]
374                                                  : frame_buffer_.data(plane)) +
375                            row * stride;
376 #if LIBGAV1_MSAN
377     const int right_padding =
378         (frame_buffer_.stride(plane) >> static_cast<int>(bitdepth_ > 8)) -
379         ((frame_buffer_.left_border(plane) + frame_buffer_.width(plane) +
380           frame_buffer_.right_border(plane)));
381     const int padded_right_border_size =
382         frame_buffer_.right_border(plane) + right_padding;
383     // The optimized loop restoration code may read into the next row's left
384     // border depending on the start of the last superblock and the size of the
385     // right border. This is safe as the post filter is applied after
386     // reconstruction is complete and the threaded implementations do not read
387     // from the left border.
388     const int left_border_overread =
389         (for_loop_restoration && padded_right_border_size < 64)
390             ? 63 - padded_right_border_size
391             : 0;
392     assert(!for_loop_restoration || left_border_overread == 0 ||
393            (frame_buffer_.bottom_border(plane) > 0 &&
394             left_border_overread <= frame_buffer_.left_border(plane)));
395     const int left_border = (for_loop_restoration && left_border_overread == 0)
396                                 ? kRestorationHorizontalBorder
397                                 : frame_buffer_.left_border(plane);
398     // The optimized loop restoration code will overread the visible frame
399     // buffer into the right border. Extend the right boundary further to
400     // prevent msan warnings.
401     const int right_border = for_loop_restoration
402                                  ? std::min(padded_right_border_size, 63)
403                                  : frame_buffer_.right_border(plane);
404 #else
405     const int left_border = for_loop_restoration
406                                 ? kRestorationHorizontalBorder
407                                 : frame_buffer_.left_border(plane);
408     const int right_border = for_loop_restoration
409                                  ? kRestorationHorizontalBorder
410                                  : frame_buffer_.right_border(plane);
411 #endif
412     const int top_border =
413         (row == 0) ? (for_loop_restoration ? kRestorationVerticalBorder
414                                            : frame_buffer_.top_border(plane))
415                    : 0;
416     const int bottom_border =
417         copy_bottom
418             ? (for_loop_restoration ? kRestorationVerticalBorder
419                                     : frame_buffer_.bottom_border(plane))
420             : 0;
421     ExtendFrameBoundary(start, plane_width, num_rows, stride, left_border,
422                         right_border, top_border, bottom_border);
423   } while (++plane < planes_);
424 }
425 
SetupLoopRestorationBorder(const int row4x4)426 void PostFilter::SetupLoopRestorationBorder(const int row4x4) {
427   assert(row4x4 >= 0);
428   assert(!DoCdef());
429   assert(DoRestoration());
430   const int upscaled_width = frame_header_.upscaled_width;
431   const int height = frame_header_.height;
432   int plane = kPlaneY;
433   do {
434     if (loop_restoration_.type[plane] == kLoopRestorationTypeNone) {
435       continue;
436     }
437     const int row_offset = DivideBy4(row4x4);
438     const int num_pixels =
439         SubsampledValue(upscaled_width, subsampling_x_[plane]);
440     const int row_width = num_pixels << pixel_size_log2_;
441     const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
442     const int row = kLoopRestorationBorderRows[subsampling_y_[plane]];
443     const int absolute_row =
444         (MultiplyBy4(row4x4) >> subsampling_y_[plane]) + row;
445     const ptrdiff_t src_stride = frame_buffer_.stride(plane);
446     const uint8_t* src =
447         GetSuperResBuffer(static_cast<Plane>(plane), row4x4, 0) +
448         row * src_stride;
449     const ptrdiff_t dst_stride = loop_restoration_border_.stride(plane);
450     uint8_t* dst =
451         loop_restoration_border_.data(plane) + row_offset * dst_stride;
452     for (int i = 0; i < 4; ++i) {
453       memcpy(dst, src, row_width);
454 #if LIBGAV1_MAX_BITDEPTH >= 10
455       if (bitdepth_ >= 10) {
456         ExtendLine<uint16_t>(dst, num_pixels, kRestorationHorizontalBorder,
457                              kRestorationHorizontalBorder);
458       } else  // NOLINT.
459 #endif
460         ExtendLine<uint8_t>(dst, num_pixels, kRestorationHorizontalBorder,
461                             kRestorationHorizontalBorder);
462       // If we run out of rows, copy the last valid row (mimics the bottom
463       // border extension).
464       if (absolute_row + i < plane_height - 1) src += src_stride;
465       dst += dst_stride;
466     }
467   } while (++plane < planes_);
468 }
469 
SetupLoopRestorationBorder(int row4x4_start,int sb4x4)470 void PostFilter::SetupLoopRestorationBorder(int row4x4_start, int sb4x4) {
471   assert(row4x4_start >= 0);
472   assert(DoCdef());
473   assert(DoRestoration());
474   for (int sb_y = 0; sb_y < sb4x4; sb_y += 16) {
475     const int row4x4 = row4x4_start + sb_y;
476     const int row_offset_start = DivideBy4(row4x4);
477     const std::array<uint8_t*, kMaxPlanes> dst = {
478         loop_restoration_border_.data(kPlaneY) +
479             row_offset_start * static_cast<ptrdiff_t>(
480                                    loop_restoration_border_.stride(kPlaneY)),
481         loop_restoration_border_.data(kPlaneU) +
482             row_offset_start * static_cast<ptrdiff_t>(
483                                    loop_restoration_border_.stride(kPlaneU)),
484         loop_restoration_border_.data(kPlaneV) +
485             row_offset_start * static_cast<ptrdiff_t>(
486                                    loop_restoration_border_.stride(kPlaneV))};
487     // If SuperRes is enabled, then we apply SuperRes for the rows to be copied
488     // directly with |loop_restoration_border_| as the destination. Otherwise,
489     // we simply copy the rows.
490     if (DoSuperRes()) {
491       std::array<uint8_t*, kMaxPlanes> src;
492       std::array<int, kMaxPlanes> rows;
493       const int height = frame_header_.height;
494       int plane = kPlaneY;
495       do {
496         if (loop_restoration_.type[plane] == kLoopRestorationTypeNone) {
497           rows[plane] = 0;
498           continue;
499         }
500         const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
501         const int row = kLoopRestorationBorderRows[subsampling_y_[plane]];
502         const int absolute_row =
503             (MultiplyBy4(row4x4) >> subsampling_y_[plane]) + row;
504         src[plane] = GetSourceBuffer(static_cast<Plane>(plane), row4x4, 0) +
505                      row * static_cast<ptrdiff_t>(frame_buffer_.stride(plane));
506         rows[plane] = Clip3(plane_height - absolute_row, 0, 4);
507       } while (++plane < planes_);
508       ApplySuperRes(src, rows, /*line_buffer_row=*/-1, dst,
509                     /*dst_is_loop_restoration_border=*/true);
510       // If we run out of rows, copy the last valid row (mimics the bottom
511       // border extension).
512       plane = kPlaneY;
513       do {
514         if (rows[plane] == 0 || rows[plane] >= 4) continue;
515         const ptrdiff_t stride = loop_restoration_border_.stride(plane);
516         uint8_t* dst_line = dst[plane] + rows[plane] * stride;
517         const uint8_t* const src_line = dst_line - stride;
518         const int upscaled_width = super_res_info_[plane].upscaled_width
519                                    << pixel_size_log2_;
520         for (int i = rows[plane]; i < 4; ++i) {
521           memcpy(dst_line, src_line, upscaled_width);
522           dst_line += stride;
523         }
524       } while (++plane < planes_);
525     } else {
526       int plane = kPlaneY;
527       do {
528         CopyDeblockedPixels(static_cast<Plane>(plane), row4x4);
529       } while (++plane < planes_);
530     }
531     // Extend the left and right boundaries needed for loop restoration.
532     const int upscaled_width = frame_header_.upscaled_width;
533     int plane = kPlaneY;
534     do {
535       if (loop_restoration_.type[plane] == kLoopRestorationTypeNone) {
536         continue;
537       }
538       uint8_t* dst_line = dst[plane];
539       const int plane_width =
540           SubsampledValue(upscaled_width, subsampling_x_[plane]);
541       for (int i = 0; i < 4; ++i) {
542 #if LIBGAV1_MAX_BITDEPTH >= 10
543         if (bitdepth_ >= 10) {
544           ExtendLine<uint16_t>(dst_line, plane_width,
545                                kRestorationHorizontalBorder,
546                                kRestorationHorizontalBorder);
547         } else  // NOLINT.
548 #endif
549         {
550           ExtendLine<uint8_t>(dst_line, plane_width,
551                               kRestorationHorizontalBorder,
552                               kRestorationHorizontalBorder);
553         }
554         dst_line += loop_restoration_border_.stride(plane);
555       }
556     } while (++plane < planes_);
557   }
558 }
559 
RunJobs(WorkerFunction worker)560 void PostFilter::RunJobs(WorkerFunction worker) {
561   std::atomic<int> row4x4(0);
562   const int num_workers = thread_pool_->num_threads();
563   BlockingCounter pending_workers(num_workers);
564   for (int i = 0; i < num_workers; ++i) {
565     thread_pool_->Schedule([this, &row4x4, &pending_workers, worker]() {
566       (this->*worker)(&row4x4);
567       pending_workers.Decrement();
568     });
569   }
570   // Run the jobs on the current thread.
571   (this->*worker)(&row4x4);
572   // Wait for the threadpool jobs to finish.
573   pending_workers.Wait();
574 }
575 
ApplyFilteringThreaded()576 void PostFilter::ApplyFilteringThreaded() {
577   if (DoDeblock()) {
578     RunJobs(&PostFilter::DeblockFilterWorker<kLoopFilterTypeVertical>);
579     RunJobs(&PostFilter::DeblockFilterWorker<kLoopFilterTypeHorizontal>);
580   }
581   if (DoCdef() && DoRestoration()) {
582     for (int row4x4 = 0; row4x4 < frame_header_.rows4x4;
583          row4x4 += kNum4x4InLoopFilterUnit) {
584       SetupLoopRestorationBorder(row4x4, kNum4x4InLoopFilterUnit);
585     }
586   }
587   if (DoCdef()) {
588     for (int row4x4 = 0; row4x4 < frame_header_.rows4x4;
589          row4x4 += kNum4x4InLoopFilterUnit) {
590       SetupCdefBorder(row4x4);
591     }
592     RunJobs(&PostFilter::ApplyCdefWorker);
593   }
594   if (DoSuperRes()) ApplySuperResThreaded();
595   if (DoRestoration()) {
596     if (!DoCdef()) {
597       int row4x4 = 0;
598       do {
599         SetupLoopRestorationBorder(row4x4);
600         row4x4 += kNum4x4InLoopFilterUnit;
601       } while (row4x4 < frame_header_.rows4x4);
602     }
603     RunJobs(&PostFilter::ApplyLoopRestorationWorker);
604   }
605   ExtendBordersForReferenceFrame();
606 }
607 
ApplyFilteringForOneSuperBlockRow(int row4x4,int sb4x4,bool is_last_row,bool do_deblock)608 int PostFilter::ApplyFilteringForOneSuperBlockRow(int row4x4, int sb4x4,
609                                                   bool is_last_row,
610                                                   bool do_deblock) {
611   if (row4x4 < 0) return -1;
612   if (DoDeblock() && do_deblock) {
613     VerticalDeblockFilter(row4x4, row4x4 + sb4x4, 0, frame_header_.columns4x4);
614     HorizontalDeblockFilter(row4x4, row4x4 + sb4x4, 0,
615                             frame_header_.columns4x4);
616   }
617   if (DoRestoration() && DoCdef()) {
618     SetupLoopRestorationBorder(row4x4, sb4x4);
619   }
620   if (DoCdef()) {
621     ApplyCdefForOneSuperBlockRow(row4x4, sb4x4, is_last_row);
622   }
623   if (DoSuperRes()) {
624     ApplySuperResForOneSuperBlockRow(row4x4, sb4x4, is_last_row);
625   }
626   if (DoRestoration()) {
627     CopyBordersForOneSuperBlockRow(row4x4, sb4x4, true);
628     ApplyLoopRestoration(row4x4, sb4x4);
629     if (is_last_row) {
630       // Loop restoration operates with a lag of 8 rows. So make sure to cover
631       // all the rows of the last superblock row.
632       CopyBordersForOneSuperBlockRow(row4x4 + sb4x4, 16, true);
633       ApplyLoopRestoration(row4x4 + sb4x4, 16);
634     }
635   }
636   if (frame_header_.refresh_frame_flags != 0 && DoBorderExtensionInLoop()) {
637     CopyBordersForOneSuperBlockRow(row4x4, sb4x4, false);
638     if (is_last_row) {
639       CopyBordersForOneSuperBlockRow(row4x4 + sb4x4, 16, false);
640     }
641   }
642   if (is_last_row && !DoBorderExtensionInLoop()) {
643     ExtendBordersForReferenceFrame();
644   }
645   return is_last_row ? frame_header_.height : progress_row_;
646 }
647 
648 }  // namespace libgav1
649