1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/post_filter.h"
16
17 #include <algorithm>
18 #include <array>
19 #include <atomic>
20 #include <cassert>
21 #include <cstddef>
22 #include <cstdint>
23 #include <cstring>
24
25 #include "src/dsp/constants.h"
26 #include "src/dsp/dsp.h"
27 #include "src/utils/array_2d.h"
28 #include "src/utils/blocking_counter.h"
29 #include "src/utils/common.h"
30 #include "src/utils/compiler_attributes.h"
31 #include "src/utils/constants.h"
32 #include "src/utils/memory.h"
33 #include "src/utils/types.h"
34
35 namespace libgav1 {
36 namespace {
37
38 // Import all the constants in the anonymous namespace.
39 #include "src/post_filter/deblock_thresholds.inc"
40
41 // Row indices of loop restoration border. This is used to populate the
42 // |loop_restoration_border_| when either cdef is on or multithreading is
43 // enabled. The dimension is subsampling_y.
44 constexpr int kLoopRestorationBorderRows[2] = {54, 26};
45
46 } // namespace
47
PostFilter(const ObuFrameHeader & frame_header,const ObuSequenceHeader & sequence_header,FrameScratchBuffer * const frame_scratch_buffer,YuvBuffer * const frame_buffer,const dsp::Dsp * dsp,int do_post_filter_mask)48 PostFilter::PostFilter(const ObuFrameHeader& frame_header,
49 const ObuSequenceHeader& sequence_header,
50 FrameScratchBuffer* const frame_scratch_buffer,
51 YuvBuffer* const frame_buffer, const dsp::Dsp* dsp,
52 int do_post_filter_mask)
53 : frame_header_(frame_header),
54 loop_restoration_(frame_header.loop_restoration),
55 dsp_(*dsp),
56 bitdepth_(sequence_header.color_config.bitdepth),
57 subsampling_x_{0, sequence_header.color_config.subsampling_x,
58 sequence_header.color_config.subsampling_x},
59 subsampling_y_{0, sequence_header.color_config.subsampling_y,
60 sequence_header.color_config.subsampling_y},
61 planes_(sequence_header.color_config.is_monochrome ? kMaxPlanesMonochrome
62 : kMaxPlanes),
63 pixel_size_log2_(static_cast<int>((bitdepth_ == 8) ? sizeof(uint8_t)
64 : sizeof(uint16_t)) -
65 1),
66 inner_thresh_(kInnerThresh[frame_header.loop_filter.sharpness]),
67 outer_thresh_(kOuterThresh[frame_header.loop_filter.sharpness]),
68 needs_chroma_deblock_(frame_header.loop_filter.level[kPlaneU + 1] != 0 ||
69 frame_header.loop_filter.level[kPlaneV + 1] != 0),
70 do_cdef_(DoCdef(frame_header, do_post_filter_mask)),
71 do_deblock_(DoDeblock(frame_header, do_post_filter_mask)),
72 do_restoration_(
73 DoRestoration(loop_restoration_, do_post_filter_mask, planes_)),
74 do_superres_(DoSuperRes(frame_header, do_post_filter_mask)),
75 cdef_index_(frame_scratch_buffer->cdef_index),
76 cdef_skip_(frame_scratch_buffer->cdef_skip),
77 inter_transform_sizes_(frame_scratch_buffer->inter_transform_sizes),
78 restoration_info_(&frame_scratch_buffer->loop_restoration_info),
79 superres_coefficients_{
80 frame_scratch_buffer->superres_coefficients[kPlaneTypeY].get(),
81 frame_scratch_buffer
82 ->superres_coefficients
83 [(sequence_header.color_config.is_monochrome ||
84 sequence_header.color_config.subsampling_x == 0)
85 ? kPlaneTypeY
86 : kPlaneTypeUV]
87 .get()},
88 superres_line_buffer_(frame_scratch_buffer->superres_line_buffer),
89 block_parameters_(frame_scratch_buffer->block_parameters_holder),
90 frame_buffer_(*frame_buffer),
91 cdef_border_(frame_scratch_buffer->cdef_border),
92 loop_restoration_border_(frame_scratch_buffer->loop_restoration_border),
93 thread_pool_(
94 frame_scratch_buffer->threading_strategy.post_filter_thread_pool()) {
95 const int8_t zero_delta_lf[kFrameLfCount] = {};
96 ComputeDeblockFilterLevels(zero_delta_lf, deblock_filter_levels_);
97 if (DoSuperRes()) {
98 int plane = kPlaneY;
99 const int width = frame_header_.width;
100 const int upscaled_width_fh = frame_header_.upscaled_width;
101 do {
102 const int downscaled_width =
103 SubsampledValue(width, subsampling_x_[plane]);
104 const int upscaled_width =
105 SubsampledValue(upscaled_width_fh, subsampling_x_[plane]);
106 const int superres_width = downscaled_width << kSuperResScaleBits;
107 super_res_info_[plane].step =
108 (superres_width + upscaled_width / 2) / upscaled_width;
109 const int error =
110 super_res_info_[plane].step * upscaled_width - superres_width;
111 super_res_info_[plane].initial_subpixel_x =
112 ((-((upscaled_width - downscaled_width) << (kSuperResScaleBits - 1)) +
113 DivideBy2(upscaled_width)) /
114 upscaled_width +
115 (1 << (kSuperResExtraBits - 1)) - error / 2) &
116 kSuperResScaleMask;
117 super_res_info_[plane].upscaled_width = upscaled_width;
118 } while (++plane < planes_);
119 if (dsp->super_res_coefficients != nullptr) {
120 int plane = kPlaneY;
121 const int number_loops = (superres_coefficients_[kPlaneTypeY] ==
122 superres_coefficients_[kPlaneTypeUV])
123 ? kMaxPlanesMonochrome
124 : static_cast<int>(kNumPlaneTypes);
125 do {
126 dsp->super_res_coefficients(super_res_info_[plane].upscaled_width,
127 super_res_info_[plane].initial_subpixel_x,
128 super_res_info_[plane].step,
129 superres_coefficients_[plane]);
130 } while (++plane < number_loops);
131 }
132 }
133 int plane = kPlaneY;
134 do {
135 loop_restoration_buffer_[plane] = frame_buffer_.data(plane);
136 cdef_buffer_[plane] = frame_buffer_.data(plane);
137 superres_buffer_[plane] = frame_buffer_.data(plane);
138 source_buffer_[plane] = frame_buffer_.data(plane);
139 } while (++plane < planes_);
140 if (DoCdef() || DoRestoration() || DoSuperRes()) {
141 plane = kPlaneY;
142 const int pixel_size_log2 = pixel_size_log2_;
143 do {
144 int horizontal_shift = 0;
145 int vertical_shift = 0;
146 if (DoRestoration() &&
147 loop_restoration_.type[plane] != kLoopRestorationTypeNone) {
148 horizontal_shift += frame_buffer_.alignment();
149 if (!DoCdef() && thread_pool_ == nullptr) {
150 vertical_shift += kRestorationVerticalBorder;
151 }
152 superres_buffer_[plane] +=
153 vertical_shift * frame_buffer_.stride(plane) +
154 (horizontal_shift << pixel_size_log2);
155 }
156 if (DoSuperRes()) {
157 vertical_shift += kSuperResVerticalBorder;
158 }
159 cdef_buffer_[plane] += vertical_shift * frame_buffer_.stride(plane) +
160 (horizontal_shift << pixel_size_log2);
161 if (DoCdef() && thread_pool_ == nullptr) {
162 horizontal_shift += frame_buffer_.alignment();
163 vertical_shift += kCdefBorder;
164 }
165 assert(horizontal_shift <= frame_buffer_.right_border(plane));
166 assert(vertical_shift <= frame_buffer_.bottom_border(plane));
167 source_buffer_[plane] += vertical_shift * frame_buffer_.stride(plane) +
168 (horizontal_shift << pixel_size_log2);
169 } while (++plane < planes_);
170 }
171 }
172
173 // The following example illustrates how ExtendFrame() extends a frame.
174 // Suppose the frame width is 8 and height is 4, and left, right, top, and
175 // bottom are all equal to 3.
176 //
177 // Before:
178 //
179 // ABCDEFGH
180 // IJKLMNOP
181 // QRSTUVWX
182 // YZabcdef
183 //
184 // After:
185 //
186 // AAA|ABCDEFGH|HHH [3]
187 // AAA|ABCDEFGH|HHH
188 // AAA|ABCDEFGH|HHH
189 // ---+--------+---
190 // AAA|ABCDEFGH|HHH [1]
191 // III|IJKLMNOP|PPP
192 // QQQ|QRSTUVWX|XXX
193 // YYY|YZabcdef|fff
194 // ---+--------+---
195 // YYY|YZabcdef|fff [2]
196 // YYY|YZabcdef|fff
197 // YYY|YZabcdef|fff
198 //
199 // ExtendFrame() first extends the rows to the left and to the right[1]. Then
200 // it copies the extended last row to the bottom borders[2]. Finally it copies
201 // the extended first row to the top borders[3].
202 // static
203 template <typename Pixel>
ExtendFrame(Pixel * const frame_start,const int width,const int height,const ptrdiff_t stride,const int left,const int right,const int top,const int bottom)204 void PostFilter::ExtendFrame(Pixel* const frame_start, const int width,
205 const int height, const ptrdiff_t stride,
206 const int left, const int right, const int top,
207 const int bottom) {
208 Pixel* src = frame_start;
209 // Copy to left and right borders.
210 int y = height;
211 do {
212 ExtendLine<Pixel>(src, width, left, right);
213 src += stride;
214 } while (--y != 0);
215 // Copy to bottom borders. For performance we copy |stride| pixels
216 // (including some padding pixels potentially) in each row, ending at the
217 // bottom right border pixel. In the diagram the asterisks indicate padding
218 // pixels.
219 //
220 // |<--- stride --->|
221 // **YYY|YZabcdef|fff <-- Copy from the extended last row.
222 // -----+--------+---
223 // **YYY|YZabcdef|fff
224 // **YYY|YZabcdef|fff
225 // **YYY|YZabcdef|fff <-- bottom right border pixel
226 assert(src == frame_start + height * stride);
227 Pixel* dst = src - left;
228 src = dst - stride;
229 for (int y = 0; y < bottom; ++y) {
230 memcpy(dst, src, sizeof(Pixel) * stride);
231 dst += stride;
232 }
233 // Copy to top borders. For performance we copy |stride| pixels (including
234 // some padding pixels potentially) in each row, starting from the top left
235 // border pixel. In the diagram the asterisks indicate padding pixels.
236 //
237 // +-- top left border pixel
238 // |
239 // v
240 // AAA|ABCDEFGH|HHH**
241 // AAA|ABCDEFGH|HHH**
242 // AAA|ABCDEFGH|HHH**
243 // ---+--------+-----
244 // AAA|ABCDEFGH|HHH** <-- Copy from the extended first row.
245 // |<--- stride --->|
246 src = frame_start - left;
247 dst = frame_start - left - top * stride;
248 for (int y = 0; y < top; ++y) {
249 memcpy(dst, src, sizeof(Pixel) * stride);
250 dst += stride;
251 }
252 }
253
254 template void PostFilter::ExtendFrame<uint8_t>(uint8_t* const frame_start,
255 const int width,
256 const int height,
257 const ptrdiff_t stride,
258 const int left, const int right,
259 const int top, const int bottom);
260
261 #if LIBGAV1_MAX_BITDEPTH >= 10
262 template void PostFilter::ExtendFrame<uint16_t>(
263 uint16_t* const frame_start, const int width, const int height,
264 const ptrdiff_t stride, const int left, const int right, const int top,
265 const int bottom);
266 #endif
267
ExtendFrameBoundary(uint8_t * const frame_start,const int width,const int height,const ptrdiff_t stride,const int left,const int right,const int top,const int bottom) const268 void PostFilter::ExtendFrameBoundary(uint8_t* const frame_start,
269 const int width, const int height,
270 const ptrdiff_t stride, const int left,
271 const int right, const int top,
272 const int bottom) const {
273 #if LIBGAV1_MAX_BITDEPTH >= 10
274 if (bitdepth_ >= 10) {
275 ExtendFrame<uint16_t>(reinterpret_cast<uint16_t*>(frame_start), width,
276 height, stride >> 1, left, right, top, bottom);
277 return;
278 }
279 #endif
280 ExtendFrame<uint8_t>(frame_start, width, height, stride, left, right, top,
281 bottom);
282 }
283
ExtendBordersForReferenceFrame()284 void PostFilter::ExtendBordersForReferenceFrame() {
285 if (frame_header_.refresh_frame_flags == 0) return;
286 const int upscaled_width = frame_header_.upscaled_width;
287 const int height = frame_header_.height;
288 int plane = kPlaneY;
289 do {
290 const int plane_width =
291 SubsampledValue(upscaled_width, subsampling_x_[plane]);
292 const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
293 assert(frame_buffer_.left_border(plane) >= kMinLeftBorderPixels &&
294 frame_buffer_.right_border(plane) >= kMinRightBorderPixels &&
295 frame_buffer_.top_border(plane) >= kMinTopBorderPixels &&
296 frame_buffer_.bottom_border(plane) >= kMinBottomBorderPixels);
297 // plane subsampling_x_ left_border
298 // Y N/A 64, 48
299 // U,V 0 64, 48
300 // U,V 1 32, 16
301 assert(frame_buffer_.left_border(plane) >= 16);
302 // The |left| argument to ExtendFrameBoundary() must be at least
303 // kMinLeftBorderPixels (13) for warp.
304 static_assert(16 >= kMinLeftBorderPixels, "");
305 ExtendFrameBoundary(
306 frame_buffer_.data(plane), plane_width, plane_height,
307 frame_buffer_.stride(plane), frame_buffer_.left_border(plane),
308 frame_buffer_.right_border(plane), frame_buffer_.top_border(plane),
309 frame_buffer_.bottom_border(plane));
310 } while (++plane < planes_);
311 }
312
CopyDeblockedPixels(Plane plane,int row4x4)313 void PostFilter::CopyDeblockedPixels(Plane plane, int row4x4) {
314 const ptrdiff_t src_stride = frame_buffer_.stride(plane);
315 const uint8_t* const src = GetSourceBuffer(plane, row4x4, 0);
316 const int row_offset = DivideBy4(row4x4);
317 const ptrdiff_t dst_stride = loop_restoration_border_.stride(plane);
318 uint8_t* dst = loop_restoration_border_.data(plane) + row_offset * dst_stride;
319 const int num_pixels = SubsampledValue(MultiplyBy4(frame_header_.columns4x4),
320 subsampling_x_[plane]);
321 const int row_width = num_pixels << pixel_size_log2_;
322 int last_valid_row = -1;
323 const int plane_height =
324 SubsampledValue(frame_header_.height, subsampling_y_[plane]);
325 int row = kLoopRestorationBorderRows[subsampling_y_[plane]];
326 const int absolute_row = (MultiplyBy4(row4x4) >> subsampling_y_[plane]) + row;
327 for (int i = 0; i < 4; ++i, ++row) {
328 if (absolute_row + i >= plane_height) {
329 if (last_valid_row == -1) break;
330 // If we run out of rows, copy the last valid row (mimics the bottom
331 // border extension).
332 row = last_valid_row;
333 }
334 memcpy(dst, src + row * src_stride, row_width);
335 last_valid_row = row;
336 dst += dst_stride;
337 }
338 }
339
CopyBordersForOneSuperBlockRow(int row4x4,int sb4x4,bool for_loop_restoration)340 void PostFilter::CopyBordersForOneSuperBlockRow(int row4x4, int sb4x4,
341 bool for_loop_restoration) {
342 // Number of rows to be subtracted from the start position described by
343 // row4x4. We always lag by 8 rows (to account for in-loop post filters).
344 const int row_offset = (row4x4 == 0) ? 0 : 8;
345 // Number of rows to be subtracted from the height described by sb4x4.
346 const int height_offset = (row4x4 == 0) ? 8 : 0;
347 // If cdef is off and post filter multithreading is off, then loop restoration
348 // needs 2 extra rows for the bottom border in each plane.
349 const int extra_rows =
350 (for_loop_restoration && thread_pool_ == nullptr && !DoCdef()) ? 2 : 0;
351 const int upscaled_width = frame_header_.upscaled_width;
352 const int height = frame_header_.height;
353 int plane = kPlaneY;
354 do {
355 const int plane_width =
356 SubsampledValue(upscaled_width, subsampling_x_[plane]);
357 const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
358 const int row = (MultiplyBy4(row4x4) - row_offset) >> subsampling_y_[plane];
359 assert(row >= 0);
360 if (row >= plane_height) break;
361 const int num_rows =
362 std::min(SubsampledValue(MultiplyBy4(sb4x4) - height_offset,
363 subsampling_y_[plane]) +
364 extra_rows,
365 plane_height - row);
366 // We only need to track the progress of the Y plane since the progress of
367 // the U and V planes will be inferred from the progress of the Y plane.
368 if (!for_loop_restoration && plane == kPlaneY) {
369 progress_row_ = row + num_rows;
370 }
371 const bool copy_bottom = row + num_rows == plane_height;
372 const ptrdiff_t stride = frame_buffer_.stride(plane);
373 uint8_t* const start = (for_loop_restoration ? superres_buffer_[plane]
374 : frame_buffer_.data(plane)) +
375 row * stride;
376 #if LIBGAV1_MSAN
377 const int right_padding =
378 (frame_buffer_.stride(plane) >> static_cast<int>(bitdepth_ > 8)) -
379 ((frame_buffer_.left_border(plane) + frame_buffer_.width(plane) +
380 frame_buffer_.right_border(plane)));
381 const int padded_right_border_size =
382 frame_buffer_.right_border(plane) + right_padding;
383 // The optimized loop restoration code may read into the next row's left
384 // border depending on the start of the last superblock and the size of the
385 // right border. This is safe as the post filter is applied after
386 // reconstruction is complete and the threaded implementations do not read
387 // from the left border.
388 const int left_border_overread =
389 (for_loop_restoration && padded_right_border_size < 64)
390 ? 63 - padded_right_border_size
391 : 0;
392 assert(!for_loop_restoration || left_border_overread == 0 ||
393 (frame_buffer_.bottom_border(plane) > 0 &&
394 left_border_overread <= frame_buffer_.left_border(plane)));
395 const int left_border = (for_loop_restoration && left_border_overread == 0)
396 ? kRestorationHorizontalBorder
397 : frame_buffer_.left_border(plane);
398 // The optimized loop restoration code will overread the visible frame
399 // buffer into the right border. Extend the right boundary further to
400 // prevent msan warnings.
401 const int right_border = for_loop_restoration
402 ? std::min(padded_right_border_size, 63)
403 : frame_buffer_.right_border(plane);
404 #else
405 const int left_border = for_loop_restoration
406 ? kRestorationHorizontalBorder
407 : frame_buffer_.left_border(plane);
408 const int right_border = for_loop_restoration
409 ? kRestorationHorizontalBorder
410 : frame_buffer_.right_border(plane);
411 #endif
412 const int top_border =
413 (row == 0) ? (for_loop_restoration ? kRestorationVerticalBorder
414 : frame_buffer_.top_border(plane))
415 : 0;
416 const int bottom_border =
417 copy_bottom
418 ? (for_loop_restoration ? kRestorationVerticalBorder
419 : frame_buffer_.bottom_border(plane))
420 : 0;
421 ExtendFrameBoundary(start, plane_width, num_rows, stride, left_border,
422 right_border, top_border, bottom_border);
423 } while (++plane < planes_);
424 }
425
SetupLoopRestorationBorder(const int row4x4)426 void PostFilter::SetupLoopRestorationBorder(const int row4x4) {
427 assert(row4x4 >= 0);
428 assert(!DoCdef());
429 assert(DoRestoration());
430 const int upscaled_width = frame_header_.upscaled_width;
431 const int height = frame_header_.height;
432 int plane = kPlaneY;
433 do {
434 if (loop_restoration_.type[plane] == kLoopRestorationTypeNone) {
435 continue;
436 }
437 const int row_offset = DivideBy4(row4x4);
438 const int num_pixels =
439 SubsampledValue(upscaled_width, subsampling_x_[plane]);
440 const int row_width = num_pixels << pixel_size_log2_;
441 const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
442 const int row = kLoopRestorationBorderRows[subsampling_y_[plane]];
443 const int absolute_row =
444 (MultiplyBy4(row4x4) >> subsampling_y_[plane]) + row;
445 const ptrdiff_t src_stride = frame_buffer_.stride(plane);
446 const uint8_t* src =
447 GetSuperResBuffer(static_cast<Plane>(plane), row4x4, 0) +
448 row * src_stride;
449 const ptrdiff_t dst_stride = loop_restoration_border_.stride(plane);
450 uint8_t* dst =
451 loop_restoration_border_.data(plane) + row_offset * dst_stride;
452 for (int i = 0; i < 4; ++i) {
453 memcpy(dst, src, row_width);
454 #if LIBGAV1_MAX_BITDEPTH >= 10
455 if (bitdepth_ >= 10) {
456 ExtendLine<uint16_t>(dst, num_pixels, kRestorationHorizontalBorder,
457 kRestorationHorizontalBorder);
458 } else // NOLINT.
459 #endif
460 ExtendLine<uint8_t>(dst, num_pixels, kRestorationHorizontalBorder,
461 kRestorationHorizontalBorder);
462 // If we run out of rows, copy the last valid row (mimics the bottom
463 // border extension).
464 if (absolute_row + i < plane_height - 1) src += src_stride;
465 dst += dst_stride;
466 }
467 } while (++plane < planes_);
468 }
469
SetupLoopRestorationBorder(int row4x4_start,int sb4x4)470 void PostFilter::SetupLoopRestorationBorder(int row4x4_start, int sb4x4) {
471 assert(row4x4_start >= 0);
472 assert(DoCdef());
473 assert(DoRestoration());
474 for (int sb_y = 0; sb_y < sb4x4; sb_y += 16) {
475 const int row4x4 = row4x4_start + sb_y;
476 const int row_offset_start = DivideBy4(row4x4);
477 const std::array<uint8_t*, kMaxPlanes> dst = {
478 loop_restoration_border_.data(kPlaneY) +
479 row_offset_start * static_cast<ptrdiff_t>(
480 loop_restoration_border_.stride(kPlaneY)),
481 loop_restoration_border_.data(kPlaneU) +
482 row_offset_start * static_cast<ptrdiff_t>(
483 loop_restoration_border_.stride(kPlaneU)),
484 loop_restoration_border_.data(kPlaneV) +
485 row_offset_start * static_cast<ptrdiff_t>(
486 loop_restoration_border_.stride(kPlaneV))};
487 // If SuperRes is enabled, then we apply SuperRes for the rows to be copied
488 // directly with |loop_restoration_border_| as the destination. Otherwise,
489 // we simply copy the rows.
490 if (DoSuperRes()) {
491 std::array<uint8_t*, kMaxPlanes> src;
492 std::array<int, kMaxPlanes> rows;
493 const int height = frame_header_.height;
494 int plane = kPlaneY;
495 do {
496 if (loop_restoration_.type[plane] == kLoopRestorationTypeNone) {
497 rows[plane] = 0;
498 continue;
499 }
500 const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
501 const int row = kLoopRestorationBorderRows[subsampling_y_[plane]];
502 const int absolute_row =
503 (MultiplyBy4(row4x4) >> subsampling_y_[plane]) + row;
504 src[plane] = GetSourceBuffer(static_cast<Plane>(plane), row4x4, 0) +
505 row * static_cast<ptrdiff_t>(frame_buffer_.stride(plane));
506 rows[plane] = Clip3(plane_height - absolute_row, 0, 4);
507 } while (++plane < planes_);
508 ApplySuperRes(src, rows, /*line_buffer_row=*/-1, dst,
509 /*dst_is_loop_restoration_border=*/true);
510 // If we run out of rows, copy the last valid row (mimics the bottom
511 // border extension).
512 plane = kPlaneY;
513 do {
514 if (rows[plane] == 0 || rows[plane] >= 4) continue;
515 const ptrdiff_t stride = loop_restoration_border_.stride(plane);
516 uint8_t* dst_line = dst[plane] + rows[plane] * stride;
517 const uint8_t* const src_line = dst_line - stride;
518 const int upscaled_width = super_res_info_[plane].upscaled_width
519 << pixel_size_log2_;
520 for (int i = rows[plane]; i < 4; ++i) {
521 memcpy(dst_line, src_line, upscaled_width);
522 dst_line += stride;
523 }
524 } while (++plane < planes_);
525 } else {
526 int plane = kPlaneY;
527 do {
528 CopyDeblockedPixels(static_cast<Plane>(plane), row4x4);
529 } while (++plane < planes_);
530 }
531 // Extend the left and right boundaries needed for loop restoration.
532 const int upscaled_width = frame_header_.upscaled_width;
533 int plane = kPlaneY;
534 do {
535 if (loop_restoration_.type[plane] == kLoopRestorationTypeNone) {
536 continue;
537 }
538 uint8_t* dst_line = dst[plane];
539 const int plane_width =
540 SubsampledValue(upscaled_width, subsampling_x_[plane]);
541 for (int i = 0; i < 4; ++i) {
542 #if LIBGAV1_MAX_BITDEPTH >= 10
543 if (bitdepth_ >= 10) {
544 ExtendLine<uint16_t>(dst_line, plane_width,
545 kRestorationHorizontalBorder,
546 kRestorationHorizontalBorder);
547 } else // NOLINT.
548 #endif
549 {
550 ExtendLine<uint8_t>(dst_line, plane_width,
551 kRestorationHorizontalBorder,
552 kRestorationHorizontalBorder);
553 }
554 dst_line += loop_restoration_border_.stride(plane);
555 }
556 } while (++plane < planes_);
557 }
558 }
559
RunJobs(WorkerFunction worker)560 void PostFilter::RunJobs(WorkerFunction worker) {
561 std::atomic<int> row4x4(0);
562 const int num_workers = thread_pool_->num_threads();
563 BlockingCounter pending_workers(num_workers);
564 for (int i = 0; i < num_workers; ++i) {
565 thread_pool_->Schedule([this, &row4x4, &pending_workers, worker]() {
566 (this->*worker)(&row4x4);
567 pending_workers.Decrement();
568 });
569 }
570 // Run the jobs on the current thread.
571 (this->*worker)(&row4x4);
572 // Wait for the threadpool jobs to finish.
573 pending_workers.Wait();
574 }
575
ApplyFilteringThreaded()576 void PostFilter::ApplyFilteringThreaded() {
577 if (DoDeblock()) {
578 RunJobs(&PostFilter::DeblockFilterWorker<kLoopFilterTypeVertical>);
579 RunJobs(&PostFilter::DeblockFilterWorker<kLoopFilterTypeHorizontal>);
580 }
581 if (DoCdef() && DoRestoration()) {
582 for (int row4x4 = 0; row4x4 < frame_header_.rows4x4;
583 row4x4 += kNum4x4InLoopFilterUnit) {
584 SetupLoopRestorationBorder(row4x4, kNum4x4InLoopFilterUnit);
585 }
586 }
587 if (DoCdef()) {
588 for (int row4x4 = 0; row4x4 < frame_header_.rows4x4;
589 row4x4 += kNum4x4InLoopFilterUnit) {
590 SetupCdefBorder(row4x4);
591 }
592 RunJobs(&PostFilter::ApplyCdefWorker);
593 }
594 if (DoSuperRes()) ApplySuperResThreaded();
595 if (DoRestoration()) {
596 if (!DoCdef()) {
597 int row4x4 = 0;
598 do {
599 SetupLoopRestorationBorder(row4x4);
600 row4x4 += kNum4x4InLoopFilterUnit;
601 } while (row4x4 < frame_header_.rows4x4);
602 }
603 RunJobs(&PostFilter::ApplyLoopRestorationWorker);
604 }
605 ExtendBordersForReferenceFrame();
606 }
607
ApplyFilteringForOneSuperBlockRow(int row4x4,int sb4x4,bool is_last_row,bool do_deblock)608 int PostFilter::ApplyFilteringForOneSuperBlockRow(int row4x4, int sb4x4,
609 bool is_last_row,
610 bool do_deblock) {
611 if (row4x4 < 0) return -1;
612 if (DoDeblock() && do_deblock) {
613 VerticalDeblockFilter(row4x4, row4x4 + sb4x4, 0, frame_header_.columns4x4);
614 HorizontalDeblockFilter(row4x4, row4x4 + sb4x4, 0,
615 frame_header_.columns4x4);
616 }
617 if (DoRestoration() && DoCdef()) {
618 SetupLoopRestorationBorder(row4x4, sb4x4);
619 }
620 if (DoCdef()) {
621 ApplyCdefForOneSuperBlockRow(row4x4, sb4x4, is_last_row);
622 }
623 if (DoSuperRes()) {
624 ApplySuperResForOneSuperBlockRow(row4x4, sb4x4, is_last_row);
625 }
626 if (DoRestoration()) {
627 CopyBordersForOneSuperBlockRow(row4x4, sb4x4, true);
628 ApplyLoopRestoration(row4x4, sb4x4);
629 if (is_last_row) {
630 // Loop restoration operates with a lag of 8 rows. So make sure to cover
631 // all the rows of the last superblock row.
632 CopyBordersForOneSuperBlockRow(row4x4 + sb4x4, 16, true);
633 ApplyLoopRestoration(row4x4 + sb4x4, 16);
634 }
635 }
636 if (frame_header_.refresh_frame_flags != 0 && DoBorderExtensionInLoop()) {
637 CopyBordersForOneSuperBlockRow(row4x4, sb4x4, false);
638 if (is_last_row) {
639 CopyBordersForOneSuperBlockRow(row4x4 + sb4x4, 16, false);
640 }
641 }
642 if (is_last_row && !DoBorderExtensionInLoop()) {
643 ExtendBordersForReferenceFrame();
644 }
645 return is_last_row ? frame_header_.height : progress_row_;
646 }
647
648 } // namespace libgav1
649