1 // Copyright 2020 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 #include "src/post_filter.h"
15 #include "src/utils/blocking_counter.h"
16
17 namespace libgav1 {
18
ApplySuperRes(const std::array<uint8_t *,kMaxPlanes> & src,const std::array<int,kMaxPlanes> & rows,const int line_buffer_row,const std::array<uint8_t *,kMaxPlanes> & dst,bool dst_is_loop_restoration_border)19 void PostFilter::ApplySuperRes(const std::array<uint8_t*, kMaxPlanes>& src,
20 const std::array<int, kMaxPlanes>& rows,
21 const int line_buffer_row,
22 const std::array<uint8_t*, kMaxPlanes>& dst,
23 bool dst_is_loop_restoration_border /*=false*/) {
24 int plane = kPlaneY;
25 do {
26 const int plane_width =
27 MultiplyBy4(frame_header_.columns4x4) >> subsampling_x_[plane];
28 #if LIBGAV1_MAX_BITDEPTH >= 10
29 if (bitdepth_ >= 10) {
30 auto* input = reinterpret_cast<uint16_t*>(src[plane]);
31 auto* output = reinterpret_cast<uint16_t*>(dst[plane]);
32 const ptrdiff_t input_stride =
33 frame_buffer_.stride(plane) / sizeof(uint16_t);
34 const ptrdiff_t output_stride =
35 (dst_is_loop_restoration_border
36 ? loop_restoration_border_.stride(plane)
37 : frame_buffer_.stride(plane)) /
38 sizeof(uint16_t);
39 if (rows[plane] > 0) {
40 dsp_.super_res(superres_coefficients_[static_cast<int>(plane != 0)],
41 input, input_stride, rows[plane], plane_width,
42 super_res_info_[plane].upscaled_width,
43 super_res_info_[plane].initial_subpixel_x,
44 super_res_info_[plane].step, output, output_stride);
45 }
46 // In the multi-threaded case, the |superres_line_buffer_| holds the last
47 // input row. Apply SuperRes for that row.
48 if (line_buffer_row >= 0) {
49 auto* const line_buffer_start =
50 reinterpret_cast<uint16_t*>(superres_line_buffer_.data(plane)) +
51 line_buffer_row * superres_line_buffer_.stride(plane) /
52 sizeof(uint16_t) +
53 kSuperResHorizontalBorder;
54 dsp_.super_res(superres_coefficients_[static_cast<int>(plane != 0)],
55 line_buffer_start, /*source_stride=*/0,
56 /*height=*/1, plane_width,
57 super_res_info_[plane].upscaled_width,
58 super_res_info_[plane].initial_subpixel_x,
59 super_res_info_[plane].step,
60 output + rows[plane] * output_stride, /*dest_stride=*/0);
61 }
62 continue;
63 }
64 #endif // LIBGAV1_MAX_BITDEPTH >= 10
65 uint8_t* input = src[plane];
66 uint8_t* output = dst[plane];
67 const ptrdiff_t input_stride = frame_buffer_.stride(plane);
68 const ptrdiff_t output_stride = dst_is_loop_restoration_border
69 ? loop_restoration_border_.stride(plane)
70 : frame_buffer_.stride(plane);
71 if (rows[plane] > 0) {
72 dsp_.super_res(superres_coefficients_[static_cast<int>(plane != 0)],
73 input, input_stride, rows[plane], plane_width,
74 super_res_info_[plane].upscaled_width,
75 super_res_info_[plane].initial_subpixel_x,
76 super_res_info_[plane].step, output, output_stride);
77 }
78 // In the multi-threaded case, the |superres_line_buffer_| holds the last
79 // input row. Apply SuperRes for that row.
80 if (line_buffer_row >= 0) {
81 uint8_t* const line_buffer_start =
82 superres_line_buffer_.data(plane) +
83 line_buffer_row * superres_line_buffer_.stride(plane) +
84 kSuperResHorizontalBorder;
85 dsp_.super_res(
86 superres_coefficients_[static_cast<int>(plane != 0)],
87 line_buffer_start, /*source_stride=*/0,
88 /*height=*/1, plane_width, super_res_info_[plane].upscaled_width,
89 super_res_info_[plane].initial_subpixel_x,
90 super_res_info_[plane].step, output + rows[plane] * output_stride,
91 /*dest_stride=*/0);
92 }
93 } while (++plane < planes_);
94 }
95
ApplySuperResForOneSuperBlockRow(int row4x4_start,int sb4x4,bool is_last_row)96 void PostFilter::ApplySuperResForOneSuperBlockRow(int row4x4_start, int sb4x4,
97 bool is_last_row) {
98 assert(row4x4_start >= 0);
99 assert(DoSuperRes());
100 // If not doing cdef, then LR needs two rows of border with superres applied.
101 const int num_rows_extra = (DoCdef() || !DoRestoration()) ? 0 : 2;
102 std::array<uint8_t*, kMaxPlanes> src;
103 std::array<uint8_t*, kMaxPlanes> dst;
104 std::array<int, kMaxPlanes> rows;
105 const int num_rows4x4 =
106 std::min(sb4x4, frame_header_.rows4x4 - row4x4_start) -
107 (is_last_row ? 0 : 2);
108 if (row4x4_start > 0) {
109 const int row4x4 = row4x4_start - 2;
110 int plane = kPlaneY;
111 do {
112 const int row =
113 (MultiplyBy4(row4x4) >> subsampling_y_[plane]) + num_rows_extra;
114 const ptrdiff_t row_offset = row * frame_buffer_.stride(plane);
115 src[plane] = cdef_buffer_[plane] + row_offset;
116 dst[plane] = superres_buffer_[plane] + row_offset;
117 // Note that the |num_rows_extra| subtraction is done after the value is
118 // subsampled since we always need to work on |num_rows_extra| extra rows
119 // irrespective of the plane subsampling.
120 // Apply superres for the last 8-|num_rows_extra| rows of the previous
121 // superblock.
122 rows[plane] = (8 >> subsampling_y_[plane]) - num_rows_extra;
123 // Apply superres for the current superblock row (except for the last
124 // 8-|num_rows_extra| rows).
125 rows[plane] += (MultiplyBy4(num_rows4x4) >> subsampling_y_[plane]) +
126 (is_last_row ? 0 : num_rows_extra);
127 } while (++plane < planes_);
128 } else {
129 // Apply superres for the current superblock row (except for the last
130 // 8-|num_rows_extra| rows).
131 int plane = kPlaneY;
132 do {
133 const ptrdiff_t row_offset =
134 (MultiplyBy4(row4x4_start) >> subsampling_y_[plane]) *
135 frame_buffer_.stride(plane);
136 src[plane] = cdef_buffer_[plane] + row_offset;
137 dst[plane] = superres_buffer_[plane] + row_offset;
138 // Note that the |num_rows_extra| addition is done after the value is
139 // subsampled since we always need to work on |num_rows_extra| extra rows
140 // irrespective of the plane subsampling.
141 rows[plane] = (MultiplyBy4(num_rows4x4) >> subsampling_y_[plane]) +
142 (is_last_row ? 0 : num_rows_extra);
143 } while (++plane < planes_);
144 }
145 ApplySuperRes(src, rows, /*line_buffer_row=*/-1, dst);
146 }
147
ApplySuperResThreaded()148 void PostFilter::ApplySuperResThreaded() {
149 int num_threads = thread_pool_->num_threads() + 1;
150 // The number of rows that will be processed by each thread in the thread pool
151 // (other than the current thread).
152 int thread_pool_rows = height_ / num_threads;
153 thread_pool_rows = std::max(thread_pool_rows, 1);
154 // Make rows of Y plane even when there is subsampling for the other planes.
155 if ((thread_pool_rows & 1) != 0 && subsampling_y_[kPlaneU] != 0) {
156 ++thread_pool_rows;
157 }
158 // Adjust the number of threads to what we really need.
159 num_threads = Clip3(height_ / thread_pool_rows, 1, num_threads);
160 // For the current thread, we round up to process all the remaining rows.
161 int current_thread_rows = height_ - thread_pool_rows * (num_threads - 1);
162 // Make rows of Y plane even when there is subsampling for the other planes.
163 if ((current_thread_rows & 1) != 0 && subsampling_y_[kPlaneU] != 0) {
164 ++current_thread_rows;
165 }
166 assert(current_thread_rows > 0);
167 BlockingCounter pending_workers(num_threads - 1);
168 for (int line_buffer_row = 0, row_start = 0; line_buffer_row < num_threads;
169 ++line_buffer_row, row_start += thread_pool_rows) {
170 std::array<uint8_t*, kMaxPlanes> src;
171 std::array<uint8_t*, kMaxPlanes> dst;
172 std::array<int, kMaxPlanes> rows;
173 int plane = kPlaneY;
174 const int pixel_size_log2 = pixel_size_log2_;
175 do {
176 src[plane] =
177 GetBufferOffset(cdef_buffer_[plane], frame_buffer_.stride(plane),
178 static_cast<Plane>(plane), row_start, 0);
179 dst[plane] =
180 GetBufferOffset(superres_buffer_[plane], frame_buffer_.stride(plane),
181 static_cast<Plane>(plane), row_start, 0);
182 rows[plane] =
183 (((line_buffer_row < num_threads - 1) ? thread_pool_rows
184 : current_thread_rows) >>
185 subsampling_y_[plane]) -
186 1;
187 const int plane_width =
188 MultiplyBy4(frame_header_.columns4x4) >> subsampling_x_[plane];
189 uint8_t* const input =
190 src[plane] + rows[plane] * frame_buffer_.stride(plane);
191 uint8_t* const line_buffer_start =
192 superres_line_buffer_.data(plane) +
193 line_buffer_row * superres_line_buffer_.stride(plane) +
194 (kSuperResHorizontalBorder << pixel_size_log2);
195 memcpy(line_buffer_start, input, plane_width << pixel_size_log2);
196 } while (++plane < planes_);
197 if (line_buffer_row < num_threads - 1) {
198 thread_pool_->Schedule(
199 [this, src, rows, line_buffer_row, dst, &pending_workers]() {
200 ApplySuperRes(src, rows, line_buffer_row, dst);
201 pending_workers.Decrement();
202 });
203 } else {
204 ApplySuperRes(src, rows, line_buffer_row, dst);
205 }
206 }
207 // Wait for the threadpool jobs to finish.
208 pending_workers.Wait();
209 }
210
211 } // namespace libgav1
212