1 // Copyright 2020 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <algorithm>
16 #include <array>
17
18 #include "src/post_filter.h"
19 #include "src/utils/blocking_counter.h"
20
21 namespace libgav1 {
22
ApplySuperRes(const std::array<uint8_t *,kMaxPlanes> & src,const std::array<int,kMaxPlanes> & rows,const int line_buffer_row,const std::array<uint8_t *,kMaxPlanes> & dst,bool dst_is_loop_restoration_border)23 void PostFilter::ApplySuperRes(const std::array<uint8_t*, kMaxPlanes>& src,
24 const std::array<int, kMaxPlanes>& rows,
25 const int line_buffer_row,
26 const std::array<uint8_t*, kMaxPlanes>& dst,
27 bool dst_is_loop_restoration_border /*=false*/) {
28 int plane = kPlaneY;
29 do {
30 const int plane_width =
31 MultiplyBy4(frame_header_.columns4x4) >> subsampling_x_[plane];
32 #if LIBGAV1_MAX_BITDEPTH >= 10
33 if (bitdepth_ >= 10) {
34 auto* input = reinterpret_cast<uint16_t*>(src[plane]);
35 auto* output = reinterpret_cast<uint16_t*>(dst[plane]);
36 const ptrdiff_t input_stride =
37 frame_buffer_.stride(plane) / sizeof(uint16_t);
38 const ptrdiff_t output_stride =
39 (dst_is_loop_restoration_border
40 ? loop_restoration_border_.stride(plane)
41 : frame_buffer_.stride(plane)) /
42 sizeof(uint16_t);
43 if (rows[plane] > 0) {
44 dsp_.super_res(superres_coefficients_[static_cast<int>(plane != 0)],
45 input, input_stride, rows[plane], plane_width,
46 super_res_info_[plane].upscaled_width,
47 super_res_info_[plane].initial_subpixel_x,
48 super_res_info_[plane].step, output, output_stride);
49 }
50 // In the multi-threaded case, the |superres_line_buffer_| holds the last
51 // input row. Apply SuperRes for that row.
52 if (line_buffer_row >= 0) {
53 auto* const line_buffer_start =
54 reinterpret_cast<uint16_t*>(superres_line_buffer_.data(plane)) +
55 line_buffer_row * superres_line_buffer_.stride(plane) /
56 sizeof(uint16_t) +
57 kSuperResHorizontalBorder;
58 dsp_.super_res(superres_coefficients_[static_cast<int>(plane != 0)],
59 line_buffer_start, /*source_stride=*/0,
60 /*height=*/1, plane_width,
61 super_res_info_[plane].upscaled_width,
62 super_res_info_[plane].initial_subpixel_x,
63 super_res_info_[plane].step,
64 output + rows[plane] * output_stride, /*dest_stride=*/0);
65 }
66 continue;
67 }
68 #endif // LIBGAV1_MAX_BITDEPTH >= 10
69 uint8_t* input = src[plane];
70 uint8_t* output = dst[plane];
71 const ptrdiff_t input_stride = frame_buffer_.stride(plane);
72 const ptrdiff_t output_stride = dst_is_loop_restoration_border
73 ? loop_restoration_border_.stride(plane)
74 : frame_buffer_.stride(plane);
75 if (rows[plane] > 0) {
76 dsp_.super_res(superres_coefficients_[static_cast<int>(plane != 0)],
77 input, input_stride, rows[plane], plane_width,
78 super_res_info_[plane].upscaled_width,
79 super_res_info_[plane].initial_subpixel_x,
80 super_res_info_[plane].step, output, output_stride);
81 }
82 // In the multi-threaded case, the |superres_line_buffer_| holds the last
83 // input row. Apply SuperRes for that row.
84 if (line_buffer_row >= 0) {
85 uint8_t* const line_buffer_start =
86 superres_line_buffer_.data(plane) +
87 line_buffer_row * superres_line_buffer_.stride(plane) +
88 kSuperResHorizontalBorder;
89 dsp_.super_res(
90 superres_coefficients_[static_cast<int>(plane != 0)],
91 line_buffer_start, /*source_stride=*/0,
92 /*height=*/1, plane_width, super_res_info_[plane].upscaled_width,
93 super_res_info_[plane].initial_subpixel_x,
94 super_res_info_[plane].step, output + rows[plane] * output_stride,
95 /*dest_stride=*/0);
96 }
97 } while (++plane < planes_);
98 }
99
ApplySuperResForOneSuperBlockRow(int row4x4_start,int sb4x4,bool is_last_row)100 void PostFilter::ApplySuperResForOneSuperBlockRow(int row4x4_start, int sb4x4,
101 bool is_last_row) {
102 assert(row4x4_start >= 0);
103 assert(DoSuperRes());
104 // If not doing cdef, then LR needs two rows of border with superres applied.
105 const int num_rows_extra = (DoCdef() || !DoRestoration()) ? 0 : 2;
106 std::array<uint8_t*, kMaxPlanes> src;
107 std::array<uint8_t*, kMaxPlanes> dst;
108 std::array<int, kMaxPlanes> rows;
109 const int num_rows4x4 =
110 std::min(sb4x4, frame_header_.rows4x4 - row4x4_start) -
111 (is_last_row ? 0 : 2);
112 if (row4x4_start > 0) {
113 const int row4x4 = row4x4_start - 2;
114 int plane = kPlaneY;
115 do {
116 const int row =
117 (MultiplyBy4(row4x4) >> subsampling_y_[plane]) + num_rows_extra;
118 const ptrdiff_t row_offset = row * frame_buffer_.stride(plane);
119 src[plane] = cdef_buffer_[plane] + row_offset;
120 dst[plane] = superres_buffer_[plane] + row_offset;
121 // Note that the |num_rows_extra| subtraction is done after the value is
122 // subsampled since we always need to work on |num_rows_extra| extra rows
123 // irrespective of the plane subsampling.
124 // Apply superres for the last 8-|num_rows_extra| rows of the previous
125 // superblock.
126 rows[plane] = (8 >> subsampling_y_[plane]) - num_rows_extra;
127 // Apply superres for the current superblock row (except for the last
128 // 8-|num_rows_extra| rows).
129 rows[plane] += (MultiplyBy4(num_rows4x4) >> subsampling_y_[plane]) +
130 (is_last_row ? 0 : num_rows_extra);
131 } while (++plane < planes_);
132 } else {
133 // Apply superres for the current superblock row (except for the last
134 // 8-|num_rows_extra| rows).
135 int plane = kPlaneY;
136 do {
137 const ptrdiff_t row_offset =
138 (MultiplyBy4(row4x4_start) >> subsampling_y_[plane]) *
139 frame_buffer_.stride(plane);
140 src[plane] = cdef_buffer_[plane] + row_offset;
141 dst[plane] = superres_buffer_[plane] + row_offset;
142 // Note that the |num_rows_extra| addition is done after the value is
143 // subsampled since we always need to work on |num_rows_extra| extra rows
144 // irrespective of the plane subsampling.
145 rows[plane] = (MultiplyBy4(num_rows4x4) >> subsampling_y_[plane]) +
146 (is_last_row ? 0 : num_rows_extra);
147 } while (++plane < planes_);
148 }
149 ApplySuperRes(src, rows, /*line_buffer_row=*/-1, dst);
150 }
151
ApplySuperResThreaded()152 void PostFilter::ApplySuperResThreaded() {
153 int num_threads = thread_pool_->num_threads() + 1;
154 // The number of rows that will be processed by each thread in the thread pool
155 // (other than the current thread).
156 int thread_pool_rows = frame_header_.height / num_threads;
157 thread_pool_rows = std::max(thread_pool_rows, 1);
158 // Make rows of Y plane even when there is subsampling for the other planes.
159 if ((thread_pool_rows & 1) != 0 && subsampling_y_[kPlaneU] != 0) {
160 ++thread_pool_rows;
161 }
162 // Adjust the number of threads to what we really need.
163 num_threads = Clip3(frame_header_.height / thread_pool_rows, 1, num_threads);
164 // For the current thread, we round up to process all the remaining rows.
165 int current_thread_rows =
166 frame_header_.height - thread_pool_rows * (num_threads - 1);
167 // Make rows of Y plane even when there is subsampling for the other planes.
168 if ((current_thread_rows & 1) != 0 && subsampling_y_[kPlaneU] != 0) {
169 ++current_thread_rows;
170 }
171 assert(current_thread_rows > 0);
172 BlockingCounter pending_workers(num_threads - 1);
173 for (int line_buffer_row = 0, row_start = 0; line_buffer_row < num_threads;
174 ++line_buffer_row, row_start += thread_pool_rows) {
175 std::array<uint8_t*, kMaxPlanes> src;
176 std::array<uint8_t*, kMaxPlanes> dst;
177 std::array<int, kMaxPlanes> rows;
178 int plane = kPlaneY;
179 const int pixel_size_log2 = pixel_size_log2_;
180 do {
181 src[plane] =
182 GetBufferOffset(cdef_buffer_[plane], frame_buffer_.stride(plane),
183 static_cast<Plane>(plane), row_start, 0);
184 dst[plane] =
185 GetBufferOffset(superres_buffer_[plane], frame_buffer_.stride(plane),
186 static_cast<Plane>(plane), row_start, 0);
187 rows[plane] =
188 (((line_buffer_row < num_threads - 1) ? thread_pool_rows
189 : current_thread_rows) >>
190 subsampling_y_[plane]) -
191 1;
192 const int plane_width =
193 MultiplyBy4(frame_header_.columns4x4) >> subsampling_x_[plane];
194 uint8_t* const input =
195 src[plane] + rows[plane] * frame_buffer_.stride(plane);
196 uint8_t* const line_buffer_start =
197 superres_line_buffer_.data(plane) +
198 line_buffer_row * superres_line_buffer_.stride(plane) +
199 (kSuperResHorizontalBorder << pixel_size_log2);
200 memcpy(line_buffer_start, input, plane_width << pixel_size_log2);
201 } while (++plane < planes_);
202 if (line_buffer_row < num_threads - 1) {
203 thread_pool_->Schedule(
204 [this, src, rows, line_buffer_row, dst, &pending_workers]() {
205 ApplySuperRes(src, rows, line_buffer_row, dst);
206 pending_workers.Decrement();
207 });
208 } else {
209 ApplySuperRes(src, rows, line_buffer_row, dst);
210 }
211 }
212 // Wait for the threadpool jobs to finish.
213 pending_workers.Wait();
214 }
215
216 } // namespace libgav1
217