• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <algorithm>
16 #include <array>
17 
18 #include "src/post_filter.h"
19 #include "src/utils/blocking_counter.h"
20 
21 namespace libgav1 {
22 
ApplySuperRes(const std::array<uint8_t *,kMaxPlanes> & src,const std::array<int,kMaxPlanes> & rows,const int line_buffer_row,const std::array<uint8_t *,kMaxPlanes> & dst,bool dst_is_loop_restoration_border)23 void PostFilter::ApplySuperRes(const std::array<uint8_t*, kMaxPlanes>& src,
24                                const std::array<int, kMaxPlanes>& rows,
25                                const int line_buffer_row,
26                                const std::array<uint8_t*, kMaxPlanes>& dst,
27                                bool dst_is_loop_restoration_border /*=false*/) {
28   int plane = kPlaneY;
29   do {
30     const int plane_width =
31         MultiplyBy4(frame_header_.columns4x4) >> subsampling_x_[plane];
32 #if LIBGAV1_MAX_BITDEPTH >= 10
33     if (bitdepth_ >= 10) {
34       auto* input = reinterpret_cast<uint16_t*>(src[plane]);
35       auto* output = reinterpret_cast<uint16_t*>(dst[plane]);
36       const ptrdiff_t input_stride =
37           frame_buffer_.stride(plane) / sizeof(uint16_t);
38       const ptrdiff_t output_stride =
39           (dst_is_loop_restoration_border
40                ? loop_restoration_border_.stride(plane)
41                : frame_buffer_.stride(plane)) /
42           sizeof(uint16_t);
43       if (rows[plane] > 0) {
44         dsp_.super_res(superres_coefficients_[static_cast<int>(plane != 0)],
45                        input, input_stride, rows[plane], plane_width,
46                        super_res_info_[plane].upscaled_width,
47                        super_res_info_[plane].initial_subpixel_x,
48                        super_res_info_[plane].step, output, output_stride);
49       }
50       // In the multi-threaded case, the |superres_line_buffer_| holds the last
51       // input row. Apply SuperRes for that row.
52       if (line_buffer_row >= 0) {
53         auto* const line_buffer_start =
54             reinterpret_cast<uint16_t*>(superres_line_buffer_.data(plane)) +
55             line_buffer_row * superres_line_buffer_.stride(plane) /
56                 sizeof(uint16_t) +
57             kSuperResHorizontalBorder;
58         dsp_.super_res(superres_coefficients_[static_cast<int>(plane != 0)],
59                        line_buffer_start, /*source_stride=*/0,
60                        /*height=*/1, plane_width,
61                        super_res_info_[plane].upscaled_width,
62                        super_res_info_[plane].initial_subpixel_x,
63                        super_res_info_[plane].step,
64                        output + rows[plane] * output_stride, /*dest_stride=*/0);
65       }
66       continue;
67     }
68 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
69     uint8_t* input = src[plane];
70     uint8_t* output = dst[plane];
71     const ptrdiff_t input_stride = frame_buffer_.stride(plane);
72     const ptrdiff_t output_stride = dst_is_loop_restoration_border
73                                         ? loop_restoration_border_.stride(plane)
74                                         : frame_buffer_.stride(plane);
75     if (rows[plane] > 0) {
76       dsp_.super_res(superres_coefficients_[static_cast<int>(plane != 0)],
77                      input, input_stride, rows[plane], plane_width,
78                      super_res_info_[plane].upscaled_width,
79                      super_res_info_[plane].initial_subpixel_x,
80                      super_res_info_[plane].step, output, output_stride);
81     }
82     // In the multi-threaded case, the |superres_line_buffer_| holds the last
83     // input row. Apply SuperRes for that row.
84     if (line_buffer_row >= 0) {
85       uint8_t* const line_buffer_start =
86           superres_line_buffer_.data(plane) +
87           line_buffer_row * superres_line_buffer_.stride(plane) +
88           kSuperResHorizontalBorder;
89       dsp_.super_res(
90           superres_coefficients_[static_cast<int>(plane != 0)],
91           line_buffer_start, /*source_stride=*/0,
92           /*height=*/1, plane_width, super_res_info_[plane].upscaled_width,
93           super_res_info_[plane].initial_subpixel_x,
94           super_res_info_[plane].step, output + rows[plane] * output_stride,
95           /*dest_stride=*/0);
96     }
97   } while (++plane < planes_);
98 }
99 
ApplySuperResForOneSuperBlockRow(int row4x4_start,int sb4x4,bool is_last_row)100 void PostFilter::ApplySuperResForOneSuperBlockRow(int row4x4_start, int sb4x4,
101                                                   bool is_last_row) {
102   assert(row4x4_start >= 0);
103   assert(DoSuperRes());
104   // If not doing cdef, then LR needs two rows of border with superres applied.
105   const int num_rows_extra = (DoCdef() || !DoRestoration()) ? 0 : 2;
106   std::array<uint8_t*, kMaxPlanes> src;
107   std::array<uint8_t*, kMaxPlanes> dst;
108   std::array<int, kMaxPlanes> rows;
109   const int num_rows4x4 =
110       std::min(sb4x4, frame_header_.rows4x4 - row4x4_start) -
111       (is_last_row ? 0 : 2);
112   if (row4x4_start > 0) {
113     const int row4x4 = row4x4_start - 2;
114     int plane = kPlaneY;
115     do {
116       const int row =
117           (MultiplyBy4(row4x4) >> subsampling_y_[plane]) + num_rows_extra;
118       const ptrdiff_t row_offset = row * frame_buffer_.stride(plane);
119       src[plane] = cdef_buffer_[plane] + row_offset;
120       dst[plane] = superres_buffer_[plane] + row_offset;
121       // Note that the |num_rows_extra| subtraction is done after the value is
122       // subsampled since we always need to work on |num_rows_extra| extra rows
123       // irrespective of the plane subsampling.
124       // Apply superres for the last 8-|num_rows_extra| rows of the previous
125       // superblock.
126       rows[plane] = (8 >> subsampling_y_[plane]) - num_rows_extra;
127       // Apply superres for the current superblock row (except for the last
128       // 8-|num_rows_extra| rows).
129       rows[plane] += (MultiplyBy4(num_rows4x4) >> subsampling_y_[plane]) +
130                      (is_last_row ? 0 : num_rows_extra);
131     } while (++plane < planes_);
132   } else {
133     // Apply superres for the current superblock row (except for the last
134     // 8-|num_rows_extra| rows).
135     int plane = kPlaneY;
136     do {
137       const ptrdiff_t row_offset =
138           (MultiplyBy4(row4x4_start) >> subsampling_y_[plane]) *
139           frame_buffer_.stride(plane);
140       src[plane] = cdef_buffer_[plane] + row_offset;
141       dst[plane] = superres_buffer_[plane] + row_offset;
142       // Note that the |num_rows_extra| addition is done after the value is
143       // subsampled since we always need to work on |num_rows_extra| extra rows
144       // irrespective of the plane subsampling.
145       rows[plane] = (MultiplyBy4(num_rows4x4) >> subsampling_y_[plane]) +
146                     (is_last_row ? 0 : num_rows_extra);
147     } while (++plane < planes_);
148   }
149   ApplySuperRes(src, rows, /*line_buffer_row=*/-1, dst);
150 }
151 
ApplySuperResThreaded()152 void PostFilter::ApplySuperResThreaded() {
153   int num_threads = thread_pool_->num_threads() + 1;
154   // The number of rows that will be processed by each thread in the thread pool
155   // (other than the current thread).
156   int thread_pool_rows = frame_header_.height / num_threads;
157   thread_pool_rows = std::max(thread_pool_rows, 1);
158   // Make rows of Y plane even when there is subsampling for the other planes.
159   if ((thread_pool_rows & 1) != 0 && subsampling_y_[kPlaneU] != 0) {
160     ++thread_pool_rows;
161   }
162   // Adjust the number of threads to what we really need.
163   num_threads = Clip3(frame_header_.height / thread_pool_rows, 1, num_threads);
164   // For the current thread, we round up to process all the remaining rows.
165   int current_thread_rows =
166       frame_header_.height - thread_pool_rows * (num_threads - 1);
167   // Make rows of Y plane even when there is subsampling for the other planes.
168   if ((current_thread_rows & 1) != 0 && subsampling_y_[kPlaneU] != 0) {
169     ++current_thread_rows;
170   }
171   assert(current_thread_rows > 0);
172   BlockingCounter pending_workers(num_threads - 1);
173   for (int line_buffer_row = 0, row_start = 0; line_buffer_row < num_threads;
174        ++line_buffer_row, row_start += thread_pool_rows) {
175     std::array<uint8_t*, kMaxPlanes> src;
176     std::array<uint8_t*, kMaxPlanes> dst;
177     std::array<int, kMaxPlanes> rows;
178     int plane = kPlaneY;
179     const int pixel_size_log2 = pixel_size_log2_;
180     do {
181       src[plane] =
182           GetBufferOffset(cdef_buffer_[plane], frame_buffer_.stride(plane),
183                           static_cast<Plane>(plane), row_start, 0);
184       dst[plane] =
185           GetBufferOffset(superres_buffer_[plane], frame_buffer_.stride(plane),
186                           static_cast<Plane>(plane), row_start, 0);
187       rows[plane] =
188           (((line_buffer_row < num_threads - 1) ? thread_pool_rows
189                                                 : current_thread_rows) >>
190            subsampling_y_[plane]) -
191           1;
192       const int plane_width =
193           MultiplyBy4(frame_header_.columns4x4) >> subsampling_x_[plane];
194       uint8_t* const input =
195           src[plane] + rows[plane] * frame_buffer_.stride(plane);
196       uint8_t* const line_buffer_start =
197           superres_line_buffer_.data(plane) +
198           line_buffer_row * superres_line_buffer_.stride(plane) +
199           (kSuperResHorizontalBorder << pixel_size_log2);
200       memcpy(line_buffer_start, input, plane_width << pixel_size_log2);
201     } while (++plane < planes_);
202     if (line_buffer_row < num_threads - 1) {
203       thread_pool_->Schedule(
204           [this, src, rows, line_buffer_row, dst, &pending_workers]() {
205             ApplySuperRes(src, rows, line_buffer_row, dst);
206             pending_workers.Decrement();
207           });
208     } else {
209       ApplySuperRes(src, rows, line_buffer_row, dst);
210     }
211   }
212   // Wait for the threadpool jobs to finish.
213   pending_workers.Wait();
214 }
215 
216 }  // namespace libgav1
217