1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/decoder_impl.h"
16
17 #include <algorithm>
18 #include <atomic>
19 #include <cassert>
20 #include <iterator>
21 #include <new>
22 #include <utility>
23
24 #include "src/dsp/common.h"
25 #include "src/dsp/constants.h"
26 #include "src/dsp/dsp.h"
27 #include "src/film_grain.h"
28 #include "src/frame_buffer_utils.h"
29 #include "src/frame_scratch_buffer.h"
30 #include "src/loop_restoration_info.h"
31 #include "src/obu_parser.h"
32 #include "src/post_filter.h"
33 #include "src/prediction_mask.h"
34 #include "src/threading_strategy.h"
35 #include "src/utils/blocking_counter.h"
36 #include "src/utils/common.h"
37 #include "src/utils/constants.h"
38 #include "src/utils/logging.h"
39 #include "src/utils/raw_bit_reader.h"
40 #include "src/utils/segmentation.h"
41 #include "src/utils/threadpool.h"
42 #include "src/yuv_buffer.h"
43
44 namespace libgav1 {
45 namespace {
46
47 constexpr int kMaxBlockWidth4x4 = 32;
48 constexpr int kMaxBlockHeight4x4 = 32;
49
50 // Computes the bottom border size in pixels. If CDEF, loop restoration or
51 // SuperRes is enabled, adds extra border pixels to facilitate those steps to
52 // happen nearly in-place (a few extra rows instead of an entire frame buffer).
53 // The logic in this function should match the corresponding logic for
54 // |vertical_shift| in the PostFilter constructor.
GetBottomBorderPixels(const bool do_cdef,const bool do_restoration,const bool do_superres,const int subsampling_y)55 int GetBottomBorderPixels(const bool do_cdef, const bool do_restoration,
56 const bool do_superres, const int subsampling_y) {
57 int extra_border = 0;
58 if (do_cdef) {
59 extra_border += kCdefBorder;
60 } else if (do_restoration) {
61 // If CDEF is enabled, loop restoration is safe without extra border.
62 extra_border += kRestorationVerticalBorder;
63 }
64 if (do_superres) extra_border += kSuperResVerticalBorder;
65 // Double the number of extra bottom border pixels if the bottom border will
66 // be subsampled.
67 extra_border <<= subsampling_y;
68 return Align(kBorderPixels + extra_border, 2); // Must be a multiple of 2.
69 }
70
71 // Sets |frame_scratch_buffer->tile_decoding_failed| to true (while holding on
72 // to |frame_scratch_buffer->superblock_row_mutex|) and notifies the first
73 // |count| condition variables in
74 // |frame_scratch_buffer->superblock_row_progress_condvar|.
SetFailureAndNotifyAll(FrameScratchBuffer * const frame_scratch_buffer,int count)75 void SetFailureAndNotifyAll(FrameScratchBuffer* const frame_scratch_buffer,
76 int count) {
77 {
78 std::lock_guard<std::mutex> lock(
79 frame_scratch_buffer->superblock_row_mutex);
80 frame_scratch_buffer->tile_decoding_failed = true;
81 }
82 std::condition_variable* const condvars =
83 frame_scratch_buffer->superblock_row_progress_condvar.get();
84 for (int i = 0; i < count; ++i) {
85 condvars[i].notify_one();
86 }
87 }
88
89 // Helper class that releases the frame scratch buffer in the destructor.
90 class FrameScratchBufferReleaser {
91 public:
FrameScratchBufferReleaser(FrameScratchBufferPool * frame_scratch_buffer_pool,std::unique_ptr<FrameScratchBuffer> * frame_scratch_buffer)92 FrameScratchBufferReleaser(
93 FrameScratchBufferPool* frame_scratch_buffer_pool,
94 std::unique_ptr<FrameScratchBuffer>* frame_scratch_buffer)
95 : frame_scratch_buffer_pool_(frame_scratch_buffer_pool),
96 frame_scratch_buffer_(frame_scratch_buffer) {}
~FrameScratchBufferReleaser()97 ~FrameScratchBufferReleaser() {
98 frame_scratch_buffer_pool_->Release(std::move(*frame_scratch_buffer_));
99 }
100
101 private:
102 FrameScratchBufferPool* const frame_scratch_buffer_pool_;
103 std::unique_ptr<FrameScratchBuffer>* const frame_scratch_buffer_;
104 };
105
106 // Sets the |frame|'s segmentation map for two cases. The third case is handled
107 // in Tile::DecodeBlock().
SetSegmentationMap(const ObuFrameHeader & frame_header,const SegmentationMap * prev_segment_ids,RefCountedBuffer * const frame)108 void SetSegmentationMap(const ObuFrameHeader& frame_header,
109 const SegmentationMap* prev_segment_ids,
110 RefCountedBuffer* const frame) {
111 if (!frame_header.segmentation.enabled) {
112 // All segment_id's are 0.
113 frame->segmentation_map()->Clear();
114 } else if (!frame_header.segmentation.update_map) {
115 // Copy from prev_segment_ids.
116 if (prev_segment_ids == nullptr) {
117 // Treat a null prev_segment_ids pointer as if it pointed to a
118 // segmentation map containing all 0s.
119 frame->segmentation_map()->Clear();
120 } else {
121 frame->segmentation_map()->CopyFrom(*prev_segment_ids);
122 }
123 }
124 }
125
DecodeTilesNonFrameParallel(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<std::unique_ptr<Tile>> & tiles,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter)126 StatusCode DecodeTilesNonFrameParallel(
127 const ObuSequenceHeader& sequence_header,
128 const ObuFrameHeader& frame_header,
129 const Vector<std::unique_ptr<Tile>>& tiles,
130 FrameScratchBuffer* const frame_scratch_buffer,
131 PostFilter* const post_filter) {
132 // Decode in superblock row order.
133 const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
134 std::unique_ptr<TileScratchBuffer> tile_scratch_buffer =
135 frame_scratch_buffer->tile_scratch_buffer_pool.Get();
136 if (tile_scratch_buffer == nullptr) return kLibgav1StatusOutOfMemory;
137 for (int row4x4 = 0; row4x4 < frame_header.rows4x4;
138 row4x4 += block_width4x4) {
139 for (const auto& tile_ptr : tiles) {
140 if (!tile_ptr->ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
141 row4x4, tile_scratch_buffer.get())) {
142 return kLibgav1StatusUnknownError;
143 }
144 }
145 post_filter->ApplyFilteringForOneSuperBlockRow(
146 row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
147 /*do_deblock=*/true);
148 }
149 frame_scratch_buffer->tile_scratch_buffer_pool.Release(
150 std::move(tile_scratch_buffer));
151 return kStatusOk;
152 }
153
DecodeTilesThreadedNonFrameParallel(const Vector<std::unique_ptr<Tile>> & tiles,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,BlockingCounterWithStatus * const pending_tiles)154 StatusCode DecodeTilesThreadedNonFrameParallel(
155 const Vector<std::unique_ptr<Tile>>& tiles,
156 FrameScratchBuffer* const frame_scratch_buffer,
157 PostFilter* const post_filter,
158 BlockingCounterWithStatus* const pending_tiles) {
159 ThreadingStrategy& threading_strategy =
160 frame_scratch_buffer->threading_strategy;
161 const int num_workers = threading_strategy.tile_thread_count();
162 BlockingCounterWithStatus pending_workers(num_workers);
163 std::atomic<int> tile_counter(0);
164 const int tile_count = static_cast<int>(tiles.size());
165 bool tile_decoding_failed = false;
166 // Submit tile decoding jobs to the thread pool.
167 for (int i = 0; i < num_workers; ++i) {
168 threading_strategy.tile_thread_pool()->Schedule([&tiles, tile_count,
169 &tile_counter,
170 &pending_workers,
171 &pending_tiles]() {
172 bool failed = false;
173 int index;
174 while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
175 tile_count) {
176 if (!failed) {
177 const auto& tile_ptr = tiles[index];
178 if (!tile_ptr->ParseAndDecode()) {
179 LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
180 failed = true;
181 }
182 } else {
183 pending_tiles->Decrement(false);
184 }
185 }
186 pending_workers.Decrement(!failed);
187 });
188 }
189 // Have the current thread partake in tile decoding.
190 int index;
191 while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
192 tile_count) {
193 if (!tile_decoding_failed) {
194 const auto& tile_ptr = tiles[index];
195 if (!tile_ptr->ParseAndDecode()) {
196 LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
197 tile_decoding_failed = true;
198 }
199 } else {
200 pending_tiles->Decrement(false);
201 }
202 }
203 // Wait until all the workers are done. This ensures that all the tiles have
204 // been parsed.
205 tile_decoding_failed |= !pending_workers.Wait();
206 // Wait until all the tiles have been decoded.
207 tile_decoding_failed |= !pending_tiles->Wait();
208 if (tile_decoding_failed) return kStatusUnknownError;
209 assert(threading_strategy.post_filter_thread_pool() != nullptr);
210 post_filter->ApplyFilteringThreaded();
211 return kStatusOk;
212 }
213
DecodeTilesFrameParallel(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<std::unique_ptr<Tile>> & tiles,const SymbolDecoderContext & saved_symbol_decoder_context,const SegmentationMap * const prev_segment_ids,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,RefCountedBuffer * const current_frame)214 StatusCode DecodeTilesFrameParallel(
215 const ObuSequenceHeader& sequence_header,
216 const ObuFrameHeader& frame_header,
217 const Vector<std::unique_ptr<Tile>>& tiles,
218 const SymbolDecoderContext& saved_symbol_decoder_context,
219 const SegmentationMap* const prev_segment_ids,
220 FrameScratchBuffer* const frame_scratch_buffer,
221 PostFilter* const post_filter, RefCountedBuffer* const current_frame) {
222 // Parse the frame.
223 for (const auto& tile : tiles) {
224 if (!tile->Parse()) {
225 LIBGAV1_DLOG(ERROR, "Failed to parse tile number: %d\n", tile->number());
226 return kStatusUnknownError;
227 }
228 }
229 if (frame_header.enable_frame_end_update_cdf) {
230 frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
231 }
232 current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
233 SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
234 // Mark frame as parsed.
235 current_frame->SetFrameState(kFrameStateParsed);
236 std::unique_ptr<TileScratchBuffer> tile_scratch_buffer =
237 frame_scratch_buffer->tile_scratch_buffer_pool.Get();
238 if (tile_scratch_buffer == nullptr) {
239 return kStatusOutOfMemory;
240 }
241 const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
242 // Decode in superblock row order (inter prediction in the Tile class will
243 // block until the required superblocks in the reference frame are decoded).
244 for (int row4x4 = 0; row4x4 < frame_header.rows4x4;
245 row4x4 += block_width4x4) {
246 for (const auto& tile_ptr : tiles) {
247 if (!tile_ptr->ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
248 row4x4, tile_scratch_buffer.get())) {
249 LIBGAV1_DLOG(ERROR, "Failed to decode tile number: %d\n",
250 tile_ptr->number());
251 return kStatusUnknownError;
252 }
253 }
254 const int progress_row = post_filter->ApplyFilteringForOneSuperBlockRow(
255 row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
256 /*do_deblock=*/true);
257 if (progress_row >= 0) {
258 current_frame->SetProgress(progress_row);
259 }
260 }
261 // Mark frame as decoded (we no longer care about row-level progress since the
262 // entire frame has been decoded).
263 current_frame->SetFrameState(kFrameStateDecoded);
264 frame_scratch_buffer->tile_scratch_buffer_pool.Release(
265 std::move(tile_scratch_buffer));
266 return kStatusOk;
267 }
268
269 // Helper function used by DecodeTilesThreadedFrameParallel. Applies the
270 // deblocking filter for tile boundaries for the superblock row at |row4x4|.
ApplyDeblockingFilterForTileBoundaries(PostFilter * const post_filter,const std::unique_ptr<Tile> * tile_row_base,const ObuFrameHeader & frame_header,int row4x4,int block_width4x4,int tile_columns,bool decode_entire_tiles_in_worker_threads)271 void ApplyDeblockingFilterForTileBoundaries(
272 PostFilter* const post_filter, const std::unique_ptr<Tile>* tile_row_base,
273 const ObuFrameHeader& frame_header, int row4x4, int block_width4x4,
274 int tile_columns, bool decode_entire_tiles_in_worker_threads) {
275 // Apply vertical deblock filtering for the first 64 columns of each tile.
276 for (int tile_column = 0; tile_column < tile_columns; ++tile_column) {
277 const Tile& tile = *tile_row_base[tile_column];
278 post_filter->ApplyDeblockFilter(
279 kLoopFilterTypeVertical, row4x4, tile.column4x4_start(),
280 tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
281 }
282 if (decode_entire_tiles_in_worker_threads &&
283 row4x4 == tile_row_base[0]->row4x4_start()) {
284 // This is the first superblock row of a tile row. In this case, apply
285 // horizontal deblock filtering for the entire superblock row.
286 post_filter->ApplyDeblockFilter(kLoopFilterTypeHorizontal, row4x4, 0,
287 frame_header.columns4x4, block_width4x4);
288 } else {
289 // Apply horizontal deblock filtering for the first 64 columns of the
290 // first tile.
291 const Tile& first_tile = *tile_row_base[0];
292 post_filter->ApplyDeblockFilter(
293 kLoopFilterTypeHorizontal, row4x4, first_tile.column4x4_start(),
294 first_tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
295 // Apply horizontal deblock filtering for the last 64 columns of the
296 // previous tile and the first 64 columns of the current tile.
297 for (int tile_column = 1; tile_column < tile_columns; ++tile_column) {
298 const Tile& tile = *tile_row_base[tile_column];
299 // If the previous tile has more than 64 columns, then include those
300 // for the horizontal deblock.
301 const Tile& previous_tile = *tile_row_base[tile_column - 1];
302 const int column4x4_start =
303 tile.column4x4_start() -
304 ((tile.column4x4_start() - kNum4x4InLoopFilterUnit !=
305 previous_tile.column4x4_start())
306 ? kNum4x4InLoopFilterUnit
307 : 0);
308 post_filter->ApplyDeblockFilter(
309 kLoopFilterTypeHorizontal, row4x4, column4x4_start,
310 tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
311 }
312 // Apply horizontal deblock filtering for the last 64 columns of the
313 // last tile.
314 const Tile& last_tile = *tile_row_base[tile_columns - 1];
315 // Identify the last column4x4 value and do horizontal filtering for
316 // that column4x4. The value of last column4x4 is the nearest multiple
317 // of 16 that is before tile.column4x4_end().
318 const int column4x4_start = (last_tile.column4x4_end() - 1) & ~15;
319 // If column4x4_start is the same as tile.column4x4_start() then it
320 // means that the last tile has <= 64 columns. So there is nothing left
321 // to deblock (since it was already deblocked in the loop above).
322 if (column4x4_start != last_tile.column4x4_start()) {
323 post_filter->ApplyDeblockFilter(
324 kLoopFilterTypeHorizontal, row4x4, column4x4_start,
325 last_tile.column4x4_end(), block_width4x4);
326 }
327 }
328 }
329
330 // Helper function used by DecodeTilesThreadedFrameParallel. Decodes the
331 // superblock row starting at |row4x4| for tile at index |tile_index| in the
332 // list of tiles |tiles|. If the decoding is successful, then it does the
333 // following:
334 // * Schedule the next superblock row in the current tile column for decoding
335 // (the next superblock row may be in a different tile than the current
336 // one).
337 // * If an entire superblock row of the frame has been decoded, it notifies
338 // the waiters (if there are any).
DecodeSuperBlockRowInTile(const Vector<std::unique_ptr<Tile>> & tiles,size_t tile_index,int row4x4,const int superblock_size4x4,const int tile_columns,const int superblock_rows,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,BlockingCounter * const pending_jobs)339 void DecodeSuperBlockRowInTile(
340 const Vector<std::unique_ptr<Tile>>& tiles, size_t tile_index, int row4x4,
341 const int superblock_size4x4, const int tile_columns,
342 const int superblock_rows, FrameScratchBuffer* const frame_scratch_buffer,
343 PostFilter* const post_filter, BlockingCounter* const pending_jobs) {
344 std::unique_ptr<TileScratchBuffer> scratch_buffer =
345 frame_scratch_buffer->tile_scratch_buffer_pool.Get();
346 if (scratch_buffer == nullptr) {
347 SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
348 return;
349 }
350 Tile& tile = *tiles[tile_index];
351 const bool ok = tile.ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
352 row4x4, scratch_buffer.get());
353 frame_scratch_buffer->tile_scratch_buffer_pool.Release(
354 std::move(scratch_buffer));
355 if (!ok) {
356 SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
357 return;
358 }
359 if (post_filter->DoDeblock()) {
360 // Apply vertical deblock filtering for all the columns in this tile except
361 // for the first 64 columns.
362 post_filter->ApplyDeblockFilter(
363 kLoopFilterTypeVertical, row4x4,
364 tile.column4x4_start() + kNum4x4InLoopFilterUnit, tile.column4x4_end(),
365 superblock_size4x4);
366 // Apply horizontal deblock filtering for all the columns in this tile
367 // except for the first and the last 64 columns.
368 // Note about the last tile of each row: For the last tile, column4x4_end
369 // may not be a multiple of 16. In that case it is still okay to simply
370 // subtract 16 since ApplyDeblockFilter() will only do the filters in
371 // increments of 64 columns (or 32 columns for chroma with subsampling).
372 post_filter->ApplyDeblockFilter(
373 kLoopFilterTypeHorizontal, row4x4,
374 tile.column4x4_start() + kNum4x4InLoopFilterUnit,
375 tile.column4x4_end() - kNum4x4InLoopFilterUnit, superblock_size4x4);
376 }
377 const int superblock_size4x4_log2 = FloorLog2(superblock_size4x4);
378 const int index = row4x4 >> superblock_size4x4_log2;
379 int* const superblock_row_progress =
380 frame_scratch_buffer->superblock_row_progress.get();
381 std::condition_variable* const superblock_row_progress_condvar =
382 frame_scratch_buffer->superblock_row_progress_condvar.get();
383 bool notify;
384 {
385 std::lock_guard<std::mutex> lock(
386 frame_scratch_buffer->superblock_row_mutex);
387 notify = ++superblock_row_progress[index] == tile_columns;
388 }
389 if (notify) {
390 // We are done decoding this superblock row. Notify the post filtering
391 // thread.
392 superblock_row_progress_condvar[index].notify_one();
393 }
394 // Schedule the next superblock row (if one exists).
395 ThreadPool& thread_pool =
396 *frame_scratch_buffer->threading_strategy.thread_pool();
397 const int next_row4x4 = row4x4 + superblock_size4x4;
398 if (!tile.IsRow4x4Inside(next_row4x4)) {
399 tile_index += tile_columns;
400 }
401 if (tile_index >= tiles.size()) return;
402 pending_jobs->IncrementBy(1);
403 thread_pool.Schedule([&tiles, tile_index, next_row4x4, superblock_size4x4,
404 tile_columns, superblock_rows, frame_scratch_buffer,
405 post_filter, pending_jobs]() {
406 DecodeSuperBlockRowInTile(tiles, tile_index, next_row4x4,
407 superblock_size4x4, tile_columns, superblock_rows,
408 frame_scratch_buffer, post_filter, pending_jobs);
409 pending_jobs->Decrement();
410 });
411 }
412
DecodeTilesThreadedFrameParallel(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<std::unique_ptr<Tile>> & tiles,const SymbolDecoderContext & saved_symbol_decoder_context,const SegmentationMap * const prev_segment_ids,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,RefCountedBuffer * const current_frame)413 StatusCode DecodeTilesThreadedFrameParallel(
414 const ObuSequenceHeader& sequence_header,
415 const ObuFrameHeader& frame_header,
416 const Vector<std::unique_ptr<Tile>>& tiles,
417 const SymbolDecoderContext& saved_symbol_decoder_context,
418 const SegmentationMap* const prev_segment_ids,
419 FrameScratchBuffer* const frame_scratch_buffer,
420 PostFilter* const post_filter, RefCountedBuffer* const current_frame) {
421 // Parse the frame.
422 ThreadPool& thread_pool =
423 *frame_scratch_buffer->threading_strategy.thread_pool();
424 std::atomic<int> tile_counter(0);
425 const int tile_count = static_cast<int>(tiles.size());
426 const int num_workers = thread_pool.num_threads();
427 BlockingCounterWithStatus parse_workers(num_workers);
428 // Submit tile parsing jobs to the thread pool.
429 for (int i = 0; i < num_workers; ++i) {
430 thread_pool.Schedule([&tiles, tile_count, &tile_counter, &parse_workers]() {
431 bool failed = false;
432 int index;
433 while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
434 tile_count) {
435 if (!failed) {
436 const auto& tile_ptr = tiles[index];
437 if (!tile_ptr->Parse()) {
438 LIBGAV1_DLOG(ERROR, "Error parsing tile #%d", tile_ptr->number());
439 failed = true;
440 }
441 }
442 }
443 parse_workers.Decrement(!failed);
444 });
445 }
446
447 // Have the current thread participate in parsing.
448 bool failed = false;
449 int index;
450 while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
451 tile_count) {
452 if (!failed) {
453 const auto& tile_ptr = tiles[index];
454 if (!tile_ptr->Parse()) {
455 LIBGAV1_DLOG(ERROR, "Error parsing tile #%d", tile_ptr->number());
456 failed = true;
457 }
458 }
459 }
460
461 // Wait until all the parse workers are done. This ensures that all the tiles
462 // have been parsed.
463 if (!parse_workers.Wait() || failed) {
464 return kLibgav1StatusUnknownError;
465 }
466 if (frame_header.enable_frame_end_update_cdf) {
467 frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
468 }
469 current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
470 SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
471 current_frame->SetFrameState(kFrameStateParsed);
472
473 // Decode the frame.
474 const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
475 const int block_width4x4_log2 =
476 sequence_header.use_128x128_superblock ? 5 : 4;
477 const int superblock_rows =
478 (frame_header.rows4x4 + block_width4x4 - 1) >> block_width4x4_log2;
479 if (!frame_scratch_buffer->superblock_row_progress.Resize(superblock_rows) ||
480 !frame_scratch_buffer->superblock_row_progress_condvar.Resize(
481 superblock_rows)) {
482 return kLibgav1StatusOutOfMemory;
483 }
484 int* const superblock_row_progress =
485 frame_scratch_buffer->superblock_row_progress.get();
486 memset(superblock_row_progress, 0,
487 superblock_rows * sizeof(superblock_row_progress[0]));
488 frame_scratch_buffer->tile_decoding_failed = false;
489 const int tile_columns = frame_header.tile_info.tile_columns;
490 const bool decode_entire_tiles_in_worker_threads =
491 num_workers >= tile_columns;
492 BlockingCounter pending_jobs(
493 decode_entire_tiles_in_worker_threads ? num_workers : tile_columns);
494 if (decode_entire_tiles_in_worker_threads) {
495 // Submit tile decoding jobs to the thread pool.
496 tile_counter = 0;
497 for (int i = 0; i < num_workers; ++i) {
498 thread_pool.Schedule([&tiles, tile_count, &tile_counter, &pending_jobs,
499 frame_scratch_buffer, superblock_rows]() {
500 bool failed = false;
501 int index;
502 while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
503 tile_count) {
504 if (failed) continue;
505 const auto& tile_ptr = tiles[index];
506 if (!tile_ptr->Decode(
507 &frame_scratch_buffer->superblock_row_mutex,
508 frame_scratch_buffer->superblock_row_progress.get(),
509 frame_scratch_buffer->superblock_row_progress_condvar
510 .get())) {
511 LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
512 failed = true;
513 SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
514 }
515 }
516 pending_jobs.Decrement();
517 });
518 }
519 } else {
520 // Schedule the jobs for first tile row.
521 for (int tile_index = 0; tile_index < tile_columns; ++tile_index) {
522 thread_pool.Schedule([&tiles, tile_index, block_width4x4, tile_columns,
523 superblock_rows, frame_scratch_buffer, post_filter,
524 &pending_jobs]() {
525 DecodeSuperBlockRowInTile(
526 tiles, tile_index, 0, block_width4x4, tile_columns, superblock_rows,
527 frame_scratch_buffer, post_filter, &pending_jobs);
528 pending_jobs.Decrement();
529 });
530 }
531 }
532
533 // Current thread will do the post filters.
534 std::condition_variable* const superblock_row_progress_condvar =
535 frame_scratch_buffer->superblock_row_progress_condvar.get();
536 const std::unique_ptr<Tile>* tile_row_base = &tiles[0];
537 for (int row4x4 = 0, index = 0; row4x4 < frame_header.rows4x4;
538 row4x4 += block_width4x4, ++index) {
539 if (!tile_row_base[0]->IsRow4x4Inside(row4x4)) {
540 tile_row_base += tile_columns;
541 }
542 {
543 std::unique_lock<std::mutex> lock(
544 frame_scratch_buffer->superblock_row_mutex);
545 while (superblock_row_progress[index] != tile_columns &&
546 !frame_scratch_buffer->tile_decoding_failed) {
547 superblock_row_progress_condvar[index].wait(lock);
548 }
549 if (frame_scratch_buffer->tile_decoding_failed) break;
550 }
551 if (post_filter->DoDeblock()) {
552 // Apply deblocking filter for the tile boundaries of this superblock row.
553 // The deblocking filter for the internal blocks will be applied in the
554 // tile worker threads. In this thread, we will only have to apply
555 // deblocking filter for the tile boundaries.
556 ApplyDeblockingFilterForTileBoundaries(
557 post_filter, tile_row_base, frame_header, row4x4, block_width4x4,
558 tile_columns, decode_entire_tiles_in_worker_threads);
559 }
560 // Apply all the post filters other than deblocking.
561 const int progress_row = post_filter->ApplyFilteringForOneSuperBlockRow(
562 row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
563 /*do_deblock=*/false);
564 if (progress_row >= 0) {
565 current_frame->SetProgress(progress_row);
566 }
567 }
568 // Wait until all the pending jobs are done. This ensures that all the tiles
569 // have been decoded and wrapped up.
570 pending_jobs.Wait();
571 {
572 std::lock_guard<std::mutex> lock(
573 frame_scratch_buffer->superblock_row_mutex);
574 if (frame_scratch_buffer->tile_decoding_failed) {
575 return kLibgav1StatusUnknownError;
576 }
577 }
578
579 current_frame->SetFrameState(kFrameStateDecoded);
580 return kStatusOk;
581 }
582
583 } // namespace
584
585 // static
Create(const DecoderSettings * settings,std::unique_ptr<DecoderImpl> * output)586 StatusCode DecoderImpl::Create(const DecoderSettings* settings,
587 std::unique_ptr<DecoderImpl>* output) {
588 if (settings->threads <= 0) {
589 LIBGAV1_DLOG(ERROR, "Invalid settings->threads: %d.", settings->threads);
590 return kStatusInvalidArgument;
591 }
592 if (settings->frame_parallel) {
593 if (settings->release_input_buffer == nullptr) {
594 LIBGAV1_DLOG(ERROR,
595 "release_input_buffer callback must not be null when "
596 "frame_parallel is true.");
597 return kStatusInvalidArgument;
598 }
599 }
600 std::unique_ptr<DecoderImpl> impl(new (std::nothrow) DecoderImpl(settings));
601 if (impl == nullptr) {
602 LIBGAV1_DLOG(ERROR, "Failed to allocate DecoderImpl.");
603 return kStatusOutOfMemory;
604 }
605 const StatusCode status = impl->Init();
606 if (status != kStatusOk) return status;
607 *output = std::move(impl);
608 return kStatusOk;
609 }
610
DecoderImpl(const DecoderSettings * settings)611 DecoderImpl::DecoderImpl(const DecoderSettings* settings)
612 : buffer_pool_(settings->on_frame_buffer_size_changed,
613 settings->get_frame_buffer, settings->release_frame_buffer,
614 settings->callback_private_data),
615 settings_(*settings) {
616 dsp::DspInit();
617 }
618
~DecoderImpl()619 DecoderImpl::~DecoderImpl() {
620 // Clean up and wait until all the threads have stopped. We just have to pass
621 // in a dummy status that is not kStatusOk or kStatusTryAgain to trigger the
622 // path that clears all the threads and structs.
623 SignalFailure(kStatusUnknownError);
624 // Release any other frame buffer references that we may be holding on to.
625 ReleaseOutputFrame();
626 output_frame_queue_.Clear();
627 for (auto& reference_frame : state_.reference_frame) {
628 reference_frame = nullptr;
629 }
630 }
631
Init()632 StatusCode DecoderImpl::Init() {
633 if (!output_frame_queue_.Init(kMaxLayers)) {
634 LIBGAV1_DLOG(ERROR, "output_frame_queue_.Init() failed.");
635 return kStatusOutOfMemory;
636 }
637 return kStatusOk;
638 }
639
InitializeFrameThreadPoolAndTemporalUnitQueue(const uint8_t * data,size_t size)640 StatusCode DecoderImpl::InitializeFrameThreadPoolAndTemporalUnitQueue(
641 const uint8_t* data, size_t size) {
642 is_frame_parallel_ = false;
643 if (settings_.frame_parallel) {
644 DecoderState state;
645 std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
646 data, size, settings_.operating_point, &buffer_pool_, &state));
647 if (obu == nullptr) {
648 LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
649 return kStatusOutOfMemory;
650 }
651 RefCountedBufferPtr current_frame;
652 const StatusCode status = obu->ParseOneFrame(¤t_frame);
653 if (status != kStatusOk) {
654 LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
655 return status;
656 }
657 current_frame = nullptr;
658 // We assume that the first frame that was parsed will contain the frame
659 // header. This assumption is usually true in practice. So we will simply
660 // not use frame parallel mode if this is not the case.
661 if (settings_.threads > 1 &&
662 !InitializeThreadPoolsForFrameParallel(
663 settings_.threads, obu->frame_header().tile_info.tile_count,
664 obu->frame_header().tile_info.tile_columns, &frame_thread_pool_,
665 &frame_scratch_buffer_pool_)) {
666 return kStatusOutOfMemory;
667 }
668 }
669 const int max_allowed_frames =
670 (frame_thread_pool_ != nullptr) ? frame_thread_pool_->num_threads() : 1;
671 assert(max_allowed_frames > 0);
672 if (!temporal_units_.Init(max_allowed_frames)) {
673 LIBGAV1_DLOG(ERROR, "temporal_units_.Init() failed.");
674 return kStatusOutOfMemory;
675 }
676 is_frame_parallel_ = frame_thread_pool_ != nullptr;
677 return kStatusOk;
678 }
679
EnqueueFrame(const uint8_t * data,size_t size,int64_t user_private_data,void * buffer_private_data)680 StatusCode DecoderImpl::EnqueueFrame(const uint8_t* data, size_t size,
681 int64_t user_private_data,
682 void* buffer_private_data) {
683 if (data == nullptr || size == 0) return kStatusInvalidArgument;
684 if (HasFailure()) return kStatusUnknownError;
685 if (!seen_first_frame_) {
686 seen_first_frame_ = true;
687 const StatusCode status =
688 InitializeFrameThreadPoolAndTemporalUnitQueue(data, size);
689 if (status != kStatusOk) {
690 return SignalFailure(status);
691 }
692 }
693 if (temporal_units_.Full()) {
694 return kStatusTryAgain;
695 }
696 if (is_frame_parallel_) {
697 return ParseAndSchedule(data, size, user_private_data, buffer_private_data);
698 }
699 TemporalUnit temporal_unit(data, size, user_private_data,
700 buffer_private_data);
701 temporal_units_.Push(std::move(temporal_unit));
702 return kStatusOk;
703 }
704
SignalFailure(StatusCode status)705 StatusCode DecoderImpl::SignalFailure(StatusCode status) {
706 if (status == kStatusOk || status == kStatusTryAgain) return status;
707 // Set the |failure_status_| first so that any pending jobs in
708 // |frame_thread_pool_| will exit right away when the thread pool is being
709 // released below.
710 {
711 std::lock_guard<std::mutex> lock(mutex_);
712 failure_status_ = status;
713 }
714 // Make sure all waiting threads exit.
715 buffer_pool_.Abort();
716 frame_thread_pool_ = nullptr;
717 while (!temporal_units_.Empty()) {
718 if (settings_.release_input_buffer != nullptr) {
719 settings_.release_input_buffer(
720 settings_.callback_private_data,
721 temporal_units_.Front().buffer_private_data);
722 }
723 temporal_units_.Pop();
724 }
725 return status;
726 }
727
728 // DequeueFrame() follows the following policy to avoid holding unnecessary
729 // frame buffer references in output_frame_: output_frame_ must be null when
730 // DequeueFrame() returns false.
DequeueFrame(const DecoderBuffer ** out_ptr)731 StatusCode DecoderImpl::DequeueFrame(const DecoderBuffer** out_ptr) {
732 if (out_ptr == nullptr) {
733 LIBGAV1_DLOG(ERROR, "Invalid argument: out_ptr == nullptr.");
734 return kStatusInvalidArgument;
735 }
736 // We assume a call to DequeueFrame() indicates that the caller is no longer
737 // using the previous output frame, so we can release it.
738 ReleaseOutputFrame();
739 if (temporal_units_.Empty()) {
740 // No input frames to decode.
741 *out_ptr = nullptr;
742 return kStatusNothingToDequeue;
743 }
744 TemporalUnit& temporal_unit = temporal_units_.Front();
745 if (!is_frame_parallel_) {
746 // If |output_frame_queue_| is not empty, then return the first frame from
747 // that queue.
748 if (!output_frame_queue_.Empty()) {
749 RefCountedBufferPtr frame = std::move(output_frame_queue_.Front());
750 output_frame_queue_.Pop();
751 buffer_.user_private_data = temporal_unit.user_private_data;
752 if (output_frame_queue_.Empty()) {
753 temporal_units_.Pop();
754 }
755 const StatusCode status = CopyFrameToOutputBuffer(frame);
756 if (status != kStatusOk) {
757 return status;
758 }
759 *out_ptr = &buffer_;
760 return kStatusOk;
761 }
762 // Decode the next available temporal unit and return.
763 const StatusCode status = DecodeTemporalUnit(temporal_unit, out_ptr);
764 if (status != kStatusOk) {
765 // In case of failure, discard all the output frames that we may be
766 // holding on references to.
767 output_frame_queue_.Clear();
768 }
769 if (settings_.release_input_buffer != nullptr) {
770 settings_.release_input_buffer(settings_.callback_private_data,
771 temporal_unit.buffer_private_data);
772 }
773 if (output_frame_queue_.Empty()) {
774 temporal_units_.Pop();
775 }
776 return status;
777 }
778 {
779 std::unique_lock<std::mutex> lock(mutex_);
780 if (settings_.blocking_dequeue) {
781 while (!temporal_unit.decoded && failure_status_ == kStatusOk) {
782 decoded_condvar_.wait(lock);
783 }
784 } else {
785 if (!temporal_unit.decoded && failure_status_ == kStatusOk) {
786 return kStatusTryAgain;
787 }
788 }
789 if (failure_status_ != kStatusOk) {
790 const StatusCode failure_status = failure_status_;
791 lock.unlock();
792 return SignalFailure(failure_status);
793 }
794 }
795 if (settings_.release_input_buffer != nullptr &&
796 !temporal_unit.released_input_buffer) {
797 temporal_unit.released_input_buffer = true;
798 settings_.release_input_buffer(settings_.callback_private_data,
799 temporal_unit.buffer_private_data);
800 }
801 if (temporal_unit.status != kStatusOk) {
802 temporal_units_.Pop();
803 return SignalFailure(temporal_unit.status);
804 }
805 if (!temporal_unit.has_displayable_frame) {
806 *out_ptr = nullptr;
807 temporal_units_.Pop();
808 return kStatusOk;
809 }
810 assert(temporal_unit.output_layer_count > 0);
811 StatusCode status = CopyFrameToOutputBuffer(
812 temporal_unit.output_layers[temporal_unit.output_layer_count - 1].frame);
813 temporal_unit.output_layers[temporal_unit.output_layer_count - 1].frame =
814 nullptr;
815 if (status != kStatusOk) {
816 temporal_units_.Pop();
817 return SignalFailure(status);
818 }
819 buffer_.user_private_data = temporal_unit.user_private_data;
820 *out_ptr = &buffer_;
821 if (--temporal_unit.output_layer_count == 0) {
822 temporal_units_.Pop();
823 }
824 return kStatusOk;
825 }
826
ParseAndSchedule(const uint8_t * data,size_t size,int64_t user_private_data,void * buffer_private_data)827 StatusCode DecoderImpl::ParseAndSchedule(const uint8_t* data, size_t size,
828 int64_t user_private_data,
829 void* buffer_private_data) {
830 TemporalUnit temporal_unit(data, size, user_private_data,
831 buffer_private_data);
832 std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
833 temporal_unit.data, temporal_unit.size, settings_.operating_point,
834 &buffer_pool_, &state_));
835 if (obu == nullptr) {
836 LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
837 return kStatusOutOfMemory;
838 }
839 if (has_sequence_header_) {
840 obu->set_sequence_header(sequence_header_);
841 }
842 StatusCode status;
843 int position_in_temporal_unit = 0;
844 while (obu->HasData()) {
845 RefCountedBufferPtr current_frame;
846 status = obu->ParseOneFrame(¤t_frame);
847 if (status != kStatusOk) {
848 LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
849 return status;
850 }
851 if (!MaybeInitializeQuantizerMatrix(obu->frame_header())) {
852 LIBGAV1_DLOG(ERROR, "InitializeQuantizerMatrix() failed.");
853 return kStatusOutOfMemory;
854 }
855 if (!MaybeInitializeWedgeMasks(obu->frame_header().frame_type)) {
856 LIBGAV1_DLOG(ERROR, "InitializeWedgeMasks() failed.");
857 return kStatusOutOfMemory;
858 }
859 if (IsNewSequenceHeader(*obu)) {
860 const ObuSequenceHeader& sequence_header = obu->sequence_header();
861 const Libgav1ImageFormat image_format =
862 ComposeImageFormat(sequence_header.color_config.is_monochrome,
863 sequence_header.color_config.subsampling_x,
864 sequence_header.color_config.subsampling_y);
865 const int max_bottom_border = GetBottomBorderPixels(
866 /*do_cdef=*/true, /*do_restoration=*/true,
867 /*do_superres=*/true, sequence_header.color_config.subsampling_y);
868 // TODO(vigneshv): This may not be the right place to call this callback
869 // for the frame parallel case. Investigate and fix it.
870 if (!buffer_pool_.OnFrameBufferSizeChanged(
871 sequence_header.color_config.bitdepth, image_format,
872 sequence_header.max_frame_width, sequence_header.max_frame_height,
873 kBorderPixels, kBorderPixels, kBorderPixels, max_bottom_border)) {
874 LIBGAV1_DLOG(ERROR, "buffer_pool_.OnFrameBufferSizeChanged failed.");
875 return kStatusUnknownError;
876 }
877 }
878 // This can happen when there are multiple spatial/temporal layers and if
879 // all the layers are outside the current operating point.
880 if (current_frame == nullptr) {
881 continue;
882 }
883 // Note that we cannot set EncodedFrame.temporal_unit here. It will be set
884 // in the code below after |temporal_unit| is std::move'd into the
885 // |temporal_units_| queue.
886 if (!temporal_unit.frames.emplace_back(obu.get(), state_, current_frame,
887 position_in_temporal_unit++)) {
888 LIBGAV1_DLOG(ERROR, "temporal_unit.frames.emplace_back failed.");
889 return kStatusOutOfMemory;
890 }
891 state_.UpdateReferenceFrames(current_frame,
892 obu->frame_header().refresh_frame_flags);
893 }
894 // This function cannot fail after this point. So it is okay to move the
895 // |temporal_unit| into |temporal_units_| queue.
896 temporal_units_.Push(std::move(temporal_unit));
897 if (temporal_units_.Back().frames.empty()) {
898 std::lock_guard<std::mutex> lock(mutex_);
899 temporal_units_.Back().has_displayable_frame = false;
900 temporal_units_.Back().decoded = true;
901 return kStatusOk;
902 }
903 for (auto& frame : temporal_units_.Back().frames) {
904 EncodedFrame* const encoded_frame = &frame;
905 encoded_frame->temporal_unit = &temporal_units_.Back();
906 frame_thread_pool_->Schedule([this, encoded_frame]() {
907 if (HasFailure()) return;
908 const StatusCode status = DecodeFrame(encoded_frame);
909 encoded_frame->state = {};
910 encoded_frame->frame = nullptr;
911 TemporalUnit& temporal_unit = *encoded_frame->temporal_unit;
912 std::lock_guard<std::mutex> lock(mutex_);
913 if (failure_status_ != kStatusOk) return;
914 // temporal_unit's status defaults to kStatusOk. So we need to set it only
915 // on error. If |failure_status_| is not kStatusOk at this point, it means
916 // that there has already been a failure. So we don't care about this
917 // subsequent failure. We will simply return the error code of the first
918 // failure.
919 if (status != kStatusOk) {
920 temporal_unit.status = status;
921 if (failure_status_ == kStatusOk) {
922 failure_status_ = status;
923 }
924 }
925 temporal_unit.decoded =
926 ++temporal_unit.decoded_count == temporal_unit.frames.size();
927 if (temporal_unit.decoded && settings_.output_all_layers &&
928 temporal_unit.output_layer_count > 1) {
929 std::sort(
930 temporal_unit.output_layers,
931 temporal_unit.output_layers + temporal_unit.output_layer_count);
932 }
933 if (temporal_unit.decoded || failure_status_ != kStatusOk) {
934 decoded_condvar_.notify_one();
935 }
936 });
937 }
938 return kStatusOk;
939 }
940
DecodeFrame(EncodedFrame * const encoded_frame)941 StatusCode DecoderImpl::DecodeFrame(EncodedFrame* const encoded_frame) {
942 const ObuSequenceHeader& sequence_header = encoded_frame->sequence_header;
943 const ObuFrameHeader& frame_header = encoded_frame->frame_header;
944 RefCountedBufferPtr current_frame = std::move(encoded_frame->frame);
945
946 std::unique_ptr<FrameScratchBuffer> frame_scratch_buffer =
947 frame_scratch_buffer_pool_.Get();
948 if (frame_scratch_buffer == nullptr) {
949 LIBGAV1_DLOG(ERROR, "Error when getting FrameScratchBuffer.");
950 return kStatusOutOfMemory;
951 }
952 // |frame_scratch_buffer| will be released when this local variable goes out
953 // of scope (i.e.) on any return path in this function.
954 FrameScratchBufferReleaser frame_scratch_buffer_releaser(
955 &frame_scratch_buffer_pool_, &frame_scratch_buffer);
956
957 StatusCode status;
958 if (!frame_header.show_existing_frame) {
959 if (encoded_frame->tile_buffers.empty()) {
960 // This means that the last call to ParseOneFrame() did not actually
961 // have any tile groups. This could happen in rare cases (for example,
962 // if there is a Metadata OBU after the TileGroup OBU). We currently do
963 // not have a reason to handle those cases, so we simply continue.
964 return kStatusOk;
965 }
966 status = DecodeTiles(sequence_header, frame_header,
967 encoded_frame->tile_buffers, encoded_frame->state,
968 frame_scratch_buffer.get(), current_frame.get());
969 if (status != kStatusOk) {
970 return status;
971 }
972 } else {
973 if (!current_frame->WaitUntilDecoded()) {
974 return kStatusUnknownError;
975 }
976 }
977 if (!frame_header.show_frame && !frame_header.show_existing_frame) {
978 // This frame is not displayable. Not an error.
979 return kStatusOk;
980 }
981 RefCountedBufferPtr film_grain_frame;
982 status = ApplyFilmGrain(
983 sequence_header, frame_header, current_frame, &film_grain_frame,
984 frame_scratch_buffer->threading_strategy.thread_pool());
985 if (status != kStatusOk) {
986 return status;
987 }
988
989 TemporalUnit& temporal_unit = *encoded_frame->temporal_unit;
990 std::lock_guard<std::mutex> lock(mutex_);
991 if (temporal_unit.has_displayable_frame && !settings_.output_all_layers) {
992 assert(temporal_unit.output_frame_position >= 0);
993 // A displayable frame was already found in this temporal unit. This can
994 // happen if there are multiple spatial/temporal layers. Since
995 // |settings_.output_all_layers| is false, we will output only the last
996 // displayable frame.
997 if (temporal_unit.output_frame_position >
998 encoded_frame->position_in_temporal_unit) {
999 return kStatusOk;
1000 }
1001 // Replace any output frame that we may have seen before with the current
1002 // frame.
1003 assert(temporal_unit.output_layer_count == 1);
1004 --temporal_unit.output_layer_count;
1005 }
1006 temporal_unit.has_displayable_frame = true;
1007 temporal_unit.output_layers[temporal_unit.output_layer_count].frame =
1008 std::move(film_grain_frame);
1009 temporal_unit.output_layers[temporal_unit.output_layer_count]
1010 .position_in_temporal_unit = encoded_frame->position_in_temporal_unit;
1011 ++temporal_unit.output_layer_count;
1012 temporal_unit.output_frame_position =
1013 encoded_frame->position_in_temporal_unit;
1014 return kStatusOk;
1015 }
1016
DecodeTemporalUnit(const TemporalUnit & temporal_unit,const DecoderBuffer ** out_ptr)1017 StatusCode DecoderImpl::DecodeTemporalUnit(const TemporalUnit& temporal_unit,
1018 const DecoderBuffer** out_ptr) {
1019 std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
1020 temporal_unit.data, temporal_unit.size, settings_.operating_point,
1021 &buffer_pool_, &state_));
1022 if (obu == nullptr) {
1023 LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
1024 return kStatusOutOfMemory;
1025 }
1026 if (has_sequence_header_) {
1027 obu->set_sequence_header(sequence_header_);
1028 }
1029 StatusCode status;
1030 std::unique_ptr<FrameScratchBuffer> frame_scratch_buffer =
1031 frame_scratch_buffer_pool_.Get();
1032 if (frame_scratch_buffer == nullptr) {
1033 LIBGAV1_DLOG(ERROR, "Error when getting FrameScratchBuffer.");
1034 return kStatusOutOfMemory;
1035 }
1036 // |frame_scratch_buffer| will be released when this local variable goes out
1037 // of scope (i.e.) on any return path in this function.
1038 FrameScratchBufferReleaser frame_scratch_buffer_releaser(
1039 &frame_scratch_buffer_pool_, &frame_scratch_buffer);
1040
1041 while (obu->HasData()) {
1042 RefCountedBufferPtr current_frame;
1043 status = obu->ParseOneFrame(¤t_frame);
1044 if (status != kStatusOk) {
1045 LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
1046 return status;
1047 }
1048 if (!MaybeInitializeQuantizerMatrix(obu->frame_header())) {
1049 LIBGAV1_DLOG(ERROR, "InitializeQuantizerMatrix() failed.");
1050 return kStatusOutOfMemory;
1051 }
1052 if (!MaybeInitializeWedgeMasks(obu->frame_header().frame_type)) {
1053 LIBGAV1_DLOG(ERROR, "InitializeWedgeMasks() failed.");
1054 return kStatusOutOfMemory;
1055 }
1056 if (IsNewSequenceHeader(*obu)) {
1057 const ObuSequenceHeader& sequence_header = obu->sequence_header();
1058 const Libgav1ImageFormat image_format =
1059 ComposeImageFormat(sequence_header.color_config.is_monochrome,
1060 sequence_header.color_config.subsampling_x,
1061 sequence_header.color_config.subsampling_y);
1062 const int max_bottom_border = GetBottomBorderPixels(
1063 /*do_cdef=*/true, /*do_restoration=*/true,
1064 /*do_superres=*/true, sequence_header.color_config.subsampling_y);
1065 if (!buffer_pool_.OnFrameBufferSizeChanged(
1066 sequence_header.color_config.bitdepth, image_format,
1067 sequence_header.max_frame_width, sequence_header.max_frame_height,
1068 kBorderPixels, kBorderPixels, kBorderPixels, max_bottom_border)) {
1069 LIBGAV1_DLOG(ERROR, "buffer_pool_.OnFrameBufferSizeChanged failed.");
1070 return kStatusUnknownError;
1071 }
1072 }
1073 if (!obu->frame_header().show_existing_frame) {
1074 if (obu->tile_buffers().empty()) {
1075 // This means that the last call to ParseOneFrame() did not actually
1076 // have any tile groups. This could happen in rare cases (for example,
1077 // if there is a Metadata OBU after the TileGroup OBU). We currently do
1078 // not have a reason to handle those cases, so we simply continue.
1079 continue;
1080 }
1081 status = DecodeTiles(obu->sequence_header(), obu->frame_header(),
1082 obu->tile_buffers(), state_,
1083 frame_scratch_buffer.get(), current_frame.get());
1084 if (status != kStatusOk) {
1085 return status;
1086 }
1087 }
1088 state_.UpdateReferenceFrames(current_frame,
1089 obu->frame_header().refresh_frame_flags);
1090 if (obu->frame_header().show_frame ||
1091 obu->frame_header().show_existing_frame) {
1092 if (!output_frame_queue_.Empty() && !settings_.output_all_layers) {
1093 // There is more than one displayable frame in the current operating
1094 // point and |settings_.output_all_layers| is false. In this case, we
1095 // simply return the last displayable frame as the output frame and
1096 // ignore the rest.
1097 assert(output_frame_queue_.Size() == 1);
1098 output_frame_queue_.Pop();
1099 }
1100 RefCountedBufferPtr film_grain_frame;
1101 status = ApplyFilmGrain(
1102 obu->sequence_header(), obu->frame_header(), current_frame,
1103 &film_grain_frame,
1104 frame_scratch_buffer->threading_strategy.film_grain_thread_pool());
1105 if (status != kStatusOk) return status;
1106 output_frame_queue_.Push(std::move(film_grain_frame));
1107 }
1108 }
1109 if (output_frame_queue_.Empty()) {
1110 // No displayable frame in the temporal unit. Not an error.
1111 *out_ptr = nullptr;
1112 return kStatusOk;
1113 }
1114 status = CopyFrameToOutputBuffer(output_frame_queue_.Front());
1115 output_frame_queue_.Pop();
1116 if (status != kStatusOk) {
1117 return status;
1118 }
1119 buffer_.user_private_data = temporal_unit.user_private_data;
1120 *out_ptr = &buffer_;
1121 return kStatusOk;
1122 }
1123
CopyFrameToOutputBuffer(const RefCountedBufferPtr & frame)1124 StatusCode DecoderImpl::CopyFrameToOutputBuffer(
1125 const RefCountedBufferPtr& frame) {
1126 YuvBuffer* yuv_buffer = frame->buffer();
1127
1128 buffer_.chroma_sample_position = frame->chroma_sample_position();
1129
1130 if (yuv_buffer->is_monochrome()) {
1131 buffer_.image_format = kImageFormatMonochrome400;
1132 } else {
1133 if (yuv_buffer->subsampling_x() == 0 && yuv_buffer->subsampling_y() == 0) {
1134 buffer_.image_format = kImageFormatYuv444;
1135 } else if (yuv_buffer->subsampling_x() == 1 &&
1136 yuv_buffer->subsampling_y() == 0) {
1137 buffer_.image_format = kImageFormatYuv422;
1138 } else if (yuv_buffer->subsampling_x() == 1 &&
1139 yuv_buffer->subsampling_y() == 1) {
1140 buffer_.image_format = kImageFormatYuv420;
1141 } else {
1142 LIBGAV1_DLOG(ERROR,
1143 "Invalid chroma subsampling values: cannot determine buffer "
1144 "image format.");
1145 return kStatusInvalidArgument;
1146 }
1147 }
1148 buffer_.color_range = sequence_header_.color_config.color_range;
1149 buffer_.color_primary = sequence_header_.color_config.color_primary;
1150 buffer_.transfer_characteristics =
1151 sequence_header_.color_config.transfer_characteristics;
1152 buffer_.matrix_coefficients =
1153 sequence_header_.color_config.matrix_coefficients;
1154
1155 buffer_.bitdepth = yuv_buffer->bitdepth();
1156 const int num_planes =
1157 yuv_buffer->is_monochrome() ? kMaxPlanesMonochrome : kMaxPlanes;
1158 int plane = kPlaneY;
1159 for (; plane < num_planes; ++plane) {
1160 buffer_.stride[plane] = yuv_buffer->stride(plane);
1161 buffer_.plane[plane] = yuv_buffer->data(plane);
1162 buffer_.displayed_width[plane] = yuv_buffer->width(plane);
1163 buffer_.displayed_height[plane] = yuv_buffer->height(plane);
1164 }
1165 for (; plane < kMaxPlanes; ++plane) {
1166 buffer_.stride[plane] = 0;
1167 buffer_.plane[plane] = nullptr;
1168 buffer_.displayed_width[plane] = 0;
1169 buffer_.displayed_height[plane] = 0;
1170 }
1171 buffer_.spatial_id = frame->spatial_id();
1172 buffer_.temporal_id = frame->temporal_id();
1173 buffer_.buffer_private_data = frame->buffer_private_data();
1174 output_frame_ = frame;
1175 return kStatusOk;
1176 }
1177
ReleaseOutputFrame()1178 void DecoderImpl::ReleaseOutputFrame() {
1179 for (auto& plane : buffer_.plane) {
1180 plane = nullptr;
1181 }
1182 output_frame_ = nullptr;
1183 }
1184
DecodeTiles(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<TileBuffer> & tile_buffers,const DecoderState & state,FrameScratchBuffer * const frame_scratch_buffer,RefCountedBuffer * const current_frame)1185 StatusCode DecoderImpl::DecodeTiles(
1186 const ObuSequenceHeader& sequence_header,
1187 const ObuFrameHeader& frame_header, const Vector<TileBuffer>& tile_buffers,
1188 const DecoderState& state, FrameScratchBuffer* const frame_scratch_buffer,
1189 RefCountedBuffer* const current_frame) {
1190 frame_scratch_buffer->tile_scratch_buffer_pool.Reset(
1191 sequence_header.color_config.bitdepth);
1192 if (!frame_scratch_buffer->loop_restoration_info.Reset(
1193 &frame_header.loop_restoration, frame_header.upscaled_width,
1194 frame_header.height, sequence_header.color_config.subsampling_x,
1195 sequence_header.color_config.subsampling_y,
1196 sequence_header.color_config.is_monochrome)) {
1197 LIBGAV1_DLOG(ERROR,
1198 "Failed to allocate memory for loop restoration info units.");
1199 return kStatusOutOfMemory;
1200 }
1201 ThreadingStrategy& threading_strategy =
1202 frame_scratch_buffer->threading_strategy;
1203 if (!is_frame_parallel_ &&
1204 !threading_strategy.Reset(frame_header, settings_.threads)) {
1205 return kStatusOutOfMemory;
1206 }
1207 const bool do_cdef =
1208 PostFilter::DoCdef(frame_header, settings_.post_filter_mask);
1209 const int num_planes = sequence_header.color_config.is_monochrome
1210 ? kMaxPlanesMonochrome
1211 : kMaxPlanes;
1212 const bool do_restoration = PostFilter::DoRestoration(
1213 frame_header.loop_restoration, settings_.post_filter_mask, num_planes);
1214 const bool do_superres =
1215 PostFilter::DoSuperRes(frame_header, settings_.post_filter_mask);
1216 // Use kBorderPixels for the left, right, and top borders. Only the bottom
1217 // border may need to be bigger. Cdef border is needed only if we apply Cdef
1218 // without multithreading.
1219 const int bottom_border = GetBottomBorderPixels(
1220 do_cdef && threading_strategy.post_filter_thread_pool() == nullptr,
1221 do_restoration, do_superres, sequence_header.color_config.subsampling_y);
1222 current_frame->set_chroma_sample_position(
1223 sequence_header.color_config.chroma_sample_position);
1224 if (!current_frame->Realloc(sequence_header.color_config.bitdepth,
1225 sequence_header.color_config.is_monochrome,
1226 frame_header.upscaled_width, frame_header.height,
1227 sequence_header.color_config.subsampling_x,
1228 sequence_header.color_config.subsampling_y,
1229 /*left_border=*/kBorderPixels,
1230 /*right_border=*/kBorderPixels,
1231 /*top_border=*/kBorderPixels, bottom_border)) {
1232 LIBGAV1_DLOG(ERROR, "Failed to allocate memory for the decoder buffer.");
1233 return kStatusOutOfMemory;
1234 }
1235 if (sequence_header.enable_cdef) {
1236 if (!frame_scratch_buffer->cdef_index.Reset(
1237 DivideBy16(frame_header.rows4x4 + kMaxBlockHeight4x4),
1238 DivideBy16(frame_header.columns4x4 + kMaxBlockWidth4x4),
1239 /*zero_initialize=*/false)) {
1240 LIBGAV1_DLOG(ERROR, "Failed to allocate memory for cdef index.");
1241 return kStatusOutOfMemory;
1242 }
1243 }
1244 if (!frame_scratch_buffer->inter_transform_sizes.Reset(
1245 frame_header.rows4x4 + kMaxBlockHeight4x4,
1246 frame_header.columns4x4 + kMaxBlockWidth4x4,
1247 /*zero_initialize=*/false)) {
1248 LIBGAV1_DLOG(ERROR, "Failed to allocate memory for inter_transform_sizes.");
1249 return kStatusOutOfMemory;
1250 }
1251 if (frame_header.use_ref_frame_mvs) {
1252 if (!frame_scratch_buffer->motion_field.mv.Reset(
1253 DivideBy2(frame_header.rows4x4), DivideBy2(frame_header.columns4x4),
1254 /*zero_initialize=*/false) ||
1255 !frame_scratch_buffer->motion_field.reference_offset.Reset(
1256 DivideBy2(frame_header.rows4x4), DivideBy2(frame_header.columns4x4),
1257 /*zero_initialize=*/false)) {
1258 LIBGAV1_DLOG(ERROR,
1259 "Failed to allocate memory for temporal motion vectors.");
1260 return kStatusOutOfMemory;
1261 }
1262
1263 // For each motion vector, only mv[0] needs to be initialized to
1264 // kInvalidMvValue, mv[1] is not necessary to be initialized and can be
1265 // set to an arbitrary value. For simplicity, mv[1] is set to 0.
1266 // The following memory initialization of contiguous memory is very fast. It
1267 // is not recommended to make the initialization multi-threaded, unless the
1268 // memory which needs to be initialized in each thread is still contiguous.
1269 MotionVector invalid_mv;
1270 invalid_mv.mv[0] = kInvalidMvValue;
1271 invalid_mv.mv[1] = 0;
1272 MotionVector* const motion_field_mv =
1273 &frame_scratch_buffer->motion_field.mv[0][0];
1274 std::fill(motion_field_mv,
1275 motion_field_mv + frame_scratch_buffer->motion_field.mv.size(),
1276 invalid_mv);
1277 }
1278
1279 // The addition of kMaxBlockHeight4x4 and kMaxBlockWidth4x4 is necessary so
1280 // that the block parameters cache can be filled in for the last row/column
1281 // without having to check for boundary conditions.
1282 if (!frame_scratch_buffer->block_parameters_holder.Reset(
1283 frame_header.rows4x4 + kMaxBlockHeight4x4,
1284 frame_header.columns4x4 + kMaxBlockWidth4x4)) {
1285 return kStatusOutOfMemory;
1286 }
1287 const dsp::Dsp* const dsp =
1288 dsp::GetDspTable(sequence_header.color_config.bitdepth);
1289 if (dsp == nullptr) {
1290 LIBGAV1_DLOG(ERROR, "Failed to get the dsp table for bitdepth %d.",
1291 sequence_header.color_config.bitdepth);
1292 return kStatusInternalError;
1293 }
1294
1295 const int tile_count = frame_header.tile_info.tile_count;
1296 assert(tile_count >= 1);
1297 Vector<std::unique_ptr<Tile>> tiles;
1298 if (!tiles.reserve(tile_count)) {
1299 LIBGAV1_DLOG(ERROR, "tiles.reserve(%d) failed.\n", tile_count);
1300 return kStatusOutOfMemory;
1301 }
1302
1303 if (threading_strategy.row_thread_pool(0) != nullptr || is_frame_parallel_) {
1304 if (frame_scratch_buffer->residual_buffer_pool == nullptr) {
1305 frame_scratch_buffer->residual_buffer_pool.reset(
1306 new (std::nothrow) ResidualBufferPool(
1307 sequence_header.use_128x128_superblock,
1308 sequence_header.color_config.subsampling_x,
1309 sequence_header.color_config.subsampling_y,
1310 sequence_header.color_config.bitdepth == 8 ? sizeof(int16_t)
1311 : sizeof(int32_t)));
1312 if (frame_scratch_buffer->residual_buffer_pool == nullptr) {
1313 LIBGAV1_DLOG(ERROR, "Failed to allocate residual buffer.\n");
1314 return kStatusOutOfMemory;
1315 }
1316 } else {
1317 frame_scratch_buffer->residual_buffer_pool->Reset(
1318 sequence_header.use_128x128_superblock,
1319 sequence_header.color_config.subsampling_x,
1320 sequence_header.color_config.subsampling_y,
1321 sequence_header.color_config.bitdepth == 8 ? sizeof(int16_t)
1322 : sizeof(int32_t));
1323 }
1324 }
1325
1326 if (threading_strategy.post_filter_thread_pool() != nullptr && do_cdef) {
1327 // We need to store 4 rows per 64x64 unit.
1328 const int num_units =
1329 MultiplyBy4(RightShiftWithCeiling(frame_header.rows4x4, 4));
1330 // subsampling_y is set to zero irrespective of the actual frame's
1331 // subsampling since we need to store exactly |num_units| rows of the loop
1332 // restoration border pixels.
1333 if (!frame_scratch_buffer->cdef_border.Realloc(
1334 sequence_header.color_config.bitdepth,
1335 sequence_header.color_config.is_monochrome,
1336 MultiplyBy4(frame_header.columns4x4), num_units,
1337 sequence_header.color_config.subsampling_x,
1338 /*subsampling_y=*/0, kBorderPixels, kBorderPixels, kBorderPixels,
1339 kBorderPixels, nullptr, nullptr, nullptr)) {
1340 return kStatusOutOfMemory;
1341 }
1342 }
1343
1344 if (do_restoration &&
1345 (do_cdef || threading_strategy.post_filter_thread_pool() != nullptr)) {
1346 // We need to store 4 rows per 64x64 unit.
1347 const int num_units =
1348 MultiplyBy4(RightShiftWithCeiling(frame_header.rows4x4, 4));
1349 // subsampling_y is set to zero irrespective of the actual frame's
1350 // subsampling since we need to store exactly |num_units| rows of the loop
1351 // restoration border pixels.
1352 if (!frame_scratch_buffer->loop_restoration_border.Realloc(
1353 sequence_header.color_config.bitdepth,
1354 sequence_header.color_config.is_monochrome,
1355 frame_header.upscaled_width, num_units,
1356 sequence_header.color_config.subsampling_x,
1357 /*subsampling_y=*/0, kBorderPixels, kBorderPixels, kBorderPixels,
1358 kBorderPixels, nullptr, nullptr, nullptr)) {
1359 return kStatusOutOfMemory;
1360 }
1361 }
1362
1363 if (do_superres) {
1364 const int pixel_size = sequence_header.color_config.bitdepth == 8
1365 ? sizeof(uint8_t)
1366 : sizeof(uint16_t);
1367 if (!frame_scratch_buffer->superres_coefficients[kPlaneTypeY].Resize(
1368 kSuperResFilterTaps * Align(frame_header.upscaled_width, 16) *
1369 pixel_size)) {
1370 LIBGAV1_DLOG(ERROR,
1371 "Failed to Resize superres_coefficients[kPlaneTypeY].");
1372 return kStatusOutOfMemory;
1373 }
1374 if (!sequence_header.color_config.is_monochrome &&
1375 sequence_header.color_config.subsampling_x != 0 &&
1376 !frame_scratch_buffer->superres_coefficients[kPlaneTypeUV].Resize(
1377 kSuperResFilterTaps *
1378 Align(SubsampledValue(frame_header.upscaled_width, 1), 16) *
1379 pixel_size)) {
1380 LIBGAV1_DLOG(ERROR,
1381 "Failed to Resize superres_coefficients[kPlaneTypeUV].");
1382 return kStatusOutOfMemory;
1383 }
1384 }
1385
1386 if (do_superres && threading_strategy.post_filter_thread_pool() != nullptr) {
1387 const int num_threads =
1388 threading_strategy.post_filter_thread_pool()->num_threads() + 1;
1389 // subsampling_y is set to zero irrespective of the actual frame's
1390 // subsampling since we need to store exactly |num_threads| rows of the
1391 // down-scaled pixels.
1392 // Left and right borders are for line extension. They are doubled for the Y
1393 // plane to make sure the U and V planes have enough space after possible
1394 // subsampling.
1395 if (!frame_scratch_buffer->superres_line_buffer.Realloc(
1396 sequence_header.color_config.bitdepth,
1397 sequence_header.color_config.is_monochrome,
1398 MultiplyBy4(frame_header.columns4x4), num_threads,
1399 sequence_header.color_config.subsampling_x,
1400 /*subsampling_y=*/0, 2 * kSuperResHorizontalBorder,
1401 2 * (kSuperResHorizontalBorder + kSuperResHorizontalPadding), 0, 0,
1402 nullptr, nullptr, nullptr)) {
1403 LIBGAV1_DLOG(ERROR, "Failed to resize superres line buffer.\n");
1404 return kStatusOutOfMemory;
1405 }
1406 }
1407
1408 PostFilter post_filter(frame_header, sequence_header, frame_scratch_buffer,
1409 current_frame->buffer(), dsp,
1410 settings_.post_filter_mask);
1411
1412 if (is_frame_parallel_ && !IsIntraFrame(frame_header.frame_type)) {
1413 // We can parse the current frame if all the reference frames have been
1414 // parsed.
1415 for (const int index : frame_header.reference_frame_index) {
1416 if (!state.reference_frame[index]->WaitUntilParsed()) {
1417 return kStatusUnknownError;
1418 }
1419 }
1420 }
1421
1422 // If prev_segment_ids is a null pointer, it is treated as if it pointed to
1423 // a segmentation map containing all 0s.
1424 const SegmentationMap* prev_segment_ids = nullptr;
1425 if (frame_header.primary_reference_frame == kPrimaryReferenceNone) {
1426 frame_scratch_buffer->symbol_decoder_context.Initialize(
1427 frame_header.quantizer.base_index);
1428 } else {
1429 const int index =
1430 frame_header
1431 .reference_frame_index[frame_header.primary_reference_frame];
1432 assert(index != -1);
1433 const RefCountedBuffer* prev_frame = state.reference_frame[index].get();
1434 frame_scratch_buffer->symbol_decoder_context = prev_frame->FrameContext();
1435 if (frame_header.segmentation.enabled &&
1436 prev_frame->columns4x4() == frame_header.columns4x4 &&
1437 prev_frame->rows4x4() == frame_header.rows4x4) {
1438 prev_segment_ids = prev_frame->segmentation_map();
1439 }
1440 }
1441
1442 // The Tile class must make use of a separate buffer to store the unfiltered
1443 // pixels for the intra prediction of the next superblock row. This is done
1444 // only when one of the following conditions are true:
1445 // * is_frame_parallel_ is true.
1446 // * settings_.threads == 1.
1447 // In the non-frame-parallel multi-threaded case, we do not run the post
1448 // filters in the decode loop. So this buffer need not be used.
1449 const bool use_intra_prediction_buffer =
1450 is_frame_parallel_ || settings_.threads == 1;
1451 if (use_intra_prediction_buffer) {
1452 if (!frame_scratch_buffer->intra_prediction_buffers.Resize(
1453 frame_header.tile_info.tile_rows)) {
1454 LIBGAV1_DLOG(ERROR, "Failed to Resize intra_prediction_buffers.");
1455 return kStatusOutOfMemory;
1456 }
1457 IntraPredictionBuffer* const intra_prediction_buffers =
1458 frame_scratch_buffer->intra_prediction_buffers.get();
1459 for (int plane = kPlaneY; plane < num_planes; ++plane) {
1460 const int subsampling =
1461 (plane == kPlaneY) ? 0 : sequence_header.color_config.subsampling_x;
1462 const size_t intra_prediction_buffer_size =
1463 ((MultiplyBy4(frame_header.columns4x4) >> subsampling) *
1464 (sequence_header.color_config.bitdepth == 8 ? sizeof(uint8_t)
1465 : sizeof(uint16_t)));
1466 for (int tile_row = 0; tile_row < frame_header.tile_info.tile_rows;
1467 ++tile_row) {
1468 if (!intra_prediction_buffers[tile_row][plane].Resize(
1469 intra_prediction_buffer_size)) {
1470 LIBGAV1_DLOG(ERROR,
1471 "Failed to allocate intra prediction buffer for tile "
1472 "row %d plane %d.\n",
1473 tile_row, plane);
1474 return kStatusOutOfMemory;
1475 }
1476 }
1477 }
1478 }
1479
1480 SymbolDecoderContext saved_symbol_decoder_context;
1481 BlockingCounterWithStatus pending_tiles(tile_count);
1482 for (int tile_number = 0; tile_number < tile_count; ++tile_number) {
1483 std::unique_ptr<Tile> tile = Tile::Create(
1484 tile_number, tile_buffers[tile_number].data,
1485 tile_buffers[tile_number].size, sequence_header, frame_header,
1486 current_frame, state, frame_scratch_buffer, wedge_masks_,
1487 quantizer_matrix_, &saved_symbol_decoder_context, prev_segment_ids,
1488 &post_filter, dsp, threading_strategy.row_thread_pool(tile_number),
1489 &pending_tiles, is_frame_parallel_, use_intra_prediction_buffer);
1490 if (tile == nullptr) {
1491 LIBGAV1_DLOG(ERROR, "Failed to create tile.");
1492 return kStatusOutOfMemory;
1493 }
1494 tiles.push_back_unchecked(std::move(tile));
1495 }
1496 assert(tiles.size() == static_cast<size_t>(tile_count));
1497 if (is_frame_parallel_) {
1498 if (frame_scratch_buffer->threading_strategy.thread_pool() == nullptr) {
1499 return DecodeTilesFrameParallel(
1500 sequence_header, frame_header, tiles, saved_symbol_decoder_context,
1501 prev_segment_ids, frame_scratch_buffer, &post_filter, current_frame);
1502 }
1503 return DecodeTilesThreadedFrameParallel(
1504 sequence_header, frame_header, tiles, saved_symbol_decoder_context,
1505 prev_segment_ids, frame_scratch_buffer, &post_filter, current_frame);
1506 }
1507 StatusCode status;
1508 if (settings_.threads == 1) {
1509 status = DecodeTilesNonFrameParallel(sequence_header, frame_header, tiles,
1510 frame_scratch_buffer, &post_filter);
1511 } else {
1512 status = DecodeTilesThreadedNonFrameParallel(tiles, frame_scratch_buffer,
1513 &post_filter, &pending_tiles);
1514 }
1515 if (status != kStatusOk) return status;
1516 if (frame_header.enable_frame_end_update_cdf) {
1517 frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
1518 }
1519 current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
1520 SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
1521 return kStatusOk;
1522 }
1523
ApplyFilmGrain(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const RefCountedBufferPtr & displayable_frame,RefCountedBufferPtr * film_grain_frame,ThreadPool * thread_pool)1524 StatusCode DecoderImpl::ApplyFilmGrain(
1525 const ObuSequenceHeader& sequence_header,
1526 const ObuFrameHeader& frame_header,
1527 const RefCountedBufferPtr& displayable_frame,
1528 RefCountedBufferPtr* film_grain_frame, ThreadPool* thread_pool) {
1529 if (!sequence_header.film_grain_params_present ||
1530 !displayable_frame->film_grain_params().apply_grain ||
1531 (settings_.post_filter_mask & 0x10) == 0) {
1532 *film_grain_frame = displayable_frame;
1533 return kStatusOk;
1534 }
1535 if (!frame_header.show_existing_frame &&
1536 frame_header.refresh_frame_flags == 0) {
1537 // If show_existing_frame is true, then the current frame is a previously
1538 // saved reference frame. If refresh_frame_flags is nonzero, then the
1539 // state_.UpdateReferenceFrames() call above has saved the current frame as
1540 // a reference frame. Therefore, if both of these conditions are false, then
1541 // the current frame is not saved as a reference frame. displayable_frame
1542 // should hold the only reference to the current frame.
1543 assert(displayable_frame.use_count() == 1);
1544 // Add film grain noise in place.
1545 *film_grain_frame = displayable_frame;
1546 } else {
1547 *film_grain_frame = buffer_pool_.GetFreeBuffer();
1548 if (*film_grain_frame == nullptr) {
1549 LIBGAV1_DLOG(ERROR,
1550 "Could not get film_grain_frame from the buffer pool.");
1551 return kStatusResourceExhausted;
1552 }
1553 if (!(*film_grain_frame)
1554 ->Realloc(displayable_frame->buffer()->bitdepth(),
1555 displayable_frame->buffer()->is_monochrome(),
1556 displayable_frame->upscaled_width(),
1557 displayable_frame->frame_height(),
1558 displayable_frame->buffer()->subsampling_x(),
1559 displayable_frame->buffer()->subsampling_y(),
1560 kBorderPixelsFilmGrain, kBorderPixelsFilmGrain,
1561 kBorderPixelsFilmGrain, kBorderPixelsFilmGrain)) {
1562 LIBGAV1_DLOG(ERROR, "film_grain_frame->Realloc() failed.");
1563 return kStatusOutOfMemory;
1564 }
1565 (*film_grain_frame)
1566 ->set_chroma_sample_position(
1567 displayable_frame->chroma_sample_position());
1568 (*film_grain_frame)->set_spatial_id(displayable_frame->spatial_id());
1569 (*film_grain_frame)->set_temporal_id(displayable_frame->temporal_id());
1570 }
1571 const bool color_matrix_is_identity =
1572 sequence_header.color_config.matrix_coefficients ==
1573 kMatrixCoefficientsIdentity;
1574 assert(displayable_frame->buffer()->stride(kPlaneU) ==
1575 displayable_frame->buffer()->stride(kPlaneV));
1576 const int input_stride_uv = displayable_frame->buffer()->stride(kPlaneU);
1577 assert((*film_grain_frame)->buffer()->stride(kPlaneU) ==
1578 (*film_grain_frame)->buffer()->stride(kPlaneV));
1579 const int output_stride_uv = (*film_grain_frame)->buffer()->stride(kPlaneU);
1580 #if LIBGAV1_MAX_BITDEPTH >= 10
1581 if (displayable_frame->buffer()->bitdepth() > 8) {
1582 FilmGrain<10> film_grain(displayable_frame->film_grain_params(),
1583 displayable_frame->buffer()->is_monochrome(),
1584 color_matrix_is_identity,
1585 displayable_frame->buffer()->subsampling_x(),
1586 displayable_frame->buffer()->subsampling_y(),
1587 displayable_frame->upscaled_width(),
1588 displayable_frame->frame_height(), thread_pool);
1589 if (!film_grain.AddNoise(
1590 displayable_frame->buffer()->data(kPlaneY),
1591 displayable_frame->buffer()->stride(kPlaneY),
1592 displayable_frame->buffer()->data(kPlaneU),
1593 displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
1594 (*film_grain_frame)->buffer()->data(kPlaneY),
1595 (*film_grain_frame)->buffer()->stride(kPlaneY),
1596 (*film_grain_frame)->buffer()->data(kPlaneU),
1597 (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
1598 LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
1599 return kStatusOutOfMemory;
1600 }
1601 return kStatusOk;
1602 }
1603 #endif // LIBGAV1_MAX_BITDEPTH >= 10
1604 FilmGrain<8> film_grain(displayable_frame->film_grain_params(),
1605 displayable_frame->buffer()->is_monochrome(),
1606 color_matrix_is_identity,
1607 displayable_frame->buffer()->subsampling_x(),
1608 displayable_frame->buffer()->subsampling_y(),
1609 displayable_frame->upscaled_width(),
1610 displayable_frame->frame_height(), thread_pool);
1611 if (!film_grain.AddNoise(
1612 displayable_frame->buffer()->data(kPlaneY),
1613 displayable_frame->buffer()->stride(kPlaneY),
1614 displayable_frame->buffer()->data(kPlaneU),
1615 displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
1616 (*film_grain_frame)->buffer()->data(kPlaneY),
1617 (*film_grain_frame)->buffer()->stride(kPlaneY),
1618 (*film_grain_frame)->buffer()->data(kPlaneU),
1619 (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
1620 LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
1621 return kStatusOutOfMemory;
1622 }
1623 return kStatusOk;
1624 }
1625
IsNewSequenceHeader(const ObuParser & obu)1626 bool DecoderImpl::IsNewSequenceHeader(const ObuParser& obu) {
1627 if (std::find_if(obu.obu_headers().begin(), obu.obu_headers().end(),
1628 [](const ObuHeader& obu_header) {
1629 return obu_header.type == kObuSequenceHeader;
1630 }) == obu.obu_headers().end()) {
1631 return false;
1632 }
1633 const ObuSequenceHeader sequence_header = obu.sequence_header();
1634 const bool sequence_header_changed =
1635 !has_sequence_header_ ||
1636 sequence_header_.color_config.bitdepth !=
1637 sequence_header.color_config.bitdepth ||
1638 sequence_header_.color_config.is_monochrome !=
1639 sequence_header.color_config.is_monochrome ||
1640 sequence_header_.color_config.subsampling_x !=
1641 sequence_header.color_config.subsampling_x ||
1642 sequence_header_.color_config.subsampling_y !=
1643 sequence_header.color_config.subsampling_y ||
1644 sequence_header_.max_frame_width != sequence_header.max_frame_width ||
1645 sequence_header_.max_frame_height != sequence_header.max_frame_height;
1646 sequence_header_ = sequence_header;
1647 has_sequence_header_ = true;
1648 return sequence_header_changed;
1649 }
1650
MaybeInitializeWedgeMasks(FrameType frame_type)1651 bool DecoderImpl::MaybeInitializeWedgeMasks(FrameType frame_type) {
1652 if (IsIntraFrame(frame_type) || wedge_masks_initialized_) {
1653 return true;
1654 }
1655 if (!GenerateWedgeMask(&wedge_masks_)) {
1656 return false;
1657 }
1658 wedge_masks_initialized_ = true;
1659 return true;
1660 }
1661
MaybeInitializeQuantizerMatrix(const ObuFrameHeader & frame_header)1662 bool DecoderImpl::MaybeInitializeQuantizerMatrix(
1663 const ObuFrameHeader& frame_header) {
1664 if (quantizer_matrix_initialized_ || !frame_header.quantizer.use_matrix) {
1665 return true;
1666 }
1667 if (!InitializeQuantizerMatrix(&quantizer_matrix_)) {
1668 return false;
1669 }
1670 quantizer_matrix_initialized_ = true;
1671 return true;
1672 }
1673
1674 } // namespace libgav1
1675