1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/decoder_impl.h"
16
17 #include <algorithm>
18 #include <atomic>
19 #include <cassert>
20 #include <iterator>
21 #include <new>
22 #include <utility>
23
24 #include "src/dsp/common.h"
25 #include "src/dsp/constants.h"
26 #include "src/dsp/dsp.h"
27 #include "src/film_grain.h"
28 #include "src/frame_buffer_utils.h"
29 #include "src/frame_scratch_buffer.h"
30 #include "src/loop_restoration_info.h"
31 #include "src/obu_parser.h"
32 #include "src/post_filter.h"
33 #include "src/prediction_mask.h"
34 #include "src/threading_strategy.h"
35 #include "src/utils/blocking_counter.h"
36 #include "src/utils/common.h"
37 #include "src/utils/constants.h"
38 #include "src/utils/logging.h"
39 #include "src/utils/raw_bit_reader.h"
40 #include "src/utils/segmentation.h"
41 #include "src/utils/threadpool.h"
42 #include "src/yuv_buffer.h"
43
44 namespace libgav1 {
45 namespace {
46
47 constexpr int kMaxBlockWidth4x4 = 32;
48 constexpr int kMaxBlockHeight4x4 = 32;
49
50 // Computes the bottom border size in pixels. If CDEF, loop restoration or
51 // SuperRes is enabled, adds extra border pixels to facilitate those steps to
52 // happen nearly in-place (a few extra rows instead of an entire frame buffer).
53 // The logic in this function should match the corresponding logic for
54 // |vertical_shift| in the PostFilter constructor.
GetBottomBorderPixels(const bool do_cdef,const bool do_restoration,const bool do_superres,const int subsampling_y)55 int GetBottomBorderPixels(const bool do_cdef, const bool do_restoration,
56 const bool do_superres, const int subsampling_y) {
57 int extra_border = 0;
58 if (do_cdef) {
59 extra_border += kCdefBorder;
60 } else if (do_restoration) {
61 // If CDEF is enabled, loop restoration is safe without extra border.
62 extra_border += kRestorationVerticalBorder;
63 }
64 if (do_superres) extra_border += kSuperResVerticalBorder;
65 // Double the number of extra bottom border pixels if the bottom border will
66 // be subsampled.
67 extra_border <<= subsampling_y;
68 return Align(kBorderPixels + extra_border, 2); // Must be a multiple of 2.
69 }
70
71 // Sets |frame_scratch_buffer->tile_decoding_failed| to true (while holding on
72 // to |frame_scratch_buffer->superblock_row_mutex|) and notifies the first
73 // |count| condition variables in
74 // |frame_scratch_buffer->superblock_row_progress_condvar|.
SetFailureAndNotifyAll(FrameScratchBuffer * const frame_scratch_buffer,int count)75 void SetFailureAndNotifyAll(FrameScratchBuffer* const frame_scratch_buffer,
76 int count) {
77 {
78 std::lock_guard<std::mutex> lock(
79 frame_scratch_buffer->superblock_row_mutex);
80 frame_scratch_buffer->tile_decoding_failed = true;
81 }
82 std::condition_variable* const condvars =
83 frame_scratch_buffer->superblock_row_progress_condvar.get();
84 for (int i = 0; i < count; ++i) {
85 condvars[i].notify_one();
86 }
87 }
88
89 // Helper class that releases the frame scratch buffer in the destructor.
90 class FrameScratchBufferReleaser {
91 public:
FrameScratchBufferReleaser(FrameScratchBufferPool * frame_scratch_buffer_pool,std::unique_ptr<FrameScratchBuffer> * frame_scratch_buffer)92 FrameScratchBufferReleaser(
93 FrameScratchBufferPool* frame_scratch_buffer_pool,
94 std::unique_ptr<FrameScratchBuffer>* frame_scratch_buffer)
95 : frame_scratch_buffer_pool_(frame_scratch_buffer_pool),
96 frame_scratch_buffer_(frame_scratch_buffer) {}
~FrameScratchBufferReleaser()97 ~FrameScratchBufferReleaser() {
98 frame_scratch_buffer_pool_->Release(std::move(*frame_scratch_buffer_));
99 }
100
101 private:
102 FrameScratchBufferPool* const frame_scratch_buffer_pool_;
103 std::unique_ptr<FrameScratchBuffer>* const frame_scratch_buffer_;
104 };
105
106 // Sets the |frame|'s segmentation map for two cases. The third case is handled
107 // in Tile::DecodeBlock().
SetSegmentationMap(const ObuFrameHeader & frame_header,const SegmentationMap * prev_segment_ids,RefCountedBuffer * const frame)108 void SetSegmentationMap(const ObuFrameHeader& frame_header,
109 const SegmentationMap* prev_segment_ids,
110 RefCountedBuffer* const frame) {
111 if (!frame_header.segmentation.enabled) {
112 // All segment_id's are 0.
113 frame->segmentation_map()->Clear();
114 } else if (!frame_header.segmentation.update_map) {
115 // Copy from prev_segment_ids.
116 if (prev_segment_ids == nullptr) {
117 // Treat a null prev_segment_ids pointer as if it pointed to a
118 // segmentation map containing all 0s.
119 frame->segmentation_map()->Clear();
120 } else {
121 frame->segmentation_map()->CopyFrom(*prev_segment_ids);
122 }
123 }
124 }
125
DecodeTilesNonFrameParallel(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<std::unique_ptr<Tile>> & tiles,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter)126 StatusCode DecodeTilesNonFrameParallel(
127 const ObuSequenceHeader& sequence_header,
128 const ObuFrameHeader& frame_header,
129 const Vector<std::unique_ptr<Tile>>& tiles,
130 FrameScratchBuffer* const frame_scratch_buffer,
131 PostFilter* const post_filter) {
132 // Decode in superblock row order.
133 const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
134 std::unique_ptr<TileScratchBuffer> tile_scratch_buffer =
135 frame_scratch_buffer->tile_scratch_buffer_pool.Get();
136 if (tile_scratch_buffer == nullptr) return kLibgav1StatusOutOfMemory;
137 for (int row4x4 = 0; row4x4 < frame_header.rows4x4;
138 row4x4 += block_width4x4) {
139 for (const auto& tile_ptr : tiles) {
140 if (!tile_ptr->ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
141 row4x4, tile_scratch_buffer.get())) {
142 return kLibgav1StatusUnknownError;
143 }
144 }
145 post_filter->ApplyFilteringForOneSuperBlockRow(
146 row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
147 /*do_deblock=*/true);
148 }
149 frame_scratch_buffer->tile_scratch_buffer_pool.Release(
150 std::move(tile_scratch_buffer));
151 return kStatusOk;
152 }
153
DecodeTilesThreadedNonFrameParallel(const Vector<std::unique_ptr<Tile>> & tiles,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,BlockingCounterWithStatus * const pending_tiles)154 StatusCode DecodeTilesThreadedNonFrameParallel(
155 const Vector<std::unique_ptr<Tile>>& tiles,
156 FrameScratchBuffer* const frame_scratch_buffer,
157 PostFilter* const post_filter,
158 BlockingCounterWithStatus* const pending_tiles) {
159 ThreadingStrategy& threading_strategy =
160 frame_scratch_buffer->threading_strategy;
161 const int num_workers = threading_strategy.tile_thread_count();
162 BlockingCounterWithStatus pending_workers(num_workers);
163 std::atomic<int> tile_counter(0);
164 const int tile_count = static_cast<int>(tiles.size());
165 bool tile_decoding_failed = false;
166 // Submit tile decoding jobs to the thread pool.
167 for (int i = 0; i < num_workers; ++i) {
168 threading_strategy.tile_thread_pool()->Schedule([&tiles, tile_count,
169 &tile_counter,
170 &pending_workers,
171 &pending_tiles]() {
172 bool failed = false;
173 int index;
174 while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
175 tile_count) {
176 if (!failed) {
177 const auto& tile_ptr = tiles[index];
178 if (!tile_ptr->ParseAndDecode()) {
179 LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
180 failed = true;
181 }
182 } else {
183 pending_tiles->Decrement(false);
184 }
185 }
186 pending_workers.Decrement(!failed);
187 });
188 }
189 // Have the current thread partake in tile decoding.
190 int index;
191 while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
192 tile_count) {
193 if (!tile_decoding_failed) {
194 const auto& tile_ptr = tiles[index];
195 if (!tile_ptr->ParseAndDecode()) {
196 LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
197 tile_decoding_failed = true;
198 }
199 } else {
200 pending_tiles->Decrement(false);
201 }
202 }
203 // Wait until all the workers are done. This ensures that all the tiles have
204 // been parsed.
205 tile_decoding_failed |= !pending_workers.Wait();
206 // Wait until all the tiles have been decoded.
207 tile_decoding_failed |= !pending_tiles->Wait();
208 if (tile_decoding_failed) return kStatusUnknownError;
209 assert(threading_strategy.post_filter_thread_pool() != nullptr);
210 post_filter->ApplyFilteringThreaded();
211 return kStatusOk;
212 }
213
DecodeTilesFrameParallel(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<std::unique_ptr<Tile>> & tiles,const SymbolDecoderContext & saved_symbol_decoder_context,const SegmentationMap * const prev_segment_ids,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,RefCountedBuffer * const current_frame)214 StatusCode DecodeTilesFrameParallel(
215 const ObuSequenceHeader& sequence_header,
216 const ObuFrameHeader& frame_header,
217 const Vector<std::unique_ptr<Tile>>& tiles,
218 const SymbolDecoderContext& saved_symbol_decoder_context,
219 const SegmentationMap* const prev_segment_ids,
220 FrameScratchBuffer* const frame_scratch_buffer,
221 PostFilter* const post_filter, RefCountedBuffer* const current_frame) {
222 // Parse the frame.
223 for (const auto& tile : tiles) {
224 if (!tile->Parse()) {
225 LIBGAV1_DLOG(ERROR, "Failed to parse tile number: %d\n", tile->number());
226 return kStatusUnknownError;
227 }
228 }
229 if (frame_header.enable_frame_end_update_cdf) {
230 frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
231 }
232 current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
233 SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
234 // Mark frame as parsed.
235 current_frame->SetFrameState(kFrameStateParsed);
236 std::unique_ptr<TileScratchBuffer> tile_scratch_buffer =
237 frame_scratch_buffer->tile_scratch_buffer_pool.Get();
238 if (tile_scratch_buffer == nullptr) {
239 return kStatusOutOfMemory;
240 }
241 const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
242 // Decode in superblock row order (inter prediction in the Tile class will
243 // block until the required superblocks in the reference frame are decoded).
244 for (int row4x4 = 0; row4x4 < frame_header.rows4x4;
245 row4x4 += block_width4x4) {
246 for (const auto& tile_ptr : tiles) {
247 if (!tile_ptr->ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
248 row4x4, tile_scratch_buffer.get())) {
249 LIBGAV1_DLOG(ERROR, "Failed to decode tile number: %d\n",
250 tile_ptr->number());
251 return kStatusUnknownError;
252 }
253 }
254 const int progress_row = post_filter->ApplyFilteringForOneSuperBlockRow(
255 row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
256 /*do_deblock=*/true);
257 if (progress_row >= 0) {
258 current_frame->SetProgress(progress_row);
259 }
260 }
261 // Mark frame as decoded (we no longer care about row-level progress since the
262 // entire frame has been decoded).
263 current_frame->SetFrameState(kFrameStateDecoded);
264 frame_scratch_buffer->tile_scratch_buffer_pool.Release(
265 std::move(tile_scratch_buffer));
266 return kStatusOk;
267 }
268
269 // Helper function used by DecodeTilesThreadedFrameParallel. Applies the
270 // deblocking filter for tile boundaries for the superblock row at |row4x4|.
ApplyDeblockingFilterForTileBoundaries(PostFilter * const post_filter,const std::unique_ptr<Tile> * tile_row_base,const ObuFrameHeader & frame_header,int row4x4,int block_width4x4,int tile_columns,bool decode_entire_tiles_in_worker_threads)271 void ApplyDeblockingFilterForTileBoundaries(
272 PostFilter* const post_filter, const std::unique_ptr<Tile>* tile_row_base,
273 const ObuFrameHeader& frame_header, int row4x4, int block_width4x4,
274 int tile_columns, bool decode_entire_tiles_in_worker_threads) {
275 // Apply vertical deblock filtering for the first 64 columns of each tile.
276 for (int tile_column = 0; tile_column < tile_columns; ++tile_column) {
277 const Tile& tile = *tile_row_base[tile_column];
278 post_filter->ApplyDeblockFilter(
279 kLoopFilterTypeVertical, row4x4, tile.column4x4_start(),
280 tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
281 }
282 if (decode_entire_tiles_in_worker_threads &&
283 row4x4 == tile_row_base[0]->row4x4_start()) {
284 // This is the first superblock row of a tile row. In this case, apply
285 // horizontal deblock filtering for the entire superblock row.
286 post_filter->ApplyDeblockFilter(kLoopFilterTypeHorizontal, row4x4, 0,
287 frame_header.columns4x4, block_width4x4);
288 } else {
289 // Apply horizontal deblock filtering for the first 64 columns of the
290 // first tile.
291 const Tile& first_tile = *tile_row_base[0];
292 post_filter->ApplyDeblockFilter(
293 kLoopFilterTypeHorizontal, row4x4, first_tile.column4x4_start(),
294 first_tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
295 // Apply horizontal deblock filtering for the last 64 columns of the
296 // previous tile and the first 64 columns of the current tile.
297 for (int tile_column = 1; tile_column < tile_columns; ++tile_column) {
298 const Tile& tile = *tile_row_base[tile_column];
299 // If the previous tile has more than 64 columns, then include those
300 // for the horizontal deblock.
301 const Tile& previous_tile = *tile_row_base[tile_column - 1];
302 const int column4x4_start =
303 tile.column4x4_start() -
304 ((tile.column4x4_start() - kNum4x4InLoopFilterUnit !=
305 previous_tile.column4x4_start())
306 ? kNum4x4InLoopFilterUnit
307 : 0);
308 post_filter->ApplyDeblockFilter(
309 kLoopFilterTypeHorizontal, row4x4, column4x4_start,
310 tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
311 }
312 // Apply horizontal deblock filtering for the last 64 columns of the
313 // last tile.
314 const Tile& last_tile = *tile_row_base[tile_columns - 1];
315 // Identify the last column4x4 value and do horizontal filtering for
316 // that column4x4. The value of last column4x4 is the nearest multiple
317 // of 16 that is before tile.column4x4_end().
318 const int column4x4_start = (last_tile.column4x4_end() - 1) & ~15;
319 // If column4x4_start is the same as tile.column4x4_start() then it
320 // means that the last tile has <= 64 columns. So there is nothing left
321 // to deblock (since it was already deblocked in the loop above).
322 if (column4x4_start != last_tile.column4x4_start()) {
323 post_filter->ApplyDeblockFilter(
324 kLoopFilterTypeHorizontal, row4x4, column4x4_start,
325 last_tile.column4x4_end(), block_width4x4);
326 }
327 }
328 }
329
330 // Helper function used by DecodeTilesThreadedFrameParallel. Decodes the
331 // superblock row starting at |row4x4| for tile at index |tile_index| in the
332 // list of tiles |tiles|. If the decoding is successful, then it does the
333 // following:
334 // * Schedule the next superblock row in the current tile column for decoding
335 // (the next superblock row may be in a different tile than the current
336 // one).
337 // * If an entire superblock row of the frame has been decoded, it notifies
338 // the waiters (if there are any).
DecodeSuperBlockRowInTile(const Vector<std::unique_ptr<Tile>> & tiles,size_t tile_index,int row4x4,const int superblock_size4x4,const int tile_columns,const int superblock_rows,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,BlockingCounter * const pending_jobs)339 void DecodeSuperBlockRowInTile(
340 const Vector<std::unique_ptr<Tile>>& tiles, size_t tile_index, int row4x4,
341 const int superblock_size4x4, const int tile_columns,
342 const int superblock_rows, FrameScratchBuffer* const frame_scratch_buffer,
343 PostFilter* const post_filter, BlockingCounter* const pending_jobs) {
344 std::unique_ptr<TileScratchBuffer> scratch_buffer =
345 frame_scratch_buffer->tile_scratch_buffer_pool.Get();
346 if (scratch_buffer == nullptr) {
347 SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
348 return;
349 }
350 Tile& tile = *tiles[tile_index];
351 const bool ok = tile.ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
352 row4x4, scratch_buffer.get());
353 frame_scratch_buffer->tile_scratch_buffer_pool.Release(
354 std::move(scratch_buffer));
355 if (!ok) {
356 SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
357 return;
358 }
359 if (post_filter->DoDeblock()) {
360 // Apply vertical deblock filtering for all the columns in this tile except
361 // for the first 64 columns.
362 post_filter->ApplyDeblockFilter(
363 kLoopFilterTypeVertical, row4x4,
364 tile.column4x4_start() + kNum4x4InLoopFilterUnit, tile.column4x4_end(),
365 superblock_size4x4);
366 // Apply horizontal deblock filtering for all the columns in this tile
367 // except for the first and the last 64 columns.
368 // Note about the last tile of each row: For the last tile, column4x4_end
369 // may not be a multiple of 16. In that case it is still okay to simply
370 // subtract 16 since ApplyDeblockFilter() will only do the filters in
371 // increments of 64 columns (or 32 columns for chroma with subsampling).
372 post_filter->ApplyDeblockFilter(
373 kLoopFilterTypeHorizontal, row4x4,
374 tile.column4x4_start() + kNum4x4InLoopFilterUnit,
375 tile.column4x4_end() - kNum4x4InLoopFilterUnit, superblock_size4x4);
376 }
377 const int superblock_size4x4_log2 = FloorLog2(superblock_size4x4);
378 const int index = row4x4 >> superblock_size4x4_log2;
379 int* const superblock_row_progress =
380 frame_scratch_buffer->superblock_row_progress.get();
381 std::condition_variable* const superblock_row_progress_condvar =
382 frame_scratch_buffer->superblock_row_progress_condvar.get();
383 bool notify;
384 {
385 std::lock_guard<std::mutex> lock(
386 frame_scratch_buffer->superblock_row_mutex);
387 notify = ++superblock_row_progress[index] == tile_columns;
388 }
389 if (notify) {
390 // We are done decoding this superblock row. Notify the post filtering
391 // thread.
392 superblock_row_progress_condvar[index].notify_one();
393 }
394 // Schedule the next superblock row (if one exists).
395 ThreadPool& thread_pool =
396 *frame_scratch_buffer->threading_strategy.thread_pool();
397 const int next_row4x4 = row4x4 + superblock_size4x4;
398 if (!tile.IsRow4x4Inside(next_row4x4)) {
399 tile_index += tile_columns;
400 }
401 if (tile_index >= tiles.size()) return;
402 pending_jobs->IncrementBy(1);
403 thread_pool.Schedule([&tiles, tile_index, next_row4x4, superblock_size4x4,
404 tile_columns, superblock_rows, frame_scratch_buffer,
405 post_filter, pending_jobs]() {
406 DecodeSuperBlockRowInTile(tiles, tile_index, next_row4x4,
407 superblock_size4x4, tile_columns, superblock_rows,
408 frame_scratch_buffer, post_filter, pending_jobs);
409 pending_jobs->Decrement();
410 });
411 }
412
DecodeTilesThreadedFrameParallel(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<std::unique_ptr<Tile>> & tiles,const SymbolDecoderContext & saved_symbol_decoder_context,const SegmentationMap * const prev_segment_ids,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,RefCountedBuffer * const current_frame)413 StatusCode DecodeTilesThreadedFrameParallel(
414 const ObuSequenceHeader& sequence_header,
415 const ObuFrameHeader& frame_header,
416 const Vector<std::unique_ptr<Tile>>& tiles,
417 const SymbolDecoderContext& saved_symbol_decoder_context,
418 const SegmentationMap* const prev_segment_ids,
419 FrameScratchBuffer* const frame_scratch_buffer,
420 PostFilter* const post_filter, RefCountedBuffer* const current_frame) {
421 // Parse the frame.
422 ThreadPool& thread_pool =
423 *frame_scratch_buffer->threading_strategy.thread_pool();
424 std::atomic<int> tile_counter(0);
425 const int tile_count = static_cast<int>(tiles.size());
426 const int num_workers = thread_pool.num_threads();
427 BlockingCounterWithStatus parse_workers(num_workers);
428 // Submit tile parsing jobs to the thread pool.
429 for (int i = 0; i < num_workers; ++i) {
430 thread_pool.Schedule([&tiles, tile_count, &tile_counter, &parse_workers]() {
431 bool failed = false;
432 int index;
433 while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
434 tile_count) {
435 if (!failed) {
436 const auto& tile_ptr = tiles[index];
437 if (!tile_ptr->Parse()) {
438 LIBGAV1_DLOG(ERROR, "Error parsing tile #%d", tile_ptr->number());
439 failed = true;
440 }
441 }
442 }
443 parse_workers.Decrement(!failed);
444 });
445 }
446
447 // Have the current thread participate in parsing.
448 bool failed = false;
449 int index;
450 while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
451 tile_count) {
452 if (!failed) {
453 const auto& tile_ptr = tiles[index];
454 if (!tile_ptr->Parse()) {
455 LIBGAV1_DLOG(ERROR, "Error parsing tile #%d", tile_ptr->number());
456 failed = true;
457 }
458 }
459 }
460
461 // Wait until all the parse workers are done. This ensures that all the tiles
462 // have been parsed.
463 if (!parse_workers.Wait() || failed) {
464 return kLibgav1StatusUnknownError;
465 }
466 if (frame_header.enable_frame_end_update_cdf) {
467 frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
468 }
469 current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
470 SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
471 current_frame->SetFrameState(kFrameStateParsed);
472
473 // Decode the frame.
474 const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
475 const int block_width4x4_log2 =
476 sequence_header.use_128x128_superblock ? 5 : 4;
477 const int superblock_rows =
478 (frame_header.rows4x4 + block_width4x4 - 1) >> block_width4x4_log2;
479 if (!frame_scratch_buffer->superblock_row_progress.Resize(superblock_rows) ||
480 !frame_scratch_buffer->superblock_row_progress_condvar.Resize(
481 superblock_rows)) {
482 return kLibgav1StatusOutOfMemory;
483 }
484 int* const superblock_row_progress =
485 frame_scratch_buffer->superblock_row_progress.get();
486 memset(superblock_row_progress, 0,
487 superblock_rows * sizeof(superblock_row_progress[0]));
488 frame_scratch_buffer->tile_decoding_failed = false;
489 const int tile_columns = frame_header.tile_info.tile_columns;
490 const bool decode_entire_tiles_in_worker_threads =
491 num_workers >= tile_columns;
492 BlockingCounter pending_jobs(
493 decode_entire_tiles_in_worker_threads ? num_workers : tile_columns);
494 if (decode_entire_tiles_in_worker_threads) {
495 // Submit tile decoding jobs to the thread pool.
496 tile_counter = 0;
497 for (int i = 0; i < num_workers; ++i) {
498 thread_pool.Schedule([&tiles, tile_count, &tile_counter, &pending_jobs,
499 frame_scratch_buffer, superblock_rows]() {
500 bool failed = false;
501 int index;
502 while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
503 tile_count) {
504 if (failed) continue;
505 const auto& tile_ptr = tiles[index];
506 if (!tile_ptr->Decode(
507 &frame_scratch_buffer->superblock_row_mutex,
508 frame_scratch_buffer->superblock_row_progress.get(),
509 frame_scratch_buffer->superblock_row_progress_condvar
510 .get())) {
511 LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
512 failed = true;
513 SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
514 }
515 }
516 pending_jobs.Decrement();
517 });
518 }
519 } else {
520 // Schedule the jobs for first tile row.
521 for (int tile_index = 0; tile_index < tile_columns; ++tile_index) {
522 thread_pool.Schedule([&tiles, tile_index, block_width4x4, tile_columns,
523 superblock_rows, frame_scratch_buffer, post_filter,
524 &pending_jobs]() {
525 DecodeSuperBlockRowInTile(
526 tiles, tile_index, 0, block_width4x4, tile_columns, superblock_rows,
527 frame_scratch_buffer, post_filter, &pending_jobs);
528 pending_jobs.Decrement();
529 });
530 }
531 }
532
533 // Current thread will do the post filters.
534 std::condition_variable* const superblock_row_progress_condvar =
535 frame_scratch_buffer->superblock_row_progress_condvar.get();
536 const std::unique_ptr<Tile>* tile_row_base = &tiles[0];
537 for (int row4x4 = 0, index = 0; row4x4 < frame_header.rows4x4;
538 row4x4 += block_width4x4, ++index) {
539 if (!tile_row_base[0]->IsRow4x4Inside(row4x4)) {
540 tile_row_base += tile_columns;
541 }
542 {
543 std::unique_lock<std::mutex> lock(
544 frame_scratch_buffer->superblock_row_mutex);
545 while (superblock_row_progress[index] != tile_columns &&
546 !frame_scratch_buffer->tile_decoding_failed) {
547 superblock_row_progress_condvar[index].wait(lock);
548 }
549 if (frame_scratch_buffer->tile_decoding_failed) break;
550 }
551 if (post_filter->DoDeblock()) {
552 // Apply deblocking filter for the tile boundaries of this superblock row.
553 // The deblocking filter for the internal blocks will be applied in the
554 // tile worker threads. In this thread, we will only have to apply
555 // deblocking filter for the tile boundaries.
556 ApplyDeblockingFilterForTileBoundaries(
557 post_filter, tile_row_base, frame_header, row4x4, block_width4x4,
558 tile_columns, decode_entire_tiles_in_worker_threads);
559 }
560 // Apply all the post filters other than deblocking.
561 const int progress_row = post_filter->ApplyFilteringForOneSuperBlockRow(
562 row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
563 /*do_deblock=*/false);
564 if (progress_row >= 0) {
565 current_frame->SetProgress(progress_row);
566 }
567 }
568 // Wait until all the pending jobs are done. This ensures that all the tiles
569 // have been decoded and wrapped up.
570 pending_jobs.Wait();
571 {
572 std::lock_guard<std::mutex> lock(
573 frame_scratch_buffer->superblock_row_mutex);
574 if (frame_scratch_buffer->tile_decoding_failed) {
575 return kLibgav1StatusUnknownError;
576 }
577 }
578
579 current_frame->SetFrameState(kFrameStateDecoded);
580 return kStatusOk;
581 }
582
583 } // namespace
584
585 // static
Create(const DecoderSettings * settings,std::unique_ptr<DecoderImpl> * output)586 StatusCode DecoderImpl::Create(const DecoderSettings* settings,
587 std::unique_ptr<DecoderImpl>* output) {
588 if (settings->threads <= 0) {
589 LIBGAV1_DLOG(ERROR, "Invalid settings->threads: %d.", settings->threads);
590 return kStatusInvalidArgument;
591 }
592 if (settings->frame_parallel) {
593 if (settings->release_input_buffer == nullptr) {
594 LIBGAV1_DLOG(ERROR,
595 "release_input_buffer callback must not be null when "
596 "frame_parallel is true.");
597 return kStatusInvalidArgument;
598 }
599 }
600 std::unique_ptr<DecoderImpl> impl(new (std::nothrow) DecoderImpl(settings));
601 if (impl == nullptr) {
602 LIBGAV1_DLOG(ERROR, "Failed to allocate DecoderImpl.");
603 return kStatusOutOfMemory;
604 }
605 const StatusCode status = impl->Init();
606 if (status != kStatusOk) return status;
607 *output = std::move(impl);
608 return kStatusOk;
609 }
610
DecoderImpl(const DecoderSettings * settings)611 DecoderImpl::DecoderImpl(const DecoderSettings* settings)
612 : buffer_pool_(settings->on_frame_buffer_size_changed,
613 settings->get_frame_buffer, settings->release_frame_buffer,
614 settings->callback_private_data),
615 settings_(*settings) {
616 dsp::DspInit();
617 }
618
~DecoderImpl()619 DecoderImpl::~DecoderImpl() {
620 // Clean up and wait until all the threads have stopped. We just have to pass
621 // in a dummy status that is not kStatusOk or kStatusTryAgain to trigger the
622 // path that clears all the threads and structs.
623 SignalFailure(kStatusUnknownError);
624 // Release any other frame buffer references that we may be holding on to.
625 ReleaseOutputFrame();
626 output_frame_queue_.Clear();
627 for (auto& reference_frame : state_.reference_frame) {
628 reference_frame = nullptr;
629 }
630 }
631
Init()632 StatusCode DecoderImpl::Init() {
633 if (!output_frame_queue_.Init(kMaxLayers)) {
634 LIBGAV1_DLOG(ERROR, "output_frame_queue_.Init() failed.");
635 return kStatusOutOfMemory;
636 }
637 return kStatusOk;
638 }
639
InitializeFrameThreadPoolAndTemporalUnitQueue(const uint8_t * data,size_t size)640 StatusCode DecoderImpl::InitializeFrameThreadPoolAndTemporalUnitQueue(
641 const uint8_t* data, size_t size) {
642 is_frame_parallel_ = false;
643 if (settings_.frame_parallel) {
644 DecoderState state;
645 std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
646 data, size, settings_.operating_point, &buffer_pool_, &state));
647 if (obu == nullptr) {
648 LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
649 return kStatusOutOfMemory;
650 }
651 RefCountedBufferPtr current_frame;
652 const StatusCode status = obu->ParseOneFrame(¤t_frame);
653 if (status != kStatusOk) {
654 LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
655 return status;
656 }
657 current_frame = nullptr;
658 // We assume that the first frame that was parsed will contain the frame
659 // header. This assumption is usually true in practice. So we will simply
660 // not use frame parallel mode if this is not the case.
661 if (settings_.threads > 1 &&
662 !InitializeThreadPoolsForFrameParallel(
663 settings_.threads, obu->frame_header().tile_info.tile_count,
664 obu->frame_header().tile_info.tile_columns, &frame_thread_pool_,
665 &frame_scratch_buffer_pool_)) {
666 return kStatusOutOfMemory;
667 }
668 }
669 const int max_allowed_frames =
670 (frame_thread_pool_ != nullptr) ? frame_thread_pool_->num_threads() : 1;
671 assert(max_allowed_frames > 0);
672 if (!temporal_units_.Init(max_allowed_frames)) {
673 LIBGAV1_DLOG(ERROR, "temporal_units_.Init() failed.");
674 return kStatusOutOfMemory;
675 }
676 is_frame_parallel_ = frame_thread_pool_ != nullptr;
677 return kStatusOk;
678 }
679
EnqueueFrame(const uint8_t * data,size_t size,int64_t user_private_data,void * buffer_private_data)680 StatusCode DecoderImpl::EnqueueFrame(const uint8_t* data, size_t size,
681 int64_t user_private_data,
682 void* buffer_private_data) {
683 if (data == nullptr || size == 0) return kStatusInvalidArgument;
684 if (HasFailure()) return kStatusUnknownError;
685 if (!seen_first_frame_) {
686 seen_first_frame_ = true;
687 const StatusCode status =
688 InitializeFrameThreadPoolAndTemporalUnitQueue(data, size);
689 if (status != kStatusOk) {
690 return SignalFailure(status);
691 }
692 }
693 if (temporal_units_.Full()) {
694 return kStatusTryAgain;
695 }
696 if (is_frame_parallel_) {
697 return ParseAndSchedule(data, size, user_private_data, buffer_private_data);
698 }
699 TemporalUnit temporal_unit(data, size, user_private_data,
700 buffer_private_data);
701 temporal_units_.Push(std::move(temporal_unit));
702 return kStatusOk;
703 }
704
SignalFailure(StatusCode status)705 StatusCode DecoderImpl::SignalFailure(StatusCode status) {
706 if (status == kStatusOk || status == kStatusTryAgain) return status;
707 // Set the |failure_status_| first so that any pending jobs in
708 // |frame_thread_pool_| will exit right away when the thread pool is being
709 // released below.
710 {
711 std::lock_guard<std::mutex> lock(mutex_);
712 failure_status_ = status;
713 }
714 // Make sure all waiting threads exit.
715 buffer_pool_.Abort();
716 frame_thread_pool_ = nullptr;
717 while (!temporal_units_.Empty()) {
718 if (settings_.release_input_buffer != nullptr) {
719 settings_.release_input_buffer(
720 settings_.callback_private_data,
721 temporal_units_.Front().buffer_private_data);
722 }
723 temporal_units_.Pop();
724 }
725 return status;
726 }
727
728 // DequeueFrame() follows the following policy to avoid holding unnecessary
729 // frame buffer references in output_frame_: output_frame_ must be null when
730 // DequeueFrame() returns false.
DequeueFrame(const DecoderBuffer ** out_ptr)731 StatusCode DecoderImpl::DequeueFrame(const DecoderBuffer** out_ptr) {
732 if (out_ptr == nullptr) {
733 LIBGAV1_DLOG(ERROR, "Invalid argument: out_ptr == nullptr.");
734 return kStatusInvalidArgument;
735 }
736 // We assume a call to DequeueFrame() indicates that the caller is no longer
737 // using the previous output frame, so we can release it.
738 ReleaseOutputFrame();
739 if (temporal_units_.Empty()) {
740 // No input frames to decode.
741 *out_ptr = nullptr;
742 return kStatusNothingToDequeue;
743 }
744 TemporalUnit& temporal_unit = temporal_units_.Front();
745 if (!is_frame_parallel_) {
746 // If |output_frame_queue_| is not empty, then return the first frame from
747 // that queue.
748 if (!output_frame_queue_.Empty()) {
749 RefCountedBufferPtr frame = std::move(output_frame_queue_.Front());
750 output_frame_queue_.Pop();
751 buffer_.user_private_data = temporal_unit.user_private_data;
752 if (output_frame_queue_.Empty()) {
753 temporal_units_.Pop();
754 }
755 const StatusCode status = CopyFrameToOutputBuffer(frame);
756 if (status != kStatusOk) {
757 return status;
758 }
759 *out_ptr = &buffer_;
760 return kStatusOk;
761 }
762 // Decode the next available temporal unit and return.
763 const StatusCode status = DecodeTemporalUnit(temporal_unit, out_ptr);
764 if (status != kStatusOk) {
765 // In case of failure, discard all the output frames that we may be
766 // holding on references to.
767 output_frame_queue_.Clear();
768 }
769 if (settings_.release_input_buffer != nullptr) {
770 settings_.release_input_buffer(settings_.callback_private_data,
771 temporal_unit.buffer_private_data);
772 }
773 if (output_frame_queue_.Empty()) {
774 temporal_units_.Pop();
775 }
776 return status;
777 }
778 {
779 std::unique_lock<std::mutex> lock(mutex_);
780 if (settings_.blocking_dequeue) {
781 while (!temporal_unit.decoded && failure_status_ == kStatusOk) {
782 decoded_condvar_.wait(lock);
783 }
784 } else {
785 if (!temporal_unit.decoded && failure_status_ == kStatusOk) {
786 return kStatusTryAgain;
787 }
788 }
789 if (failure_status_ != kStatusOk) {
790 const StatusCode failure_status = failure_status_;
791 lock.unlock();
792 return SignalFailure(failure_status);
793 }
794 }
795 if (settings_.release_input_buffer != nullptr &&
796 !temporal_unit.released_input_buffer) {
797 temporal_unit.released_input_buffer = true;
798 settings_.release_input_buffer(settings_.callback_private_data,
799 temporal_unit.buffer_private_data);
800 }
801 if (temporal_unit.status != kStatusOk) {
802 temporal_units_.Pop();
803 return SignalFailure(temporal_unit.status);
804 }
805 if (!temporal_unit.has_displayable_frame) {
806 *out_ptr = nullptr;
807 temporal_units_.Pop();
808 return kStatusOk;
809 }
810 assert(temporal_unit.output_layer_count > 0);
811 StatusCode status = CopyFrameToOutputBuffer(
812 temporal_unit.output_layers[temporal_unit.output_layer_count - 1].frame);
813 temporal_unit.output_layers[temporal_unit.output_layer_count - 1].frame =
814 nullptr;
815 if (status != kStatusOk) {
816 temporal_units_.Pop();
817 return SignalFailure(status);
818 }
819 buffer_.user_private_data = temporal_unit.user_private_data;
820 *out_ptr = &buffer_;
821 if (--temporal_unit.output_layer_count == 0) {
822 temporal_units_.Pop();
823 }
824 return kStatusOk;
825 }
826
ParseAndSchedule(const uint8_t * data,size_t size,int64_t user_private_data,void * buffer_private_data)827 StatusCode DecoderImpl::ParseAndSchedule(const uint8_t* data, size_t size,
828 int64_t user_private_data,
829 void* buffer_private_data) {
830 TemporalUnit temporal_unit(data, size, user_private_data,
831 buffer_private_data);
832 std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
833 temporal_unit.data, temporal_unit.size, settings_.operating_point,
834 &buffer_pool_, &state_));
835 if (obu == nullptr) {
836 LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
837 return kStatusOutOfMemory;
838 }
839 if (has_sequence_header_) {
840 obu->set_sequence_header(sequence_header_);
841 }
842 StatusCode status;
843 int position_in_temporal_unit = 0;
844 while (obu->HasData()) {
845 RefCountedBufferPtr current_frame;
846 status = obu->ParseOneFrame(¤t_frame);
847 if (status != kStatusOk) {
848 LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
849 return status;
850 }
851 if (!MaybeInitializeQuantizerMatrix(obu->frame_header())) {
852 LIBGAV1_DLOG(ERROR, "InitializeQuantizerMatrix() failed.");
853 return kStatusOutOfMemory;
854 }
855 if (!MaybeInitializeWedgeMasks(obu->frame_header().frame_type)) {
856 LIBGAV1_DLOG(ERROR, "InitializeWedgeMasks() failed.");
857 return kStatusOutOfMemory;
858 }
859 if (IsNewSequenceHeader(*obu)) {
860 const ObuSequenceHeader& sequence_header = obu->sequence_header();
861 const Libgav1ImageFormat image_format =
862 ComposeImageFormat(sequence_header.color_config.is_monochrome,
863 sequence_header.color_config.subsampling_x,
864 sequence_header.color_config.subsampling_y);
865 const int max_bottom_border = GetBottomBorderPixels(
866 /*do_cdef=*/true, /*do_restoration=*/true,
867 /*do_superres=*/true, sequence_header.color_config.subsampling_y);
868 // TODO(vigneshv): This may not be the right place to call this callback
869 // for the frame parallel case. Investigate and fix it.
870 if (!buffer_pool_.OnFrameBufferSizeChanged(
871 sequence_header.color_config.bitdepth, image_format,
872 sequence_header.max_frame_width, sequence_header.max_frame_height,
873 kBorderPixels, kBorderPixels, kBorderPixels, max_bottom_border)) {
874 LIBGAV1_DLOG(ERROR, "buffer_pool_.OnFrameBufferSizeChanged failed.");
875 return kStatusUnknownError;
876 }
877 }
878 // This can happen when there are multiple spatial/temporal layers and if
879 // all the layers are outside the current operating point.
880 if (current_frame == nullptr) {
881 continue;
882 }
883 // Note that we cannot set EncodedFrame.temporal_unit here. It will be set
884 // in the code below after |temporal_unit| is std::move'd into the
885 // |temporal_units_| queue.
886 if (!temporal_unit.frames.emplace_back(obu.get(), state_, current_frame,
887 position_in_temporal_unit++)) {
888 LIBGAV1_DLOG(ERROR, "temporal_unit.frames.emplace_back failed.");
889 return kStatusOutOfMemory;
890 }
891 state_.UpdateReferenceFrames(current_frame,
892 obu->frame_header().refresh_frame_flags);
893 }
894 // This function cannot fail after this point. So it is okay to move the
895 // |temporal_unit| into |temporal_units_| queue.
896 temporal_units_.Push(std::move(temporal_unit));
897 if (temporal_units_.Back().frames.empty()) {
898 std::lock_guard<std::mutex> lock(mutex_);
899 temporal_units_.Back().has_displayable_frame = false;
900 temporal_units_.Back().decoded = true;
901 return kStatusOk;
902 }
903 for (auto& frame : temporal_units_.Back().frames) {
904 EncodedFrame* const encoded_frame = &frame;
905 encoded_frame->temporal_unit = &temporal_units_.Back();
906 frame_thread_pool_->Schedule([this, encoded_frame]() {
907 if (HasFailure()) return;
908 const StatusCode status = DecodeFrame(encoded_frame);
909 encoded_frame->state = {};
910 encoded_frame->frame = nullptr;
911 TemporalUnit& temporal_unit = *encoded_frame->temporal_unit;
912 std::lock_guard<std::mutex> lock(mutex_);
913 if (failure_status_ != kStatusOk) return;
914 // temporal_unit's status defaults to kStatusOk. So we need to set it only
915 // on error. If |failure_status_| is not kStatusOk at this point, it means
916 // that there has already been a failure. So we don't care about this
917 // subsequent failure. We will simply return the error code of the first
918 // failure.
919 if (status != kStatusOk) {
920 temporal_unit.status = status;
921 if (failure_status_ == kStatusOk) {
922 failure_status_ = status;
923 }
924 }
925 temporal_unit.decoded =
926 ++temporal_unit.decoded_count == temporal_unit.frames.size();
927 if (temporal_unit.decoded && settings_.output_all_layers &&
928 temporal_unit.output_layer_count > 1) {
929 std::sort(
930 temporal_unit.output_layers,
931 temporal_unit.output_layers + temporal_unit.output_layer_count);
932 }
933 if (temporal_unit.decoded || failure_status_ != kStatusOk) {
934 decoded_condvar_.notify_one();
935 }
936 });
937 }
938 return kStatusOk;
939 }
940
DecodeFrame(EncodedFrame * const encoded_frame)941 StatusCode DecoderImpl::DecodeFrame(EncodedFrame* const encoded_frame) {
942 const ObuSequenceHeader& sequence_header = encoded_frame->sequence_header;
943 const ObuFrameHeader& frame_header = encoded_frame->frame_header;
944 RefCountedBufferPtr current_frame = std::move(encoded_frame->frame);
945
946 std::unique_ptr<FrameScratchBuffer> frame_scratch_buffer =
947 frame_scratch_buffer_pool_.Get();
948 if (frame_scratch_buffer == nullptr) {
949 LIBGAV1_DLOG(ERROR, "Error when getting FrameScratchBuffer.");
950 return kStatusOutOfMemory;
951 }
952 // |frame_scratch_buffer| will be released when this local variable goes out
953 // of scope (i.e.) on any return path in this function.
954 FrameScratchBufferReleaser frame_scratch_buffer_releaser(
955 &frame_scratch_buffer_pool_, &frame_scratch_buffer);
956
957 StatusCode status;
958 if (!frame_header.show_existing_frame) {
959 if (encoded_frame->tile_buffers.empty()) {
960 // This means that the last call to ParseOneFrame() did not actually
961 // have any tile groups. This could happen in rare cases (for example,
962 // if there is a Metadata OBU after the TileGroup OBU). We currently do
963 // not have a reason to handle those cases, so we simply continue.
964 return kStatusOk;
965 }
966 status = DecodeTiles(sequence_header, frame_header,
967 encoded_frame->tile_buffers, encoded_frame->state,
968 frame_scratch_buffer.get(), current_frame.get());
969 if (status != kStatusOk) {
970 return status;
971 }
972 } else {
973 if (!current_frame->WaitUntilDecoded()) {
974 return kStatusUnknownError;
975 }
976 }
977 if (!frame_header.show_frame && !frame_header.show_existing_frame) {
978 // This frame is not displayable. Not an error.
979 return kStatusOk;
980 }
981 RefCountedBufferPtr film_grain_frame;
982 status = ApplyFilmGrain(
983 sequence_header, frame_header, current_frame, &film_grain_frame,
984 frame_scratch_buffer->threading_strategy.thread_pool());
985 if (status != kStatusOk) {
986 return status;
987 }
988
989 TemporalUnit& temporal_unit = *encoded_frame->temporal_unit;
990 std::lock_guard<std::mutex> lock(mutex_);
991 if (temporal_unit.has_displayable_frame && !settings_.output_all_layers) {
992 assert(temporal_unit.output_frame_position >= 0);
993 // A displayable frame was already found in this temporal unit. This can
994 // happen if there are multiple spatial/temporal layers. Since
995 // |settings_.output_all_layers| is false, we will output only the last
996 // displayable frame.
997 if (temporal_unit.output_frame_position >
998 encoded_frame->position_in_temporal_unit) {
999 return kStatusOk;
1000 }
1001 // Replace any output frame that we may have seen before with the current
1002 // frame.
1003 assert(temporal_unit.output_layer_count == 1);
1004 --temporal_unit.output_layer_count;
1005 }
1006 temporal_unit.has_displayable_frame = true;
1007 temporal_unit.output_layers[temporal_unit.output_layer_count].frame =
1008 std::move(film_grain_frame);
1009 temporal_unit.output_layers[temporal_unit.output_layer_count]
1010 .position_in_temporal_unit = encoded_frame->position_in_temporal_unit;
1011 ++temporal_unit.output_layer_count;
1012 temporal_unit.output_frame_position =
1013 encoded_frame->position_in_temporal_unit;
1014 return kStatusOk;
1015 }
1016
DecodeTemporalUnit(const TemporalUnit & temporal_unit,const DecoderBuffer ** out_ptr)1017 StatusCode DecoderImpl::DecodeTemporalUnit(const TemporalUnit& temporal_unit,
1018 const DecoderBuffer** out_ptr) {
1019 std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
1020 temporal_unit.data, temporal_unit.size, settings_.operating_point,
1021 &buffer_pool_, &state_));
1022 if (obu == nullptr) {
1023 LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
1024 return kStatusOutOfMemory;
1025 }
1026 if (has_sequence_header_) {
1027 obu->set_sequence_header(sequence_header_);
1028 }
1029 StatusCode status;
1030 std::unique_ptr<FrameScratchBuffer> frame_scratch_buffer =
1031 frame_scratch_buffer_pool_.Get();
1032 if (frame_scratch_buffer == nullptr) {
1033 LIBGAV1_DLOG(ERROR, "Error when getting FrameScratchBuffer.");
1034 return kStatusOutOfMemory;
1035 }
1036 // |frame_scratch_buffer| will be released when this local variable goes out
1037 // of scope (i.e.) on any return path in this function.
1038 FrameScratchBufferReleaser frame_scratch_buffer_releaser(
1039 &frame_scratch_buffer_pool_, &frame_scratch_buffer);
1040
1041 while (obu->HasData()) {
1042 RefCountedBufferPtr current_frame;
1043 status = obu->ParseOneFrame(¤t_frame);
1044 if (status != kStatusOk) {
1045 LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
1046 return status;
1047 }
1048 if (!MaybeInitializeQuantizerMatrix(obu->frame_header())) {
1049 LIBGAV1_DLOG(ERROR, "InitializeQuantizerMatrix() failed.");
1050 return kStatusOutOfMemory;
1051 }
1052 if (!MaybeInitializeWedgeMasks(obu->frame_header().frame_type)) {
1053 LIBGAV1_DLOG(ERROR, "InitializeWedgeMasks() failed.");
1054 return kStatusOutOfMemory;
1055 }
1056 if (IsNewSequenceHeader(*obu)) {
1057 const ObuSequenceHeader& sequence_header = obu->sequence_header();
1058 const Libgav1ImageFormat image_format =
1059 ComposeImageFormat(sequence_header.color_config.is_monochrome,
1060 sequence_header.color_config.subsampling_x,
1061 sequence_header.color_config.subsampling_y);
1062 const int max_bottom_border = GetBottomBorderPixels(
1063 /*do_cdef=*/true, /*do_restoration=*/true,
1064 /*do_superres=*/true, sequence_header.color_config.subsampling_y);
1065 if (!buffer_pool_.OnFrameBufferSizeChanged(
1066 sequence_header.color_config.bitdepth, image_format,
1067 sequence_header.max_frame_width, sequence_header.max_frame_height,
1068 kBorderPixels, kBorderPixels, kBorderPixels, max_bottom_border)) {
1069 LIBGAV1_DLOG(ERROR, "buffer_pool_.OnFrameBufferSizeChanged failed.");
1070 return kStatusUnknownError;
1071 }
1072 }
1073 if (!obu->frame_header().show_existing_frame) {
1074 if (obu->tile_buffers().empty()) {
1075 // This means that the last call to ParseOneFrame() did not actually
1076 // have any tile groups. This could happen in rare cases (for example,
1077 // if there is a Metadata OBU after the TileGroup OBU). We currently do
1078 // not have a reason to handle those cases, so we simply continue.
1079 continue;
1080 }
1081 status = DecodeTiles(obu->sequence_header(), obu->frame_header(),
1082 obu->tile_buffers(), state_,
1083 frame_scratch_buffer.get(), current_frame.get());
1084 if (status != kStatusOk) {
1085 return status;
1086 }
1087 }
1088 state_.UpdateReferenceFrames(current_frame,
1089 obu->frame_header().refresh_frame_flags);
1090 if (obu->frame_header().show_frame ||
1091 obu->frame_header().show_existing_frame) {
1092 if (!output_frame_queue_.Empty() && !settings_.output_all_layers) {
1093 // There is more than one displayable frame in the current operating
1094 // point and |settings_.output_all_layers| is false. In this case, we
1095 // simply return the last displayable frame as the output frame and
1096 // ignore the rest.
1097 assert(output_frame_queue_.Size() == 1);
1098 output_frame_queue_.Pop();
1099 }
1100 RefCountedBufferPtr film_grain_frame;
1101 status = ApplyFilmGrain(
1102 obu->sequence_header(), obu->frame_header(), current_frame,
1103 &film_grain_frame,
1104 frame_scratch_buffer->threading_strategy.film_grain_thread_pool());
1105 if (status != kStatusOk) return status;
1106 output_frame_queue_.Push(std::move(film_grain_frame));
1107 }
1108 }
1109 if (output_frame_queue_.Empty()) {
1110 // No displayable frame in the temporal unit. Not an error.
1111 *out_ptr = nullptr;
1112 return kStatusOk;
1113 }
1114 status = CopyFrameToOutputBuffer(output_frame_queue_.Front());
1115 output_frame_queue_.Pop();
1116 if (status != kStatusOk) {
1117 return status;
1118 }
1119 buffer_.user_private_data = temporal_unit.user_private_data;
1120 *out_ptr = &buffer_;
1121 return kStatusOk;
1122 }
1123
CopyFrameToOutputBuffer(const RefCountedBufferPtr & frame)1124 StatusCode DecoderImpl::CopyFrameToOutputBuffer(
1125 const RefCountedBufferPtr& frame) {
1126 YuvBuffer* yuv_buffer = frame->buffer();
1127
1128 buffer_.chroma_sample_position = frame->chroma_sample_position();
1129
1130 if (yuv_buffer->is_monochrome()) {
1131 buffer_.image_format = kImageFormatMonochrome400;
1132 } else {
1133 if (yuv_buffer->subsampling_x() == 0 && yuv_buffer->subsampling_y() == 0) {
1134 buffer_.image_format = kImageFormatYuv444;
1135 } else if (yuv_buffer->subsampling_x() == 1 &&
1136 yuv_buffer->subsampling_y() == 0) {
1137 buffer_.image_format = kImageFormatYuv422;
1138 } else if (yuv_buffer->subsampling_x() == 1 &&
1139 yuv_buffer->subsampling_y() == 1) {
1140 buffer_.image_format = kImageFormatYuv420;
1141 } else {
1142 LIBGAV1_DLOG(ERROR,
1143 "Invalid chroma subsampling values: cannot determine buffer "
1144 "image format.");
1145 return kStatusInvalidArgument;
1146 }
1147 }
1148 buffer_.color_range = sequence_header_.color_config.color_range;
1149 buffer_.color_primary = sequence_header_.color_config.color_primary;
1150 buffer_.transfer_characteristics =
1151 sequence_header_.color_config.transfer_characteristics;
1152 buffer_.matrix_coefficients =
1153 sequence_header_.color_config.matrix_coefficients;
1154
1155 buffer_.bitdepth = yuv_buffer->bitdepth();
1156 const int num_planes =
1157 yuv_buffer->is_monochrome() ? kMaxPlanesMonochrome : kMaxPlanes;
1158 int plane = kPlaneY;
1159 for (; plane < num_planes; ++plane) {
1160 buffer_.stride[plane] = yuv_buffer->stride(plane);
1161 buffer_.plane[plane] = yuv_buffer->data(plane);
1162 buffer_.displayed_width[plane] = yuv_buffer->width(plane);
1163 buffer_.displayed_height[plane] = yuv_buffer->height(plane);
1164 }
1165 for (; plane < kMaxPlanes; ++plane) {
1166 buffer_.stride[plane] = 0;
1167 buffer_.plane[plane] = nullptr;
1168 buffer_.displayed_width[plane] = 0;
1169 buffer_.displayed_height[plane] = 0;
1170 }
1171 buffer_.spatial_id = frame->spatial_id();
1172 buffer_.temporal_id = frame->temporal_id();
1173 buffer_.buffer_private_data = frame->buffer_private_data();
1174 if (frame->hdr_cll_set()) {
1175 buffer_.has_hdr_cll = 1;
1176 buffer_.hdr_cll = frame->hdr_cll();
1177 } else {
1178 buffer_.has_hdr_cll = 0;
1179 }
1180 if (frame->hdr_mdcv_set()) {
1181 buffer_.has_hdr_mdcv = 1;
1182 buffer_.hdr_mdcv = frame->hdr_mdcv();
1183 } else {
1184 buffer_.has_hdr_mdcv = 0;
1185 }
1186 if (frame->itut_t35_set()) {
1187 buffer_.has_itut_t35 = 1;
1188 buffer_.itut_t35 = frame->itut_t35();
1189 } else {
1190 buffer_.has_itut_t35 = 0;
1191 }
1192 output_frame_ = frame;
1193 return kStatusOk;
1194 }
1195
ReleaseOutputFrame()1196 void DecoderImpl::ReleaseOutputFrame() {
1197 for (auto& plane : buffer_.plane) {
1198 plane = nullptr;
1199 }
1200 output_frame_ = nullptr;
1201 }
1202
DecodeTiles(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<TileBuffer> & tile_buffers,const DecoderState & state,FrameScratchBuffer * const frame_scratch_buffer,RefCountedBuffer * const current_frame)1203 StatusCode DecoderImpl::DecodeTiles(
1204 const ObuSequenceHeader& sequence_header,
1205 const ObuFrameHeader& frame_header, const Vector<TileBuffer>& tile_buffers,
1206 const DecoderState& state, FrameScratchBuffer* const frame_scratch_buffer,
1207 RefCountedBuffer* const current_frame) {
1208 frame_scratch_buffer->tile_scratch_buffer_pool.Reset(
1209 sequence_header.color_config.bitdepth);
1210 if (!frame_scratch_buffer->loop_restoration_info.Reset(
1211 &frame_header.loop_restoration, frame_header.upscaled_width,
1212 frame_header.height, sequence_header.color_config.subsampling_x,
1213 sequence_header.color_config.subsampling_y,
1214 sequence_header.color_config.is_monochrome)) {
1215 LIBGAV1_DLOG(ERROR,
1216 "Failed to allocate memory for loop restoration info units.");
1217 return kStatusOutOfMemory;
1218 }
1219 ThreadingStrategy& threading_strategy =
1220 frame_scratch_buffer->threading_strategy;
1221 if (!is_frame_parallel_ &&
1222 !threading_strategy.Reset(frame_header, settings_.threads)) {
1223 return kStatusOutOfMemory;
1224 }
1225 const bool do_cdef =
1226 PostFilter::DoCdef(frame_header, settings_.post_filter_mask);
1227 const int num_planes = sequence_header.color_config.is_monochrome
1228 ? kMaxPlanesMonochrome
1229 : kMaxPlanes;
1230 const bool do_restoration = PostFilter::DoRestoration(
1231 frame_header.loop_restoration, settings_.post_filter_mask, num_planes);
1232 const bool do_superres =
1233 PostFilter::DoSuperRes(frame_header, settings_.post_filter_mask);
1234 // Use kBorderPixels for the left, right, and top borders. Only the bottom
1235 // border may need to be bigger. Cdef border is needed only if we apply Cdef
1236 // without multithreading.
1237 const int bottom_border = GetBottomBorderPixels(
1238 do_cdef && threading_strategy.post_filter_thread_pool() == nullptr,
1239 do_restoration, do_superres, sequence_header.color_config.subsampling_y);
1240 current_frame->set_chroma_sample_position(
1241 sequence_header.color_config.chroma_sample_position);
1242 if (!current_frame->Realloc(sequence_header.color_config.bitdepth,
1243 sequence_header.color_config.is_monochrome,
1244 frame_header.upscaled_width, frame_header.height,
1245 sequence_header.color_config.subsampling_x,
1246 sequence_header.color_config.subsampling_y,
1247 /*left_border=*/kBorderPixels,
1248 /*right_border=*/kBorderPixels,
1249 /*top_border=*/kBorderPixels, bottom_border)) {
1250 LIBGAV1_DLOG(ERROR, "Failed to allocate memory for the decoder buffer.");
1251 return kStatusOutOfMemory;
1252 }
1253 if (frame_header.cdef.bits > 0) {
1254 if (!frame_scratch_buffer->cdef_index.Reset(
1255 DivideBy16(frame_header.rows4x4 + kMaxBlockHeight4x4),
1256 DivideBy16(frame_header.columns4x4 + kMaxBlockWidth4x4),
1257 /*zero_initialize=*/false)) {
1258 LIBGAV1_DLOG(ERROR, "Failed to allocate memory for cdef index.");
1259 return kStatusOutOfMemory;
1260 }
1261 }
1262 if (do_cdef) {
1263 if (!frame_scratch_buffer->cdef_skip.Reset(
1264 DivideBy2(frame_header.rows4x4 + kMaxBlockHeight4x4),
1265 DivideBy16(frame_header.columns4x4 + kMaxBlockWidth4x4),
1266 /*zero_initialize=*/true)) {
1267 LIBGAV1_DLOG(ERROR, "Failed to allocate memory for cdef skip.");
1268 return kStatusOutOfMemory;
1269 }
1270 }
1271 if (!frame_scratch_buffer->inter_transform_sizes.Reset(
1272 frame_header.rows4x4 + kMaxBlockHeight4x4,
1273 frame_header.columns4x4 + kMaxBlockWidth4x4,
1274 /*zero_initialize=*/false)) {
1275 LIBGAV1_DLOG(ERROR, "Failed to allocate memory for inter_transform_sizes.");
1276 return kStatusOutOfMemory;
1277 }
1278 if (frame_header.use_ref_frame_mvs) {
1279 if (!frame_scratch_buffer->motion_field.mv.Reset(
1280 DivideBy2(frame_header.rows4x4), DivideBy2(frame_header.columns4x4),
1281 /*zero_initialize=*/false) ||
1282 !frame_scratch_buffer->motion_field.reference_offset.Reset(
1283 DivideBy2(frame_header.rows4x4), DivideBy2(frame_header.columns4x4),
1284 /*zero_initialize=*/false)) {
1285 LIBGAV1_DLOG(ERROR,
1286 "Failed to allocate memory for temporal motion vectors.");
1287 return kStatusOutOfMemory;
1288 }
1289
1290 // For each motion vector, only mv[0] needs to be initialized to
1291 // kInvalidMvValue, mv[1] is not necessary to be initialized and can be
1292 // set to an arbitrary value. For simplicity, mv[1] is set to 0.
1293 // The following memory initialization of contiguous memory is very fast. It
1294 // is not recommended to make the initialization multi-threaded, unless the
1295 // memory which needs to be initialized in each thread is still contiguous.
1296 MotionVector invalid_mv;
1297 invalid_mv.mv[0] = kInvalidMvValue;
1298 invalid_mv.mv[1] = 0;
1299 MotionVector* const motion_field_mv =
1300 &frame_scratch_buffer->motion_field.mv[0][0];
1301 std::fill(motion_field_mv,
1302 motion_field_mv + frame_scratch_buffer->motion_field.mv.size(),
1303 invalid_mv);
1304 }
1305
1306 // The addition of kMaxBlockHeight4x4 and kMaxBlockWidth4x4 is necessary so
1307 // that the block parameters cache can be filled in for the last row/column
1308 // without having to check for boundary conditions.
1309 if (!frame_scratch_buffer->block_parameters_holder.Reset(
1310 frame_header.rows4x4 + kMaxBlockHeight4x4,
1311 frame_header.columns4x4 + kMaxBlockWidth4x4)) {
1312 return kStatusOutOfMemory;
1313 }
1314 const dsp::Dsp* const dsp =
1315 dsp::GetDspTable(sequence_header.color_config.bitdepth);
1316 if (dsp == nullptr) {
1317 LIBGAV1_DLOG(ERROR, "Failed to get the dsp table for bitdepth %d.",
1318 sequence_header.color_config.bitdepth);
1319 return kStatusInternalError;
1320 }
1321
1322 const int tile_count = frame_header.tile_info.tile_count;
1323 assert(tile_count >= 1);
1324 Vector<std::unique_ptr<Tile>> tiles;
1325 if (!tiles.reserve(tile_count)) {
1326 LIBGAV1_DLOG(ERROR, "tiles.reserve(%d) failed.\n", tile_count);
1327 return kStatusOutOfMemory;
1328 }
1329
1330 if (threading_strategy.row_thread_pool(0) != nullptr || is_frame_parallel_) {
1331 if (frame_scratch_buffer->residual_buffer_pool == nullptr) {
1332 frame_scratch_buffer->residual_buffer_pool.reset(
1333 new (std::nothrow) ResidualBufferPool(
1334 sequence_header.use_128x128_superblock,
1335 sequence_header.color_config.subsampling_x,
1336 sequence_header.color_config.subsampling_y,
1337 sequence_header.color_config.bitdepth == 8 ? sizeof(int16_t)
1338 : sizeof(int32_t)));
1339 if (frame_scratch_buffer->residual_buffer_pool == nullptr) {
1340 LIBGAV1_DLOG(ERROR, "Failed to allocate residual buffer.\n");
1341 return kStatusOutOfMemory;
1342 }
1343 } else {
1344 frame_scratch_buffer->residual_buffer_pool->Reset(
1345 sequence_header.use_128x128_superblock,
1346 sequence_header.color_config.subsampling_x,
1347 sequence_header.color_config.subsampling_y,
1348 sequence_header.color_config.bitdepth == 8 ? sizeof(int16_t)
1349 : sizeof(int32_t));
1350 }
1351 }
1352
1353 if (threading_strategy.post_filter_thread_pool() != nullptr && do_cdef) {
1354 // We need to store 4 rows per 64x64 unit.
1355 const int num_units =
1356 MultiplyBy4(RightShiftWithCeiling(frame_header.rows4x4, 4));
1357 // subsampling_y is set to zero irrespective of the actual frame's
1358 // subsampling since we need to store exactly |num_units| rows of the loop
1359 // restoration border pixels.
1360 if (!frame_scratch_buffer->cdef_border.Realloc(
1361 sequence_header.color_config.bitdepth,
1362 sequence_header.color_config.is_monochrome,
1363 MultiplyBy4(frame_header.columns4x4), num_units,
1364 sequence_header.color_config.subsampling_x,
1365 /*subsampling_y=*/0, kBorderPixels, kBorderPixels, kBorderPixels,
1366 kBorderPixels, nullptr, nullptr, nullptr)) {
1367 return kStatusOutOfMemory;
1368 }
1369 }
1370
1371 if (do_restoration &&
1372 (do_cdef || threading_strategy.post_filter_thread_pool() != nullptr)) {
1373 // We need to store 4 rows per 64x64 unit.
1374 const int num_units =
1375 MultiplyBy4(RightShiftWithCeiling(frame_header.rows4x4, 4));
1376 // subsampling_y is set to zero irrespective of the actual frame's
1377 // subsampling since we need to store exactly |num_units| rows of the loop
1378 // restoration border pixels.
1379 if (!frame_scratch_buffer->loop_restoration_border.Realloc(
1380 sequence_header.color_config.bitdepth,
1381 sequence_header.color_config.is_monochrome,
1382 frame_header.upscaled_width, num_units,
1383 sequence_header.color_config.subsampling_x,
1384 /*subsampling_y=*/0, kBorderPixels, kBorderPixels, kBorderPixels,
1385 kBorderPixels, nullptr, nullptr, nullptr)) {
1386 return kStatusOutOfMemory;
1387 }
1388 }
1389
1390 if (do_superres) {
1391 const int pixel_size = sequence_header.color_config.bitdepth == 8
1392 ? sizeof(uint8_t)
1393 : sizeof(uint16_t);
1394 const int coefficients_size = kSuperResFilterTaps *
1395 Align(frame_header.upscaled_width, 16) *
1396 pixel_size;
1397 if (!frame_scratch_buffer->superres_coefficients[kPlaneTypeY].Resize(
1398 coefficients_size)) {
1399 LIBGAV1_DLOG(ERROR,
1400 "Failed to Resize superres_coefficients[kPlaneTypeY].");
1401 return kStatusOutOfMemory;
1402 }
1403 #if LIBGAV1_MSAN
1404 // Quiet SuperRes_NEON() msan warnings.
1405 memset(frame_scratch_buffer->superres_coefficients[kPlaneTypeY].get(), 0,
1406 coefficients_size);
1407 #endif
1408 const int uv_coefficients_size =
1409 kSuperResFilterTaps *
1410 Align(SubsampledValue(frame_header.upscaled_width, 1), 16) * pixel_size;
1411 if (!sequence_header.color_config.is_monochrome &&
1412 sequence_header.color_config.subsampling_x != 0 &&
1413 !frame_scratch_buffer->superres_coefficients[kPlaneTypeUV].Resize(
1414 uv_coefficients_size)) {
1415 LIBGAV1_DLOG(ERROR,
1416 "Failed to Resize superres_coefficients[kPlaneTypeUV].");
1417 return kStatusOutOfMemory;
1418 }
1419 #if LIBGAV1_MSAN
1420 if (!sequence_header.color_config.is_monochrome &&
1421 sequence_header.color_config.subsampling_x != 0) {
1422 // Quiet SuperRes_NEON() msan warnings.
1423 memset(frame_scratch_buffer->superres_coefficients[kPlaneTypeUV].get(), 0,
1424 uv_coefficients_size);
1425 }
1426 #endif
1427 }
1428
1429 if (do_superres && threading_strategy.post_filter_thread_pool() != nullptr) {
1430 const int num_threads =
1431 threading_strategy.post_filter_thread_pool()->num_threads() + 1;
1432 // subsampling_y is set to zero irrespective of the actual frame's
1433 // subsampling since we need to store exactly |num_threads| rows of the
1434 // down-scaled pixels.
1435 // Left and right borders are for line extension. They are doubled for the Y
1436 // plane to make sure the U and V planes have enough space after possible
1437 // subsampling.
1438 if (!frame_scratch_buffer->superres_line_buffer.Realloc(
1439 sequence_header.color_config.bitdepth,
1440 sequence_header.color_config.is_monochrome,
1441 MultiplyBy4(frame_header.columns4x4), num_threads,
1442 sequence_header.color_config.subsampling_x,
1443 /*subsampling_y=*/0, 2 * kSuperResHorizontalBorder,
1444 2 * (kSuperResHorizontalBorder + kSuperResHorizontalPadding), 0, 0,
1445 nullptr, nullptr, nullptr)) {
1446 LIBGAV1_DLOG(ERROR, "Failed to resize superres line buffer.\n");
1447 return kStatusOutOfMemory;
1448 }
1449 }
1450
1451 if (is_frame_parallel_ && !IsIntraFrame(frame_header.frame_type)) {
1452 // We can parse the current frame if all the reference frames have been
1453 // parsed.
1454 for (const int index : frame_header.reference_frame_index) {
1455 if (!state.reference_frame[index]->WaitUntilParsed()) {
1456 return kStatusUnknownError;
1457 }
1458 }
1459 }
1460
1461 // If prev_segment_ids is a null pointer, it is treated as if it pointed to
1462 // a segmentation map containing all 0s.
1463 const SegmentationMap* prev_segment_ids = nullptr;
1464 if (frame_header.primary_reference_frame == kPrimaryReferenceNone) {
1465 frame_scratch_buffer->symbol_decoder_context.Initialize(
1466 frame_header.quantizer.base_index);
1467 } else {
1468 const int index =
1469 frame_header
1470 .reference_frame_index[frame_header.primary_reference_frame];
1471 assert(index != -1);
1472 const RefCountedBuffer* prev_frame = state.reference_frame[index].get();
1473 frame_scratch_buffer->symbol_decoder_context = prev_frame->FrameContext();
1474 if (frame_header.segmentation.enabled &&
1475 prev_frame->columns4x4() == frame_header.columns4x4 &&
1476 prev_frame->rows4x4() == frame_header.rows4x4) {
1477 prev_segment_ids = prev_frame->segmentation_map();
1478 }
1479 }
1480
1481 // The Tile class must make use of a separate buffer to store the unfiltered
1482 // pixels for the intra prediction of the next superblock row. This is done
1483 // only when one of the following conditions are true:
1484 // * is_frame_parallel_ is true.
1485 // * settings_.threads == 1.
1486 // In the non-frame-parallel multi-threaded case, we do not run the post
1487 // filters in the decode loop. So this buffer need not be used.
1488 const bool use_intra_prediction_buffer =
1489 is_frame_parallel_ || settings_.threads == 1;
1490 if (use_intra_prediction_buffer) {
1491 if (!frame_scratch_buffer->intra_prediction_buffers.Resize(
1492 frame_header.tile_info.tile_rows)) {
1493 LIBGAV1_DLOG(ERROR, "Failed to Resize intra_prediction_buffers.");
1494 return kStatusOutOfMemory;
1495 }
1496 IntraPredictionBuffer* const intra_prediction_buffers =
1497 frame_scratch_buffer->intra_prediction_buffers.get();
1498 for (int plane = kPlaneY; plane < num_planes; ++plane) {
1499 const int subsampling =
1500 (plane == kPlaneY) ? 0 : sequence_header.color_config.subsampling_x;
1501 const size_t intra_prediction_buffer_size =
1502 ((MultiplyBy4(frame_header.columns4x4) >> subsampling) *
1503 (sequence_header.color_config.bitdepth == 8 ? sizeof(uint8_t)
1504 : sizeof(uint16_t)));
1505 for (int tile_row = 0; tile_row < frame_header.tile_info.tile_rows;
1506 ++tile_row) {
1507 if (!intra_prediction_buffers[tile_row][plane].Resize(
1508 intra_prediction_buffer_size)) {
1509 LIBGAV1_DLOG(ERROR,
1510 "Failed to allocate intra prediction buffer for tile "
1511 "row %d plane %d.\n",
1512 tile_row, plane);
1513 return kStatusOutOfMemory;
1514 }
1515 }
1516 }
1517 }
1518
1519 PostFilter post_filter(frame_header, sequence_header, frame_scratch_buffer,
1520 current_frame->buffer(), dsp,
1521 settings_.post_filter_mask);
1522 SymbolDecoderContext saved_symbol_decoder_context;
1523 BlockingCounterWithStatus pending_tiles(tile_count);
1524 for (int tile_number = 0; tile_number < tile_count; ++tile_number) {
1525 std::unique_ptr<Tile> tile = Tile::Create(
1526 tile_number, tile_buffers[tile_number].data,
1527 tile_buffers[tile_number].size, sequence_header, frame_header,
1528 current_frame, state, frame_scratch_buffer, wedge_masks_,
1529 quantizer_matrix_, &saved_symbol_decoder_context, prev_segment_ids,
1530 &post_filter, dsp, threading_strategy.row_thread_pool(tile_number),
1531 &pending_tiles, is_frame_parallel_, use_intra_prediction_buffer);
1532 if (tile == nullptr) {
1533 LIBGAV1_DLOG(ERROR, "Failed to create tile.");
1534 return kStatusOutOfMemory;
1535 }
1536 tiles.push_back_unchecked(std::move(tile));
1537 }
1538 assert(tiles.size() == static_cast<size_t>(tile_count));
1539 if (is_frame_parallel_) {
1540 if (frame_scratch_buffer->threading_strategy.thread_pool() == nullptr) {
1541 return DecodeTilesFrameParallel(
1542 sequence_header, frame_header, tiles, saved_symbol_decoder_context,
1543 prev_segment_ids, frame_scratch_buffer, &post_filter, current_frame);
1544 }
1545 return DecodeTilesThreadedFrameParallel(
1546 sequence_header, frame_header, tiles, saved_symbol_decoder_context,
1547 prev_segment_ids, frame_scratch_buffer, &post_filter, current_frame);
1548 }
1549 StatusCode status;
1550 if (settings_.threads == 1) {
1551 status = DecodeTilesNonFrameParallel(sequence_header, frame_header, tiles,
1552 frame_scratch_buffer, &post_filter);
1553 } else {
1554 status = DecodeTilesThreadedNonFrameParallel(tiles, frame_scratch_buffer,
1555 &post_filter, &pending_tiles);
1556 }
1557 if (status != kStatusOk) return status;
1558 if (frame_header.enable_frame_end_update_cdf) {
1559 frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
1560 }
1561 current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
1562 SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
1563 return kStatusOk;
1564 }
1565
ApplyFilmGrain(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const RefCountedBufferPtr & displayable_frame,RefCountedBufferPtr * film_grain_frame,ThreadPool * thread_pool)1566 StatusCode DecoderImpl::ApplyFilmGrain(
1567 const ObuSequenceHeader& sequence_header,
1568 const ObuFrameHeader& frame_header,
1569 const RefCountedBufferPtr& displayable_frame,
1570 RefCountedBufferPtr* film_grain_frame, ThreadPool* thread_pool) {
1571 if (!sequence_header.film_grain_params_present ||
1572 !displayable_frame->film_grain_params().apply_grain ||
1573 (settings_.post_filter_mask & 0x10) == 0) {
1574 *film_grain_frame = displayable_frame;
1575 return kStatusOk;
1576 }
1577 if (!frame_header.show_existing_frame &&
1578 frame_header.refresh_frame_flags == 0) {
1579 // If show_existing_frame is true, then the current frame is a previously
1580 // saved reference frame. If refresh_frame_flags is nonzero, then the
1581 // state_.UpdateReferenceFrames() call above has saved the current frame as
1582 // a reference frame. Therefore, if both of these conditions are false, then
1583 // the current frame is not saved as a reference frame. displayable_frame
1584 // should hold the only reference to the current frame.
1585 assert(displayable_frame.use_count() == 1);
1586 // Add film grain noise in place.
1587 *film_grain_frame = displayable_frame;
1588 } else {
1589 *film_grain_frame = buffer_pool_.GetFreeBuffer();
1590 if (*film_grain_frame == nullptr) {
1591 LIBGAV1_DLOG(ERROR,
1592 "Could not get film_grain_frame from the buffer pool.");
1593 return kStatusResourceExhausted;
1594 }
1595 if (!(*film_grain_frame)
1596 ->Realloc(displayable_frame->buffer()->bitdepth(),
1597 displayable_frame->buffer()->is_monochrome(),
1598 displayable_frame->upscaled_width(),
1599 displayable_frame->frame_height(),
1600 displayable_frame->buffer()->subsampling_x(),
1601 displayable_frame->buffer()->subsampling_y(),
1602 kBorderPixelsFilmGrain, kBorderPixelsFilmGrain,
1603 kBorderPixelsFilmGrain, kBorderPixelsFilmGrain)) {
1604 LIBGAV1_DLOG(ERROR, "film_grain_frame->Realloc() failed.");
1605 return kStatusOutOfMemory;
1606 }
1607 (*film_grain_frame)
1608 ->set_chroma_sample_position(
1609 displayable_frame->chroma_sample_position());
1610 (*film_grain_frame)->set_spatial_id(displayable_frame->spatial_id());
1611 (*film_grain_frame)->set_temporal_id(displayable_frame->temporal_id());
1612 }
1613 const bool color_matrix_is_identity =
1614 sequence_header.color_config.matrix_coefficients ==
1615 kMatrixCoefficientsIdentity;
1616 assert(displayable_frame->buffer()->stride(kPlaneU) ==
1617 displayable_frame->buffer()->stride(kPlaneV));
1618 const int input_stride_uv = displayable_frame->buffer()->stride(kPlaneU);
1619 assert((*film_grain_frame)->buffer()->stride(kPlaneU) ==
1620 (*film_grain_frame)->buffer()->stride(kPlaneV));
1621 const int output_stride_uv = (*film_grain_frame)->buffer()->stride(kPlaneU);
1622 #if LIBGAV1_MAX_BITDEPTH >= 10
1623 if (displayable_frame->buffer()->bitdepth() == 10) {
1624 FilmGrain<10> film_grain(displayable_frame->film_grain_params(),
1625 displayable_frame->buffer()->is_monochrome(),
1626 color_matrix_is_identity,
1627 displayable_frame->buffer()->subsampling_x(),
1628 displayable_frame->buffer()->subsampling_y(),
1629 displayable_frame->upscaled_width(),
1630 displayable_frame->frame_height(), thread_pool);
1631 if (!film_grain.AddNoise(
1632 displayable_frame->buffer()->data(kPlaneY),
1633 displayable_frame->buffer()->stride(kPlaneY),
1634 displayable_frame->buffer()->data(kPlaneU),
1635 displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
1636 (*film_grain_frame)->buffer()->data(kPlaneY),
1637 (*film_grain_frame)->buffer()->stride(kPlaneY),
1638 (*film_grain_frame)->buffer()->data(kPlaneU),
1639 (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
1640 LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
1641 return kStatusOutOfMemory;
1642 }
1643 return kStatusOk;
1644 }
1645 #endif // LIBGAV1_MAX_BITDEPTH >= 10
1646 #if LIBGAV1_MAX_BITDEPTH == 12
1647 if (displayable_frame->buffer()->bitdepth() == 12) {
1648 FilmGrain<12> film_grain(displayable_frame->film_grain_params(),
1649 displayable_frame->buffer()->is_monochrome(),
1650 color_matrix_is_identity,
1651 displayable_frame->buffer()->subsampling_x(),
1652 displayable_frame->buffer()->subsampling_y(),
1653 displayable_frame->upscaled_width(),
1654 displayable_frame->frame_height(), thread_pool);
1655 if (!film_grain.AddNoise(
1656 displayable_frame->buffer()->data(kPlaneY),
1657 displayable_frame->buffer()->stride(kPlaneY),
1658 displayable_frame->buffer()->data(kPlaneU),
1659 displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
1660 (*film_grain_frame)->buffer()->data(kPlaneY),
1661 (*film_grain_frame)->buffer()->stride(kPlaneY),
1662 (*film_grain_frame)->buffer()->data(kPlaneU),
1663 (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
1664 LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
1665 return kStatusOutOfMemory;
1666 }
1667 return kStatusOk;
1668 }
1669 #endif // LIBGAV1_MAX_BITDEPTH == 12
1670 FilmGrain<8> film_grain(displayable_frame->film_grain_params(),
1671 displayable_frame->buffer()->is_monochrome(),
1672 color_matrix_is_identity,
1673 displayable_frame->buffer()->subsampling_x(),
1674 displayable_frame->buffer()->subsampling_y(),
1675 displayable_frame->upscaled_width(),
1676 displayable_frame->frame_height(), thread_pool);
1677 if (!film_grain.AddNoise(
1678 displayable_frame->buffer()->data(kPlaneY),
1679 displayable_frame->buffer()->stride(kPlaneY),
1680 displayable_frame->buffer()->data(kPlaneU),
1681 displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
1682 (*film_grain_frame)->buffer()->data(kPlaneY),
1683 (*film_grain_frame)->buffer()->stride(kPlaneY),
1684 (*film_grain_frame)->buffer()->data(kPlaneU),
1685 (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
1686 LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
1687 return kStatusOutOfMemory;
1688 }
1689 return kStatusOk;
1690 }
1691
IsNewSequenceHeader(const ObuParser & obu)1692 bool DecoderImpl::IsNewSequenceHeader(const ObuParser& obu) {
1693 if (std::find_if(obu.obu_headers().begin(), obu.obu_headers().end(),
1694 [](const ObuHeader& obu_header) {
1695 return obu_header.type == kObuSequenceHeader;
1696 }) == obu.obu_headers().end()) {
1697 return false;
1698 }
1699 const ObuSequenceHeader sequence_header = obu.sequence_header();
1700 const bool sequence_header_changed =
1701 !has_sequence_header_ ||
1702 sequence_header_.color_config.bitdepth !=
1703 sequence_header.color_config.bitdepth ||
1704 sequence_header_.color_config.is_monochrome !=
1705 sequence_header.color_config.is_monochrome ||
1706 sequence_header_.color_config.subsampling_x !=
1707 sequence_header.color_config.subsampling_x ||
1708 sequence_header_.color_config.subsampling_y !=
1709 sequence_header.color_config.subsampling_y ||
1710 sequence_header_.max_frame_width != sequence_header.max_frame_width ||
1711 sequence_header_.max_frame_height != sequence_header.max_frame_height;
1712 sequence_header_ = sequence_header;
1713 has_sequence_header_ = true;
1714 return sequence_header_changed;
1715 }
1716
MaybeInitializeWedgeMasks(FrameType frame_type)1717 bool DecoderImpl::MaybeInitializeWedgeMasks(FrameType frame_type) {
1718 if (IsIntraFrame(frame_type) || wedge_masks_initialized_) {
1719 return true;
1720 }
1721 if (!GenerateWedgeMask(&wedge_masks_)) {
1722 return false;
1723 }
1724 wedge_masks_initialized_ = true;
1725 return true;
1726 }
1727
MaybeInitializeQuantizerMatrix(const ObuFrameHeader & frame_header)1728 bool DecoderImpl::MaybeInitializeQuantizerMatrix(
1729 const ObuFrameHeader& frame_header) {
1730 if (quantizer_matrix_initialized_ || !frame_header.quantizer.use_matrix) {
1731 return true;
1732 }
1733 if (!InitializeQuantizerMatrix(&quantizer_matrix_)) {
1734 return false;
1735 }
1736 quantizer_matrix_initialized_ = true;
1737 return true;
1738 }
1739
1740 } // namespace libgav1
1741