1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/decoder_impl.h"
16
17 #include <algorithm>
18 #include <atomic>
19 #include <cassert>
20 #include <cmath>
21 #include <condition_variable> // NOLINT (unapproved c++11 header)
22 #include <iterator>
23 #include <memory>
24 #include <mutex> // NOLINT (unapproved c++11 header)
25 #include <new>
26 #include <utility>
27 #include <vector>
28
29 #include "src/dsp/common.h"
30 #include "src/dsp/constants.h"
31 #include "src/dsp/dsp.h"
32 #include "src/film_grain.h"
33 #include "src/frame_buffer_utils.h"
34 #include "src/frame_scratch_buffer.h"
35 #include "src/loop_restoration_info.h"
36 #include "src/obu_parser.h"
37 #include "src/post_filter.h"
38 #include "src/prediction_mask.h"
39 #include "src/threading_strategy.h"
40 #include "src/utils/blocking_counter.h"
41 #include "src/utils/common.h"
42 #include "src/utils/constants.h"
43 #include "src/utils/logging.h"
44 #include "src/utils/raw_bit_reader.h"
45 #include "src/utils/segmentation.h"
46 #include "src/utils/threadpool.h"
47 #include "src/yuv_buffer.h"
48
49 namespace libgav1 {
50 namespace {
51
52 constexpr int kMaxBlockWidth4x4 = 32;
53 constexpr int kMaxBlockHeight4x4 = 32;
54
55 // Computes the bottom border size in pixels. If CDEF, loop restoration or
56 // SuperRes is enabled, adds extra border pixels to facilitate those steps to
57 // happen nearly in-place (a few extra rows instead of an entire frame buffer).
58 // The logic in this function should match the corresponding logic for
59 // |vertical_shift| in the PostFilter constructor.
GetBottomBorderPixels(const bool do_cdef,const bool do_restoration,const bool do_superres,const int subsampling_y)60 int GetBottomBorderPixels(const bool do_cdef, const bool do_restoration,
61 const bool do_superres, const int subsampling_y) {
62 int extra_border = 0;
63 if (do_cdef) {
64 extra_border += kCdefBorder;
65 } else if (do_restoration) {
66 // If CDEF is enabled, loop restoration is safe without extra border.
67 extra_border += kRestorationVerticalBorder;
68 }
69 if (do_superres) extra_border += kSuperResVerticalBorder;
70 // Double the number of extra bottom border pixels if the bottom border will
71 // be subsampled.
72 extra_border <<= subsampling_y;
73 return Align(kBorderPixels + extra_border, 2); // Must be a multiple of 2.
74 }
75
76 // Sets |frame_scratch_buffer->tile_decoding_failed| to true (while holding on
77 // to |frame_scratch_buffer->superblock_row_mutex|) and notifies the first
78 // |count| condition variables in
79 // |frame_scratch_buffer->superblock_row_progress_condvar|.
SetFailureAndNotifyAll(FrameScratchBuffer * const frame_scratch_buffer,int count)80 void SetFailureAndNotifyAll(FrameScratchBuffer* const frame_scratch_buffer,
81 int count) {
82 {
83 std::lock_guard<std::mutex> lock(
84 frame_scratch_buffer->superblock_row_mutex);
85 frame_scratch_buffer->tile_decoding_failed = true;
86 }
87 std::condition_variable* const condvars =
88 frame_scratch_buffer->superblock_row_progress_condvar.get();
89 for (int i = 0; i < count; ++i) {
90 condvars[i].notify_one();
91 }
92 }
93
94 // Helper class that releases the frame scratch buffer in the destructor.
95 class FrameScratchBufferReleaser {
96 public:
FrameScratchBufferReleaser(FrameScratchBufferPool * frame_scratch_buffer_pool,std::unique_ptr<FrameScratchBuffer> * frame_scratch_buffer)97 FrameScratchBufferReleaser(
98 FrameScratchBufferPool* frame_scratch_buffer_pool,
99 std::unique_ptr<FrameScratchBuffer>* frame_scratch_buffer)
100 : frame_scratch_buffer_pool_(frame_scratch_buffer_pool),
101 frame_scratch_buffer_(frame_scratch_buffer) {}
~FrameScratchBufferReleaser()102 ~FrameScratchBufferReleaser() {
103 frame_scratch_buffer_pool_->Release(std::move(*frame_scratch_buffer_));
104 }
105
106 private:
107 FrameScratchBufferPool* const frame_scratch_buffer_pool_;
108 std::unique_ptr<FrameScratchBuffer>* const frame_scratch_buffer_;
109 };
110
111 // Sets the |frame|'s segmentation map for two cases. The third case is handled
112 // in Tile::DecodeBlock().
SetSegmentationMap(const ObuFrameHeader & frame_header,const SegmentationMap * prev_segment_ids,RefCountedBuffer * const frame)113 void SetSegmentationMap(const ObuFrameHeader& frame_header,
114 const SegmentationMap* prev_segment_ids,
115 RefCountedBuffer* const frame) {
116 if (!frame_header.segmentation.enabled) {
117 // All segment_id's are 0.
118 frame->segmentation_map()->Clear();
119 } else if (!frame_header.segmentation.update_map) {
120 // Copy from prev_segment_ids.
121 if (prev_segment_ids == nullptr) {
122 // Treat a null prev_segment_ids pointer as if it pointed to a
123 // segmentation map containing all 0s.
124 frame->segmentation_map()->Clear();
125 } else {
126 frame->segmentation_map()->CopyFrom(*prev_segment_ids);
127 }
128 }
129 }
130
DecodeTilesNonFrameParallel(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<std::unique_ptr<Tile>> & tiles,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter)131 StatusCode DecodeTilesNonFrameParallel(
132 const ObuSequenceHeader& sequence_header,
133 const ObuFrameHeader& frame_header,
134 const Vector<std::unique_ptr<Tile>>& tiles,
135 FrameScratchBuffer* const frame_scratch_buffer,
136 PostFilter* const post_filter) {
137 // Decode in superblock row order.
138 const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
139 std::unique_ptr<TileScratchBuffer> tile_scratch_buffer =
140 frame_scratch_buffer->tile_scratch_buffer_pool.Get();
141 if (tile_scratch_buffer == nullptr) return kLibgav1StatusOutOfMemory;
142 for (int row4x4 = 0; row4x4 < frame_header.rows4x4;
143 row4x4 += block_width4x4) {
144 for (const auto& tile_ptr : tiles) {
145 if (!tile_ptr->ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
146 row4x4, tile_scratch_buffer.get())) {
147 return kLibgav1StatusUnknownError;
148 }
149 }
150 post_filter->ApplyFilteringForOneSuperBlockRow(
151 row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
152 /*do_deblock=*/true);
153 }
154 frame_scratch_buffer->tile_scratch_buffer_pool.Release(
155 std::move(tile_scratch_buffer));
156 return kStatusOk;
157 }
158
DecodeTilesThreadedNonFrameParallel(const Vector<std::unique_ptr<Tile>> & tiles,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,BlockingCounterWithStatus * const pending_tiles)159 StatusCode DecodeTilesThreadedNonFrameParallel(
160 const Vector<std::unique_ptr<Tile>>& tiles,
161 FrameScratchBuffer* const frame_scratch_buffer,
162 PostFilter* const post_filter,
163 BlockingCounterWithStatus* const pending_tiles) {
164 ThreadingStrategy& threading_strategy =
165 frame_scratch_buffer->threading_strategy;
166 const int num_workers = threading_strategy.tile_thread_count();
167 BlockingCounterWithStatus pending_workers(num_workers);
168 std::atomic<int> tile_counter(0);
169 const int tile_count = static_cast<int>(tiles.size());
170 bool tile_decoding_failed = false;
171 // Submit tile decoding jobs to the thread pool.
172 for (int i = 0; i < num_workers; ++i) {
173 threading_strategy.tile_thread_pool()->Schedule([&tiles, tile_count,
174 &tile_counter,
175 &pending_workers,
176 &pending_tiles]() {
177 bool failed = false;
178 int index;
179 while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
180 tile_count) {
181 if (!failed) {
182 const auto& tile_ptr = tiles[index];
183 if (!tile_ptr->ParseAndDecode()) {
184 LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
185 failed = true;
186 }
187 } else {
188 pending_tiles->Decrement(false);
189 }
190 }
191 pending_workers.Decrement(!failed);
192 });
193 }
194 // Have the current thread partake in tile decoding.
195 int index;
196 while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
197 tile_count) {
198 if (!tile_decoding_failed) {
199 const auto& tile_ptr = tiles[index];
200 if (!tile_ptr->ParseAndDecode()) {
201 LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
202 tile_decoding_failed = true;
203 }
204 } else {
205 pending_tiles->Decrement(false);
206 }
207 }
208 // Wait until all the workers are done. This ensures that all the tiles have
209 // been parsed.
210 tile_decoding_failed |= !pending_workers.Wait();
211 // Wait until all the tiles have been decoded.
212 tile_decoding_failed |= !pending_tiles->Wait();
213 if (tile_decoding_failed) return kStatusUnknownError;
214 assert(threading_strategy.post_filter_thread_pool() != nullptr);
215 post_filter->ApplyFilteringThreaded();
216 return kStatusOk;
217 }
218
ParseTiles(const Vector<std::unique_ptr<Tile>> & tiles)219 StatusCode ParseTiles(const Vector<std::unique_ptr<Tile>>& tiles) {
220 for (const auto& tile : tiles) {
221 if (!tile->Parse()) {
222 LIBGAV1_DLOG(ERROR, "Failed to parse tile number: %d\n", tile->number());
223 return kStatusUnknownError;
224 }
225 }
226 return kStatusOk;
227 }
228
DecodeTilesFrameParallel(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<std::unique_ptr<Tile>> & tiles,const SymbolDecoderContext & saved_symbol_decoder_context,const SegmentationMap * const prev_segment_ids,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,RefCountedBuffer * const current_frame)229 StatusCode DecodeTilesFrameParallel(
230 const ObuSequenceHeader& sequence_header,
231 const ObuFrameHeader& frame_header,
232 const Vector<std::unique_ptr<Tile>>& tiles,
233 const SymbolDecoderContext& saved_symbol_decoder_context,
234 const SegmentationMap* const prev_segment_ids,
235 FrameScratchBuffer* const frame_scratch_buffer,
236 PostFilter* const post_filter, RefCountedBuffer* const current_frame) {
237 // Parse the frame.
238 StatusCode status = ParseTiles(tiles);
239 if (status != kStatusOk) return status;
240 if (frame_header.enable_frame_end_update_cdf) {
241 frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
242 }
243 current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
244 SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
245 // Mark frame as parsed.
246 current_frame->SetFrameState(kFrameStateParsed);
247 std::unique_ptr<TileScratchBuffer> tile_scratch_buffer =
248 frame_scratch_buffer->tile_scratch_buffer_pool.Get();
249 if (tile_scratch_buffer == nullptr) {
250 return kStatusOutOfMemory;
251 }
252 const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
253 // Decode in superblock row order (inter prediction in the Tile class will
254 // block until the required superblocks in the reference frame are decoded).
255 for (int row4x4 = 0; row4x4 < frame_header.rows4x4;
256 row4x4 += block_width4x4) {
257 for (const auto& tile_ptr : tiles) {
258 if (!tile_ptr->ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
259 row4x4, tile_scratch_buffer.get())) {
260 LIBGAV1_DLOG(ERROR, "Failed to decode tile number: %d\n",
261 tile_ptr->number());
262 return kStatusUnknownError;
263 }
264 }
265 const int progress_row = post_filter->ApplyFilteringForOneSuperBlockRow(
266 row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
267 /*do_deblock=*/true);
268 if (progress_row >= 0) {
269 current_frame->SetProgress(progress_row);
270 }
271 }
272 // Mark frame as decoded (we no longer care about row-level progress since the
273 // entire frame has been decoded).
274 current_frame->SetFrameState(kFrameStateDecoded);
275 frame_scratch_buffer->tile_scratch_buffer_pool.Release(
276 std::move(tile_scratch_buffer));
277 return kStatusOk;
278 }
279
280 // Helper function used by DecodeTilesThreadedFrameParallel. Applies the
281 // deblocking filter for tile boundaries for the superblock row at |row4x4|.
ApplyDeblockingFilterForTileBoundaries(PostFilter * const post_filter,const std::unique_ptr<Tile> * tile_row_base,const ObuFrameHeader & frame_header,int row4x4,int block_width4x4,int tile_columns,bool decode_entire_tiles_in_worker_threads)282 void ApplyDeblockingFilterForTileBoundaries(
283 PostFilter* const post_filter, const std::unique_ptr<Tile>* tile_row_base,
284 const ObuFrameHeader& frame_header, int row4x4, int block_width4x4,
285 int tile_columns, bool decode_entire_tiles_in_worker_threads) {
286 // Apply vertical deblock filtering for the first 64 columns of each tile.
287 for (int tile_column = 0; tile_column < tile_columns; ++tile_column) {
288 const Tile& tile = *tile_row_base[tile_column];
289 post_filter->ApplyDeblockFilter(
290 kLoopFilterTypeVertical, row4x4, tile.column4x4_start(),
291 tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
292 }
293 if (decode_entire_tiles_in_worker_threads &&
294 row4x4 == tile_row_base[0]->row4x4_start()) {
295 // This is the first superblock row of a tile row. In this case, apply
296 // horizontal deblock filtering for the entire superblock row.
297 post_filter->ApplyDeblockFilter(kLoopFilterTypeHorizontal, row4x4, 0,
298 frame_header.columns4x4, block_width4x4);
299 } else {
300 // Apply horizontal deblock filtering for the first 64 columns of the
301 // first tile.
302 const Tile& first_tile = *tile_row_base[0];
303 post_filter->ApplyDeblockFilter(
304 kLoopFilterTypeHorizontal, row4x4, first_tile.column4x4_start(),
305 first_tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
306 // Apply horizontal deblock filtering for the last 64 columns of the
307 // previous tile and the first 64 columns of the current tile.
308 for (int tile_column = 1; tile_column < tile_columns; ++tile_column) {
309 const Tile& tile = *tile_row_base[tile_column];
310 // If the previous tile has more than 64 columns, then include those
311 // for the horizontal deblock.
312 const Tile& previous_tile = *tile_row_base[tile_column - 1];
313 const int column4x4_start =
314 tile.column4x4_start() -
315 ((tile.column4x4_start() - kNum4x4InLoopFilterUnit !=
316 previous_tile.column4x4_start())
317 ? kNum4x4InLoopFilterUnit
318 : 0);
319 post_filter->ApplyDeblockFilter(
320 kLoopFilterTypeHorizontal, row4x4, column4x4_start,
321 tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
322 }
323 // Apply horizontal deblock filtering for the last 64 columns of the
324 // last tile.
325 const Tile& last_tile = *tile_row_base[tile_columns - 1];
326 // Identify the last column4x4 value and do horizontal filtering for
327 // that column4x4. The value of last column4x4 is the nearest multiple
328 // of 16 that is before tile.column4x4_end().
329 const int column4x4_start = (last_tile.column4x4_end() - 1) & ~15;
330 // If column4x4_start is the same as tile.column4x4_start() then it
331 // means that the last tile has <= 64 columns. So there is nothing left
332 // to deblock (since it was already deblocked in the loop above).
333 if (column4x4_start != last_tile.column4x4_start()) {
334 post_filter->ApplyDeblockFilter(
335 kLoopFilterTypeHorizontal, row4x4, column4x4_start,
336 last_tile.column4x4_end(), block_width4x4);
337 }
338 }
339 }
340
341 // Helper function used by DecodeTilesThreadedFrameParallel. Decodes the
342 // superblock row starting at |row4x4| for tile at index |tile_index| in the
343 // list of tiles |tiles|. If the decoding is successful, then it does the
344 // following:
345 // * Schedule the next superblock row in the current tile column for decoding
346 // (the next superblock row may be in a different tile than the current
347 // one).
348 // * If an entire superblock row of the frame has been decoded, it notifies
349 // the waiters (if there are any).
DecodeSuperBlockRowInTile(const Vector<std::unique_ptr<Tile>> & tiles,size_t tile_index,int row4x4,const int superblock_size4x4,const int tile_columns,const int superblock_rows,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,BlockingCounter * const pending_jobs)350 void DecodeSuperBlockRowInTile(
351 const Vector<std::unique_ptr<Tile>>& tiles, size_t tile_index, int row4x4,
352 const int superblock_size4x4, const int tile_columns,
353 const int superblock_rows, FrameScratchBuffer* const frame_scratch_buffer,
354 PostFilter* const post_filter, BlockingCounter* const pending_jobs) {
355 std::unique_ptr<TileScratchBuffer> scratch_buffer =
356 frame_scratch_buffer->tile_scratch_buffer_pool.Get();
357 if (scratch_buffer == nullptr) {
358 SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
359 return;
360 }
361 Tile& tile = *tiles[tile_index];
362 const bool ok = tile.ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
363 row4x4, scratch_buffer.get());
364 frame_scratch_buffer->tile_scratch_buffer_pool.Release(
365 std::move(scratch_buffer));
366 if (!ok) {
367 SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
368 return;
369 }
370 if (post_filter->DoDeblock()) {
371 // Apply vertical deblock filtering for all the columns in this tile except
372 // for the first 64 columns.
373 post_filter->ApplyDeblockFilter(
374 kLoopFilterTypeVertical, row4x4,
375 tile.column4x4_start() + kNum4x4InLoopFilterUnit, tile.column4x4_end(),
376 superblock_size4x4);
377 // Apply horizontal deblock filtering for all the columns in this tile
378 // except for the first and the last 64 columns.
379 // Note about the last tile of each row: For the last tile, column4x4_end
380 // may not be a multiple of 16. In that case it is still okay to simply
381 // subtract 16 since ApplyDeblockFilter() will only do the filters in
382 // increments of 64 columns (or 32 columns for chroma with subsampling).
383 post_filter->ApplyDeblockFilter(
384 kLoopFilterTypeHorizontal, row4x4,
385 tile.column4x4_start() + kNum4x4InLoopFilterUnit,
386 tile.column4x4_end() - kNum4x4InLoopFilterUnit, superblock_size4x4);
387 }
388 const int superblock_size4x4_log2 = FloorLog2(superblock_size4x4);
389 const int index = row4x4 >> superblock_size4x4_log2;
390 int* const superblock_row_progress =
391 frame_scratch_buffer->superblock_row_progress.get();
392 std::condition_variable* const superblock_row_progress_condvar =
393 frame_scratch_buffer->superblock_row_progress_condvar.get();
394 bool notify;
395 {
396 std::lock_guard<std::mutex> lock(
397 frame_scratch_buffer->superblock_row_mutex);
398 notify = ++superblock_row_progress[index] == tile_columns;
399 }
400 if (notify) {
401 // We are done decoding this superblock row. Notify the post filtering
402 // thread.
403 superblock_row_progress_condvar[index].notify_one();
404 }
405 // Schedule the next superblock row (if one exists).
406 ThreadPool& thread_pool =
407 *frame_scratch_buffer->threading_strategy.thread_pool();
408 const int next_row4x4 = row4x4 + superblock_size4x4;
409 if (!tile.IsRow4x4Inside(next_row4x4)) {
410 tile_index += tile_columns;
411 }
412 if (tile_index >= tiles.size()) return;
413 pending_jobs->IncrementBy(1);
414 thread_pool.Schedule([&tiles, tile_index, next_row4x4, superblock_size4x4,
415 tile_columns, superblock_rows, frame_scratch_buffer,
416 post_filter, pending_jobs]() {
417 DecodeSuperBlockRowInTile(tiles, tile_index, next_row4x4,
418 superblock_size4x4, tile_columns, superblock_rows,
419 frame_scratch_buffer, post_filter, pending_jobs);
420 pending_jobs->Decrement();
421 });
422 }
423
DecodeTilesThreadedFrameParallel(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<std::unique_ptr<Tile>> & tiles,const SymbolDecoderContext & saved_symbol_decoder_context,const SegmentationMap * const prev_segment_ids,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,RefCountedBuffer * const current_frame)424 StatusCode DecodeTilesThreadedFrameParallel(
425 const ObuSequenceHeader& sequence_header,
426 const ObuFrameHeader& frame_header,
427 const Vector<std::unique_ptr<Tile>>& tiles,
428 const SymbolDecoderContext& saved_symbol_decoder_context,
429 const SegmentationMap* const prev_segment_ids,
430 FrameScratchBuffer* const frame_scratch_buffer,
431 PostFilter* const post_filter, RefCountedBuffer* const current_frame) {
432 // Parse the frame.
433 ThreadPool& thread_pool =
434 *frame_scratch_buffer->threading_strategy.thread_pool();
435 std::atomic<int> tile_counter(0);
436 const int tile_count = static_cast<int>(tiles.size());
437 const int num_workers = thread_pool.num_threads();
438 BlockingCounterWithStatus parse_workers(num_workers);
439 // Submit tile parsing jobs to the thread pool.
440 for (int i = 0; i < num_workers; ++i) {
441 thread_pool.Schedule([&tiles, tile_count, &tile_counter, &parse_workers]() {
442 bool failed = false;
443 int index;
444 while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
445 tile_count) {
446 if (!failed) {
447 const auto& tile_ptr = tiles[index];
448 if (!tile_ptr->Parse()) {
449 LIBGAV1_DLOG(ERROR, "Error parsing tile #%d", tile_ptr->number());
450 failed = true;
451 }
452 }
453 }
454 parse_workers.Decrement(!failed);
455 });
456 }
457
458 // Have the current thread participate in parsing.
459 bool failed = false;
460 int index;
461 while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
462 tile_count) {
463 if (!failed) {
464 const auto& tile_ptr = tiles[index];
465 if (!tile_ptr->Parse()) {
466 LIBGAV1_DLOG(ERROR, "Error parsing tile #%d", tile_ptr->number());
467 failed = true;
468 }
469 }
470 }
471
472 // Wait until all the parse workers are done. This ensures that all the tiles
473 // have been parsed.
474 if (!parse_workers.Wait() || failed) {
475 return kLibgav1StatusUnknownError;
476 }
477 if (frame_header.enable_frame_end_update_cdf) {
478 frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
479 }
480 current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
481 SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
482 current_frame->SetFrameState(kFrameStateParsed);
483
484 // Decode the frame.
485 const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
486 const int block_width4x4_log2 =
487 sequence_header.use_128x128_superblock ? 5 : 4;
488 const int superblock_rows =
489 (frame_header.rows4x4 + block_width4x4 - 1) >> block_width4x4_log2;
490 if (!frame_scratch_buffer->superblock_row_progress.Resize(superblock_rows) ||
491 !frame_scratch_buffer->superblock_row_progress_condvar.Resize(
492 superblock_rows)) {
493 return kLibgav1StatusOutOfMemory;
494 }
495 int* const superblock_row_progress =
496 frame_scratch_buffer->superblock_row_progress.get();
497 memset(superblock_row_progress, 0,
498 superblock_rows * sizeof(superblock_row_progress[0]));
499 frame_scratch_buffer->tile_decoding_failed = false;
500 const int tile_columns = frame_header.tile_info.tile_columns;
501 const bool decode_entire_tiles_in_worker_threads =
502 num_workers >= tile_columns;
503 BlockingCounter pending_jobs(
504 decode_entire_tiles_in_worker_threads ? num_workers : tile_columns);
505 if (decode_entire_tiles_in_worker_threads) {
506 // Submit tile decoding jobs to the thread pool.
507 tile_counter = 0;
508 for (int i = 0; i < num_workers; ++i) {
509 thread_pool.Schedule([&tiles, tile_count, &tile_counter, &pending_jobs,
510 frame_scratch_buffer, superblock_rows]() {
511 bool failed = false;
512 int index;
513 while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
514 tile_count) {
515 if (failed) continue;
516 const auto& tile_ptr = tiles[index];
517 if (!tile_ptr->Decode(
518 &frame_scratch_buffer->superblock_row_mutex,
519 frame_scratch_buffer->superblock_row_progress.get(),
520 frame_scratch_buffer->superblock_row_progress_condvar
521 .get())) {
522 LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
523 failed = true;
524 SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
525 }
526 }
527 pending_jobs.Decrement();
528 });
529 }
530 } else {
531 // Schedule the jobs for first tile row.
532 for (int tile_index = 0; tile_index < tile_columns; ++tile_index) {
533 thread_pool.Schedule([&tiles, tile_index, block_width4x4, tile_columns,
534 superblock_rows, frame_scratch_buffer, post_filter,
535 &pending_jobs]() {
536 DecodeSuperBlockRowInTile(
537 tiles, tile_index, 0, block_width4x4, tile_columns, superblock_rows,
538 frame_scratch_buffer, post_filter, &pending_jobs);
539 pending_jobs.Decrement();
540 });
541 }
542 }
543
544 // Current thread will do the post filters.
545 std::condition_variable* const superblock_row_progress_condvar =
546 frame_scratch_buffer->superblock_row_progress_condvar.get();
547 const std::unique_ptr<Tile>* tile_row_base = &tiles[0];
548 for (int row4x4 = 0, index = 0; row4x4 < frame_header.rows4x4;
549 row4x4 += block_width4x4, ++index) {
550 if (!tile_row_base[0]->IsRow4x4Inside(row4x4)) {
551 tile_row_base += tile_columns;
552 }
553 {
554 std::unique_lock<std::mutex> lock(
555 frame_scratch_buffer->superblock_row_mutex);
556 while (superblock_row_progress[index] != tile_columns &&
557 !frame_scratch_buffer->tile_decoding_failed) {
558 superblock_row_progress_condvar[index].wait(lock);
559 }
560 if (frame_scratch_buffer->tile_decoding_failed) break;
561 }
562 if (post_filter->DoDeblock()) {
563 // Apply deblocking filter for the tile boundaries of this superblock row.
564 // The deblocking filter for the internal blocks will be applied in the
565 // tile worker threads. In this thread, we will only have to apply
566 // deblocking filter for the tile boundaries.
567 ApplyDeblockingFilterForTileBoundaries(
568 post_filter, tile_row_base, frame_header, row4x4, block_width4x4,
569 tile_columns, decode_entire_tiles_in_worker_threads);
570 }
571 // Apply all the post filters other than deblocking.
572 const int progress_row = post_filter->ApplyFilteringForOneSuperBlockRow(
573 row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
574 /*do_deblock=*/false);
575 if (progress_row >= 0) {
576 current_frame->SetProgress(progress_row);
577 }
578 }
579 // Wait until all the pending jobs are done. This ensures that all the tiles
580 // have been decoded and wrapped up.
581 pending_jobs.Wait();
582 {
583 std::lock_guard<std::mutex> lock(
584 frame_scratch_buffer->superblock_row_mutex);
585 if (frame_scratch_buffer->tile_decoding_failed) {
586 return kLibgav1StatusUnknownError;
587 }
588 }
589
590 current_frame->SetFrameState(kFrameStateDecoded);
591 return kStatusOk;
592 }
593
CalcFrameMeanQp(const Vector<std::unique_ptr<Tile>> & tiles)594 int CalcFrameMeanQp(const Vector<std::unique_ptr<Tile>>& tiles) {
595 int cumulative_frame_qp = 0;
596 for (const auto& tile : tiles) {
597 cumulative_frame_qp += tile->GetTileMeanQP();
598 }
599 const int frame_mean_qp = static_cast<int>(
600 std::round(cumulative_frame_qp / static_cast<float>(tiles.size())));
601 if (frame_mean_qp > 255 || frame_mean_qp < 0) {
602 LIBGAV1_DLOG(
603 WARNING,
604 "The mean QP value for the frame is %d, i.e., out of bounds for AV1.",
605 frame_mean_qp);
606 }
607 return frame_mean_qp;
608 }
609
610 } // namespace
611
612 // static
Create(const DecoderSettings * settings,std::unique_ptr<DecoderImpl> * output)613 StatusCode DecoderImpl::Create(const DecoderSettings* settings,
614 std::unique_ptr<DecoderImpl>* output) {
615 if (settings->threads <= 0) {
616 LIBGAV1_DLOG(ERROR, "Invalid settings->threads: %d.", settings->threads);
617 return kStatusInvalidArgument;
618 }
619 if (settings->frame_parallel) {
620 if (settings->release_input_buffer == nullptr) {
621 LIBGAV1_DLOG(ERROR,
622 "release_input_buffer callback must not be null when "
623 "frame_parallel is true.");
624 return kStatusInvalidArgument;
625 }
626 }
627 if (settings->parse_only &&
628 (settings->threads > 1 || settings->frame_parallel)) {
629 LIBGAV1_DLOG(
630 ERROR,
631 "The number of threads cannot be more than 1 (default) and "
632 "the frame_parallel option cannot be used in the parse_only mode.");
633 return kStatusInvalidArgument;
634 }
635 std::unique_ptr<DecoderImpl> impl(new (std::nothrow) DecoderImpl(settings));
636 if (impl == nullptr) {
637 LIBGAV1_DLOG(ERROR, "Failed to allocate DecoderImpl.");
638 return kStatusOutOfMemory;
639 }
640 const StatusCode status = impl->Init();
641 if (status != kStatusOk) return status;
642 *output = std::move(impl);
643 return kStatusOk;
644 }
645
DecoderImpl(const DecoderSettings * settings)646 DecoderImpl::DecoderImpl(const DecoderSettings* settings)
647 : buffer_pool_(settings->on_frame_buffer_size_changed,
648 settings->get_frame_buffer, settings->release_frame_buffer,
649 settings->callback_private_data),
650 settings_(*settings) {
651 dsp::DspInit();
652 }
653
~DecoderImpl()654 DecoderImpl::~DecoderImpl() {
655 // Clean up and wait until all the threads have stopped. We just have to pass
656 // in a dummy status that is not kStatusOk or kStatusTryAgain to trigger the
657 // path that clears all the threads and structs.
658 SignalFailure(kStatusUnknownError);
659 // Release any other frame buffer references that we may be holding on to.
660 ReleaseOutputFrame();
661 output_frame_queue_.Clear();
662 for (auto& reference_frame : state_.reference_frame) {
663 reference_frame = nullptr;
664 }
665 }
666
Init()667 StatusCode DecoderImpl::Init() {
668 if (!output_frame_queue_.Init(kMaxLayers)) {
669 LIBGAV1_DLOG(ERROR, "output_frame_queue_.Init() failed.");
670 return kStatusOutOfMemory;
671 }
672 return kStatusOk;
673 }
674
InitializeFrameThreadPoolAndTemporalUnitQueue(const uint8_t * data,size_t size)675 StatusCode DecoderImpl::InitializeFrameThreadPoolAndTemporalUnitQueue(
676 const uint8_t* data, size_t size) {
677 is_frame_parallel_ = false;
678 if (settings_.frame_parallel) {
679 DecoderState state;
680 std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
681 data, size, settings_.operating_point, &buffer_pool_, &state));
682 if (obu == nullptr) {
683 LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
684 return kStatusOutOfMemory;
685 }
686 RefCountedBufferPtr current_frame;
687 const StatusCode status = obu->ParseOneFrame(¤t_frame);
688 if (status != kStatusOk) {
689 LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
690 return status;
691 }
692 current_frame = nullptr;
693 // We assume that the first frame that was parsed will contain the frame
694 // header. This assumption is usually true in practice. So we will simply
695 // not use frame parallel mode if this is not the case.
696 if (settings_.threads > 1 &&
697 !InitializeThreadPoolsForFrameParallel(
698 settings_.threads, obu->frame_header().tile_info.tile_count,
699 obu->frame_header().tile_info.tile_columns, &frame_thread_pool_,
700 &frame_scratch_buffer_pool_)) {
701 return kStatusOutOfMemory;
702 }
703 }
704 const int max_allowed_frames =
705 (frame_thread_pool_ != nullptr) ? frame_thread_pool_->num_threads() : 1;
706 assert(max_allowed_frames > 0);
707 if (!temporal_units_.Init(max_allowed_frames)) {
708 LIBGAV1_DLOG(ERROR, "temporal_units_.Init() failed.");
709 return kStatusOutOfMemory;
710 }
711 is_frame_parallel_ = frame_thread_pool_ != nullptr;
712 return kStatusOk;
713 }
714
EnqueueFrame(const uint8_t * data,size_t size,int64_t user_private_data,void * buffer_private_data)715 StatusCode DecoderImpl::EnqueueFrame(const uint8_t* data, size_t size,
716 int64_t user_private_data,
717 void* buffer_private_data) {
718 if (data == nullptr || size == 0) return kStatusInvalidArgument;
719 if (HasFailure()) return kStatusUnknownError;
720 if (!seen_first_frame_) {
721 seen_first_frame_ = true;
722 const StatusCode status =
723 InitializeFrameThreadPoolAndTemporalUnitQueue(data, size);
724 if (status != kStatusOk) {
725 return SignalFailure(status);
726 }
727 }
728 if (temporal_units_.Full()) {
729 return kStatusTryAgain;
730 }
731 if (is_frame_parallel_) {
732 return ParseAndSchedule(data, size, user_private_data, buffer_private_data);
733 }
734 TemporalUnit temporal_unit(data, size, user_private_data,
735 buffer_private_data);
736 temporal_units_.Push(std::move(temporal_unit));
737 return kStatusOk;
738 }
739
SignalFailure(StatusCode status)740 StatusCode DecoderImpl::SignalFailure(StatusCode status) {
741 if (status == kStatusOk || status == kStatusTryAgain) return status;
742 // Set the |failure_status_| first so that any pending jobs in
743 // |frame_thread_pool_| will exit right away when the thread pool is being
744 // released below.
745 {
746 std::lock_guard<std::mutex> lock(mutex_);
747 failure_status_ = status;
748 }
749 // Make sure all waiting threads exit.
750 buffer_pool_.Abort();
751 frame_thread_pool_ = nullptr;
752 while (!temporal_units_.Empty()) {
753 if (settings_.release_input_buffer != nullptr) {
754 settings_.release_input_buffer(
755 settings_.callback_private_data,
756 temporal_units_.Front().buffer_private_data);
757 }
758 temporal_units_.Pop();
759 }
760 return status;
761 }
762
763 // DequeueFrame() follows the following policy to avoid holding unnecessary
764 // frame buffer references in output_frame_: output_frame_ must be null when
765 // DequeueFrame() returns false.
DequeueFrame(const DecoderBuffer ** out_ptr)766 StatusCode DecoderImpl::DequeueFrame(const DecoderBuffer** out_ptr) {
767 if (out_ptr == nullptr) {
768 LIBGAV1_DLOG(ERROR, "Invalid argument: out_ptr == nullptr.");
769 return kStatusInvalidArgument;
770 }
771 // We assume a call to DequeueFrame() indicates that the caller is no longer
772 // using the previous output frame, so we can release it.
773 ReleaseOutputFrame();
774 if (temporal_units_.Empty()) {
775 // No input frames to decode.
776 *out_ptr = nullptr;
777 return kStatusNothingToDequeue;
778 }
779 TemporalUnit& temporal_unit = temporal_units_.Front();
780 if (!is_frame_parallel_) {
781 // If |output_frame_queue_| is not empty, then return the first frame from
782 // that queue.
783 if (!output_frame_queue_.Empty()) {
784 RefCountedBufferPtr frame = std::move(output_frame_queue_.Front());
785 output_frame_queue_.Pop();
786 buffer_.user_private_data = temporal_unit.user_private_data;
787 if (output_frame_queue_.Empty()) {
788 temporal_units_.Pop();
789 }
790 const StatusCode status = CopyFrameToOutputBuffer(frame);
791 if (status != kStatusOk) {
792 return status;
793 }
794 *out_ptr = &buffer_;
795 return kStatusOk;
796 }
797 // Decode the next available temporal unit and return.
798 const StatusCode status = DecodeTemporalUnit(temporal_unit, out_ptr);
799 if (status != kStatusOk) {
800 // In case of failure, discard all the output frames that we may be
801 // holding on references to.
802 output_frame_queue_.Clear();
803 }
804 if (settings_.release_input_buffer != nullptr) {
805 settings_.release_input_buffer(settings_.callback_private_data,
806 temporal_unit.buffer_private_data);
807 }
808 if (output_frame_queue_.Empty()) {
809 temporal_units_.Pop();
810 }
811 return status;
812 }
813 {
814 std::unique_lock<std::mutex> lock(mutex_);
815 if (settings_.blocking_dequeue) {
816 while (!temporal_unit.decoded && failure_status_ == kStatusOk) {
817 decoded_condvar_.wait(lock);
818 }
819 } else {
820 if (!temporal_unit.decoded && failure_status_ == kStatusOk) {
821 return kStatusTryAgain;
822 }
823 }
824 if (failure_status_ != kStatusOk) {
825 const StatusCode failure_status = failure_status_;
826 lock.unlock();
827 return SignalFailure(failure_status);
828 }
829 }
830 if (settings_.release_input_buffer != nullptr &&
831 !temporal_unit.released_input_buffer) {
832 temporal_unit.released_input_buffer = true;
833 settings_.release_input_buffer(settings_.callback_private_data,
834 temporal_unit.buffer_private_data);
835 }
836 if (temporal_unit.status != kStatusOk) {
837 temporal_units_.Pop();
838 return SignalFailure(temporal_unit.status);
839 }
840 if (!temporal_unit.has_displayable_frame) {
841 *out_ptr = nullptr;
842 temporal_units_.Pop();
843 return kStatusOk;
844 }
845 assert(temporal_unit.output_layer_count > 0);
846 StatusCode status = CopyFrameToOutputBuffer(
847 temporal_unit.output_layers[temporal_unit.output_layer_count - 1].frame);
848 temporal_unit.output_layers[temporal_unit.output_layer_count - 1].frame =
849 nullptr;
850 if (status != kStatusOk) {
851 temporal_units_.Pop();
852 return SignalFailure(status);
853 }
854 buffer_.user_private_data = temporal_unit.user_private_data;
855 *out_ptr = &buffer_;
856 if (--temporal_unit.output_layer_count == 0) {
857 temporal_units_.Pop();
858 }
859 return kStatusOk;
860 }
861
GetFrameQps()862 std::vector<int> DecoderImpl::GetFrameQps() { return frame_mean_qps_; }
863
ParseAndSchedule(const uint8_t * data,size_t size,int64_t user_private_data,void * buffer_private_data)864 StatusCode DecoderImpl::ParseAndSchedule(const uint8_t* data, size_t size,
865 int64_t user_private_data,
866 void* buffer_private_data) {
867 TemporalUnit temporal_unit(data, size, user_private_data,
868 buffer_private_data);
869 std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
870 temporal_unit.data, temporal_unit.size, settings_.operating_point,
871 &buffer_pool_, &state_));
872 if (obu == nullptr) {
873 LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
874 return kStatusOutOfMemory;
875 }
876 if (has_sequence_header_) {
877 obu->set_sequence_header(sequence_header_);
878 }
879 StatusCode status;
880 int position_in_temporal_unit = 0;
881 while (obu->HasData()) {
882 RefCountedBufferPtr current_frame;
883 status = obu->ParseOneFrame(¤t_frame);
884 if (status != kStatusOk) {
885 LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
886 return status;
887 }
888 if (!MaybeInitializeQuantizerMatrix(obu->frame_header())) {
889 LIBGAV1_DLOG(ERROR, "InitializeQuantizerMatrix() failed.");
890 return kStatusOutOfMemory;
891 }
892 if (!MaybeInitializeWedgeMasks(obu->frame_header().frame_type)) {
893 LIBGAV1_DLOG(ERROR, "InitializeWedgeMasks() failed.");
894 return kStatusOutOfMemory;
895 }
896 if (IsNewSequenceHeader(*obu)) {
897 const ObuSequenceHeader& sequence_header = obu->sequence_header();
898 const Libgav1ImageFormat image_format =
899 ComposeImageFormat(sequence_header.color_config.is_monochrome,
900 sequence_header.color_config.subsampling_x,
901 sequence_header.color_config.subsampling_y);
902 const int max_bottom_border = GetBottomBorderPixels(
903 /*do_cdef=*/true, /*do_restoration=*/true,
904 /*do_superres=*/true, sequence_header.color_config.subsampling_y);
905 // TODO(vigneshv): This may not be the right place to call this callback
906 // for the frame parallel case. Investigate and fix it.
907 if (!buffer_pool_.OnFrameBufferSizeChanged(
908 sequence_header.color_config.bitdepth, image_format,
909 sequence_header.max_frame_width, sequence_header.max_frame_height,
910 kBorderPixels, kBorderPixels, kBorderPixels, max_bottom_border)) {
911 LIBGAV1_DLOG(ERROR, "buffer_pool_.OnFrameBufferSizeChanged failed.");
912 return kStatusUnknownError;
913 }
914 }
915 // This can happen when there are multiple spatial/temporal layers and if
916 // all the layers are outside the current operating point.
917 if (current_frame == nullptr) {
918 continue;
919 }
920 // Note that we cannot set EncodedFrame.temporal_unit here. It will be set
921 // in the code below after |temporal_unit| is std::move'd into the
922 // |temporal_units_| queue.
923 if (!temporal_unit.frames.emplace_back(obu.get(), state_, current_frame,
924 position_in_temporal_unit++)) {
925 LIBGAV1_DLOG(ERROR, "temporal_unit.frames.emplace_back failed.");
926 return kStatusOutOfMemory;
927 }
928 state_.UpdateReferenceFrames(current_frame,
929 obu->frame_header().refresh_frame_flags);
930 }
931 // This function cannot fail after this point. So it is okay to move the
932 // |temporal_unit| into |temporal_units_| queue.
933 temporal_units_.Push(std::move(temporal_unit));
934 if (temporal_units_.Back().frames.empty()) {
935 std::lock_guard<std::mutex> lock(mutex_);
936 temporal_units_.Back().has_displayable_frame = false;
937 temporal_units_.Back().decoded = true;
938 return kStatusOk;
939 }
940 for (auto& frame : temporal_units_.Back().frames) {
941 EncodedFrame* const encoded_frame = &frame;
942 encoded_frame->temporal_unit = &temporal_units_.Back();
943 frame_thread_pool_->Schedule([this, encoded_frame]() {
944 if (HasFailure()) return;
945 const StatusCode status = DecodeFrame(encoded_frame);
946 encoded_frame->state = {};
947 encoded_frame->frame = nullptr;
948 TemporalUnit& temporal_unit = *encoded_frame->temporal_unit;
949 std::lock_guard<std::mutex> lock(mutex_);
950 if (failure_status_ != kStatusOk) return;
951 // temporal_unit's status defaults to kStatusOk. So we need to set it only
952 // on error. If |failure_status_| is not kStatusOk at this point, it means
953 // that there has already been a failure. So we don't care about this
954 // subsequent failure. We will simply return the error code of the first
955 // failure.
956 if (status != kStatusOk) {
957 temporal_unit.status = status;
958 if (failure_status_ == kStatusOk) {
959 failure_status_ = status;
960 }
961 }
962 temporal_unit.decoded =
963 ++temporal_unit.decoded_count == temporal_unit.frames.size();
964 if (temporal_unit.decoded && settings_.output_all_layers &&
965 temporal_unit.output_layer_count > 1) {
966 std::sort(
967 temporal_unit.output_layers,
968 temporal_unit.output_layers + temporal_unit.output_layer_count);
969 }
970 if (temporal_unit.decoded || failure_status_ != kStatusOk) {
971 decoded_condvar_.notify_one();
972 }
973 });
974 }
975 return kStatusOk;
976 }
977
DecodeFrame(EncodedFrame * const encoded_frame)978 StatusCode DecoderImpl::DecodeFrame(EncodedFrame* const encoded_frame) {
979 const ObuSequenceHeader& sequence_header = encoded_frame->sequence_header;
980 const ObuFrameHeader& frame_header = encoded_frame->frame_header;
981 RefCountedBufferPtr current_frame = std::move(encoded_frame->frame);
982
983 std::unique_ptr<FrameScratchBuffer> frame_scratch_buffer =
984 frame_scratch_buffer_pool_.Get();
985 if (frame_scratch_buffer == nullptr) {
986 LIBGAV1_DLOG(ERROR, "Error when getting FrameScratchBuffer.");
987 return kStatusOutOfMemory;
988 }
989 // |frame_scratch_buffer| will be released when this local variable goes out
990 // of scope (i.e.) on any return path in this function.
991 FrameScratchBufferReleaser frame_scratch_buffer_releaser(
992 &frame_scratch_buffer_pool_, &frame_scratch_buffer);
993
994 StatusCode status;
995 if (!frame_header.show_existing_frame) {
996 if (encoded_frame->tile_buffers.empty()) {
997 // This means that the last call to ParseOneFrame() did not actually
998 // have any tile groups. This could happen in rare cases (for example,
999 // if there is a Metadata OBU after the TileGroup OBU). We currently do
1000 // not have a reason to handle those cases, so we simply continue.
1001 return kStatusOk;
1002 }
1003 status = DecodeTiles(sequence_header, frame_header,
1004 encoded_frame->tile_buffers, encoded_frame->state,
1005 frame_scratch_buffer.get(), current_frame.get());
1006 if (status != kStatusOk) {
1007 return status;
1008 }
1009 } else {
1010 if (!current_frame->WaitUntilDecoded()) {
1011 return kStatusUnknownError;
1012 }
1013 }
1014 if (!frame_header.show_frame && !frame_header.show_existing_frame) {
1015 // This frame is not displayable. Not an error.
1016 return kStatusOk;
1017 }
1018 RefCountedBufferPtr film_grain_frame;
1019 status = ApplyFilmGrain(
1020 sequence_header, frame_header, current_frame, &film_grain_frame,
1021 frame_scratch_buffer->threading_strategy.thread_pool());
1022 if (status != kStatusOk) {
1023 return status;
1024 }
1025
1026 TemporalUnit& temporal_unit = *encoded_frame->temporal_unit;
1027 std::lock_guard<std::mutex> lock(mutex_);
1028 if (temporal_unit.has_displayable_frame && !settings_.output_all_layers) {
1029 assert(temporal_unit.output_frame_position >= 0);
1030 // A displayable frame was already found in this temporal unit. This can
1031 // happen if there are multiple spatial/temporal layers. Since
1032 // |settings_.output_all_layers| is false, we will output only the last
1033 // displayable frame.
1034 if (temporal_unit.output_frame_position >
1035 encoded_frame->position_in_temporal_unit) {
1036 return kStatusOk;
1037 }
1038 // Replace any output frame that we may have seen before with the current
1039 // frame.
1040 assert(temporal_unit.output_layer_count == 1);
1041 --temporal_unit.output_layer_count;
1042 }
1043 temporal_unit.has_displayable_frame = true;
1044 temporal_unit.output_layers[temporal_unit.output_layer_count].frame =
1045 std::move(film_grain_frame);
1046 temporal_unit.output_layers[temporal_unit.output_layer_count]
1047 .position_in_temporal_unit = encoded_frame->position_in_temporal_unit;
1048 ++temporal_unit.output_layer_count;
1049 temporal_unit.output_frame_position =
1050 encoded_frame->position_in_temporal_unit;
1051 return kStatusOk;
1052 }
1053
DecodeTemporalUnit(const TemporalUnit & temporal_unit,const DecoderBuffer ** out_ptr)1054 StatusCode DecoderImpl::DecodeTemporalUnit(const TemporalUnit& temporal_unit,
1055 const DecoderBuffer** out_ptr) {
1056 std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
1057 temporal_unit.data, temporal_unit.size, settings_.operating_point,
1058 &buffer_pool_, &state_));
1059 if (obu == nullptr) {
1060 LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
1061 return kStatusOutOfMemory;
1062 }
1063 frame_mean_qps_.clear();
1064 if (has_sequence_header_) {
1065 obu->set_sequence_header(sequence_header_);
1066 }
1067 StatusCode status;
1068 std::unique_ptr<FrameScratchBuffer> frame_scratch_buffer =
1069 frame_scratch_buffer_pool_.Get();
1070 if (frame_scratch_buffer == nullptr) {
1071 LIBGAV1_DLOG(ERROR, "Error when getting FrameScratchBuffer.");
1072 return kStatusOutOfMemory;
1073 }
1074 // |frame_scratch_buffer| will be released when this local variable goes out
1075 // of scope (i.e.) on any return path in this function.
1076 FrameScratchBufferReleaser frame_scratch_buffer_releaser(
1077 &frame_scratch_buffer_pool_, &frame_scratch_buffer);
1078
1079 while (obu->HasData()) {
1080 RefCountedBufferPtr current_frame;
1081 status = obu->ParseOneFrame(¤t_frame);
1082 if (status != kStatusOk) {
1083 LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
1084 return status;
1085 }
1086 if (!MaybeInitializeQuantizerMatrix(obu->frame_header())) {
1087 LIBGAV1_DLOG(ERROR, "InitializeQuantizerMatrix() failed.");
1088 return kStatusOutOfMemory;
1089 }
1090 if (!MaybeInitializeWedgeMasks(obu->frame_header().frame_type)) {
1091 LIBGAV1_DLOG(ERROR, "InitializeWedgeMasks() failed.");
1092 return kStatusOutOfMemory;
1093 }
1094 if (IsNewSequenceHeader(*obu)) {
1095 const ObuSequenceHeader& sequence_header = obu->sequence_header();
1096 const Libgav1ImageFormat image_format =
1097 ComposeImageFormat(sequence_header.color_config.is_monochrome,
1098 sequence_header.color_config.subsampling_x,
1099 sequence_header.color_config.subsampling_y);
1100 const int max_bottom_border = GetBottomBorderPixels(
1101 /*do_cdef=*/true, /*do_restoration=*/true,
1102 /*do_superres=*/true, sequence_header.color_config.subsampling_y);
1103 if (!buffer_pool_.OnFrameBufferSizeChanged(
1104 sequence_header.color_config.bitdepth, image_format,
1105 sequence_header.max_frame_width, sequence_header.max_frame_height,
1106 kBorderPixels, kBorderPixels, kBorderPixels, max_bottom_border)) {
1107 LIBGAV1_DLOG(ERROR, "buffer_pool_.OnFrameBufferSizeChanged failed.");
1108 return kStatusUnknownError;
1109 }
1110 }
1111 if (!obu->frame_header().show_existing_frame) {
1112 if (obu->tile_buffers().empty()) {
1113 // This means that the last call to ParseOneFrame() did not actually
1114 // have any tile groups. This could happen in rare cases (for example,
1115 // if there is a Metadata OBU after the TileGroup OBU). We currently do
1116 // not have a reason to handle those cases, so we simply continue.
1117 continue;
1118 }
1119 status = DecodeTiles(obu->sequence_header(), obu->frame_header(),
1120 obu->tile_buffers(), state_,
1121 frame_scratch_buffer.get(), current_frame.get());
1122 if (settings_.parse_only) {
1123 frame_mean_qps_.push_back(frame_mean_qp_);
1124 }
1125 if (status != kStatusOk) {
1126 return status;
1127 }
1128 }
1129 state_.UpdateReferenceFrames(current_frame,
1130 obu->frame_header().refresh_frame_flags);
1131 if (obu->frame_header().show_frame ||
1132 obu->frame_header().show_existing_frame) {
1133 if (!output_frame_queue_.Empty() && !settings_.output_all_layers) {
1134 // There is more than one displayable frame in the current operating
1135 // point and |settings_.output_all_layers| is false. In this case, we
1136 // simply return the last displayable frame as the output frame and
1137 // ignore the rest.
1138 assert(output_frame_queue_.Size() == 1);
1139 output_frame_queue_.Pop();
1140 }
1141 if (!settings_.parse_only) {
1142 RefCountedBufferPtr film_grain_frame;
1143 status = ApplyFilmGrain(
1144 obu->sequence_header(), obu->frame_header(), current_frame,
1145 &film_grain_frame,
1146 frame_scratch_buffer->threading_strategy.film_grain_thread_pool());
1147 if (status != kStatusOk) return status;
1148 output_frame_queue_.Push(std::move(film_grain_frame));
1149 }
1150 }
1151 }
1152 if (output_frame_queue_.Empty()) {
1153 // No displayable frame in the temporal unit. Not an error.
1154 *out_ptr = nullptr;
1155 return kStatusOk;
1156 }
1157 status = CopyFrameToOutputBuffer(output_frame_queue_.Front());
1158 output_frame_queue_.Pop();
1159 if (status != kStatusOk) {
1160 return status;
1161 }
1162 buffer_.user_private_data = temporal_unit.user_private_data;
1163 *out_ptr = &buffer_;
1164 return kStatusOk;
1165 }
1166
CopyFrameToOutputBuffer(const RefCountedBufferPtr & frame)1167 StatusCode DecoderImpl::CopyFrameToOutputBuffer(
1168 const RefCountedBufferPtr& frame) {
1169 YuvBuffer* yuv_buffer = frame->buffer();
1170
1171 buffer_.chroma_sample_position = frame->chroma_sample_position();
1172
1173 if (yuv_buffer->is_monochrome()) {
1174 buffer_.image_format = kImageFormatMonochrome400;
1175 } else {
1176 if (yuv_buffer->subsampling_x() == 0 && yuv_buffer->subsampling_y() == 0) {
1177 buffer_.image_format = kImageFormatYuv444;
1178 } else if (yuv_buffer->subsampling_x() == 1 &&
1179 yuv_buffer->subsampling_y() == 0) {
1180 buffer_.image_format = kImageFormatYuv422;
1181 } else if (yuv_buffer->subsampling_x() == 1 &&
1182 yuv_buffer->subsampling_y() == 1) {
1183 buffer_.image_format = kImageFormatYuv420;
1184 } else {
1185 LIBGAV1_DLOG(ERROR,
1186 "Invalid chroma subsampling values: cannot determine buffer "
1187 "image format.");
1188 return kStatusInvalidArgument;
1189 }
1190 }
1191 buffer_.color_range = sequence_header_.color_config.color_range;
1192 buffer_.color_primary = sequence_header_.color_config.color_primary;
1193 buffer_.transfer_characteristics =
1194 sequence_header_.color_config.transfer_characteristics;
1195 buffer_.matrix_coefficients =
1196 sequence_header_.color_config.matrix_coefficients;
1197
1198 buffer_.bitdepth = yuv_buffer->bitdepth();
1199 const int num_planes =
1200 yuv_buffer->is_monochrome() ? kMaxPlanesMonochrome : kMaxPlanes;
1201 int plane = kPlaneY;
1202 for (; plane < num_planes; ++plane) {
1203 buffer_.stride[plane] = yuv_buffer->stride(plane);
1204 buffer_.plane[plane] = yuv_buffer->data(plane);
1205 buffer_.displayed_width[plane] = yuv_buffer->width(plane);
1206 buffer_.displayed_height[plane] = yuv_buffer->height(plane);
1207 }
1208 for (; plane < kMaxPlanes; ++plane) {
1209 buffer_.stride[plane] = 0;
1210 buffer_.plane[plane] = nullptr;
1211 buffer_.displayed_width[plane] = 0;
1212 buffer_.displayed_height[plane] = 0;
1213 }
1214 buffer_.spatial_id = frame->spatial_id();
1215 buffer_.temporal_id = frame->temporal_id();
1216 buffer_.buffer_private_data = frame->buffer_private_data();
1217 if (frame->hdr_cll_set()) {
1218 buffer_.has_hdr_cll = 1;
1219 buffer_.hdr_cll = frame->hdr_cll();
1220 } else {
1221 buffer_.has_hdr_cll = 0;
1222 }
1223 if (frame->hdr_mdcv_set()) {
1224 buffer_.has_hdr_mdcv = 1;
1225 buffer_.hdr_mdcv = frame->hdr_mdcv();
1226 } else {
1227 buffer_.has_hdr_mdcv = 0;
1228 }
1229 if (frame->itut_t35_set()) {
1230 buffer_.has_itut_t35 = 1;
1231 buffer_.itut_t35 = frame->itut_t35();
1232 } else {
1233 buffer_.has_itut_t35 = 0;
1234 }
1235 output_frame_ = frame;
1236 return kStatusOk;
1237 }
1238
ReleaseOutputFrame()1239 void DecoderImpl::ReleaseOutputFrame() {
1240 for (auto& plane : buffer_.plane) {
1241 plane = nullptr;
1242 }
1243 output_frame_ = nullptr;
1244 }
1245
DecodeTiles(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<TileBuffer> & tile_buffers,const DecoderState & state,FrameScratchBuffer * const frame_scratch_buffer,RefCountedBuffer * const current_frame)1246 StatusCode DecoderImpl::DecodeTiles(
1247 const ObuSequenceHeader& sequence_header,
1248 const ObuFrameHeader& frame_header, const Vector<TileBuffer>& tile_buffers,
1249 const DecoderState& state, FrameScratchBuffer* const frame_scratch_buffer,
1250 RefCountedBuffer* const current_frame) {
1251 frame_scratch_buffer->tile_scratch_buffer_pool.Reset(
1252 sequence_header.color_config.bitdepth);
1253 if (!frame_scratch_buffer->loop_restoration_info.Reset(
1254 &frame_header.loop_restoration, frame_header.upscaled_width,
1255 frame_header.height, sequence_header.color_config.subsampling_x,
1256 sequence_header.color_config.subsampling_y,
1257 sequence_header.color_config.is_monochrome)) {
1258 LIBGAV1_DLOG(ERROR,
1259 "Failed to allocate memory for loop restoration info units.");
1260 return kStatusOutOfMemory;
1261 }
1262 ThreadingStrategy& threading_strategy =
1263 frame_scratch_buffer->threading_strategy;
1264 if (!is_frame_parallel_ &&
1265 !threading_strategy.Reset(frame_header, settings_.threads)) {
1266 return kStatusOutOfMemory;
1267 }
1268 const bool do_cdef =
1269 PostFilter::DoCdef(frame_header, settings_.post_filter_mask);
1270 const int num_planes = sequence_header.color_config.is_monochrome
1271 ? kMaxPlanesMonochrome
1272 : kMaxPlanes;
1273 const bool do_restoration = PostFilter::DoRestoration(
1274 frame_header.loop_restoration, settings_.post_filter_mask, num_planes);
1275 const bool do_superres =
1276 PostFilter::DoSuperRes(frame_header, settings_.post_filter_mask);
1277 // Use kBorderPixels for the left, right, and top borders. Only the bottom
1278 // border may need to be bigger. Cdef border is needed only if we apply Cdef
1279 // without multithreading.
1280 const int bottom_border = GetBottomBorderPixels(
1281 do_cdef && threading_strategy.post_filter_thread_pool() == nullptr,
1282 do_restoration, do_superres, sequence_header.color_config.subsampling_y);
1283 current_frame->set_chroma_sample_position(
1284 sequence_header.color_config.chroma_sample_position);
1285 if (!current_frame->Realloc(sequence_header.color_config.bitdepth,
1286 sequence_header.color_config.is_monochrome,
1287 frame_header.upscaled_width, frame_header.height,
1288 sequence_header.color_config.subsampling_x,
1289 sequence_header.color_config.subsampling_y,
1290 /*left_border=*/kBorderPixels,
1291 /*right_border=*/kBorderPixels,
1292 /*top_border=*/kBorderPixels, bottom_border)) {
1293 LIBGAV1_DLOG(ERROR, "Failed to allocate memory for the decoder buffer.");
1294 return kStatusOutOfMemory;
1295 }
1296 if (frame_header.cdef.bits > 0) {
1297 if (!frame_scratch_buffer->cdef_index.Reset(
1298 DivideBy16(frame_header.rows4x4 + kMaxBlockHeight4x4),
1299 DivideBy16(frame_header.columns4x4 + kMaxBlockWidth4x4),
1300 /*zero_initialize=*/false)) {
1301 LIBGAV1_DLOG(ERROR, "Failed to allocate memory for cdef index.");
1302 return kStatusOutOfMemory;
1303 }
1304 }
1305 if (do_cdef) {
1306 if (!frame_scratch_buffer->cdef_skip.Reset(
1307 DivideBy2(frame_header.rows4x4 + kMaxBlockHeight4x4),
1308 DivideBy16(frame_header.columns4x4 + kMaxBlockWidth4x4),
1309 /*zero_initialize=*/true)) {
1310 LIBGAV1_DLOG(ERROR, "Failed to allocate memory for cdef skip.");
1311 return kStatusOutOfMemory;
1312 }
1313 }
1314 if (!frame_scratch_buffer->inter_transform_sizes.Reset(
1315 frame_header.rows4x4 + kMaxBlockHeight4x4,
1316 frame_header.columns4x4 + kMaxBlockWidth4x4,
1317 /*zero_initialize=*/false)) {
1318 LIBGAV1_DLOG(ERROR, "Failed to allocate memory for inter_transform_sizes.");
1319 return kStatusOutOfMemory;
1320 }
1321 if (frame_header.use_ref_frame_mvs) {
1322 if (!frame_scratch_buffer->motion_field.mv.Reset(
1323 DivideBy2(frame_header.rows4x4), DivideBy2(frame_header.columns4x4),
1324 /*zero_initialize=*/false) ||
1325 !frame_scratch_buffer->motion_field.reference_offset.Reset(
1326 DivideBy2(frame_header.rows4x4), DivideBy2(frame_header.columns4x4),
1327 /*zero_initialize=*/false)) {
1328 LIBGAV1_DLOG(ERROR,
1329 "Failed to allocate memory for temporal motion vectors.");
1330 return kStatusOutOfMemory;
1331 }
1332
1333 // For each motion vector, only mv[0] needs to be initialized to
1334 // kInvalidMvValue, mv[1] is not necessary to be initialized and can be
1335 // set to an arbitrary value. For simplicity, mv[1] is set to 0.
1336 // The following memory initialization of contiguous memory is very fast. It
1337 // is not recommended to make the initialization multi-threaded, unless the
1338 // memory which needs to be initialized in each thread is still contiguous.
1339 MotionVector invalid_mv;
1340 invalid_mv.mv[0] = kInvalidMvValue;
1341 invalid_mv.mv[1] = 0;
1342 MotionVector* const motion_field_mv =
1343 &frame_scratch_buffer->motion_field.mv[0][0];
1344 std::fill(motion_field_mv,
1345 motion_field_mv + frame_scratch_buffer->motion_field.mv.size(),
1346 invalid_mv);
1347 }
1348
1349 // The addition of kMaxBlockHeight4x4 and kMaxBlockWidth4x4 is necessary so
1350 // that the block parameters cache can be filled in for the last row/column
1351 // without having to check for boundary conditions.
1352 if (!frame_scratch_buffer->block_parameters_holder.Reset(
1353 frame_header.rows4x4 + kMaxBlockHeight4x4,
1354 frame_header.columns4x4 + kMaxBlockWidth4x4)) {
1355 return kStatusOutOfMemory;
1356 }
1357 const dsp::Dsp* const dsp =
1358 dsp::GetDspTable(sequence_header.color_config.bitdepth);
1359 if (dsp == nullptr) {
1360 LIBGAV1_DLOG(ERROR, "Failed to get the dsp table for bitdepth %d.",
1361 sequence_header.color_config.bitdepth);
1362 return kStatusInternalError;
1363 }
1364
1365 const int tile_count = frame_header.tile_info.tile_count;
1366 assert(tile_count >= 1);
1367 Vector<std::unique_ptr<Tile>> tiles;
1368 if (!tiles.reserve(tile_count)) {
1369 LIBGAV1_DLOG(ERROR, "tiles.reserve(%d) failed.\n", tile_count);
1370 return kStatusOutOfMemory;
1371 }
1372
1373 if (threading_strategy.row_thread_pool(0) != nullptr || is_frame_parallel_ ||
1374 settings_.parse_only) {
1375 if (frame_scratch_buffer->residual_buffer_pool == nullptr) {
1376 frame_scratch_buffer->residual_buffer_pool.reset(
1377 new (std::nothrow) ResidualBufferPool(
1378 sequence_header.use_128x128_superblock,
1379 sequence_header.color_config.subsampling_x,
1380 sequence_header.color_config.subsampling_y,
1381 sequence_header.color_config.bitdepth == 8 ? sizeof(int16_t)
1382 : sizeof(int32_t)));
1383 if (frame_scratch_buffer->residual_buffer_pool == nullptr) {
1384 LIBGAV1_DLOG(ERROR, "Failed to allocate residual buffer.\n");
1385 return kStatusOutOfMemory;
1386 }
1387 } else {
1388 frame_scratch_buffer->residual_buffer_pool->Reset(
1389 sequence_header.use_128x128_superblock,
1390 sequence_header.color_config.subsampling_x,
1391 sequence_header.color_config.subsampling_y,
1392 sequence_header.color_config.bitdepth == 8 ? sizeof(int16_t)
1393 : sizeof(int32_t));
1394 }
1395 }
1396
1397 if (threading_strategy.post_filter_thread_pool() != nullptr && do_cdef) {
1398 // We need to store 4 rows per 64x64 unit.
1399 const int num_units =
1400 MultiplyBy4(RightShiftWithCeiling(frame_header.rows4x4, 4));
1401 // subsampling_y is set to zero irrespective of the actual frame's
1402 // subsampling since we need to store exactly |num_units| rows of the loop
1403 // restoration border pixels.
1404 if (!frame_scratch_buffer->cdef_border.Realloc(
1405 sequence_header.color_config.bitdepth,
1406 sequence_header.color_config.is_monochrome,
1407 MultiplyBy4(frame_header.columns4x4), num_units,
1408 sequence_header.color_config.subsampling_x,
1409 /*subsampling_y=*/0, kBorderPixels, kBorderPixels, kBorderPixels,
1410 kBorderPixels, nullptr, nullptr, nullptr)) {
1411 return kStatusOutOfMemory;
1412 }
1413 }
1414
1415 if (do_restoration &&
1416 (do_cdef || threading_strategy.post_filter_thread_pool() != nullptr)) {
1417 // We need to store 4 rows per 64x64 unit.
1418 const int num_units =
1419 MultiplyBy4(RightShiftWithCeiling(frame_header.rows4x4, 4));
1420 // subsampling_y is set to zero irrespective of the actual frame's
1421 // subsampling since we need to store exactly |num_units| rows of the loop
1422 // restoration border pixels.
1423 if (!frame_scratch_buffer->loop_restoration_border.Realloc(
1424 sequence_header.color_config.bitdepth,
1425 sequence_header.color_config.is_monochrome,
1426 frame_header.upscaled_width, num_units,
1427 sequence_header.color_config.subsampling_x,
1428 /*subsampling_y=*/0, kBorderPixels, kBorderPixels, kBorderPixels,
1429 kBorderPixels, nullptr, nullptr, nullptr)) {
1430 return kStatusOutOfMemory;
1431 }
1432 }
1433
1434 if (do_superres) {
1435 const int pixel_size = sequence_header.color_config.bitdepth == 8
1436 ? sizeof(uint8_t)
1437 : sizeof(uint16_t);
1438 const int coefficients_size = kSuperResFilterTaps *
1439 Align(frame_header.upscaled_width, 16) *
1440 pixel_size;
1441 if (!frame_scratch_buffer->superres_coefficients[kPlaneTypeY].Resize(
1442 coefficients_size)) {
1443 LIBGAV1_DLOG(ERROR,
1444 "Failed to Resize superres_coefficients[kPlaneTypeY].");
1445 return kStatusOutOfMemory;
1446 }
1447 #if LIBGAV1_MSAN
1448 // Quiet SuperRes_NEON() msan warnings.
1449 memset(frame_scratch_buffer->superres_coefficients[kPlaneTypeY].get(), 0,
1450 coefficients_size);
1451 #endif
1452 const int uv_coefficients_size =
1453 kSuperResFilterTaps *
1454 Align(SubsampledValue(frame_header.upscaled_width, 1), 16) * pixel_size;
1455 if (!sequence_header.color_config.is_monochrome &&
1456 sequence_header.color_config.subsampling_x != 0 &&
1457 !frame_scratch_buffer->superres_coefficients[kPlaneTypeUV].Resize(
1458 uv_coefficients_size)) {
1459 LIBGAV1_DLOG(ERROR,
1460 "Failed to Resize superres_coefficients[kPlaneTypeUV].");
1461 return kStatusOutOfMemory;
1462 }
1463 #if LIBGAV1_MSAN
1464 if (!sequence_header.color_config.is_monochrome &&
1465 sequence_header.color_config.subsampling_x != 0) {
1466 // Quiet SuperRes_NEON() msan warnings.
1467 memset(frame_scratch_buffer->superres_coefficients[kPlaneTypeUV].get(), 0,
1468 uv_coefficients_size);
1469 }
1470 #endif
1471 }
1472
1473 if (do_superres && threading_strategy.post_filter_thread_pool() != nullptr) {
1474 const int num_threads =
1475 threading_strategy.post_filter_thread_pool()->num_threads() + 1;
1476 // subsampling_y is set to zero irrespective of the actual frame's
1477 // subsampling since we need to store exactly |num_threads| rows of the
1478 // down-scaled pixels.
1479 // Left and right borders are for line extension. They are doubled for the Y
1480 // plane to make sure the U and V planes have enough space after possible
1481 // subsampling.
1482 if (!frame_scratch_buffer->superres_line_buffer.Realloc(
1483 sequence_header.color_config.bitdepth,
1484 sequence_header.color_config.is_monochrome,
1485 MultiplyBy4(frame_header.columns4x4), num_threads,
1486 sequence_header.color_config.subsampling_x,
1487 /*subsampling_y=*/0, 2 * kSuperResHorizontalBorder,
1488 2 * (kSuperResHorizontalBorder + kSuperResHorizontalPadding), 0, 0,
1489 nullptr, nullptr, nullptr)) {
1490 LIBGAV1_DLOG(ERROR, "Failed to resize superres line buffer.\n");
1491 return kStatusOutOfMemory;
1492 }
1493 }
1494
1495 if (is_frame_parallel_ && !IsIntraFrame(frame_header.frame_type)) {
1496 // We can parse the current frame if all the reference frames have been
1497 // parsed.
1498 for (const int index : frame_header.reference_frame_index) {
1499 if (!state.reference_frame[index]->WaitUntilParsed()) {
1500 return kStatusUnknownError;
1501 }
1502 }
1503 }
1504
1505 // If prev_segment_ids is a null pointer, it is treated as if it pointed to
1506 // a segmentation map containing all 0s.
1507 const SegmentationMap* prev_segment_ids = nullptr;
1508 if (frame_header.primary_reference_frame == kPrimaryReferenceNone) {
1509 frame_scratch_buffer->symbol_decoder_context.Initialize(
1510 frame_header.quantizer.base_index);
1511 } else {
1512 const int index =
1513 frame_header
1514 .reference_frame_index[frame_header.primary_reference_frame];
1515 assert(index != -1);
1516 const RefCountedBuffer* prev_frame = state.reference_frame[index].get();
1517 frame_scratch_buffer->symbol_decoder_context = prev_frame->FrameContext();
1518 if (frame_header.segmentation.enabled &&
1519 prev_frame->columns4x4() == frame_header.columns4x4 &&
1520 prev_frame->rows4x4() == frame_header.rows4x4) {
1521 prev_segment_ids = prev_frame->segmentation_map();
1522 }
1523 }
1524
1525 // The Tile class must make use of a separate buffer to store the unfiltered
1526 // pixels for the intra prediction of the next superblock row. This is done
1527 // only when one of the following conditions are true:
1528 // * is_frame_parallel_ is true.
1529 // * settings_.threads == 1.
1530 // In the non-frame-parallel multi-threaded case, we do not run the post
1531 // filters in the decode loop. So this buffer need not be used.
1532 const bool use_intra_prediction_buffer =
1533 is_frame_parallel_ || settings_.threads == 1;
1534 if (use_intra_prediction_buffer) {
1535 if (!frame_scratch_buffer->intra_prediction_buffers.Resize(
1536 frame_header.tile_info.tile_rows)) {
1537 LIBGAV1_DLOG(ERROR, "Failed to Resize intra_prediction_buffers.");
1538 return kStatusOutOfMemory;
1539 }
1540 IntraPredictionBuffer* const intra_prediction_buffers =
1541 frame_scratch_buffer->intra_prediction_buffers.get();
1542 for (int plane = kPlaneY; plane < num_planes; ++plane) {
1543 const int subsampling =
1544 (plane == kPlaneY) ? 0 : sequence_header.color_config.subsampling_x;
1545 const size_t intra_prediction_buffer_size =
1546 ((MultiplyBy4(frame_header.columns4x4) >> subsampling) *
1547 (sequence_header.color_config.bitdepth == 8 ? sizeof(uint8_t)
1548 : sizeof(uint16_t)));
1549 for (int tile_row = 0; tile_row < frame_header.tile_info.tile_rows;
1550 ++tile_row) {
1551 if (!intra_prediction_buffers[tile_row][plane].Resize(
1552 intra_prediction_buffer_size)) {
1553 LIBGAV1_DLOG(ERROR,
1554 "Failed to allocate intra prediction buffer for tile "
1555 "row %d plane %d.\n",
1556 tile_row, plane);
1557 return kStatusOutOfMemory;
1558 }
1559 }
1560 }
1561 }
1562
1563 PostFilter post_filter(frame_header, sequence_header, frame_scratch_buffer,
1564 current_frame->buffer(), dsp,
1565 settings_.post_filter_mask);
1566 SymbolDecoderContext saved_symbol_decoder_context;
1567 BlockingCounterWithStatus pending_tiles(tile_count);
1568 for (int tile_number = 0; tile_number < tile_count; ++tile_number) {
1569 std::unique_ptr<Tile> tile = Tile::Create(
1570 tile_number, tile_buffers[tile_number].data,
1571 tile_buffers[tile_number].size, sequence_header, frame_header,
1572 current_frame, state, frame_scratch_buffer, wedge_masks_,
1573 quantizer_matrix_, &saved_symbol_decoder_context, prev_segment_ids,
1574 &post_filter, dsp, threading_strategy.row_thread_pool(tile_number),
1575 &pending_tiles, is_frame_parallel_, use_intra_prediction_buffer,
1576 settings_.parse_only);
1577 if (tile == nullptr) {
1578 LIBGAV1_DLOG(ERROR, "Failed to create tile.");
1579 return kStatusOutOfMemory;
1580 }
1581 tiles.push_back_unchecked(std::move(tile));
1582 }
1583 assert(tiles.size() == static_cast<size_t>(tile_count));
1584 if (settings_.parse_only) { // Parse only.
1585 if (ParseTiles(tiles) != kStatusOk) {
1586 return kStatusUnknownError;
1587 }
1588 frame_mean_qp_ = CalcFrameMeanQp(tiles);
1589 } else { // Decode.
1590 if (is_frame_parallel_) {
1591 if (frame_scratch_buffer->threading_strategy.thread_pool() == nullptr) {
1592 return DecodeTilesFrameParallel(sequence_header, frame_header, tiles,
1593 saved_symbol_decoder_context,
1594 prev_segment_ids, frame_scratch_buffer,
1595 &post_filter, current_frame);
1596 }
1597 return DecodeTilesThreadedFrameParallel(
1598 sequence_header, frame_header, tiles, saved_symbol_decoder_context,
1599 prev_segment_ids, frame_scratch_buffer, &post_filter, current_frame);
1600 }
1601 StatusCode status;
1602 if (settings_.threads == 1) {
1603 status = DecodeTilesNonFrameParallel(sequence_header, frame_header, tiles,
1604 frame_scratch_buffer, &post_filter);
1605 } else {
1606 status = DecodeTilesThreadedNonFrameParallel(
1607 tiles, frame_scratch_buffer, &post_filter, &pending_tiles);
1608 }
1609 if (status != kStatusOk) return status;
1610 }
1611
1612 if (frame_header.enable_frame_end_update_cdf) {
1613 frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
1614 }
1615 current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
1616 SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
1617 return kStatusOk;
1618 }
1619
ApplyFilmGrain(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const RefCountedBufferPtr & displayable_frame,RefCountedBufferPtr * film_grain_frame,ThreadPool * thread_pool)1620 StatusCode DecoderImpl::ApplyFilmGrain(
1621 const ObuSequenceHeader& sequence_header,
1622 const ObuFrameHeader& frame_header,
1623 const RefCountedBufferPtr& displayable_frame,
1624 RefCountedBufferPtr* film_grain_frame, ThreadPool* thread_pool) {
1625 if (!sequence_header.film_grain_params_present ||
1626 !displayable_frame->film_grain_params().apply_grain ||
1627 (settings_.post_filter_mask & 0x10) == 0) {
1628 *film_grain_frame = displayable_frame;
1629 return kStatusOk;
1630 }
1631 if (!frame_header.show_existing_frame &&
1632 frame_header.refresh_frame_flags == 0) {
1633 // If show_existing_frame is true, then the current frame is a previously
1634 // saved reference frame. If refresh_frame_flags is nonzero, then the
1635 // state_.UpdateReferenceFrames() call above has saved the current frame as
1636 // a reference frame. Therefore, if both of these conditions are false, then
1637 // the current frame is not saved as a reference frame. displayable_frame
1638 // should hold the only reference to the current frame.
1639 assert(displayable_frame.use_count() == 1);
1640 // Add film grain noise in place.
1641 *film_grain_frame = displayable_frame;
1642 } else {
1643 *film_grain_frame = buffer_pool_.GetFreeBuffer();
1644 if (*film_grain_frame == nullptr) {
1645 LIBGAV1_DLOG(ERROR,
1646 "Could not get film_grain_frame from the buffer pool.");
1647 return kStatusResourceExhausted;
1648 }
1649 if (!(*film_grain_frame)
1650 ->Realloc(displayable_frame->buffer()->bitdepth(),
1651 displayable_frame->buffer()->is_monochrome(),
1652 displayable_frame->upscaled_width(),
1653 displayable_frame->frame_height(),
1654 displayable_frame->buffer()->subsampling_x(),
1655 displayable_frame->buffer()->subsampling_y(),
1656 kBorderPixelsFilmGrain, kBorderPixelsFilmGrain,
1657 kBorderPixelsFilmGrain, kBorderPixelsFilmGrain)) {
1658 LIBGAV1_DLOG(ERROR, "film_grain_frame->Realloc() failed.");
1659 return kStatusOutOfMemory;
1660 }
1661 (*film_grain_frame)
1662 ->set_chroma_sample_position(
1663 displayable_frame->chroma_sample_position());
1664 (*film_grain_frame)->set_spatial_id(displayable_frame->spatial_id());
1665 (*film_grain_frame)->set_temporal_id(displayable_frame->temporal_id());
1666 }
1667 const bool color_matrix_is_identity =
1668 sequence_header.color_config.matrix_coefficients ==
1669 kMatrixCoefficientsIdentity;
1670 assert(displayable_frame->buffer()->stride(kPlaneU) ==
1671 displayable_frame->buffer()->stride(kPlaneV));
1672 const int input_stride_uv = displayable_frame->buffer()->stride(kPlaneU);
1673 assert((*film_grain_frame)->buffer()->stride(kPlaneU) ==
1674 (*film_grain_frame)->buffer()->stride(kPlaneV));
1675 const int output_stride_uv = (*film_grain_frame)->buffer()->stride(kPlaneU);
1676 #if LIBGAV1_MAX_BITDEPTH >= 10
1677 if (displayable_frame->buffer()->bitdepth() == 10) {
1678 FilmGrain<10> film_grain(displayable_frame->film_grain_params(),
1679 displayable_frame->buffer()->is_monochrome(),
1680 color_matrix_is_identity,
1681 displayable_frame->buffer()->subsampling_x(),
1682 displayable_frame->buffer()->subsampling_y(),
1683 displayable_frame->upscaled_width(),
1684 displayable_frame->frame_height(), thread_pool);
1685 if (!film_grain.AddNoise(
1686 displayable_frame->buffer()->data(kPlaneY),
1687 displayable_frame->buffer()->stride(kPlaneY),
1688 displayable_frame->buffer()->data(kPlaneU),
1689 displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
1690 (*film_grain_frame)->buffer()->data(kPlaneY),
1691 (*film_grain_frame)->buffer()->stride(kPlaneY),
1692 (*film_grain_frame)->buffer()->data(kPlaneU),
1693 (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
1694 LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
1695 return kStatusOutOfMemory;
1696 }
1697 return kStatusOk;
1698 }
1699 #endif // LIBGAV1_MAX_BITDEPTH >= 10
1700 #if LIBGAV1_MAX_BITDEPTH == 12
1701 if (displayable_frame->buffer()->bitdepth() == 12) {
1702 FilmGrain<12> film_grain(displayable_frame->film_grain_params(),
1703 displayable_frame->buffer()->is_monochrome(),
1704 color_matrix_is_identity,
1705 displayable_frame->buffer()->subsampling_x(),
1706 displayable_frame->buffer()->subsampling_y(),
1707 displayable_frame->upscaled_width(),
1708 displayable_frame->frame_height(), thread_pool);
1709 if (!film_grain.AddNoise(
1710 displayable_frame->buffer()->data(kPlaneY),
1711 displayable_frame->buffer()->stride(kPlaneY),
1712 displayable_frame->buffer()->data(kPlaneU),
1713 displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
1714 (*film_grain_frame)->buffer()->data(kPlaneY),
1715 (*film_grain_frame)->buffer()->stride(kPlaneY),
1716 (*film_grain_frame)->buffer()->data(kPlaneU),
1717 (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
1718 LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
1719 return kStatusOutOfMemory;
1720 }
1721 return kStatusOk;
1722 }
1723 #endif // LIBGAV1_MAX_BITDEPTH == 12
1724 FilmGrain<8> film_grain(displayable_frame->film_grain_params(),
1725 displayable_frame->buffer()->is_monochrome(),
1726 color_matrix_is_identity,
1727 displayable_frame->buffer()->subsampling_x(),
1728 displayable_frame->buffer()->subsampling_y(),
1729 displayable_frame->upscaled_width(),
1730 displayable_frame->frame_height(), thread_pool);
1731 if (!film_grain.AddNoise(
1732 displayable_frame->buffer()->data(kPlaneY),
1733 displayable_frame->buffer()->stride(kPlaneY),
1734 displayable_frame->buffer()->data(kPlaneU),
1735 displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
1736 (*film_grain_frame)->buffer()->data(kPlaneY),
1737 (*film_grain_frame)->buffer()->stride(kPlaneY),
1738 (*film_grain_frame)->buffer()->data(kPlaneU),
1739 (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
1740 LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
1741 return kStatusOutOfMemory;
1742 }
1743 return kStatusOk;
1744 }
1745
IsNewSequenceHeader(const ObuParser & obu)1746 bool DecoderImpl::IsNewSequenceHeader(const ObuParser& obu) {
1747 if (std::find_if(obu.obu_headers().begin(), obu.obu_headers().end(),
1748 [](const ObuHeader& obu_header) {
1749 return obu_header.type == kObuSequenceHeader;
1750 }) == obu.obu_headers().end()) {
1751 return false;
1752 }
1753 const ObuSequenceHeader sequence_header = obu.sequence_header();
1754 const bool sequence_header_changed =
1755 !has_sequence_header_ ||
1756 sequence_header_.color_config.bitdepth !=
1757 sequence_header.color_config.bitdepth ||
1758 sequence_header_.color_config.is_monochrome !=
1759 sequence_header.color_config.is_monochrome ||
1760 sequence_header_.color_config.subsampling_x !=
1761 sequence_header.color_config.subsampling_x ||
1762 sequence_header_.color_config.subsampling_y !=
1763 sequence_header.color_config.subsampling_y ||
1764 sequence_header_.max_frame_width != sequence_header.max_frame_width ||
1765 sequence_header_.max_frame_height != sequence_header.max_frame_height;
1766 sequence_header_ = sequence_header;
1767 has_sequence_header_ = true;
1768 return sequence_header_changed;
1769 }
1770
MaybeInitializeWedgeMasks(FrameType frame_type)1771 bool DecoderImpl::MaybeInitializeWedgeMasks(FrameType frame_type) {
1772 if (IsIntraFrame(frame_type) || wedge_masks_initialized_) {
1773 return true;
1774 }
1775 if (!GenerateWedgeMask(&wedge_masks_)) {
1776 return false;
1777 }
1778 wedge_masks_initialized_ = true;
1779 return true;
1780 }
1781
MaybeInitializeQuantizerMatrix(const ObuFrameHeader & frame_header)1782 bool DecoderImpl::MaybeInitializeQuantizerMatrix(
1783 const ObuFrameHeader& frame_header) {
1784 if (quantizer_matrix_initialized_ || !frame_header.quantizer.use_matrix) {
1785 return true;
1786 }
1787 if (!InitializeQuantizerMatrix(&quantizer_matrix_)) {
1788 return false;
1789 }
1790 quantizer_matrix_initialized_ = true;
1791 return true;
1792 }
1793
1794 } // namespace libgav1
1795