• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/decoder_impl.h"
16 
17 #include <algorithm>
18 #include <atomic>
19 #include <cassert>
20 #include <cmath>
21 #include <condition_variable>  // NOLINT (unapproved c++11 header)
22 #include <iterator>
23 #include <memory>
24 #include <mutex>  // NOLINT (unapproved c++11 header)
25 #include <new>
26 #include <utility>
27 #include <vector>
28 
29 #include "src/dsp/common.h"
30 #include "src/dsp/constants.h"
31 #include "src/dsp/dsp.h"
32 #include "src/film_grain.h"
33 #include "src/frame_buffer_utils.h"
34 #include "src/frame_scratch_buffer.h"
35 #include "src/loop_restoration_info.h"
36 #include "src/obu_parser.h"
37 #include "src/post_filter.h"
38 #include "src/prediction_mask.h"
39 #include "src/threading_strategy.h"
40 #include "src/utils/blocking_counter.h"
41 #include "src/utils/common.h"
42 #include "src/utils/constants.h"
43 #include "src/utils/logging.h"
44 #include "src/utils/raw_bit_reader.h"
45 #include "src/utils/segmentation.h"
46 #include "src/utils/threadpool.h"
47 #include "src/yuv_buffer.h"
48 
49 namespace libgav1 {
50 namespace {
51 
52 constexpr int kMaxBlockWidth4x4 = 32;
53 constexpr int kMaxBlockHeight4x4 = 32;
54 
55 // Computes the bottom border size in pixels. If CDEF, loop restoration or
56 // SuperRes is enabled, adds extra border pixels to facilitate those steps to
57 // happen nearly in-place (a few extra rows instead of an entire frame buffer).
58 // The logic in this function should match the corresponding logic for
59 // |vertical_shift| in the PostFilter constructor.
GetBottomBorderPixels(const bool do_cdef,const bool do_restoration,const bool do_superres,const int subsampling_y)60 int GetBottomBorderPixels(const bool do_cdef, const bool do_restoration,
61                           const bool do_superres, const int subsampling_y) {
62   int extra_border = 0;
63   if (do_cdef) {
64     extra_border += kCdefBorder;
65   } else if (do_restoration) {
66     // If CDEF is enabled, loop restoration is safe without extra border.
67     extra_border += kRestorationVerticalBorder;
68   }
69   if (do_superres) extra_border += kSuperResVerticalBorder;
70   // Double the number of extra bottom border pixels if the bottom border will
71   // be subsampled.
72   extra_border <<= subsampling_y;
73   return Align(kBorderPixels + extra_border, 2);  // Must be a multiple of 2.
74 }
75 
76 // Sets |frame_scratch_buffer->tile_decoding_failed| to true (while holding on
77 // to |frame_scratch_buffer->superblock_row_mutex|) and notifies the first
78 // |count| condition variables in
79 // |frame_scratch_buffer->superblock_row_progress_condvar|.
SetFailureAndNotifyAll(FrameScratchBuffer * const frame_scratch_buffer,int count)80 void SetFailureAndNotifyAll(FrameScratchBuffer* const frame_scratch_buffer,
81                             int count) {
82   {
83     std::lock_guard<std::mutex> lock(
84         frame_scratch_buffer->superblock_row_mutex);
85     frame_scratch_buffer->tile_decoding_failed = true;
86   }
87   std::condition_variable* const condvars =
88       frame_scratch_buffer->superblock_row_progress_condvar.get();
89   for (int i = 0; i < count; ++i) {
90     condvars[i].notify_one();
91   }
92 }
93 
94 // Helper class that releases the frame scratch buffer in the destructor.
95 class FrameScratchBufferReleaser {
96  public:
FrameScratchBufferReleaser(FrameScratchBufferPool * frame_scratch_buffer_pool,std::unique_ptr<FrameScratchBuffer> * frame_scratch_buffer)97   FrameScratchBufferReleaser(
98       FrameScratchBufferPool* frame_scratch_buffer_pool,
99       std::unique_ptr<FrameScratchBuffer>* frame_scratch_buffer)
100       : frame_scratch_buffer_pool_(frame_scratch_buffer_pool),
101         frame_scratch_buffer_(frame_scratch_buffer) {}
~FrameScratchBufferReleaser()102   ~FrameScratchBufferReleaser() {
103     frame_scratch_buffer_pool_->Release(std::move(*frame_scratch_buffer_));
104   }
105 
106  private:
107   FrameScratchBufferPool* const frame_scratch_buffer_pool_;
108   std::unique_ptr<FrameScratchBuffer>* const frame_scratch_buffer_;
109 };
110 
111 // Sets the |frame|'s segmentation map for two cases. The third case is handled
112 // in Tile::DecodeBlock().
SetSegmentationMap(const ObuFrameHeader & frame_header,const SegmentationMap * prev_segment_ids,RefCountedBuffer * const frame)113 void SetSegmentationMap(const ObuFrameHeader& frame_header,
114                         const SegmentationMap* prev_segment_ids,
115                         RefCountedBuffer* const frame) {
116   if (!frame_header.segmentation.enabled) {
117     // All segment_id's are 0.
118     frame->segmentation_map()->Clear();
119   } else if (!frame_header.segmentation.update_map) {
120     // Copy from prev_segment_ids.
121     if (prev_segment_ids == nullptr) {
122       // Treat a null prev_segment_ids pointer as if it pointed to a
123       // segmentation map containing all 0s.
124       frame->segmentation_map()->Clear();
125     } else {
126       frame->segmentation_map()->CopyFrom(*prev_segment_ids);
127     }
128   }
129 }
130 
DecodeTilesNonFrameParallel(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<std::unique_ptr<Tile>> & tiles,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter)131 StatusCode DecodeTilesNonFrameParallel(
132     const ObuSequenceHeader& sequence_header,
133     const ObuFrameHeader& frame_header,
134     const Vector<std::unique_ptr<Tile>>& tiles,
135     FrameScratchBuffer* const frame_scratch_buffer,
136     PostFilter* const post_filter) {
137   // Decode in superblock row order.
138   const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
139   std::unique_ptr<TileScratchBuffer> tile_scratch_buffer =
140       frame_scratch_buffer->tile_scratch_buffer_pool.Get();
141   if (tile_scratch_buffer == nullptr) return kLibgav1StatusOutOfMemory;
142   for (int row4x4 = 0; row4x4 < frame_header.rows4x4;
143        row4x4 += block_width4x4) {
144     for (const auto& tile_ptr : tiles) {
145       if (!tile_ptr->ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
146               row4x4, tile_scratch_buffer.get())) {
147         return kLibgav1StatusUnknownError;
148       }
149     }
150     post_filter->ApplyFilteringForOneSuperBlockRow(
151         row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
152         /*do_deblock=*/true);
153   }
154   frame_scratch_buffer->tile_scratch_buffer_pool.Release(
155       std::move(tile_scratch_buffer));
156   return kStatusOk;
157 }
158 
DecodeTilesThreadedNonFrameParallel(const Vector<std::unique_ptr<Tile>> & tiles,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,BlockingCounterWithStatus * const pending_tiles)159 StatusCode DecodeTilesThreadedNonFrameParallel(
160     const Vector<std::unique_ptr<Tile>>& tiles,
161     FrameScratchBuffer* const frame_scratch_buffer,
162     PostFilter* const post_filter,
163     BlockingCounterWithStatus* const pending_tiles) {
164   ThreadingStrategy& threading_strategy =
165       frame_scratch_buffer->threading_strategy;
166   const int num_workers = threading_strategy.tile_thread_count();
167   BlockingCounterWithStatus pending_workers(num_workers);
168   std::atomic<int> tile_counter(0);
169   const int tile_count = static_cast<int>(tiles.size());
170   bool tile_decoding_failed = false;
171   // Submit tile decoding jobs to the thread pool.
172   for (int i = 0; i < num_workers; ++i) {
173     threading_strategy.tile_thread_pool()->Schedule([&tiles, tile_count,
174                                                      &tile_counter,
175                                                      &pending_workers,
176                                                      &pending_tiles]() {
177       bool failed = false;
178       int index;
179       while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
180              tile_count) {
181         if (!failed) {
182           const auto& tile_ptr = tiles[index];
183           if (!tile_ptr->ParseAndDecode()) {
184             LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
185             failed = true;
186           }
187         } else {
188           pending_tiles->Decrement(false);
189         }
190       }
191       pending_workers.Decrement(!failed);
192     });
193   }
194   // Have the current thread partake in tile decoding.
195   int index;
196   while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
197          tile_count) {
198     if (!tile_decoding_failed) {
199       const auto& tile_ptr = tiles[index];
200       if (!tile_ptr->ParseAndDecode()) {
201         LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
202         tile_decoding_failed = true;
203       }
204     } else {
205       pending_tiles->Decrement(false);
206     }
207   }
208   // Wait until all the workers are done. This ensures that all the tiles have
209   // been parsed.
210   tile_decoding_failed |= !pending_workers.Wait();
211   // Wait until all the tiles have been decoded.
212   tile_decoding_failed |= !pending_tiles->Wait();
213   if (tile_decoding_failed) return kStatusUnknownError;
214   assert(threading_strategy.post_filter_thread_pool() != nullptr);
215   post_filter->ApplyFilteringThreaded();
216   return kStatusOk;
217 }
218 
ParseTiles(const Vector<std::unique_ptr<Tile>> & tiles)219 StatusCode ParseTiles(const Vector<std::unique_ptr<Tile>>& tiles) {
220   for (const auto& tile : tiles) {
221     if (!tile->Parse()) {
222       LIBGAV1_DLOG(ERROR, "Failed to parse tile number: %d\n", tile->number());
223       return kStatusUnknownError;
224     }
225   }
226   return kStatusOk;
227 }
228 
DecodeTilesFrameParallel(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<std::unique_ptr<Tile>> & tiles,const SymbolDecoderContext & saved_symbol_decoder_context,const SegmentationMap * const prev_segment_ids,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,RefCountedBuffer * const current_frame)229 StatusCode DecodeTilesFrameParallel(
230     const ObuSequenceHeader& sequence_header,
231     const ObuFrameHeader& frame_header,
232     const Vector<std::unique_ptr<Tile>>& tiles,
233     const SymbolDecoderContext& saved_symbol_decoder_context,
234     const SegmentationMap* const prev_segment_ids,
235     FrameScratchBuffer* const frame_scratch_buffer,
236     PostFilter* const post_filter, RefCountedBuffer* const current_frame) {
237   // Parse the frame.
238   StatusCode status = ParseTiles(tiles);
239   if (status != kStatusOk) return status;
240   if (frame_header.enable_frame_end_update_cdf) {
241     frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
242   }
243   current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
244   SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
245   // Mark frame as parsed.
246   current_frame->SetFrameState(kFrameStateParsed);
247   std::unique_ptr<TileScratchBuffer> tile_scratch_buffer =
248       frame_scratch_buffer->tile_scratch_buffer_pool.Get();
249   if (tile_scratch_buffer == nullptr) {
250     return kStatusOutOfMemory;
251   }
252   const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
253   // Decode in superblock row order (inter prediction in the Tile class will
254   // block until the required superblocks in the reference frame are decoded).
255   for (int row4x4 = 0; row4x4 < frame_header.rows4x4;
256        row4x4 += block_width4x4) {
257     for (const auto& tile_ptr : tiles) {
258       if (!tile_ptr->ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
259               row4x4, tile_scratch_buffer.get())) {
260         LIBGAV1_DLOG(ERROR, "Failed to decode tile number: %d\n",
261                      tile_ptr->number());
262         return kStatusUnknownError;
263       }
264     }
265     const int progress_row = post_filter->ApplyFilteringForOneSuperBlockRow(
266         row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
267         /*do_deblock=*/true);
268     if (progress_row >= 0) {
269       current_frame->SetProgress(progress_row);
270     }
271   }
272   // Mark frame as decoded (we no longer care about row-level progress since the
273   // entire frame has been decoded).
274   current_frame->SetFrameState(kFrameStateDecoded);
275   frame_scratch_buffer->tile_scratch_buffer_pool.Release(
276       std::move(tile_scratch_buffer));
277   return kStatusOk;
278 }
279 
280 // Helper function used by DecodeTilesThreadedFrameParallel. Applies the
281 // deblocking filter for tile boundaries for the superblock row at |row4x4|.
ApplyDeblockingFilterForTileBoundaries(PostFilter * const post_filter,const std::unique_ptr<Tile> * tile_row_base,const ObuFrameHeader & frame_header,int row4x4,int block_width4x4,int tile_columns,bool decode_entire_tiles_in_worker_threads)282 void ApplyDeblockingFilterForTileBoundaries(
283     PostFilter* const post_filter, const std::unique_ptr<Tile>* tile_row_base,
284     const ObuFrameHeader& frame_header, int row4x4, int block_width4x4,
285     int tile_columns, bool decode_entire_tiles_in_worker_threads) {
286   // Apply vertical deblock filtering for the first 64 columns of each tile.
287   for (int tile_column = 0; tile_column < tile_columns; ++tile_column) {
288     const Tile& tile = *tile_row_base[tile_column];
289     post_filter->ApplyDeblockFilter(
290         kLoopFilterTypeVertical, row4x4, tile.column4x4_start(),
291         tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
292   }
293   if (decode_entire_tiles_in_worker_threads &&
294       row4x4 == tile_row_base[0]->row4x4_start()) {
295     // This is the first superblock row of a tile row. In this case, apply
296     // horizontal deblock filtering for the entire superblock row.
297     post_filter->ApplyDeblockFilter(kLoopFilterTypeHorizontal, row4x4, 0,
298                                     frame_header.columns4x4, block_width4x4);
299   } else {
300     // Apply horizontal deblock filtering for the first 64 columns of the
301     // first tile.
302     const Tile& first_tile = *tile_row_base[0];
303     post_filter->ApplyDeblockFilter(
304         kLoopFilterTypeHorizontal, row4x4, first_tile.column4x4_start(),
305         first_tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
306     // Apply horizontal deblock filtering for the last 64 columns of the
307     // previous tile and the first 64 columns of the current tile.
308     for (int tile_column = 1; tile_column < tile_columns; ++tile_column) {
309       const Tile& tile = *tile_row_base[tile_column];
310       // If the previous tile has more than 64 columns, then include those
311       // for the horizontal deblock.
312       const Tile& previous_tile = *tile_row_base[tile_column - 1];
313       const int column4x4_start =
314           tile.column4x4_start() -
315           ((tile.column4x4_start() - kNum4x4InLoopFilterUnit !=
316             previous_tile.column4x4_start())
317                ? kNum4x4InLoopFilterUnit
318                : 0);
319       post_filter->ApplyDeblockFilter(
320           kLoopFilterTypeHorizontal, row4x4, column4x4_start,
321           tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
322     }
323     // Apply horizontal deblock filtering for the last 64 columns of the
324     // last tile.
325     const Tile& last_tile = *tile_row_base[tile_columns - 1];
326     // Identify the last column4x4 value and do horizontal filtering for
327     // that column4x4. The value of last column4x4 is the nearest multiple
328     // of 16 that is before tile.column4x4_end().
329     const int column4x4_start = (last_tile.column4x4_end() - 1) & ~15;
330     // If column4x4_start is the same as tile.column4x4_start() then it
331     // means that the last tile has <= 64 columns. So there is nothing left
332     // to deblock (since it was already deblocked in the loop above).
333     if (column4x4_start != last_tile.column4x4_start()) {
334       post_filter->ApplyDeblockFilter(
335           kLoopFilterTypeHorizontal, row4x4, column4x4_start,
336           last_tile.column4x4_end(), block_width4x4);
337     }
338   }
339 }
340 
341 // Helper function used by DecodeTilesThreadedFrameParallel. Decodes the
342 // superblock row starting at |row4x4| for tile at index |tile_index| in the
343 // list of tiles |tiles|. If the decoding is successful, then it does the
344 // following:
345 //   * Schedule the next superblock row in the current tile column for decoding
346 //     (the next superblock row may be in a different tile than the current
347 //     one).
348 //   * If an entire superblock row of the frame has been decoded, it notifies
349 //     the waiters (if there are any).
DecodeSuperBlockRowInTile(const Vector<std::unique_ptr<Tile>> & tiles,size_t tile_index,int row4x4,const int superblock_size4x4,const int tile_columns,const int superblock_rows,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,BlockingCounter * const pending_jobs)350 void DecodeSuperBlockRowInTile(
351     const Vector<std::unique_ptr<Tile>>& tiles, size_t tile_index, int row4x4,
352     const int superblock_size4x4, const int tile_columns,
353     const int superblock_rows, FrameScratchBuffer* const frame_scratch_buffer,
354     PostFilter* const post_filter, BlockingCounter* const pending_jobs) {
355   std::unique_ptr<TileScratchBuffer> scratch_buffer =
356       frame_scratch_buffer->tile_scratch_buffer_pool.Get();
357   if (scratch_buffer == nullptr) {
358     SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
359     return;
360   }
361   Tile& tile = *tiles[tile_index];
362   const bool ok = tile.ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
363       row4x4, scratch_buffer.get());
364   frame_scratch_buffer->tile_scratch_buffer_pool.Release(
365       std::move(scratch_buffer));
366   if (!ok) {
367     SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
368     return;
369   }
370   if (post_filter->DoDeblock()) {
371     // Apply vertical deblock filtering for all the columns in this tile except
372     // for the first 64 columns.
373     post_filter->ApplyDeblockFilter(
374         kLoopFilterTypeVertical, row4x4,
375         tile.column4x4_start() + kNum4x4InLoopFilterUnit, tile.column4x4_end(),
376         superblock_size4x4);
377     // Apply horizontal deblock filtering for all the columns in this tile
378     // except for the first and the last 64 columns.
379     // Note about the last tile of each row: For the last tile, column4x4_end
380     // may not be a multiple of 16. In that case it is still okay to simply
381     // subtract 16 since ApplyDeblockFilter() will only do the filters in
382     // increments of 64 columns (or 32 columns for chroma with subsampling).
383     post_filter->ApplyDeblockFilter(
384         kLoopFilterTypeHorizontal, row4x4,
385         tile.column4x4_start() + kNum4x4InLoopFilterUnit,
386         tile.column4x4_end() - kNum4x4InLoopFilterUnit, superblock_size4x4);
387   }
388   const int superblock_size4x4_log2 = FloorLog2(superblock_size4x4);
389   const int index = row4x4 >> superblock_size4x4_log2;
390   int* const superblock_row_progress =
391       frame_scratch_buffer->superblock_row_progress.get();
392   std::condition_variable* const superblock_row_progress_condvar =
393       frame_scratch_buffer->superblock_row_progress_condvar.get();
394   bool notify;
395   {
396     std::lock_guard<std::mutex> lock(
397         frame_scratch_buffer->superblock_row_mutex);
398     notify = ++superblock_row_progress[index] == tile_columns;
399   }
400   if (notify) {
401     // We are done decoding this superblock row. Notify the post filtering
402     // thread.
403     superblock_row_progress_condvar[index].notify_one();
404   }
405   // Schedule the next superblock row (if one exists).
406   ThreadPool& thread_pool =
407       *frame_scratch_buffer->threading_strategy.thread_pool();
408   const int next_row4x4 = row4x4 + superblock_size4x4;
409   if (!tile.IsRow4x4Inside(next_row4x4)) {
410     tile_index += tile_columns;
411   }
412   if (tile_index >= tiles.size()) return;
413   pending_jobs->IncrementBy(1);
414   thread_pool.Schedule([&tiles, tile_index, next_row4x4, superblock_size4x4,
415                         tile_columns, superblock_rows, frame_scratch_buffer,
416                         post_filter, pending_jobs]() {
417     DecodeSuperBlockRowInTile(tiles, tile_index, next_row4x4,
418                               superblock_size4x4, tile_columns, superblock_rows,
419                               frame_scratch_buffer, post_filter, pending_jobs);
420     pending_jobs->Decrement();
421   });
422 }
423 
DecodeTilesThreadedFrameParallel(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<std::unique_ptr<Tile>> & tiles,const SymbolDecoderContext & saved_symbol_decoder_context,const SegmentationMap * const prev_segment_ids,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,RefCountedBuffer * const current_frame)424 StatusCode DecodeTilesThreadedFrameParallel(
425     const ObuSequenceHeader& sequence_header,
426     const ObuFrameHeader& frame_header,
427     const Vector<std::unique_ptr<Tile>>& tiles,
428     const SymbolDecoderContext& saved_symbol_decoder_context,
429     const SegmentationMap* const prev_segment_ids,
430     FrameScratchBuffer* const frame_scratch_buffer,
431     PostFilter* const post_filter, RefCountedBuffer* const current_frame) {
432   // Parse the frame.
433   ThreadPool& thread_pool =
434       *frame_scratch_buffer->threading_strategy.thread_pool();
435   std::atomic<int> tile_counter(0);
436   const int tile_count = static_cast<int>(tiles.size());
437   const int num_workers = thread_pool.num_threads();
438   BlockingCounterWithStatus parse_workers(num_workers);
439   // Submit tile parsing jobs to the thread pool.
440   for (int i = 0; i < num_workers; ++i) {
441     thread_pool.Schedule([&tiles, tile_count, &tile_counter, &parse_workers]() {
442       bool failed = false;
443       int index;
444       while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
445              tile_count) {
446         if (!failed) {
447           const auto& tile_ptr = tiles[index];
448           if (!tile_ptr->Parse()) {
449             LIBGAV1_DLOG(ERROR, "Error parsing tile #%d", tile_ptr->number());
450             failed = true;
451           }
452         }
453       }
454       parse_workers.Decrement(!failed);
455     });
456   }
457 
458   // Have the current thread participate in parsing.
459   bool failed = false;
460   int index;
461   while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
462          tile_count) {
463     if (!failed) {
464       const auto& tile_ptr = tiles[index];
465       if (!tile_ptr->Parse()) {
466         LIBGAV1_DLOG(ERROR, "Error parsing tile #%d", tile_ptr->number());
467         failed = true;
468       }
469     }
470   }
471 
472   // Wait until all the parse workers are done. This ensures that all the tiles
473   // have been parsed.
474   if (!parse_workers.Wait() || failed) {
475     return kLibgav1StatusUnknownError;
476   }
477   if (frame_header.enable_frame_end_update_cdf) {
478     frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
479   }
480   current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
481   SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
482   current_frame->SetFrameState(kFrameStateParsed);
483 
484   // Decode the frame.
485   const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
486   const int block_width4x4_log2 =
487       sequence_header.use_128x128_superblock ? 5 : 4;
488   const int superblock_rows =
489       (frame_header.rows4x4 + block_width4x4 - 1) >> block_width4x4_log2;
490   if (!frame_scratch_buffer->superblock_row_progress.Resize(superblock_rows) ||
491       !frame_scratch_buffer->superblock_row_progress_condvar.Resize(
492           superblock_rows)) {
493     return kLibgav1StatusOutOfMemory;
494   }
495   int* const superblock_row_progress =
496       frame_scratch_buffer->superblock_row_progress.get();
497   memset(superblock_row_progress, 0,
498          superblock_rows * sizeof(superblock_row_progress[0]));
499   frame_scratch_buffer->tile_decoding_failed = false;
500   const int tile_columns = frame_header.tile_info.tile_columns;
501   const bool decode_entire_tiles_in_worker_threads =
502       num_workers >= tile_columns;
503   BlockingCounter pending_jobs(
504       decode_entire_tiles_in_worker_threads ? num_workers : tile_columns);
505   if (decode_entire_tiles_in_worker_threads) {
506     // Submit tile decoding jobs to the thread pool.
507     tile_counter = 0;
508     for (int i = 0; i < num_workers; ++i) {
509       thread_pool.Schedule([&tiles, tile_count, &tile_counter, &pending_jobs,
510                             frame_scratch_buffer, superblock_rows]() {
511         bool failed = false;
512         int index;
513         while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
514                tile_count) {
515           if (failed) continue;
516           const auto& tile_ptr = tiles[index];
517           if (!tile_ptr->Decode(
518                   &frame_scratch_buffer->superblock_row_mutex,
519                   frame_scratch_buffer->superblock_row_progress.get(),
520                   frame_scratch_buffer->superblock_row_progress_condvar
521                       .get())) {
522             LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
523             failed = true;
524             SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
525           }
526         }
527         pending_jobs.Decrement();
528       });
529     }
530   } else {
531     // Schedule the jobs for first tile row.
532     for (int tile_index = 0; tile_index < tile_columns; ++tile_index) {
533       thread_pool.Schedule([&tiles, tile_index, block_width4x4, tile_columns,
534                             superblock_rows, frame_scratch_buffer, post_filter,
535                             &pending_jobs]() {
536         DecodeSuperBlockRowInTile(
537             tiles, tile_index, 0, block_width4x4, tile_columns, superblock_rows,
538             frame_scratch_buffer, post_filter, &pending_jobs);
539         pending_jobs.Decrement();
540       });
541     }
542   }
543 
544   // Current thread will do the post filters.
545   std::condition_variable* const superblock_row_progress_condvar =
546       frame_scratch_buffer->superblock_row_progress_condvar.get();
547   const std::unique_ptr<Tile>* tile_row_base = &tiles[0];
548   for (int row4x4 = 0, index = 0; row4x4 < frame_header.rows4x4;
549        row4x4 += block_width4x4, ++index) {
550     if (!tile_row_base[0]->IsRow4x4Inside(row4x4)) {
551       tile_row_base += tile_columns;
552     }
553     {
554       std::unique_lock<std::mutex> lock(
555           frame_scratch_buffer->superblock_row_mutex);
556       while (superblock_row_progress[index] != tile_columns &&
557              !frame_scratch_buffer->tile_decoding_failed) {
558         superblock_row_progress_condvar[index].wait(lock);
559       }
560       if (frame_scratch_buffer->tile_decoding_failed) break;
561     }
562     if (post_filter->DoDeblock()) {
563       // Apply deblocking filter for the tile boundaries of this superblock row.
564       // The deblocking filter for the internal blocks will be applied in the
565       // tile worker threads. In this thread, we will only have to apply
566       // deblocking filter for the tile boundaries.
567       ApplyDeblockingFilterForTileBoundaries(
568           post_filter, tile_row_base, frame_header, row4x4, block_width4x4,
569           tile_columns, decode_entire_tiles_in_worker_threads);
570     }
571     // Apply all the post filters other than deblocking.
572     const int progress_row = post_filter->ApplyFilteringForOneSuperBlockRow(
573         row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
574         /*do_deblock=*/false);
575     if (progress_row >= 0) {
576       current_frame->SetProgress(progress_row);
577     }
578   }
579   // Wait until all the pending jobs are done. This ensures that all the tiles
580   // have been decoded and wrapped up.
581   pending_jobs.Wait();
582   {
583     std::lock_guard<std::mutex> lock(
584         frame_scratch_buffer->superblock_row_mutex);
585     if (frame_scratch_buffer->tile_decoding_failed) {
586       return kLibgav1StatusUnknownError;
587     }
588   }
589 
590   current_frame->SetFrameState(kFrameStateDecoded);
591   return kStatusOk;
592 }
593 
CalcFrameMeanQp(const Vector<std::unique_ptr<Tile>> & tiles)594 int CalcFrameMeanQp(const Vector<std::unique_ptr<Tile>>& tiles) {
595   int cumulative_frame_qp = 0;
596   for (const auto& tile : tiles) {
597     cumulative_frame_qp += tile->GetTileMeanQP();
598   }
599   const int frame_mean_qp = static_cast<int>(
600       std::round(cumulative_frame_qp / static_cast<float>(tiles.size())));
601   if (frame_mean_qp > 255 || frame_mean_qp < 0) {
602     LIBGAV1_DLOG(
603         WARNING,
604         "The mean QP value for the frame is %d, i.e., out of bounds for AV1.",
605         frame_mean_qp);
606   }
607   return frame_mean_qp;
608 }
609 
610 }  // namespace
611 
612 // static
Create(const DecoderSettings * settings,std::unique_ptr<DecoderImpl> * output)613 StatusCode DecoderImpl::Create(const DecoderSettings* settings,
614                                std::unique_ptr<DecoderImpl>* output) {
615   if (settings->threads <= 0) {
616     LIBGAV1_DLOG(ERROR, "Invalid settings->threads: %d.", settings->threads);
617     return kStatusInvalidArgument;
618   }
619   if (settings->frame_parallel) {
620     if (settings->release_input_buffer == nullptr) {
621       LIBGAV1_DLOG(ERROR,
622                    "release_input_buffer callback must not be null when "
623                    "frame_parallel is true.");
624       return kStatusInvalidArgument;
625     }
626   }
627   if (settings->parse_only &&
628       (settings->threads > 1 || settings->frame_parallel)) {
629     LIBGAV1_DLOG(
630         ERROR,
631         "The number of threads cannot be more than 1 (default) and "
632         "the frame_parallel option cannot be used in the parse_only mode.");
633     return kStatusInvalidArgument;
634   }
635   std::unique_ptr<DecoderImpl> impl(new (std::nothrow) DecoderImpl(settings));
636   if (impl == nullptr) {
637     LIBGAV1_DLOG(ERROR, "Failed to allocate DecoderImpl.");
638     return kStatusOutOfMemory;
639   }
640   const StatusCode status = impl->Init();
641   if (status != kStatusOk) return status;
642   *output = std::move(impl);
643   return kStatusOk;
644 }
645 
DecoderImpl(const DecoderSettings * settings)646 DecoderImpl::DecoderImpl(const DecoderSettings* settings)
647     : buffer_pool_(settings->on_frame_buffer_size_changed,
648                    settings->get_frame_buffer, settings->release_frame_buffer,
649                    settings->callback_private_data),
650       settings_(*settings) {
651   dsp::DspInit();
652 }
653 
~DecoderImpl()654 DecoderImpl::~DecoderImpl() {
655   // Clean up and wait until all the threads have stopped. We just have to pass
656   // in a dummy status that is not kStatusOk or kStatusTryAgain to trigger the
657   // path that clears all the threads and structs.
658   SignalFailure(kStatusUnknownError);
659   // Release any other frame buffer references that we may be holding on to.
660   ReleaseOutputFrame();
661   output_frame_queue_.Clear();
662   for (auto& reference_frame : state_.reference_frame) {
663     reference_frame = nullptr;
664   }
665 }
666 
Init()667 StatusCode DecoderImpl::Init() {
668   if (!output_frame_queue_.Init(kMaxLayers)) {
669     LIBGAV1_DLOG(ERROR, "output_frame_queue_.Init() failed.");
670     return kStatusOutOfMemory;
671   }
672   return kStatusOk;
673 }
674 
InitializeFrameThreadPoolAndTemporalUnitQueue(const uint8_t * data,size_t size)675 StatusCode DecoderImpl::InitializeFrameThreadPoolAndTemporalUnitQueue(
676     const uint8_t* data, size_t size) {
677   is_frame_parallel_ = false;
678   if (settings_.frame_parallel) {
679     DecoderState state;
680     std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
681         data, size, settings_.operating_point, &buffer_pool_, &state));
682     if (obu == nullptr) {
683       LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
684       return kStatusOutOfMemory;
685     }
686     RefCountedBufferPtr current_frame;
687     const StatusCode status = obu->ParseOneFrame(&current_frame);
688     if (status != kStatusOk) {
689       LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
690       return status;
691     }
692     current_frame = nullptr;
693     // We assume that the first frame that was parsed will contain the frame
694     // header. This assumption is usually true in practice. So we will simply
695     // not use frame parallel mode if this is not the case.
696     if (settings_.threads > 1 &&
697         !InitializeThreadPoolsForFrameParallel(
698             settings_.threads, obu->frame_header().tile_info.tile_count,
699             obu->frame_header().tile_info.tile_columns, &frame_thread_pool_,
700             &frame_scratch_buffer_pool_)) {
701       return kStatusOutOfMemory;
702     }
703   }
704   const int max_allowed_frames =
705       (frame_thread_pool_ != nullptr) ? frame_thread_pool_->num_threads() : 1;
706   assert(max_allowed_frames > 0);
707   if (!temporal_units_.Init(max_allowed_frames)) {
708     LIBGAV1_DLOG(ERROR, "temporal_units_.Init() failed.");
709     return kStatusOutOfMemory;
710   }
711   is_frame_parallel_ = frame_thread_pool_ != nullptr;
712   return kStatusOk;
713 }
714 
EnqueueFrame(const uint8_t * data,size_t size,int64_t user_private_data,void * buffer_private_data)715 StatusCode DecoderImpl::EnqueueFrame(const uint8_t* data, size_t size,
716                                      int64_t user_private_data,
717                                      void* buffer_private_data) {
718   if (data == nullptr || size == 0) return kStatusInvalidArgument;
719   if (HasFailure()) return kStatusUnknownError;
720   if (!seen_first_frame_) {
721     seen_first_frame_ = true;
722     const StatusCode status =
723         InitializeFrameThreadPoolAndTemporalUnitQueue(data, size);
724     if (status != kStatusOk) {
725       return SignalFailure(status);
726     }
727   }
728   if (temporal_units_.Full()) {
729     return kStatusTryAgain;
730   }
731   if (is_frame_parallel_) {
732     return ParseAndSchedule(data, size, user_private_data, buffer_private_data);
733   }
734   TemporalUnit temporal_unit(data, size, user_private_data,
735                              buffer_private_data);
736   temporal_units_.Push(std::move(temporal_unit));
737   return kStatusOk;
738 }
739 
SignalFailure(StatusCode status)740 StatusCode DecoderImpl::SignalFailure(StatusCode status) {
741   if (status == kStatusOk || status == kStatusTryAgain) return status;
742   // Set the |failure_status_| first so that any pending jobs in
743   // |frame_thread_pool_| will exit right away when the thread pool is being
744   // released below.
745   {
746     std::lock_guard<std::mutex> lock(mutex_);
747     failure_status_ = status;
748   }
749   // Make sure all waiting threads exit.
750   buffer_pool_.Abort();
751   frame_thread_pool_ = nullptr;
752   while (!temporal_units_.Empty()) {
753     if (settings_.release_input_buffer != nullptr) {
754       settings_.release_input_buffer(
755           settings_.callback_private_data,
756           temporal_units_.Front().buffer_private_data);
757     }
758     temporal_units_.Pop();
759   }
760   return status;
761 }
762 
763 // DequeueFrame() follows the following policy to avoid holding unnecessary
764 // frame buffer references in output_frame_: output_frame_ must be null when
765 // DequeueFrame() returns false.
DequeueFrame(const DecoderBuffer ** out_ptr)766 StatusCode DecoderImpl::DequeueFrame(const DecoderBuffer** out_ptr) {
767   if (out_ptr == nullptr) {
768     LIBGAV1_DLOG(ERROR, "Invalid argument: out_ptr == nullptr.");
769     return kStatusInvalidArgument;
770   }
771   // We assume a call to DequeueFrame() indicates that the caller is no longer
772   // using the previous output frame, so we can release it.
773   ReleaseOutputFrame();
774   if (temporal_units_.Empty()) {
775     // No input frames to decode.
776     *out_ptr = nullptr;
777     return kStatusNothingToDequeue;
778   }
779   TemporalUnit& temporal_unit = temporal_units_.Front();
780   if (!is_frame_parallel_) {
781     // If |output_frame_queue_| is not empty, then return the first frame from
782     // that queue.
783     if (!output_frame_queue_.Empty()) {
784       RefCountedBufferPtr frame = std::move(output_frame_queue_.Front());
785       output_frame_queue_.Pop();
786       buffer_.user_private_data = temporal_unit.user_private_data;
787       if (output_frame_queue_.Empty()) {
788         temporal_units_.Pop();
789       }
790       const StatusCode status = CopyFrameToOutputBuffer(frame);
791       if (status != kStatusOk) {
792         return status;
793       }
794       *out_ptr = &buffer_;
795       return kStatusOk;
796     }
797     // Decode the next available temporal unit and return.
798     const StatusCode status = DecodeTemporalUnit(temporal_unit, out_ptr);
799     if (status != kStatusOk) {
800       // In case of failure, discard all the output frames that we may be
801       // holding on references to.
802       output_frame_queue_.Clear();
803     }
804     if (settings_.release_input_buffer != nullptr) {
805       settings_.release_input_buffer(settings_.callback_private_data,
806                                      temporal_unit.buffer_private_data);
807     }
808     if (output_frame_queue_.Empty()) {
809       temporal_units_.Pop();
810     }
811     return status;
812   }
813   {
814     std::unique_lock<std::mutex> lock(mutex_);
815     if (settings_.blocking_dequeue) {
816       while (!temporal_unit.decoded && failure_status_ == kStatusOk) {
817         decoded_condvar_.wait(lock);
818       }
819     } else {
820       if (!temporal_unit.decoded && failure_status_ == kStatusOk) {
821         return kStatusTryAgain;
822       }
823     }
824     if (failure_status_ != kStatusOk) {
825       const StatusCode failure_status = failure_status_;
826       lock.unlock();
827       return SignalFailure(failure_status);
828     }
829   }
830   if (settings_.release_input_buffer != nullptr &&
831       !temporal_unit.released_input_buffer) {
832     temporal_unit.released_input_buffer = true;
833     settings_.release_input_buffer(settings_.callback_private_data,
834                                    temporal_unit.buffer_private_data);
835   }
836   if (temporal_unit.status != kStatusOk) {
837     temporal_units_.Pop();
838     return SignalFailure(temporal_unit.status);
839   }
840   if (!temporal_unit.has_displayable_frame) {
841     *out_ptr = nullptr;
842     temporal_units_.Pop();
843     return kStatusOk;
844   }
845   assert(temporal_unit.output_layer_count > 0);
846   StatusCode status = CopyFrameToOutputBuffer(
847       temporal_unit.output_layers[temporal_unit.output_layer_count - 1].frame);
848   temporal_unit.output_layers[temporal_unit.output_layer_count - 1].frame =
849       nullptr;
850   if (status != kStatusOk) {
851     temporal_units_.Pop();
852     return SignalFailure(status);
853   }
854   buffer_.user_private_data = temporal_unit.user_private_data;
855   *out_ptr = &buffer_;
856   if (--temporal_unit.output_layer_count == 0) {
857     temporal_units_.Pop();
858   }
859   return kStatusOk;
860 }
861 
GetFrameQps()862 std::vector<int> DecoderImpl::GetFrameQps() { return frame_mean_qps_; }
863 
ParseAndSchedule(const uint8_t * data,size_t size,int64_t user_private_data,void * buffer_private_data)864 StatusCode DecoderImpl::ParseAndSchedule(const uint8_t* data, size_t size,
865                                          int64_t user_private_data,
866                                          void* buffer_private_data) {
867   TemporalUnit temporal_unit(data, size, user_private_data,
868                              buffer_private_data);
869   std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
870       temporal_unit.data, temporal_unit.size, settings_.operating_point,
871       &buffer_pool_, &state_));
872   if (obu == nullptr) {
873     LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
874     return kStatusOutOfMemory;
875   }
876   if (has_sequence_header_) {
877     obu->set_sequence_header(sequence_header_);
878   }
879   StatusCode status;
880   int position_in_temporal_unit = 0;
881   while (obu->HasData()) {
882     RefCountedBufferPtr current_frame;
883     status = obu->ParseOneFrame(&current_frame);
884     if (status != kStatusOk) {
885       LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
886       return status;
887     }
888     if (!MaybeInitializeQuantizerMatrix(obu->frame_header())) {
889       LIBGAV1_DLOG(ERROR, "InitializeQuantizerMatrix() failed.");
890       return kStatusOutOfMemory;
891     }
892     if (!MaybeInitializeWedgeMasks(obu->frame_header().frame_type)) {
893       LIBGAV1_DLOG(ERROR, "InitializeWedgeMasks() failed.");
894       return kStatusOutOfMemory;
895     }
896     if (IsNewSequenceHeader(*obu)) {
897       const ObuSequenceHeader& sequence_header = obu->sequence_header();
898       const Libgav1ImageFormat image_format =
899           ComposeImageFormat(sequence_header.color_config.is_monochrome,
900                              sequence_header.color_config.subsampling_x,
901                              sequence_header.color_config.subsampling_y);
902       const int max_bottom_border = GetBottomBorderPixels(
903           /*do_cdef=*/true, /*do_restoration=*/true,
904           /*do_superres=*/true, sequence_header.color_config.subsampling_y);
905       // TODO(vigneshv): This may not be the right place to call this callback
906       // for the frame parallel case. Investigate and fix it.
907       if (!buffer_pool_.OnFrameBufferSizeChanged(
908               sequence_header.color_config.bitdepth, image_format,
909               sequence_header.max_frame_width, sequence_header.max_frame_height,
910               kBorderPixels, kBorderPixels, kBorderPixels, max_bottom_border)) {
911         LIBGAV1_DLOG(ERROR, "buffer_pool_.OnFrameBufferSizeChanged failed.");
912         return kStatusUnknownError;
913       }
914     }
915     // This can happen when there are multiple spatial/temporal layers and if
916     // all the layers are outside the current operating point.
917     if (current_frame == nullptr) {
918       continue;
919     }
920     // Note that we cannot set EncodedFrame.temporal_unit here. It will be set
921     // in the code below after |temporal_unit| is std::move'd into the
922     // |temporal_units_| queue.
923     if (!temporal_unit.frames.emplace_back(obu.get(), state_, current_frame,
924                                            position_in_temporal_unit++)) {
925       LIBGAV1_DLOG(ERROR, "temporal_unit.frames.emplace_back failed.");
926       return kStatusOutOfMemory;
927     }
928     state_.UpdateReferenceFrames(current_frame,
929                                  obu->frame_header().refresh_frame_flags);
930   }
931   // This function cannot fail after this point. So it is okay to move the
932   // |temporal_unit| into |temporal_units_| queue.
933   temporal_units_.Push(std::move(temporal_unit));
934   if (temporal_units_.Back().frames.empty()) {
935     std::lock_guard<std::mutex> lock(mutex_);
936     temporal_units_.Back().has_displayable_frame = false;
937     temporal_units_.Back().decoded = true;
938     return kStatusOk;
939   }
940   for (auto& frame : temporal_units_.Back().frames) {
941     EncodedFrame* const encoded_frame = &frame;
942     encoded_frame->temporal_unit = &temporal_units_.Back();
943     frame_thread_pool_->Schedule([this, encoded_frame]() {
944       if (HasFailure()) return;
945       const StatusCode status = DecodeFrame(encoded_frame);
946       encoded_frame->state = {};
947       encoded_frame->frame = nullptr;
948       TemporalUnit& temporal_unit = *encoded_frame->temporal_unit;
949       std::lock_guard<std::mutex> lock(mutex_);
950       if (failure_status_ != kStatusOk) return;
951       // temporal_unit's status defaults to kStatusOk. So we need to set it only
952       // on error. If |failure_status_| is not kStatusOk at this point, it means
953       // that there has already been a failure. So we don't care about this
954       // subsequent failure.  We will simply return the error code of the first
955       // failure.
956       if (status != kStatusOk) {
957         temporal_unit.status = status;
958         if (failure_status_ == kStatusOk) {
959           failure_status_ = status;
960         }
961       }
962       temporal_unit.decoded =
963           ++temporal_unit.decoded_count == temporal_unit.frames.size();
964       if (temporal_unit.decoded && settings_.output_all_layers &&
965           temporal_unit.output_layer_count > 1) {
966         std::sort(
967             temporal_unit.output_layers,
968             temporal_unit.output_layers + temporal_unit.output_layer_count);
969       }
970       if (temporal_unit.decoded || failure_status_ != kStatusOk) {
971         decoded_condvar_.notify_one();
972       }
973     });
974   }
975   return kStatusOk;
976 }
977 
DecodeFrame(EncodedFrame * const encoded_frame)978 StatusCode DecoderImpl::DecodeFrame(EncodedFrame* const encoded_frame) {
979   const ObuSequenceHeader& sequence_header = encoded_frame->sequence_header;
980   const ObuFrameHeader& frame_header = encoded_frame->frame_header;
981   RefCountedBufferPtr current_frame = std::move(encoded_frame->frame);
982 
983   std::unique_ptr<FrameScratchBuffer> frame_scratch_buffer =
984       frame_scratch_buffer_pool_.Get();
985   if (frame_scratch_buffer == nullptr) {
986     LIBGAV1_DLOG(ERROR, "Error when getting FrameScratchBuffer.");
987     return kStatusOutOfMemory;
988   }
989   // |frame_scratch_buffer| will be released when this local variable goes out
990   // of scope (i.e.) on any return path in this function.
991   FrameScratchBufferReleaser frame_scratch_buffer_releaser(
992       &frame_scratch_buffer_pool_, &frame_scratch_buffer);
993 
994   StatusCode status;
995   if (!frame_header.show_existing_frame) {
996     if (encoded_frame->tile_buffers.empty()) {
997       // This means that the last call to ParseOneFrame() did not actually
998       // have any tile groups. This could happen in rare cases (for example,
999       // if there is a Metadata OBU after the TileGroup OBU). We currently do
1000       // not have a reason to handle those cases, so we simply continue.
1001       return kStatusOk;
1002     }
1003     status = DecodeTiles(sequence_header, frame_header,
1004                          encoded_frame->tile_buffers, encoded_frame->state,
1005                          frame_scratch_buffer.get(), current_frame.get());
1006     if (status != kStatusOk) {
1007       return status;
1008     }
1009   } else {
1010     if (!current_frame->WaitUntilDecoded()) {
1011       return kStatusUnknownError;
1012     }
1013   }
1014   if (!frame_header.show_frame && !frame_header.show_existing_frame) {
1015     // This frame is not displayable. Not an error.
1016     return kStatusOk;
1017   }
1018   RefCountedBufferPtr film_grain_frame;
1019   status = ApplyFilmGrain(
1020       sequence_header, frame_header, current_frame, &film_grain_frame,
1021       frame_scratch_buffer->threading_strategy.thread_pool());
1022   if (status != kStatusOk) {
1023     return status;
1024   }
1025 
1026   TemporalUnit& temporal_unit = *encoded_frame->temporal_unit;
1027   std::lock_guard<std::mutex> lock(mutex_);
1028   if (temporal_unit.has_displayable_frame && !settings_.output_all_layers) {
1029     assert(temporal_unit.output_frame_position >= 0);
1030     // A displayable frame was already found in this temporal unit. This can
1031     // happen if there are multiple spatial/temporal layers. Since
1032     // |settings_.output_all_layers| is false, we will output only the last
1033     // displayable frame.
1034     if (temporal_unit.output_frame_position >
1035         encoded_frame->position_in_temporal_unit) {
1036       return kStatusOk;
1037     }
1038     // Replace any output frame that we may have seen before with the current
1039     // frame.
1040     assert(temporal_unit.output_layer_count == 1);
1041     --temporal_unit.output_layer_count;
1042   }
1043   temporal_unit.has_displayable_frame = true;
1044   temporal_unit.output_layers[temporal_unit.output_layer_count].frame =
1045       std::move(film_grain_frame);
1046   temporal_unit.output_layers[temporal_unit.output_layer_count]
1047       .position_in_temporal_unit = encoded_frame->position_in_temporal_unit;
1048   ++temporal_unit.output_layer_count;
1049   temporal_unit.output_frame_position =
1050       encoded_frame->position_in_temporal_unit;
1051   return kStatusOk;
1052 }
1053 
DecodeTemporalUnit(const TemporalUnit & temporal_unit,const DecoderBuffer ** out_ptr)1054 StatusCode DecoderImpl::DecodeTemporalUnit(const TemporalUnit& temporal_unit,
1055                                            const DecoderBuffer** out_ptr) {
1056   std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
1057       temporal_unit.data, temporal_unit.size, settings_.operating_point,
1058       &buffer_pool_, &state_));
1059   if (obu == nullptr) {
1060     LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
1061     return kStatusOutOfMemory;
1062   }
1063   frame_mean_qps_.clear();
1064   if (has_sequence_header_) {
1065     obu->set_sequence_header(sequence_header_);
1066   }
1067   StatusCode status;
1068   std::unique_ptr<FrameScratchBuffer> frame_scratch_buffer =
1069       frame_scratch_buffer_pool_.Get();
1070   if (frame_scratch_buffer == nullptr) {
1071     LIBGAV1_DLOG(ERROR, "Error when getting FrameScratchBuffer.");
1072     return kStatusOutOfMemory;
1073   }
1074   // |frame_scratch_buffer| will be released when this local variable goes out
1075   // of scope (i.e.) on any return path in this function.
1076   FrameScratchBufferReleaser frame_scratch_buffer_releaser(
1077       &frame_scratch_buffer_pool_, &frame_scratch_buffer);
1078 
1079   while (obu->HasData()) {
1080     RefCountedBufferPtr current_frame;
1081     status = obu->ParseOneFrame(&current_frame);
1082     if (status != kStatusOk) {
1083       LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
1084       return status;
1085     }
1086     if (!MaybeInitializeQuantizerMatrix(obu->frame_header())) {
1087       LIBGAV1_DLOG(ERROR, "InitializeQuantizerMatrix() failed.");
1088       return kStatusOutOfMemory;
1089     }
1090     if (!MaybeInitializeWedgeMasks(obu->frame_header().frame_type)) {
1091       LIBGAV1_DLOG(ERROR, "InitializeWedgeMasks() failed.");
1092       return kStatusOutOfMemory;
1093     }
1094     if (IsNewSequenceHeader(*obu)) {
1095       const ObuSequenceHeader& sequence_header = obu->sequence_header();
1096       const Libgav1ImageFormat image_format =
1097           ComposeImageFormat(sequence_header.color_config.is_monochrome,
1098                              sequence_header.color_config.subsampling_x,
1099                              sequence_header.color_config.subsampling_y);
1100       const int max_bottom_border = GetBottomBorderPixels(
1101           /*do_cdef=*/true, /*do_restoration=*/true,
1102           /*do_superres=*/true, sequence_header.color_config.subsampling_y);
1103       if (!buffer_pool_.OnFrameBufferSizeChanged(
1104               sequence_header.color_config.bitdepth, image_format,
1105               sequence_header.max_frame_width, sequence_header.max_frame_height,
1106               kBorderPixels, kBorderPixels, kBorderPixels, max_bottom_border)) {
1107         LIBGAV1_DLOG(ERROR, "buffer_pool_.OnFrameBufferSizeChanged failed.");
1108         return kStatusUnknownError;
1109       }
1110     }
1111     if (!obu->frame_header().show_existing_frame) {
1112       if (obu->tile_buffers().empty()) {
1113         // This means that the last call to ParseOneFrame() did not actually
1114         // have any tile groups. This could happen in rare cases (for example,
1115         // if there is a Metadata OBU after the TileGroup OBU). We currently do
1116         // not have a reason to handle those cases, so we simply continue.
1117         continue;
1118       }
1119       status = DecodeTiles(obu->sequence_header(), obu->frame_header(),
1120                            obu->tile_buffers(), state_,
1121                            frame_scratch_buffer.get(), current_frame.get());
1122       if (settings_.parse_only) {
1123         frame_mean_qps_.push_back(frame_mean_qp_);
1124       }
1125       if (status != kStatusOk) {
1126         return status;
1127       }
1128     }
1129     state_.UpdateReferenceFrames(current_frame,
1130                                  obu->frame_header().refresh_frame_flags);
1131     if (obu->frame_header().show_frame ||
1132         obu->frame_header().show_existing_frame) {
1133       if (!output_frame_queue_.Empty() && !settings_.output_all_layers) {
1134         // There is more than one displayable frame in the current operating
1135         // point and |settings_.output_all_layers| is false. In this case, we
1136         // simply return the last displayable frame as the output frame and
1137         // ignore the rest.
1138         assert(output_frame_queue_.Size() == 1);
1139         output_frame_queue_.Pop();
1140       }
1141       if (!settings_.parse_only) {
1142         RefCountedBufferPtr film_grain_frame;
1143         status = ApplyFilmGrain(
1144             obu->sequence_header(), obu->frame_header(), current_frame,
1145             &film_grain_frame,
1146             frame_scratch_buffer->threading_strategy.film_grain_thread_pool());
1147         if (status != kStatusOk) return status;
1148         output_frame_queue_.Push(std::move(film_grain_frame));
1149       }
1150     }
1151   }
1152   if (output_frame_queue_.Empty()) {
1153     // No displayable frame in the temporal unit. Not an error.
1154     *out_ptr = nullptr;
1155     return kStatusOk;
1156   }
1157   status = CopyFrameToOutputBuffer(output_frame_queue_.Front());
1158   output_frame_queue_.Pop();
1159   if (status != kStatusOk) {
1160     return status;
1161   }
1162   buffer_.user_private_data = temporal_unit.user_private_data;
1163   *out_ptr = &buffer_;
1164   return kStatusOk;
1165 }
1166 
CopyFrameToOutputBuffer(const RefCountedBufferPtr & frame)1167 StatusCode DecoderImpl::CopyFrameToOutputBuffer(
1168     const RefCountedBufferPtr& frame) {
1169   YuvBuffer* yuv_buffer = frame->buffer();
1170 
1171   buffer_.chroma_sample_position = frame->chroma_sample_position();
1172 
1173   if (yuv_buffer->is_monochrome()) {
1174     buffer_.image_format = kImageFormatMonochrome400;
1175   } else {
1176     if (yuv_buffer->subsampling_x() == 0 && yuv_buffer->subsampling_y() == 0) {
1177       buffer_.image_format = kImageFormatYuv444;
1178     } else if (yuv_buffer->subsampling_x() == 1 &&
1179                yuv_buffer->subsampling_y() == 0) {
1180       buffer_.image_format = kImageFormatYuv422;
1181     } else if (yuv_buffer->subsampling_x() == 1 &&
1182                yuv_buffer->subsampling_y() == 1) {
1183       buffer_.image_format = kImageFormatYuv420;
1184     } else {
1185       LIBGAV1_DLOG(ERROR,
1186                    "Invalid chroma subsampling values: cannot determine buffer "
1187                    "image format.");
1188       return kStatusInvalidArgument;
1189     }
1190   }
1191   buffer_.color_range = sequence_header_.color_config.color_range;
1192   buffer_.color_primary = sequence_header_.color_config.color_primary;
1193   buffer_.transfer_characteristics =
1194       sequence_header_.color_config.transfer_characteristics;
1195   buffer_.matrix_coefficients =
1196       sequence_header_.color_config.matrix_coefficients;
1197 
1198   buffer_.bitdepth = yuv_buffer->bitdepth();
1199   const int num_planes =
1200       yuv_buffer->is_monochrome() ? kMaxPlanesMonochrome : kMaxPlanes;
1201   int plane = kPlaneY;
1202   for (; plane < num_planes; ++plane) {
1203     buffer_.stride[plane] = yuv_buffer->stride(plane);
1204     buffer_.plane[plane] = yuv_buffer->data(plane);
1205     buffer_.displayed_width[plane] = yuv_buffer->width(plane);
1206     buffer_.displayed_height[plane] = yuv_buffer->height(plane);
1207   }
1208   for (; plane < kMaxPlanes; ++plane) {
1209     buffer_.stride[plane] = 0;
1210     buffer_.plane[plane] = nullptr;
1211     buffer_.displayed_width[plane] = 0;
1212     buffer_.displayed_height[plane] = 0;
1213   }
1214   buffer_.spatial_id = frame->spatial_id();
1215   buffer_.temporal_id = frame->temporal_id();
1216   buffer_.buffer_private_data = frame->buffer_private_data();
1217   if (frame->hdr_cll_set()) {
1218     buffer_.has_hdr_cll = 1;
1219     buffer_.hdr_cll = frame->hdr_cll();
1220   } else {
1221     buffer_.has_hdr_cll = 0;
1222   }
1223   if (frame->hdr_mdcv_set()) {
1224     buffer_.has_hdr_mdcv = 1;
1225     buffer_.hdr_mdcv = frame->hdr_mdcv();
1226   } else {
1227     buffer_.has_hdr_mdcv = 0;
1228   }
1229   if (frame->itut_t35_set()) {
1230     buffer_.has_itut_t35 = 1;
1231     buffer_.itut_t35 = frame->itut_t35();
1232   } else {
1233     buffer_.has_itut_t35 = 0;
1234   }
1235   output_frame_ = frame;
1236   return kStatusOk;
1237 }
1238 
ReleaseOutputFrame()1239 void DecoderImpl::ReleaseOutputFrame() {
1240   for (auto& plane : buffer_.plane) {
1241     plane = nullptr;
1242   }
1243   output_frame_ = nullptr;
1244 }
1245 
DecodeTiles(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<TileBuffer> & tile_buffers,const DecoderState & state,FrameScratchBuffer * const frame_scratch_buffer,RefCountedBuffer * const current_frame)1246 StatusCode DecoderImpl::DecodeTiles(
1247     const ObuSequenceHeader& sequence_header,
1248     const ObuFrameHeader& frame_header, const Vector<TileBuffer>& tile_buffers,
1249     const DecoderState& state, FrameScratchBuffer* const frame_scratch_buffer,
1250     RefCountedBuffer* const current_frame) {
1251   frame_scratch_buffer->tile_scratch_buffer_pool.Reset(
1252       sequence_header.color_config.bitdepth);
1253   if (!frame_scratch_buffer->loop_restoration_info.Reset(
1254           &frame_header.loop_restoration, frame_header.upscaled_width,
1255           frame_header.height, sequence_header.color_config.subsampling_x,
1256           sequence_header.color_config.subsampling_y,
1257           sequence_header.color_config.is_monochrome)) {
1258     LIBGAV1_DLOG(ERROR,
1259                  "Failed to allocate memory for loop restoration info units.");
1260     return kStatusOutOfMemory;
1261   }
1262   ThreadingStrategy& threading_strategy =
1263       frame_scratch_buffer->threading_strategy;
1264   if (!is_frame_parallel_ &&
1265       !threading_strategy.Reset(frame_header, settings_.threads)) {
1266     return kStatusOutOfMemory;
1267   }
1268   const bool do_cdef =
1269       PostFilter::DoCdef(frame_header, settings_.post_filter_mask);
1270   const int num_planes = sequence_header.color_config.is_monochrome
1271                              ? kMaxPlanesMonochrome
1272                              : kMaxPlanes;
1273   const bool do_restoration = PostFilter::DoRestoration(
1274       frame_header.loop_restoration, settings_.post_filter_mask, num_planes);
1275   const bool do_superres =
1276       PostFilter::DoSuperRes(frame_header, settings_.post_filter_mask);
1277   // Use kBorderPixels for the left, right, and top borders. Only the bottom
1278   // border may need to be bigger. Cdef border is needed only if we apply Cdef
1279   // without multithreading.
1280   const int bottom_border = GetBottomBorderPixels(
1281       do_cdef && threading_strategy.post_filter_thread_pool() == nullptr,
1282       do_restoration, do_superres, sequence_header.color_config.subsampling_y);
1283   current_frame->set_chroma_sample_position(
1284       sequence_header.color_config.chroma_sample_position);
1285   if (!current_frame->Realloc(sequence_header.color_config.bitdepth,
1286                               sequence_header.color_config.is_monochrome,
1287                               frame_header.upscaled_width, frame_header.height,
1288                               sequence_header.color_config.subsampling_x,
1289                               sequence_header.color_config.subsampling_y,
1290                               /*left_border=*/kBorderPixels,
1291                               /*right_border=*/kBorderPixels,
1292                               /*top_border=*/kBorderPixels, bottom_border)) {
1293     LIBGAV1_DLOG(ERROR, "Failed to allocate memory for the decoder buffer.");
1294     return kStatusOutOfMemory;
1295   }
1296   if (frame_header.cdef.bits > 0) {
1297     if (!frame_scratch_buffer->cdef_index.Reset(
1298             DivideBy16(frame_header.rows4x4 + kMaxBlockHeight4x4),
1299             DivideBy16(frame_header.columns4x4 + kMaxBlockWidth4x4),
1300             /*zero_initialize=*/false)) {
1301       LIBGAV1_DLOG(ERROR, "Failed to allocate memory for cdef index.");
1302       return kStatusOutOfMemory;
1303     }
1304   }
1305   if (do_cdef) {
1306     if (!frame_scratch_buffer->cdef_skip.Reset(
1307             DivideBy2(frame_header.rows4x4 + kMaxBlockHeight4x4),
1308             DivideBy16(frame_header.columns4x4 + kMaxBlockWidth4x4),
1309             /*zero_initialize=*/true)) {
1310       LIBGAV1_DLOG(ERROR, "Failed to allocate memory for cdef skip.");
1311       return kStatusOutOfMemory;
1312     }
1313   }
1314   if (!frame_scratch_buffer->inter_transform_sizes.Reset(
1315           frame_header.rows4x4 + kMaxBlockHeight4x4,
1316           frame_header.columns4x4 + kMaxBlockWidth4x4,
1317           /*zero_initialize=*/false)) {
1318     LIBGAV1_DLOG(ERROR, "Failed to allocate memory for inter_transform_sizes.");
1319     return kStatusOutOfMemory;
1320   }
1321   if (frame_header.use_ref_frame_mvs) {
1322     if (!frame_scratch_buffer->motion_field.mv.Reset(
1323             DivideBy2(frame_header.rows4x4), DivideBy2(frame_header.columns4x4),
1324             /*zero_initialize=*/false) ||
1325         !frame_scratch_buffer->motion_field.reference_offset.Reset(
1326             DivideBy2(frame_header.rows4x4), DivideBy2(frame_header.columns4x4),
1327             /*zero_initialize=*/false)) {
1328       LIBGAV1_DLOG(ERROR,
1329                    "Failed to allocate memory for temporal motion vectors.");
1330       return kStatusOutOfMemory;
1331     }
1332 
1333     // For each motion vector, only mv[0] needs to be initialized to
1334     // kInvalidMvValue, mv[1] is not necessary to be initialized and can be
1335     // set to an arbitrary value. For simplicity, mv[1] is set to 0.
1336     // The following memory initialization of contiguous memory is very fast. It
1337     // is not recommended to make the initialization multi-threaded, unless the
1338     // memory which needs to be initialized in each thread is still contiguous.
1339     MotionVector invalid_mv;
1340     invalid_mv.mv[0] = kInvalidMvValue;
1341     invalid_mv.mv[1] = 0;
1342     MotionVector* const motion_field_mv =
1343         &frame_scratch_buffer->motion_field.mv[0][0];
1344     std::fill(motion_field_mv,
1345               motion_field_mv + frame_scratch_buffer->motion_field.mv.size(),
1346               invalid_mv);
1347   }
1348 
1349   // The addition of kMaxBlockHeight4x4 and kMaxBlockWidth4x4 is necessary so
1350   // that the block parameters cache can be filled in for the last row/column
1351   // without having to check for boundary conditions.
1352   if (!frame_scratch_buffer->block_parameters_holder.Reset(
1353           frame_header.rows4x4 + kMaxBlockHeight4x4,
1354           frame_header.columns4x4 + kMaxBlockWidth4x4)) {
1355     return kStatusOutOfMemory;
1356   }
1357   const dsp::Dsp* const dsp =
1358       dsp::GetDspTable(sequence_header.color_config.bitdepth);
1359   if (dsp == nullptr) {
1360     LIBGAV1_DLOG(ERROR, "Failed to get the dsp table for bitdepth %d.",
1361                  sequence_header.color_config.bitdepth);
1362     return kStatusInternalError;
1363   }
1364 
1365   const int tile_count = frame_header.tile_info.tile_count;
1366   assert(tile_count >= 1);
1367   Vector<std::unique_ptr<Tile>> tiles;
1368   if (!tiles.reserve(tile_count)) {
1369     LIBGAV1_DLOG(ERROR, "tiles.reserve(%d) failed.\n", tile_count);
1370     return kStatusOutOfMemory;
1371   }
1372 
1373   if (threading_strategy.row_thread_pool(0) != nullptr || is_frame_parallel_ ||
1374       settings_.parse_only) {
1375     if (frame_scratch_buffer->residual_buffer_pool == nullptr) {
1376       frame_scratch_buffer->residual_buffer_pool.reset(
1377           new (std::nothrow) ResidualBufferPool(
1378               sequence_header.use_128x128_superblock,
1379               sequence_header.color_config.subsampling_x,
1380               sequence_header.color_config.subsampling_y,
1381               sequence_header.color_config.bitdepth == 8 ? sizeof(int16_t)
1382                                                          : sizeof(int32_t)));
1383       if (frame_scratch_buffer->residual_buffer_pool == nullptr) {
1384         LIBGAV1_DLOG(ERROR, "Failed to allocate residual buffer.\n");
1385         return kStatusOutOfMemory;
1386       }
1387     } else {
1388       frame_scratch_buffer->residual_buffer_pool->Reset(
1389           sequence_header.use_128x128_superblock,
1390           sequence_header.color_config.subsampling_x,
1391           sequence_header.color_config.subsampling_y,
1392           sequence_header.color_config.bitdepth == 8 ? sizeof(int16_t)
1393                                                      : sizeof(int32_t));
1394     }
1395   }
1396 
1397   if (threading_strategy.post_filter_thread_pool() != nullptr && do_cdef) {
1398     // We need to store 4 rows per 64x64 unit.
1399     const int num_units =
1400         MultiplyBy4(RightShiftWithCeiling(frame_header.rows4x4, 4));
1401     // subsampling_y is set to zero irrespective of the actual frame's
1402     // subsampling since we need to store exactly |num_units| rows of the loop
1403     // restoration border pixels.
1404     if (!frame_scratch_buffer->cdef_border.Realloc(
1405             sequence_header.color_config.bitdepth,
1406             sequence_header.color_config.is_monochrome,
1407             MultiplyBy4(frame_header.columns4x4), num_units,
1408             sequence_header.color_config.subsampling_x,
1409             /*subsampling_y=*/0, kBorderPixels, kBorderPixels, kBorderPixels,
1410             kBorderPixels, nullptr, nullptr, nullptr)) {
1411       return kStatusOutOfMemory;
1412     }
1413   }
1414 
1415   if (do_restoration &&
1416       (do_cdef || threading_strategy.post_filter_thread_pool() != nullptr)) {
1417     // We need to store 4 rows per 64x64 unit.
1418     const int num_units =
1419         MultiplyBy4(RightShiftWithCeiling(frame_header.rows4x4, 4));
1420     // subsampling_y is set to zero irrespective of the actual frame's
1421     // subsampling since we need to store exactly |num_units| rows of the loop
1422     // restoration border pixels.
1423     if (!frame_scratch_buffer->loop_restoration_border.Realloc(
1424             sequence_header.color_config.bitdepth,
1425             sequence_header.color_config.is_monochrome,
1426             frame_header.upscaled_width, num_units,
1427             sequence_header.color_config.subsampling_x,
1428             /*subsampling_y=*/0, kBorderPixels, kBorderPixels, kBorderPixels,
1429             kBorderPixels, nullptr, nullptr, nullptr)) {
1430       return kStatusOutOfMemory;
1431     }
1432   }
1433 
1434   if (do_superres) {
1435     const int pixel_size = sequence_header.color_config.bitdepth == 8
1436                                ? sizeof(uint8_t)
1437                                : sizeof(uint16_t);
1438     const int coefficients_size = kSuperResFilterTaps *
1439                                   Align(frame_header.upscaled_width, 16) *
1440                                   pixel_size;
1441     if (!frame_scratch_buffer->superres_coefficients[kPlaneTypeY].Resize(
1442             coefficients_size)) {
1443       LIBGAV1_DLOG(ERROR,
1444                    "Failed to Resize superres_coefficients[kPlaneTypeY].");
1445       return kStatusOutOfMemory;
1446     }
1447 #if LIBGAV1_MSAN
1448     // Quiet SuperRes_NEON() msan warnings.
1449     memset(frame_scratch_buffer->superres_coefficients[kPlaneTypeY].get(), 0,
1450            coefficients_size);
1451 #endif
1452     const int uv_coefficients_size =
1453         kSuperResFilterTaps *
1454         Align(SubsampledValue(frame_header.upscaled_width, 1), 16) * pixel_size;
1455     if (!sequence_header.color_config.is_monochrome &&
1456         sequence_header.color_config.subsampling_x != 0 &&
1457         !frame_scratch_buffer->superres_coefficients[kPlaneTypeUV].Resize(
1458             uv_coefficients_size)) {
1459       LIBGAV1_DLOG(ERROR,
1460                    "Failed to Resize superres_coefficients[kPlaneTypeUV].");
1461       return kStatusOutOfMemory;
1462     }
1463 #if LIBGAV1_MSAN
1464     if (!sequence_header.color_config.is_monochrome &&
1465         sequence_header.color_config.subsampling_x != 0) {
1466       // Quiet SuperRes_NEON() msan warnings.
1467       memset(frame_scratch_buffer->superres_coefficients[kPlaneTypeUV].get(), 0,
1468              uv_coefficients_size);
1469     }
1470 #endif
1471   }
1472 
1473   if (do_superres && threading_strategy.post_filter_thread_pool() != nullptr) {
1474     const int num_threads =
1475         threading_strategy.post_filter_thread_pool()->num_threads() + 1;
1476     // subsampling_y is set to zero irrespective of the actual frame's
1477     // subsampling since we need to store exactly |num_threads| rows of the
1478     // down-scaled pixels.
1479     // Left and right borders are for line extension. They are doubled for the Y
1480     // plane to make sure the U and V planes have enough space after possible
1481     // subsampling.
1482     if (!frame_scratch_buffer->superres_line_buffer.Realloc(
1483             sequence_header.color_config.bitdepth,
1484             sequence_header.color_config.is_monochrome,
1485             MultiplyBy4(frame_header.columns4x4), num_threads,
1486             sequence_header.color_config.subsampling_x,
1487             /*subsampling_y=*/0, 2 * kSuperResHorizontalBorder,
1488             2 * (kSuperResHorizontalBorder + kSuperResHorizontalPadding), 0, 0,
1489             nullptr, nullptr, nullptr)) {
1490       LIBGAV1_DLOG(ERROR, "Failed to resize superres line buffer.\n");
1491       return kStatusOutOfMemory;
1492     }
1493   }
1494 
1495   if (is_frame_parallel_ && !IsIntraFrame(frame_header.frame_type)) {
1496     // We can parse the current frame if all the reference frames have been
1497     // parsed.
1498     for (const int index : frame_header.reference_frame_index) {
1499       if (!state.reference_frame[index]->WaitUntilParsed()) {
1500         return kStatusUnknownError;
1501       }
1502     }
1503   }
1504 
1505   // If prev_segment_ids is a null pointer, it is treated as if it pointed to
1506   // a segmentation map containing all 0s.
1507   const SegmentationMap* prev_segment_ids = nullptr;
1508   if (frame_header.primary_reference_frame == kPrimaryReferenceNone) {
1509     frame_scratch_buffer->symbol_decoder_context.Initialize(
1510         frame_header.quantizer.base_index);
1511   } else {
1512     const int index =
1513         frame_header
1514             .reference_frame_index[frame_header.primary_reference_frame];
1515     assert(index != -1);
1516     const RefCountedBuffer* prev_frame = state.reference_frame[index].get();
1517     frame_scratch_buffer->symbol_decoder_context = prev_frame->FrameContext();
1518     if (frame_header.segmentation.enabled &&
1519         prev_frame->columns4x4() == frame_header.columns4x4 &&
1520         prev_frame->rows4x4() == frame_header.rows4x4) {
1521       prev_segment_ids = prev_frame->segmentation_map();
1522     }
1523   }
1524 
1525   // The Tile class must make use of a separate buffer to store the unfiltered
1526   // pixels for the intra prediction of the next superblock row. This is done
1527   // only when one of the following conditions are true:
1528   //   * is_frame_parallel_ is true.
1529   //   * settings_.threads == 1.
1530   // In the non-frame-parallel multi-threaded case, we do not run the post
1531   // filters in the decode loop. So this buffer need not be used.
1532   const bool use_intra_prediction_buffer =
1533       is_frame_parallel_ || settings_.threads == 1;
1534   if (use_intra_prediction_buffer) {
1535     if (!frame_scratch_buffer->intra_prediction_buffers.Resize(
1536             frame_header.tile_info.tile_rows)) {
1537       LIBGAV1_DLOG(ERROR, "Failed to Resize intra_prediction_buffers.");
1538       return kStatusOutOfMemory;
1539     }
1540     IntraPredictionBuffer* const intra_prediction_buffers =
1541         frame_scratch_buffer->intra_prediction_buffers.get();
1542     for (int plane = kPlaneY; plane < num_planes; ++plane) {
1543       const int subsampling =
1544           (plane == kPlaneY) ? 0 : sequence_header.color_config.subsampling_x;
1545       const size_t intra_prediction_buffer_size =
1546           ((MultiplyBy4(frame_header.columns4x4) >> subsampling) *
1547            (sequence_header.color_config.bitdepth == 8 ? sizeof(uint8_t)
1548                                                        : sizeof(uint16_t)));
1549       for (int tile_row = 0; tile_row < frame_header.tile_info.tile_rows;
1550            ++tile_row) {
1551         if (!intra_prediction_buffers[tile_row][plane].Resize(
1552                 intra_prediction_buffer_size)) {
1553           LIBGAV1_DLOG(ERROR,
1554                        "Failed to allocate intra prediction buffer for tile "
1555                        "row %d plane %d.\n",
1556                        tile_row, plane);
1557           return kStatusOutOfMemory;
1558         }
1559       }
1560     }
1561   }
1562 
1563   PostFilter post_filter(frame_header, sequence_header, frame_scratch_buffer,
1564                          current_frame->buffer(), dsp,
1565                          settings_.post_filter_mask);
1566   SymbolDecoderContext saved_symbol_decoder_context;
1567   BlockingCounterWithStatus pending_tiles(tile_count);
1568   for (int tile_number = 0; tile_number < tile_count; ++tile_number) {
1569     std::unique_ptr<Tile> tile = Tile::Create(
1570         tile_number, tile_buffers[tile_number].data,
1571         tile_buffers[tile_number].size, sequence_header, frame_header,
1572         current_frame, state, frame_scratch_buffer, wedge_masks_,
1573         quantizer_matrix_, &saved_symbol_decoder_context, prev_segment_ids,
1574         &post_filter, dsp, threading_strategy.row_thread_pool(tile_number),
1575         &pending_tiles, is_frame_parallel_, use_intra_prediction_buffer,
1576         settings_.parse_only);
1577     if (tile == nullptr) {
1578       LIBGAV1_DLOG(ERROR, "Failed to create tile.");
1579       return kStatusOutOfMemory;
1580     }
1581     tiles.push_back_unchecked(std::move(tile));
1582   }
1583   assert(tiles.size() == static_cast<size_t>(tile_count));
1584   if (settings_.parse_only) {  // Parse only.
1585     if (ParseTiles(tiles) != kStatusOk) {
1586       return kStatusUnknownError;
1587     }
1588     frame_mean_qp_ = CalcFrameMeanQp(tiles);
1589   } else {  // Decode.
1590     if (is_frame_parallel_) {
1591       if (frame_scratch_buffer->threading_strategy.thread_pool() == nullptr) {
1592         return DecodeTilesFrameParallel(sequence_header, frame_header, tiles,
1593                                         saved_symbol_decoder_context,
1594                                         prev_segment_ids, frame_scratch_buffer,
1595                                         &post_filter, current_frame);
1596       }
1597       return DecodeTilesThreadedFrameParallel(
1598           sequence_header, frame_header, tiles, saved_symbol_decoder_context,
1599           prev_segment_ids, frame_scratch_buffer, &post_filter, current_frame);
1600     }
1601     StatusCode status;
1602     if (settings_.threads == 1) {
1603       status = DecodeTilesNonFrameParallel(sequence_header, frame_header, tiles,
1604                                            frame_scratch_buffer, &post_filter);
1605     } else {
1606       status = DecodeTilesThreadedNonFrameParallel(
1607           tiles, frame_scratch_buffer, &post_filter, &pending_tiles);
1608     }
1609     if (status != kStatusOk) return status;
1610   }
1611 
1612   if (frame_header.enable_frame_end_update_cdf) {
1613     frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
1614   }
1615   current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
1616   SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
1617   return kStatusOk;
1618 }
1619 
ApplyFilmGrain(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const RefCountedBufferPtr & displayable_frame,RefCountedBufferPtr * film_grain_frame,ThreadPool * thread_pool)1620 StatusCode DecoderImpl::ApplyFilmGrain(
1621     const ObuSequenceHeader& sequence_header,
1622     const ObuFrameHeader& frame_header,
1623     const RefCountedBufferPtr& displayable_frame,
1624     RefCountedBufferPtr* film_grain_frame, ThreadPool* thread_pool) {
1625   if (!sequence_header.film_grain_params_present ||
1626       !displayable_frame->film_grain_params().apply_grain ||
1627       (settings_.post_filter_mask & 0x10) == 0) {
1628     *film_grain_frame = displayable_frame;
1629     return kStatusOk;
1630   }
1631   if (!frame_header.show_existing_frame &&
1632       frame_header.refresh_frame_flags == 0) {
1633     // If show_existing_frame is true, then the current frame is a previously
1634     // saved reference frame. If refresh_frame_flags is nonzero, then the
1635     // state_.UpdateReferenceFrames() call above has saved the current frame as
1636     // a reference frame. Therefore, if both of these conditions are false, then
1637     // the current frame is not saved as a reference frame. displayable_frame
1638     // should hold the only reference to the current frame.
1639     assert(displayable_frame.use_count() == 1);
1640     // Add film grain noise in place.
1641     *film_grain_frame = displayable_frame;
1642   } else {
1643     *film_grain_frame = buffer_pool_.GetFreeBuffer();
1644     if (*film_grain_frame == nullptr) {
1645       LIBGAV1_DLOG(ERROR,
1646                    "Could not get film_grain_frame from the buffer pool.");
1647       return kStatusResourceExhausted;
1648     }
1649     if (!(*film_grain_frame)
1650              ->Realloc(displayable_frame->buffer()->bitdepth(),
1651                        displayable_frame->buffer()->is_monochrome(),
1652                        displayable_frame->upscaled_width(),
1653                        displayable_frame->frame_height(),
1654                        displayable_frame->buffer()->subsampling_x(),
1655                        displayable_frame->buffer()->subsampling_y(),
1656                        kBorderPixelsFilmGrain, kBorderPixelsFilmGrain,
1657                        kBorderPixelsFilmGrain, kBorderPixelsFilmGrain)) {
1658       LIBGAV1_DLOG(ERROR, "film_grain_frame->Realloc() failed.");
1659       return kStatusOutOfMemory;
1660     }
1661     (*film_grain_frame)
1662         ->set_chroma_sample_position(
1663             displayable_frame->chroma_sample_position());
1664     (*film_grain_frame)->set_spatial_id(displayable_frame->spatial_id());
1665     (*film_grain_frame)->set_temporal_id(displayable_frame->temporal_id());
1666   }
1667   const bool color_matrix_is_identity =
1668       sequence_header.color_config.matrix_coefficients ==
1669       kMatrixCoefficientsIdentity;
1670   assert(displayable_frame->buffer()->stride(kPlaneU) ==
1671          displayable_frame->buffer()->stride(kPlaneV));
1672   const int input_stride_uv = displayable_frame->buffer()->stride(kPlaneU);
1673   assert((*film_grain_frame)->buffer()->stride(kPlaneU) ==
1674          (*film_grain_frame)->buffer()->stride(kPlaneV));
1675   const int output_stride_uv = (*film_grain_frame)->buffer()->stride(kPlaneU);
1676 #if LIBGAV1_MAX_BITDEPTH >= 10
1677   if (displayable_frame->buffer()->bitdepth() == 10) {
1678     FilmGrain<10> film_grain(displayable_frame->film_grain_params(),
1679                              displayable_frame->buffer()->is_monochrome(),
1680                              color_matrix_is_identity,
1681                              displayable_frame->buffer()->subsampling_x(),
1682                              displayable_frame->buffer()->subsampling_y(),
1683                              displayable_frame->upscaled_width(),
1684                              displayable_frame->frame_height(), thread_pool);
1685     if (!film_grain.AddNoise(
1686             displayable_frame->buffer()->data(kPlaneY),
1687             displayable_frame->buffer()->stride(kPlaneY),
1688             displayable_frame->buffer()->data(kPlaneU),
1689             displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
1690             (*film_grain_frame)->buffer()->data(kPlaneY),
1691             (*film_grain_frame)->buffer()->stride(kPlaneY),
1692             (*film_grain_frame)->buffer()->data(kPlaneU),
1693             (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
1694       LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
1695       return kStatusOutOfMemory;
1696     }
1697     return kStatusOk;
1698   }
1699 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
1700 #if LIBGAV1_MAX_BITDEPTH == 12
1701   if (displayable_frame->buffer()->bitdepth() == 12) {
1702     FilmGrain<12> film_grain(displayable_frame->film_grain_params(),
1703                              displayable_frame->buffer()->is_monochrome(),
1704                              color_matrix_is_identity,
1705                              displayable_frame->buffer()->subsampling_x(),
1706                              displayable_frame->buffer()->subsampling_y(),
1707                              displayable_frame->upscaled_width(),
1708                              displayable_frame->frame_height(), thread_pool);
1709     if (!film_grain.AddNoise(
1710             displayable_frame->buffer()->data(kPlaneY),
1711             displayable_frame->buffer()->stride(kPlaneY),
1712             displayable_frame->buffer()->data(kPlaneU),
1713             displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
1714             (*film_grain_frame)->buffer()->data(kPlaneY),
1715             (*film_grain_frame)->buffer()->stride(kPlaneY),
1716             (*film_grain_frame)->buffer()->data(kPlaneU),
1717             (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
1718       LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
1719       return kStatusOutOfMemory;
1720     }
1721     return kStatusOk;
1722   }
1723 #endif  // LIBGAV1_MAX_BITDEPTH == 12
1724   FilmGrain<8> film_grain(displayable_frame->film_grain_params(),
1725                           displayable_frame->buffer()->is_monochrome(),
1726                           color_matrix_is_identity,
1727                           displayable_frame->buffer()->subsampling_x(),
1728                           displayable_frame->buffer()->subsampling_y(),
1729                           displayable_frame->upscaled_width(),
1730                           displayable_frame->frame_height(), thread_pool);
1731   if (!film_grain.AddNoise(
1732           displayable_frame->buffer()->data(kPlaneY),
1733           displayable_frame->buffer()->stride(kPlaneY),
1734           displayable_frame->buffer()->data(kPlaneU),
1735           displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
1736           (*film_grain_frame)->buffer()->data(kPlaneY),
1737           (*film_grain_frame)->buffer()->stride(kPlaneY),
1738           (*film_grain_frame)->buffer()->data(kPlaneU),
1739           (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
1740     LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
1741     return kStatusOutOfMemory;
1742   }
1743   return kStatusOk;
1744 }
1745 
IsNewSequenceHeader(const ObuParser & obu)1746 bool DecoderImpl::IsNewSequenceHeader(const ObuParser& obu) {
1747   if (std::find_if(obu.obu_headers().begin(), obu.obu_headers().end(),
1748                    [](const ObuHeader& obu_header) {
1749                      return obu_header.type == kObuSequenceHeader;
1750                    }) == obu.obu_headers().end()) {
1751     return false;
1752   }
1753   const ObuSequenceHeader sequence_header = obu.sequence_header();
1754   const bool sequence_header_changed =
1755       !has_sequence_header_ ||
1756       sequence_header_.color_config.bitdepth !=
1757           sequence_header.color_config.bitdepth ||
1758       sequence_header_.color_config.is_monochrome !=
1759           sequence_header.color_config.is_monochrome ||
1760       sequence_header_.color_config.subsampling_x !=
1761           sequence_header.color_config.subsampling_x ||
1762       sequence_header_.color_config.subsampling_y !=
1763           sequence_header.color_config.subsampling_y ||
1764       sequence_header_.max_frame_width != sequence_header.max_frame_width ||
1765       sequence_header_.max_frame_height != sequence_header.max_frame_height;
1766   sequence_header_ = sequence_header;
1767   has_sequence_header_ = true;
1768   return sequence_header_changed;
1769 }
1770 
MaybeInitializeWedgeMasks(FrameType frame_type)1771 bool DecoderImpl::MaybeInitializeWedgeMasks(FrameType frame_type) {
1772   if (IsIntraFrame(frame_type) || wedge_masks_initialized_) {
1773     return true;
1774   }
1775   if (!GenerateWedgeMask(&wedge_masks_)) {
1776     return false;
1777   }
1778   wedge_masks_initialized_ = true;
1779   return true;
1780 }
1781 
MaybeInitializeQuantizerMatrix(const ObuFrameHeader & frame_header)1782 bool DecoderImpl::MaybeInitializeQuantizerMatrix(
1783     const ObuFrameHeader& frame_header) {
1784   if (quantizer_matrix_initialized_ || !frame_header.quantizer.use_matrix) {
1785     return true;
1786   }
1787   if (!InitializeQuantizerMatrix(&quantizer_matrix_)) {
1788     return false;
1789   }
1790   quantizer_matrix_initialized_ = true;
1791   return true;
1792 }
1793 
1794 }  // namespace libgav1
1795