• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/decoder_impl.h"
16 
17 #include <algorithm>
18 #include <atomic>
19 #include <cassert>
20 #include <iterator>
21 #include <new>
22 #include <utility>
23 
24 #include "src/dsp/common.h"
25 #include "src/dsp/constants.h"
26 #include "src/dsp/dsp.h"
27 #include "src/film_grain.h"
28 #include "src/frame_buffer_utils.h"
29 #include "src/frame_scratch_buffer.h"
30 #include "src/loop_restoration_info.h"
31 #include "src/obu_parser.h"
32 #include "src/post_filter.h"
33 #include "src/prediction_mask.h"
34 #include "src/threading_strategy.h"
35 #include "src/utils/blocking_counter.h"
36 #include "src/utils/common.h"
37 #include "src/utils/constants.h"
38 #include "src/utils/logging.h"
39 #include "src/utils/raw_bit_reader.h"
40 #include "src/utils/segmentation.h"
41 #include "src/utils/threadpool.h"
42 #include "src/yuv_buffer.h"
43 
44 namespace libgav1 {
45 namespace {
46 
47 constexpr int kMaxBlockWidth4x4 = 32;
48 constexpr int kMaxBlockHeight4x4 = 32;
49 
50 // Computes the bottom border size in pixels. If CDEF, loop restoration or
51 // SuperRes is enabled, adds extra border pixels to facilitate those steps to
52 // happen nearly in-place (a few extra rows instead of an entire frame buffer).
53 // The logic in this function should match the corresponding logic for
54 // |vertical_shift| in the PostFilter constructor.
GetBottomBorderPixels(const bool do_cdef,const bool do_restoration,const bool do_superres,const int subsampling_y)55 int GetBottomBorderPixels(const bool do_cdef, const bool do_restoration,
56                           const bool do_superres, const int subsampling_y) {
57   int extra_border = 0;
58   if (do_cdef) {
59     extra_border += kCdefBorder;
60   } else if (do_restoration) {
61     // If CDEF is enabled, loop restoration is safe without extra border.
62     extra_border += kRestorationVerticalBorder;
63   }
64   if (do_superres) extra_border += kSuperResVerticalBorder;
65   // Double the number of extra bottom border pixels if the bottom border will
66   // be subsampled.
67   extra_border <<= subsampling_y;
68   return Align(kBorderPixels + extra_border, 2);  // Must be a multiple of 2.
69 }
70 
71 // Sets |frame_scratch_buffer->tile_decoding_failed| to true (while holding on
72 // to |frame_scratch_buffer->superblock_row_mutex|) and notifies the first
73 // |count| condition variables in
74 // |frame_scratch_buffer->superblock_row_progress_condvar|.
SetFailureAndNotifyAll(FrameScratchBuffer * const frame_scratch_buffer,int count)75 void SetFailureAndNotifyAll(FrameScratchBuffer* const frame_scratch_buffer,
76                             int count) {
77   {
78     std::lock_guard<std::mutex> lock(
79         frame_scratch_buffer->superblock_row_mutex);
80     frame_scratch_buffer->tile_decoding_failed = true;
81   }
82   std::condition_variable* const condvars =
83       frame_scratch_buffer->superblock_row_progress_condvar.get();
84   for (int i = 0; i < count; ++i) {
85     condvars[i].notify_one();
86   }
87 }
88 
89 // Helper class that releases the frame scratch buffer in the destructor.
90 class FrameScratchBufferReleaser {
91  public:
FrameScratchBufferReleaser(FrameScratchBufferPool * frame_scratch_buffer_pool,std::unique_ptr<FrameScratchBuffer> * frame_scratch_buffer)92   FrameScratchBufferReleaser(
93       FrameScratchBufferPool* frame_scratch_buffer_pool,
94       std::unique_ptr<FrameScratchBuffer>* frame_scratch_buffer)
95       : frame_scratch_buffer_pool_(frame_scratch_buffer_pool),
96         frame_scratch_buffer_(frame_scratch_buffer) {}
~FrameScratchBufferReleaser()97   ~FrameScratchBufferReleaser() {
98     frame_scratch_buffer_pool_->Release(std::move(*frame_scratch_buffer_));
99   }
100 
101  private:
102   FrameScratchBufferPool* const frame_scratch_buffer_pool_;
103   std::unique_ptr<FrameScratchBuffer>* const frame_scratch_buffer_;
104 };
105 
106 // Sets the |frame|'s segmentation map for two cases. The third case is handled
107 // in Tile::DecodeBlock().
SetSegmentationMap(const ObuFrameHeader & frame_header,const SegmentationMap * prev_segment_ids,RefCountedBuffer * const frame)108 void SetSegmentationMap(const ObuFrameHeader& frame_header,
109                         const SegmentationMap* prev_segment_ids,
110                         RefCountedBuffer* const frame) {
111   if (!frame_header.segmentation.enabled) {
112     // All segment_id's are 0.
113     frame->segmentation_map()->Clear();
114   } else if (!frame_header.segmentation.update_map) {
115     // Copy from prev_segment_ids.
116     if (prev_segment_ids == nullptr) {
117       // Treat a null prev_segment_ids pointer as if it pointed to a
118       // segmentation map containing all 0s.
119       frame->segmentation_map()->Clear();
120     } else {
121       frame->segmentation_map()->CopyFrom(*prev_segment_ids);
122     }
123   }
124 }
125 
DecodeTilesNonFrameParallel(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<std::unique_ptr<Tile>> & tiles,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter)126 StatusCode DecodeTilesNonFrameParallel(
127     const ObuSequenceHeader& sequence_header,
128     const ObuFrameHeader& frame_header,
129     const Vector<std::unique_ptr<Tile>>& tiles,
130     FrameScratchBuffer* const frame_scratch_buffer,
131     PostFilter* const post_filter) {
132   // Decode in superblock row order.
133   const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
134   std::unique_ptr<TileScratchBuffer> tile_scratch_buffer =
135       frame_scratch_buffer->tile_scratch_buffer_pool.Get();
136   if (tile_scratch_buffer == nullptr) return kLibgav1StatusOutOfMemory;
137   for (int row4x4 = 0; row4x4 < frame_header.rows4x4;
138        row4x4 += block_width4x4) {
139     for (const auto& tile_ptr : tiles) {
140       if (!tile_ptr->ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
141               row4x4, tile_scratch_buffer.get())) {
142         return kLibgav1StatusUnknownError;
143       }
144     }
145     post_filter->ApplyFilteringForOneSuperBlockRow(
146         row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
147         /*do_deblock=*/true);
148   }
149   frame_scratch_buffer->tile_scratch_buffer_pool.Release(
150       std::move(tile_scratch_buffer));
151   return kStatusOk;
152 }
153 
DecodeTilesThreadedNonFrameParallel(const Vector<std::unique_ptr<Tile>> & tiles,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,BlockingCounterWithStatus * const pending_tiles)154 StatusCode DecodeTilesThreadedNonFrameParallel(
155     const Vector<std::unique_ptr<Tile>>& tiles,
156     FrameScratchBuffer* const frame_scratch_buffer,
157     PostFilter* const post_filter,
158     BlockingCounterWithStatus* const pending_tiles) {
159   ThreadingStrategy& threading_strategy =
160       frame_scratch_buffer->threading_strategy;
161   const int num_workers = threading_strategy.tile_thread_count();
162   BlockingCounterWithStatus pending_workers(num_workers);
163   std::atomic<int> tile_counter(0);
164   const int tile_count = static_cast<int>(tiles.size());
165   bool tile_decoding_failed = false;
166   // Submit tile decoding jobs to the thread pool.
167   for (int i = 0; i < num_workers; ++i) {
168     threading_strategy.tile_thread_pool()->Schedule([&tiles, tile_count,
169                                                      &tile_counter,
170                                                      &pending_workers,
171                                                      &pending_tiles]() {
172       bool failed = false;
173       int index;
174       while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
175              tile_count) {
176         if (!failed) {
177           const auto& tile_ptr = tiles[index];
178           if (!tile_ptr->ParseAndDecode()) {
179             LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
180             failed = true;
181           }
182         } else {
183           pending_tiles->Decrement(false);
184         }
185       }
186       pending_workers.Decrement(!failed);
187     });
188   }
189   // Have the current thread partake in tile decoding.
190   int index;
191   while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
192          tile_count) {
193     if (!tile_decoding_failed) {
194       const auto& tile_ptr = tiles[index];
195       if (!tile_ptr->ParseAndDecode()) {
196         LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
197         tile_decoding_failed = true;
198       }
199     } else {
200       pending_tiles->Decrement(false);
201     }
202   }
203   // Wait until all the workers are done. This ensures that all the tiles have
204   // been parsed.
205   tile_decoding_failed |= !pending_workers.Wait();
206   // Wait until all the tiles have been decoded.
207   tile_decoding_failed |= !pending_tiles->Wait();
208   if (tile_decoding_failed) return kStatusUnknownError;
209   assert(threading_strategy.post_filter_thread_pool() != nullptr);
210   post_filter->ApplyFilteringThreaded();
211   return kStatusOk;
212 }
213 
DecodeTilesFrameParallel(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<std::unique_ptr<Tile>> & tiles,const SymbolDecoderContext & saved_symbol_decoder_context,const SegmentationMap * const prev_segment_ids,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,RefCountedBuffer * const current_frame)214 StatusCode DecodeTilesFrameParallel(
215     const ObuSequenceHeader& sequence_header,
216     const ObuFrameHeader& frame_header,
217     const Vector<std::unique_ptr<Tile>>& tiles,
218     const SymbolDecoderContext& saved_symbol_decoder_context,
219     const SegmentationMap* const prev_segment_ids,
220     FrameScratchBuffer* const frame_scratch_buffer,
221     PostFilter* const post_filter, RefCountedBuffer* const current_frame) {
222   // Parse the frame.
223   for (const auto& tile : tiles) {
224     if (!tile->Parse()) {
225       LIBGAV1_DLOG(ERROR, "Failed to parse tile number: %d\n", tile->number());
226       return kStatusUnknownError;
227     }
228   }
229   if (frame_header.enable_frame_end_update_cdf) {
230     frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
231   }
232   current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
233   SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
234   // Mark frame as parsed.
235   current_frame->SetFrameState(kFrameStateParsed);
236   std::unique_ptr<TileScratchBuffer> tile_scratch_buffer =
237       frame_scratch_buffer->tile_scratch_buffer_pool.Get();
238   if (tile_scratch_buffer == nullptr) {
239     return kStatusOutOfMemory;
240   }
241   const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
242   // Decode in superblock row order (inter prediction in the Tile class will
243   // block until the required superblocks in the reference frame are decoded).
244   for (int row4x4 = 0; row4x4 < frame_header.rows4x4;
245        row4x4 += block_width4x4) {
246     for (const auto& tile_ptr : tiles) {
247       if (!tile_ptr->ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
248               row4x4, tile_scratch_buffer.get())) {
249         LIBGAV1_DLOG(ERROR, "Failed to decode tile number: %d\n",
250                      tile_ptr->number());
251         return kStatusUnknownError;
252       }
253     }
254     const int progress_row = post_filter->ApplyFilteringForOneSuperBlockRow(
255         row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
256         /*do_deblock=*/true);
257     if (progress_row >= 0) {
258       current_frame->SetProgress(progress_row);
259     }
260   }
261   // Mark frame as decoded (we no longer care about row-level progress since the
262   // entire frame has been decoded).
263   current_frame->SetFrameState(kFrameStateDecoded);
264   frame_scratch_buffer->tile_scratch_buffer_pool.Release(
265       std::move(tile_scratch_buffer));
266   return kStatusOk;
267 }
268 
269 // Helper function used by DecodeTilesThreadedFrameParallel. Applies the
270 // deblocking filter for tile boundaries for the superblock row at |row4x4|.
ApplyDeblockingFilterForTileBoundaries(PostFilter * const post_filter,const std::unique_ptr<Tile> * tile_row_base,const ObuFrameHeader & frame_header,int row4x4,int block_width4x4,int tile_columns,bool decode_entire_tiles_in_worker_threads)271 void ApplyDeblockingFilterForTileBoundaries(
272     PostFilter* const post_filter, const std::unique_ptr<Tile>* tile_row_base,
273     const ObuFrameHeader& frame_header, int row4x4, int block_width4x4,
274     int tile_columns, bool decode_entire_tiles_in_worker_threads) {
275   // Apply vertical deblock filtering for the first 64 columns of each tile.
276   for (int tile_column = 0; tile_column < tile_columns; ++tile_column) {
277     const Tile& tile = *tile_row_base[tile_column];
278     post_filter->ApplyDeblockFilter(
279         kLoopFilterTypeVertical, row4x4, tile.column4x4_start(),
280         tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
281   }
282   if (decode_entire_tiles_in_worker_threads &&
283       row4x4 == tile_row_base[0]->row4x4_start()) {
284     // This is the first superblock row of a tile row. In this case, apply
285     // horizontal deblock filtering for the entire superblock row.
286     post_filter->ApplyDeblockFilter(kLoopFilterTypeHorizontal, row4x4, 0,
287                                     frame_header.columns4x4, block_width4x4);
288   } else {
289     // Apply horizontal deblock filtering for the first 64 columns of the
290     // first tile.
291     const Tile& first_tile = *tile_row_base[0];
292     post_filter->ApplyDeblockFilter(
293         kLoopFilterTypeHorizontal, row4x4, first_tile.column4x4_start(),
294         first_tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
295     // Apply horizontal deblock filtering for the last 64 columns of the
296     // previous tile and the first 64 columns of the current tile.
297     for (int tile_column = 1; tile_column < tile_columns; ++tile_column) {
298       const Tile& tile = *tile_row_base[tile_column];
299       // If the previous tile has more than 64 columns, then include those
300       // for the horizontal deblock.
301       const Tile& previous_tile = *tile_row_base[tile_column - 1];
302       const int column4x4_start =
303           tile.column4x4_start() -
304           ((tile.column4x4_start() - kNum4x4InLoopFilterUnit !=
305             previous_tile.column4x4_start())
306                ? kNum4x4InLoopFilterUnit
307                : 0);
308       post_filter->ApplyDeblockFilter(
309           kLoopFilterTypeHorizontal, row4x4, column4x4_start,
310           tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
311     }
312     // Apply horizontal deblock filtering for the last 64 columns of the
313     // last tile.
314     const Tile& last_tile = *tile_row_base[tile_columns - 1];
315     // Identify the last column4x4 value and do horizontal filtering for
316     // that column4x4. The value of last column4x4 is the nearest multiple
317     // of 16 that is before tile.column4x4_end().
318     const int column4x4_start = (last_tile.column4x4_end() - 1) & ~15;
319     // If column4x4_start is the same as tile.column4x4_start() then it
320     // means that the last tile has <= 64 columns. So there is nothing left
321     // to deblock (since it was already deblocked in the loop above).
322     if (column4x4_start != last_tile.column4x4_start()) {
323       post_filter->ApplyDeblockFilter(
324           kLoopFilterTypeHorizontal, row4x4, column4x4_start,
325           last_tile.column4x4_end(), block_width4x4);
326     }
327   }
328 }
329 
330 // Helper function used by DecodeTilesThreadedFrameParallel. Decodes the
331 // superblock row starting at |row4x4| for tile at index |tile_index| in the
332 // list of tiles |tiles|. If the decoding is successful, then it does the
333 // following:
334 //   * Schedule the next superblock row in the current tile column for decoding
335 //     (the next superblock row may be in a different tile than the current
336 //     one).
337 //   * If an entire superblock row of the frame has been decoded, it notifies
338 //     the waiters (if there are any).
DecodeSuperBlockRowInTile(const Vector<std::unique_ptr<Tile>> & tiles,size_t tile_index,int row4x4,const int superblock_size4x4,const int tile_columns,const int superblock_rows,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,BlockingCounter * const pending_jobs)339 void DecodeSuperBlockRowInTile(
340     const Vector<std::unique_ptr<Tile>>& tiles, size_t tile_index, int row4x4,
341     const int superblock_size4x4, const int tile_columns,
342     const int superblock_rows, FrameScratchBuffer* const frame_scratch_buffer,
343     PostFilter* const post_filter, BlockingCounter* const pending_jobs) {
344   std::unique_ptr<TileScratchBuffer> scratch_buffer =
345       frame_scratch_buffer->tile_scratch_buffer_pool.Get();
346   if (scratch_buffer == nullptr) {
347     SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
348     return;
349   }
350   Tile& tile = *tiles[tile_index];
351   const bool ok = tile.ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
352       row4x4, scratch_buffer.get());
353   frame_scratch_buffer->tile_scratch_buffer_pool.Release(
354       std::move(scratch_buffer));
355   if (!ok) {
356     SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
357     return;
358   }
359   if (post_filter->DoDeblock()) {
360     // Apply vertical deblock filtering for all the columns in this tile except
361     // for the first 64 columns.
362     post_filter->ApplyDeblockFilter(
363         kLoopFilterTypeVertical, row4x4,
364         tile.column4x4_start() + kNum4x4InLoopFilterUnit, tile.column4x4_end(),
365         superblock_size4x4);
366     // Apply horizontal deblock filtering for all the columns in this tile
367     // except for the first and the last 64 columns.
368     // Note about the last tile of each row: For the last tile, column4x4_end
369     // may not be a multiple of 16. In that case it is still okay to simply
370     // subtract 16 since ApplyDeblockFilter() will only do the filters in
371     // increments of 64 columns (or 32 columns for chroma with subsampling).
372     post_filter->ApplyDeblockFilter(
373         kLoopFilterTypeHorizontal, row4x4,
374         tile.column4x4_start() + kNum4x4InLoopFilterUnit,
375         tile.column4x4_end() - kNum4x4InLoopFilterUnit, superblock_size4x4);
376   }
377   const int superblock_size4x4_log2 = FloorLog2(superblock_size4x4);
378   const int index = row4x4 >> superblock_size4x4_log2;
379   int* const superblock_row_progress =
380       frame_scratch_buffer->superblock_row_progress.get();
381   std::condition_variable* const superblock_row_progress_condvar =
382       frame_scratch_buffer->superblock_row_progress_condvar.get();
383   bool notify;
384   {
385     std::lock_guard<std::mutex> lock(
386         frame_scratch_buffer->superblock_row_mutex);
387     notify = ++superblock_row_progress[index] == tile_columns;
388   }
389   if (notify) {
390     // We are done decoding this superblock row. Notify the post filtering
391     // thread.
392     superblock_row_progress_condvar[index].notify_one();
393   }
394   // Schedule the next superblock row (if one exists).
395   ThreadPool& thread_pool =
396       *frame_scratch_buffer->threading_strategy.thread_pool();
397   const int next_row4x4 = row4x4 + superblock_size4x4;
398   if (!tile.IsRow4x4Inside(next_row4x4)) {
399     tile_index += tile_columns;
400   }
401   if (tile_index >= tiles.size()) return;
402   pending_jobs->IncrementBy(1);
403   thread_pool.Schedule([&tiles, tile_index, next_row4x4, superblock_size4x4,
404                         tile_columns, superblock_rows, frame_scratch_buffer,
405                         post_filter, pending_jobs]() {
406     DecodeSuperBlockRowInTile(tiles, tile_index, next_row4x4,
407                               superblock_size4x4, tile_columns, superblock_rows,
408                               frame_scratch_buffer, post_filter, pending_jobs);
409     pending_jobs->Decrement();
410   });
411 }
412 
DecodeTilesThreadedFrameParallel(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<std::unique_ptr<Tile>> & tiles,const SymbolDecoderContext & saved_symbol_decoder_context,const SegmentationMap * const prev_segment_ids,FrameScratchBuffer * const frame_scratch_buffer,PostFilter * const post_filter,RefCountedBuffer * const current_frame)413 StatusCode DecodeTilesThreadedFrameParallel(
414     const ObuSequenceHeader& sequence_header,
415     const ObuFrameHeader& frame_header,
416     const Vector<std::unique_ptr<Tile>>& tiles,
417     const SymbolDecoderContext& saved_symbol_decoder_context,
418     const SegmentationMap* const prev_segment_ids,
419     FrameScratchBuffer* const frame_scratch_buffer,
420     PostFilter* const post_filter, RefCountedBuffer* const current_frame) {
421   // Parse the frame.
422   ThreadPool& thread_pool =
423       *frame_scratch_buffer->threading_strategy.thread_pool();
424   std::atomic<int> tile_counter(0);
425   const int tile_count = static_cast<int>(tiles.size());
426   const int num_workers = thread_pool.num_threads();
427   BlockingCounterWithStatus parse_workers(num_workers);
428   // Submit tile parsing jobs to the thread pool.
429   for (int i = 0; i < num_workers; ++i) {
430     thread_pool.Schedule([&tiles, tile_count, &tile_counter, &parse_workers]() {
431       bool failed = false;
432       int index;
433       while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
434              tile_count) {
435         if (!failed) {
436           const auto& tile_ptr = tiles[index];
437           if (!tile_ptr->Parse()) {
438             LIBGAV1_DLOG(ERROR, "Error parsing tile #%d", tile_ptr->number());
439             failed = true;
440           }
441         }
442       }
443       parse_workers.Decrement(!failed);
444     });
445   }
446 
447   // Have the current thread participate in parsing.
448   bool failed = false;
449   int index;
450   while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
451          tile_count) {
452     if (!failed) {
453       const auto& tile_ptr = tiles[index];
454       if (!tile_ptr->Parse()) {
455         LIBGAV1_DLOG(ERROR, "Error parsing tile #%d", tile_ptr->number());
456         failed = true;
457       }
458     }
459   }
460 
461   // Wait until all the parse workers are done. This ensures that all the tiles
462   // have been parsed.
463   if (!parse_workers.Wait() || failed) {
464     return kLibgav1StatusUnknownError;
465   }
466   if (frame_header.enable_frame_end_update_cdf) {
467     frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
468   }
469   current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
470   SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
471   current_frame->SetFrameState(kFrameStateParsed);
472 
473   // Decode the frame.
474   const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
475   const int block_width4x4_log2 =
476       sequence_header.use_128x128_superblock ? 5 : 4;
477   const int superblock_rows =
478       (frame_header.rows4x4 + block_width4x4 - 1) >> block_width4x4_log2;
479   if (!frame_scratch_buffer->superblock_row_progress.Resize(superblock_rows) ||
480       !frame_scratch_buffer->superblock_row_progress_condvar.Resize(
481           superblock_rows)) {
482     return kLibgav1StatusOutOfMemory;
483   }
484   int* const superblock_row_progress =
485       frame_scratch_buffer->superblock_row_progress.get();
486   memset(superblock_row_progress, 0,
487          superblock_rows * sizeof(superblock_row_progress[0]));
488   frame_scratch_buffer->tile_decoding_failed = false;
489   const int tile_columns = frame_header.tile_info.tile_columns;
490   const bool decode_entire_tiles_in_worker_threads =
491       num_workers >= tile_columns;
492   BlockingCounter pending_jobs(
493       decode_entire_tiles_in_worker_threads ? num_workers : tile_columns);
494   if (decode_entire_tiles_in_worker_threads) {
495     // Submit tile decoding jobs to the thread pool.
496     tile_counter = 0;
497     for (int i = 0; i < num_workers; ++i) {
498       thread_pool.Schedule([&tiles, tile_count, &tile_counter, &pending_jobs,
499                             frame_scratch_buffer, superblock_rows]() {
500         bool failed = false;
501         int index;
502         while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
503                tile_count) {
504           if (failed) continue;
505           const auto& tile_ptr = tiles[index];
506           if (!tile_ptr->Decode(
507                   &frame_scratch_buffer->superblock_row_mutex,
508                   frame_scratch_buffer->superblock_row_progress.get(),
509                   frame_scratch_buffer->superblock_row_progress_condvar
510                       .get())) {
511             LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
512             failed = true;
513             SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
514           }
515         }
516         pending_jobs.Decrement();
517       });
518     }
519   } else {
520     // Schedule the jobs for first tile row.
521     for (int tile_index = 0; tile_index < tile_columns; ++tile_index) {
522       thread_pool.Schedule([&tiles, tile_index, block_width4x4, tile_columns,
523                             superblock_rows, frame_scratch_buffer, post_filter,
524                             &pending_jobs]() {
525         DecodeSuperBlockRowInTile(
526             tiles, tile_index, 0, block_width4x4, tile_columns, superblock_rows,
527             frame_scratch_buffer, post_filter, &pending_jobs);
528         pending_jobs.Decrement();
529       });
530     }
531   }
532 
533   // Current thread will do the post filters.
534   std::condition_variable* const superblock_row_progress_condvar =
535       frame_scratch_buffer->superblock_row_progress_condvar.get();
536   const std::unique_ptr<Tile>* tile_row_base = &tiles[0];
537   for (int row4x4 = 0, index = 0; row4x4 < frame_header.rows4x4;
538        row4x4 += block_width4x4, ++index) {
539     if (!tile_row_base[0]->IsRow4x4Inside(row4x4)) {
540       tile_row_base += tile_columns;
541     }
542     {
543       std::unique_lock<std::mutex> lock(
544           frame_scratch_buffer->superblock_row_mutex);
545       while (superblock_row_progress[index] != tile_columns &&
546              !frame_scratch_buffer->tile_decoding_failed) {
547         superblock_row_progress_condvar[index].wait(lock);
548       }
549       if (frame_scratch_buffer->tile_decoding_failed) break;
550     }
551     if (post_filter->DoDeblock()) {
552       // Apply deblocking filter for the tile boundaries of this superblock row.
553       // The deblocking filter for the internal blocks will be applied in the
554       // tile worker threads. In this thread, we will only have to apply
555       // deblocking filter for the tile boundaries.
556       ApplyDeblockingFilterForTileBoundaries(
557           post_filter, tile_row_base, frame_header, row4x4, block_width4x4,
558           tile_columns, decode_entire_tiles_in_worker_threads);
559     }
560     // Apply all the post filters other than deblocking.
561     const int progress_row = post_filter->ApplyFilteringForOneSuperBlockRow(
562         row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
563         /*do_deblock=*/false);
564     if (progress_row >= 0) {
565       current_frame->SetProgress(progress_row);
566     }
567   }
568   // Wait until all the pending jobs are done. This ensures that all the tiles
569   // have been decoded and wrapped up.
570   pending_jobs.Wait();
571   {
572     std::lock_guard<std::mutex> lock(
573         frame_scratch_buffer->superblock_row_mutex);
574     if (frame_scratch_buffer->tile_decoding_failed) {
575       return kLibgav1StatusUnknownError;
576     }
577   }
578 
579   current_frame->SetFrameState(kFrameStateDecoded);
580   return kStatusOk;
581 }
582 
583 }  // namespace
584 
585 // static
Create(const DecoderSettings * settings,std::unique_ptr<DecoderImpl> * output)586 StatusCode DecoderImpl::Create(const DecoderSettings* settings,
587                                std::unique_ptr<DecoderImpl>* output) {
588   if (settings->threads <= 0) {
589     LIBGAV1_DLOG(ERROR, "Invalid settings->threads: %d.", settings->threads);
590     return kStatusInvalidArgument;
591   }
592   if (settings->frame_parallel) {
593     if (settings->release_input_buffer == nullptr) {
594       LIBGAV1_DLOG(ERROR,
595                    "release_input_buffer callback must not be null when "
596                    "frame_parallel is true.");
597       return kStatusInvalidArgument;
598     }
599   }
600   std::unique_ptr<DecoderImpl> impl(new (std::nothrow) DecoderImpl(settings));
601   if (impl == nullptr) {
602     LIBGAV1_DLOG(ERROR, "Failed to allocate DecoderImpl.");
603     return kStatusOutOfMemory;
604   }
605   const StatusCode status = impl->Init();
606   if (status != kStatusOk) return status;
607   *output = std::move(impl);
608   return kStatusOk;
609 }
610 
DecoderImpl(const DecoderSettings * settings)611 DecoderImpl::DecoderImpl(const DecoderSettings* settings)
612     : buffer_pool_(settings->on_frame_buffer_size_changed,
613                    settings->get_frame_buffer, settings->release_frame_buffer,
614                    settings->callback_private_data),
615       settings_(*settings) {
616   dsp::DspInit();
617 }
618 
~DecoderImpl()619 DecoderImpl::~DecoderImpl() {
620   // Clean up and wait until all the threads have stopped. We just have to pass
621   // in a dummy status that is not kStatusOk or kStatusTryAgain to trigger the
622   // path that clears all the threads and structs.
623   SignalFailure(kStatusUnknownError);
624   // Release any other frame buffer references that we may be holding on to.
625   ReleaseOutputFrame();
626   output_frame_queue_.Clear();
627   for (auto& reference_frame : state_.reference_frame) {
628     reference_frame = nullptr;
629   }
630 }
631 
Init()632 StatusCode DecoderImpl::Init() {
633   if (!output_frame_queue_.Init(kMaxLayers)) {
634     LIBGAV1_DLOG(ERROR, "output_frame_queue_.Init() failed.");
635     return kStatusOutOfMemory;
636   }
637   return kStatusOk;
638 }
639 
InitializeFrameThreadPoolAndTemporalUnitQueue(const uint8_t * data,size_t size)640 StatusCode DecoderImpl::InitializeFrameThreadPoolAndTemporalUnitQueue(
641     const uint8_t* data, size_t size) {
642   is_frame_parallel_ = false;
643   if (settings_.frame_parallel) {
644     DecoderState state;
645     std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
646         data, size, settings_.operating_point, &buffer_pool_, &state));
647     if (obu == nullptr) {
648       LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
649       return kStatusOutOfMemory;
650     }
651     RefCountedBufferPtr current_frame;
652     const StatusCode status = obu->ParseOneFrame(&current_frame);
653     if (status != kStatusOk) {
654       LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
655       return status;
656     }
657     current_frame = nullptr;
658     // We assume that the first frame that was parsed will contain the frame
659     // header. This assumption is usually true in practice. So we will simply
660     // not use frame parallel mode if this is not the case.
661     if (settings_.threads > 1 &&
662         !InitializeThreadPoolsForFrameParallel(
663             settings_.threads, obu->frame_header().tile_info.tile_count,
664             obu->frame_header().tile_info.tile_columns, &frame_thread_pool_,
665             &frame_scratch_buffer_pool_)) {
666       return kStatusOutOfMemory;
667     }
668   }
669   const int max_allowed_frames =
670       (frame_thread_pool_ != nullptr) ? frame_thread_pool_->num_threads() : 1;
671   assert(max_allowed_frames > 0);
672   if (!temporal_units_.Init(max_allowed_frames)) {
673     LIBGAV1_DLOG(ERROR, "temporal_units_.Init() failed.");
674     return kStatusOutOfMemory;
675   }
676   is_frame_parallel_ = frame_thread_pool_ != nullptr;
677   return kStatusOk;
678 }
679 
EnqueueFrame(const uint8_t * data,size_t size,int64_t user_private_data,void * buffer_private_data)680 StatusCode DecoderImpl::EnqueueFrame(const uint8_t* data, size_t size,
681                                      int64_t user_private_data,
682                                      void* buffer_private_data) {
683   if (data == nullptr || size == 0) return kStatusInvalidArgument;
684   if (HasFailure()) return kStatusUnknownError;
685   if (!seen_first_frame_) {
686     seen_first_frame_ = true;
687     const StatusCode status =
688         InitializeFrameThreadPoolAndTemporalUnitQueue(data, size);
689     if (status != kStatusOk) {
690       return SignalFailure(status);
691     }
692   }
693   if (temporal_units_.Full()) {
694     return kStatusTryAgain;
695   }
696   if (is_frame_parallel_) {
697     return ParseAndSchedule(data, size, user_private_data, buffer_private_data);
698   }
699   TemporalUnit temporal_unit(data, size, user_private_data,
700                              buffer_private_data);
701   temporal_units_.Push(std::move(temporal_unit));
702   return kStatusOk;
703 }
704 
SignalFailure(StatusCode status)705 StatusCode DecoderImpl::SignalFailure(StatusCode status) {
706   if (status == kStatusOk || status == kStatusTryAgain) return status;
707   // Set the |failure_status_| first so that any pending jobs in
708   // |frame_thread_pool_| will exit right away when the thread pool is being
709   // released below.
710   {
711     std::lock_guard<std::mutex> lock(mutex_);
712     failure_status_ = status;
713   }
714   // Make sure all waiting threads exit.
715   buffer_pool_.Abort();
716   frame_thread_pool_ = nullptr;
717   while (!temporal_units_.Empty()) {
718     if (settings_.release_input_buffer != nullptr) {
719       settings_.release_input_buffer(
720           settings_.callback_private_data,
721           temporal_units_.Front().buffer_private_data);
722     }
723     temporal_units_.Pop();
724   }
725   return status;
726 }
727 
728 // DequeueFrame() follows the following policy to avoid holding unnecessary
729 // frame buffer references in output_frame_: output_frame_ must be null when
730 // DequeueFrame() returns false.
DequeueFrame(const DecoderBuffer ** out_ptr)731 StatusCode DecoderImpl::DequeueFrame(const DecoderBuffer** out_ptr) {
732   if (out_ptr == nullptr) {
733     LIBGAV1_DLOG(ERROR, "Invalid argument: out_ptr == nullptr.");
734     return kStatusInvalidArgument;
735   }
736   // We assume a call to DequeueFrame() indicates that the caller is no longer
737   // using the previous output frame, so we can release it.
738   ReleaseOutputFrame();
739   if (temporal_units_.Empty()) {
740     // No input frames to decode.
741     *out_ptr = nullptr;
742     return kStatusNothingToDequeue;
743   }
744   TemporalUnit& temporal_unit = temporal_units_.Front();
745   if (!is_frame_parallel_) {
746     // If |output_frame_queue_| is not empty, then return the first frame from
747     // that queue.
748     if (!output_frame_queue_.Empty()) {
749       RefCountedBufferPtr frame = std::move(output_frame_queue_.Front());
750       output_frame_queue_.Pop();
751       buffer_.user_private_data = temporal_unit.user_private_data;
752       if (output_frame_queue_.Empty()) {
753         temporal_units_.Pop();
754       }
755       const StatusCode status = CopyFrameToOutputBuffer(frame);
756       if (status != kStatusOk) {
757         return status;
758       }
759       *out_ptr = &buffer_;
760       return kStatusOk;
761     }
762     // Decode the next available temporal unit and return.
763     const StatusCode status = DecodeTemporalUnit(temporal_unit, out_ptr);
764     if (status != kStatusOk) {
765       // In case of failure, discard all the output frames that we may be
766       // holding on references to.
767       output_frame_queue_.Clear();
768     }
769     if (settings_.release_input_buffer != nullptr) {
770       settings_.release_input_buffer(settings_.callback_private_data,
771                                      temporal_unit.buffer_private_data);
772     }
773     if (output_frame_queue_.Empty()) {
774       temporal_units_.Pop();
775     }
776     return status;
777   }
778   {
779     std::unique_lock<std::mutex> lock(mutex_);
780     if (settings_.blocking_dequeue) {
781       while (!temporal_unit.decoded && failure_status_ == kStatusOk) {
782         decoded_condvar_.wait(lock);
783       }
784     } else {
785       if (!temporal_unit.decoded && failure_status_ == kStatusOk) {
786         return kStatusTryAgain;
787       }
788     }
789     if (failure_status_ != kStatusOk) {
790       const StatusCode failure_status = failure_status_;
791       lock.unlock();
792       return SignalFailure(failure_status);
793     }
794   }
795   if (settings_.release_input_buffer != nullptr &&
796       !temporal_unit.released_input_buffer) {
797     temporal_unit.released_input_buffer = true;
798     settings_.release_input_buffer(settings_.callback_private_data,
799                                    temporal_unit.buffer_private_data);
800   }
801   if (temporal_unit.status != kStatusOk) {
802     temporal_units_.Pop();
803     return SignalFailure(temporal_unit.status);
804   }
805   if (!temporal_unit.has_displayable_frame) {
806     *out_ptr = nullptr;
807     temporal_units_.Pop();
808     return kStatusOk;
809   }
810   assert(temporal_unit.output_layer_count > 0);
811   StatusCode status = CopyFrameToOutputBuffer(
812       temporal_unit.output_layers[temporal_unit.output_layer_count - 1].frame);
813   temporal_unit.output_layers[temporal_unit.output_layer_count - 1].frame =
814       nullptr;
815   if (status != kStatusOk) {
816     temporal_units_.Pop();
817     return SignalFailure(status);
818   }
819   buffer_.user_private_data = temporal_unit.user_private_data;
820   *out_ptr = &buffer_;
821   if (--temporal_unit.output_layer_count == 0) {
822     temporal_units_.Pop();
823   }
824   return kStatusOk;
825 }
826 
ParseAndSchedule(const uint8_t * data,size_t size,int64_t user_private_data,void * buffer_private_data)827 StatusCode DecoderImpl::ParseAndSchedule(const uint8_t* data, size_t size,
828                                          int64_t user_private_data,
829                                          void* buffer_private_data) {
830   TemporalUnit temporal_unit(data, size, user_private_data,
831                              buffer_private_data);
832   std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
833       temporal_unit.data, temporal_unit.size, settings_.operating_point,
834       &buffer_pool_, &state_));
835   if (obu == nullptr) {
836     LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
837     return kStatusOutOfMemory;
838   }
839   if (has_sequence_header_) {
840     obu->set_sequence_header(sequence_header_);
841   }
842   StatusCode status;
843   int position_in_temporal_unit = 0;
844   while (obu->HasData()) {
845     RefCountedBufferPtr current_frame;
846     status = obu->ParseOneFrame(&current_frame);
847     if (status != kStatusOk) {
848       LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
849       return status;
850     }
851     if (!MaybeInitializeQuantizerMatrix(obu->frame_header())) {
852       LIBGAV1_DLOG(ERROR, "InitializeQuantizerMatrix() failed.");
853       return kStatusOutOfMemory;
854     }
855     if (!MaybeInitializeWedgeMasks(obu->frame_header().frame_type)) {
856       LIBGAV1_DLOG(ERROR, "InitializeWedgeMasks() failed.");
857       return kStatusOutOfMemory;
858     }
859     if (IsNewSequenceHeader(*obu)) {
860       const ObuSequenceHeader& sequence_header = obu->sequence_header();
861       const Libgav1ImageFormat image_format =
862           ComposeImageFormat(sequence_header.color_config.is_monochrome,
863                              sequence_header.color_config.subsampling_x,
864                              sequence_header.color_config.subsampling_y);
865       const int max_bottom_border = GetBottomBorderPixels(
866           /*do_cdef=*/true, /*do_restoration=*/true,
867           /*do_superres=*/true, sequence_header.color_config.subsampling_y);
868       // TODO(vigneshv): This may not be the right place to call this callback
869       // for the frame parallel case. Investigate and fix it.
870       if (!buffer_pool_.OnFrameBufferSizeChanged(
871               sequence_header.color_config.bitdepth, image_format,
872               sequence_header.max_frame_width, sequence_header.max_frame_height,
873               kBorderPixels, kBorderPixels, kBorderPixels, max_bottom_border)) {
874         LIBGAV1_DLOG(ERROR, "buffer_pool_.OnFrameBufferSizeChanged failed.");
875         return kStatusUnknownError;
876       }
877     }
878     // This can happen when there are multiple spatial/temporal layers and if
879     // all the layers are outside the current operating point.
880     if (current_frame == nullptr) {
881       continue;
882     }
883     // Note that we cannot set EncodedFrame.temporal_unit here. It will be set
884     // in the code below after |temporal_unit| is std::move'd into the
885     // |temporal_units_| queue.
886     if (!temporal_unit.frames.emplace_back(obu.get(), state_, current_frame,
887                                            position_in_temporal_unit++)) {
888       LIBGAV1_DLOG(ERROR, "temporal_unit.frames.emplace_back failed.");
889       return kStatusOutOfMemory;
890     }
891     state_.UpdateReferenceFrames(current_frame,
892                                  obu->frame_header().refresh_frame_flags);
893   }
894   // This function cannot fail after this point. So it is okay to move the
895   // |temporal_unit| into |temporal_units_| queue.
896   temporal_units_.Push(std::move(temporal_unit));
897   if (temporal_units_.Back().frames.empty()) {
898     std::lock_guard<std::mutex> lock(mutex_);
899     temporal_units_.Back().has_displayable_frame = false;
900     temporal_units_.Back().decoded = true;
901     return kStatusOk;
902   }
903   for (auto& frame : temporal_units_.Back().frames) {
904     EncodedFrame* const encoded_frame = &frame;
905     encoded_frame->temporal_unit = &temporal_units_.Back();
906     frame_thread_pool_->Schedule([this, encoded_frame]() {
907       if (HasFailure()) return;
908       const StatusCode status = DecodeFrame(encoded_frame);
909       encoded_frame->state = {};
910       encoded_frame->frame = nullptr;
911       TemporalUnit& temporal_unit = *encoded_frame->temporal_unit;
912       std::lock_guard<std::mutex> lock(mutex_);
913       if (failure_status_ != kStatusOk) return;
914       // temporal_unit's status defaults to kStatusOk. So we need to set it only
915       // on error. If |failure_status_| is not kStatusOk at this point, it means
916       // that there has already been a failure. So we don't care about this
917       // subsequent failure.  We will simply return the error code of the first
918       // failure.
919       if (status != kStatusOk) {
920         temporal_unit.status = status;
921         if (failure_status_ == kStatusOk) {
922           failure_status_ = status;
923         }
924       }
925       temporal_unit.decoded =
926           ++temporal_unit.decoded_count == temporal_unit.frames.size();
927       if (temporal_unit.decoded && settings_.output_all_layers &&
928           temporal_unit.output_layer_count > 1) {
929         std::sort(
930             temporal_unit.output_layers,
931             temporal_unit.output_layers + temporal_unit.output_layer_count);
932       }
933       if (temporal_unit.decoded || failure_status_ != kStatusOk) {
934         decoded_condvar_.notify_one();
935       }
936     });
937   }
938   return kStatusOk;
939 }
940 
DecodeFrame(EncodedFrame * const encoded_frame)941 StatusCode DecoderImpl::DecodeFrame(EncodedFrame* const encoded_frame) {
942   const ObuSequenceHeader& sequence_header = encoded_frame->sequence_header;
943   const ObuFrameHeader& frame_header = encoded_frame->frame_header;
944   RefCountedBufferPtr current_frame = std::move(encoded_frame->frame);
945 
946   std::unique_ptr<FrameScratchBuffer> frame_scratch_buffer =
947       frame_scratch_buffer_pool_.Get();
948   if (frame_scratch_buffer == nullptr) {
949     LIBGAV1_DLOG(ERROR, "Error when getting FrameScratchBuffer.");
950     return kStatusOutOfMemory;
951   }
952   // |frame_scratch_buffer| will be released when this local variable goes out
953   // of scope (i.e.) on any return path in this function.
954   FrameScratchBufferReleaser frame_scratch_buffer_releaser(
955       &frame_scratch_buffer_pool_, &frame_scratch_buffer);
956 
957   StatusCode status;
958   if (!frame_header.show_existing_frame) {
959     if (encoded_frame->tile_buffers.empty()) {
960       // This means that the last call to ParseOneFrame() did not actually
961       // have any tile groups. This could happen in rare cases (for example,
962       // if there is a Metadata OBU after the TileGroup OBU). We currently do
963       // not have a reason to handle those cases, so we simply continue.
964       return kStatusOk;
965     }
966     status = DecodeTiles(sequence_header, frame_header,
967                          encoded_frame->tile_buffers, encoded_frame->state,
968                          frame_scratch_buffer.get(), current_frame.get());
969     if (status != kStatusOk) {
970       return status;
971     }
972   } else {
973     if (!current_frame->WaitUntilDecoded()) {
974       return kStatusUnknownError;
975     }
976   }
977   if (!frame_header.show_frame && !frame_header.show_existing_frame) {
978     // This frame is not displayable. Not an error.
979     return kStatusOk;
980   }
981   RefCountedBufferPtr film_grain_frame;
982   status = ApplyFilmGrain(
983       sequence_header, frame_header, current_frame, &film_grain_frame,
984       frame_scratch_buffer->threading_strategy.thread_pool());
985   if (status != kStatusOk) {
986     return status;
987   }
988 
989   TemporalUnit& temporal_unit = *encoded_frame->temporal_unit;
990   std::lock_guard<std::mutex> lock(mutex_);
991   if (temporal_unit.has_displayable_frame && !settings_.output_all_layers) {
992     assert(temporal_unit.output_frame_position >= 0);
993     // A displayable frame was already found in this temporal unit. This can
994     // happen if there are multiple spatial/temporal layers. Since
995     // |settings_.output_all_layers| is false, we will output only the last
996     // displayable frame.
997     if (temporal_unit.output_frame_position >
998         encoded_frame->position_in_temporal_unit) {
999       return kStatusOk;
1000     }
1001     // Replace any output frame that we may have seen before with the current
1002     // frame.
1003     assert(temporal_unit.output_layer_count == 1);
1004     --temporal_unit.output_layer_count;
1005   }
1006   temporal_unit.has_displayable_frame = true;
1007   temporal_unit.output_layers[temporal_unit.output_layer_count].frame =
1008       std::move(film_grain_frame);
1009   temporal_unit.output_layers[temporal_unit.output_layer_count]
1010       .position_in_temporal_unit = encoded_frame->position_in_temporal_unit;
1011   ++temporal_unit.output_layer_count;
1012   temporal_unit.output_frame_position =
1013       encoded_frame->position_in_temporal_unit;
1014   return kStatusOk;
1015 }
1016 
DecodeTemporalUnit(const TemporalUnit & temporal_unit,const DecoderBuffer ** out_ptr)1017 StatusCode DecoderImpl::DecodeTemporalUnit(const TemporalUnit& temporal_unit,
1018                                            const DecoderBuffer** out_ptr) {
1019   std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
1020       temporal_unit.data, temporal_unit.size, settings_.operating_point,
1021       &buffer_pool_, &state_));
1022   if (obu == nullptr) {
1023     LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
1024     return kStatusOutOfMemory;
1025   }
1026   if (has_sequence_header_) {
1027     obu->set_sequence_header(sequence_header_);
1028   }
1029   StatusCode status;
1030   std::unique_ptr<FrameScratchBuffer> frame_scratch_buffer =
1031       frame_scratch_buffer_pool_.Get();
1032   if (frame_scratch_buffer == nullptr) {
1033     LIBGAV1_DLOG(ERROR, "Error when getting FrameScratchBuffer.");
1034     return kStatusOutOfMemory;
1035   }
1036   // |frame_scratch_buffer| will be released when this local variable goes out
1037   // of scope (i.e.) on any return path in this function.
1038   FrameScratchBufferReleaser frame_scratch_buffer_releaser(
1039       &frame_scratch_buffer_pool_, &frame_scratch_buffer);
1040 
1041   while (obu->HasData()) {
1042     RefCountedBufferPtr current_frame;
1043     status = obu->ParseOneFrame(&current_frame);
1044     if (status != kStatusOk) {
1045       LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
1046       return status;
1047     }
1048     if (!MaybeInitializeQuantizerMatrix(obu->frame_header())) {
1049       LIBGAV1_DLOG(ERROR, "InitializeQuantizerMatrix() failed.");
1050       return kStatusOutOfMemory;
1051     }
1052     if (!MaybeInitializeWedgeMasks(obu->frame_header().frame_type)) {
1053       LIBGAV1_DLOG(ERROR, "InitializeWedgeMasks() failed.");
1054       return kStatusOutOfMemory;
1055     }
1056     if (IsNewSequenceHeader(*obu)) {
1057       const ObuSequenceHeader& sequence_header = obu->sequence_header();
1058       const Libgav1ImageFormat image_format =
1059           ComposeImageFormat(sequence_header.color_config.is_monochrome,
1060                              sequence_header.color_config.subsampling_x,
1061                              sequence_header.color_config.subsampling_y);
1062       const int max_bottom_border = GetBottomBorderPixels(
1063           /*do_cdef=*/true, /*do_restoration=*/true,
1064           /*do_superres=*/true, sequence_header.color_config.subsampling_y);
1065       if (!buffer_pool_.OnFrameBufferSizeChanged(
1066               sequence_header.color_config.bitdepth, image_format,
1067               sequence_header.max_frame_width, sequence_header.max_frame_height,
1068               kBorderPixels, kBorderPixels, kBorderPixels, max_bottom_border)) {
1069         LIBGAV1_DLOG(ERROR, "buffer_pool_.OnFrameBufferSizeChanged failed.");
1070         return kStatusUnknownError;
1071       }
1072     }
1073     if (!obu->frame_header().show_existing_frame) {
1074       if (obu->tile_buffers().empty()) {
1075         // This means that the last call to ParseOneFrame() did not actually
1076         // have any tile groups. This could happen in rare cases (for example,
1077         // if there is a Metadata OBU after the TileGroup OBU). We currently do
1078         // not have a reason to handle those cases, so we simply continue.
1079         continue;
1080       }
1081       status = DecodeTiles(obu->sequence_header(), obu->frame_header(),
1082                            obu->tile_buffers(), state_,
1083                            frame_scratch_buffer.get(), current_frame.get());
1084       if (status != kStatusOk) {
1085         return status;
1086       }
1087     }
1088     state_.UpdateReferenceFrames(current_frame,
1089                                  obu->frame_header().refresh_frame_flags);
1090     if (obu->frame_header().show_frame ||
1091         obu->frame_header().show_existing_frame) {
1092       if (!output_frame_queue_.Empty() && !settings_.output_all_layers) {
1093         // There is more than one displayable frame in the current operating
1094         // point and |settings_.output_all_layers| is false. In this case, we
1095         // simply return the last displayable frame as the output frame and
1096         // ignore the rest.
1097         assert(output_frame_queue_.Size() == 1);
1098         output_frame_queue_.Pop();
1099       }
1100       RefCountedBufferPtr film_grain_frame;
1101       status = ApplyFilmGrain(
1102           obu->sequence_header(), obu->frame_header(), current_frame,
1103           &film_grain_frame,
1104           frame_scratch_buffer->threading_strategy.film_grain_thread_pool());
1105       if (status != kStatusOk) return status;
1106       output_frame_queue_.Push(std::move(film_grain_frame));
1107     }
1108   }
1109   if (output_frame_queue_.Empty()) {
1110     // No displayable frame in the temporal unit. Not an error.
1111     *out_ptr = nullptr;
1112     return kStatusOk;
1113   }
1114   status = CopyFrameToOutputBuffer(output_frame_queue_.Front());
1115   output_frame_queue_.Pop();
1116   if (status != kStatusOk) {
1117     return status;
1118   }
1119   buffer_.user_private_data = temporal_unit.user_private_data;
1120   *out_ptr = &buffer_;
1121   return kStatusOk;
1122 }
1123 
CopyFrameToOutputBuffer(const RefCountedBufferPtr & frame)1124 StatusCode DecoderImpl::CopyFrameToOutputBuffer(
1125     const RefCountedBufferPtr& frame) {
1126   YuvBuffer* yuv_buffer = frame->buffer();
1127 
1128   buffer_.chroma_sample_position = frame->chroma_sample_position();
1129 
1130   if (yuv_buffer->is_monochrome()) {
1131     buffer_.image_format = kImageFormatMonochrome400;
1132   } else {
1133     if (yuv_buffer->subsampling_x() == 0 && yuv_buffer->subsampling_y() == 0) {
1134       buffer_.image_format = kImageFormatYuv444;
1135     } else if (yuv_buffer->subsampling_x() == 1 &&
1136                yuv_buffer->subsampling_y() == 0) {
1137       buffer_.image_format = kImageFormatYuv422;
1138     } else if (yuv_buffer->subsampling_x() == 1 &&
1139                yuv_buffer->subsampling_y() == 1) {
1140       buffer_.image_format = kImageFormatYuv420;
1141     } else {
1142       LIBGAV1_DLOG(ERROR,
1143                    "Invalid chroma subsampling values: cannot determine buffer "
1144                    "image format.");
1145       return kStatusInvalidArgument;
1146     }
1147   }
1148   buffer_.color_range = sequence_header_.color_config.color_range;
1149   buffer_.color_primary = sequence_header_.color_config.color_primary;
1150   buffer_.transfer_characteristics =
1151       sequence_header_.color_config.transfer_characteristics;
1152   buffer_.matrix_coefficients =
1153       sequence_header_.color_config.matrix_coefficients;
1154 
1155   buffer_.bitdepth = yuv_buffer->bitdepth();
1156   const int num_planes =
1157       yuv_buffer->is_monochrome() ? kMaxPlanesMonochrome : kMaxPlanes;
1158   int plane = kPlaneY;
1159   for (; plane < num_planes; ++plane) {
1160     buffer_.stride[plane] = yuv_buffer->stride(plane);
1161     buffer_.plane[plane] = yuv_buffer->data(plane);
1162     buffer_.displayed_width[plane] = yuv_buffer->width(plane);
1163     buffer_.displayed_height[plane] = yuv_buffer->height(plane);
1164   }
1165   for (; plane < kMaxPlanes; ++plane) {
1166     buffer_.stride[plane] = 0;
1167     buffer_.plane[plane] = nullptr;
1168     buffer_.displayed_width[plane] = 0;
1169     buffer_.displayed_height[plane] = 0;
1170   }
1171   buffer_.spatial_id = frame->spatial_id();
1172   buffer_.temporal_id = frame->temporal_id();
1173   buffer_.buffer_private_data = frame->buffer_private_data();
1174   if (frame->hdr_cll_set()) {
1175     buffer_.has_hdr_cll = 1;
1176     buffer_.hdr_cll = frame->hdr_cll();
1177   } else {
1178     buffer_.has_hdr_cll = 0;
1179   }
1180   if (frame->hdr_mdcv_set()) {
1181     buffer_.has_hdr_mdcv = 1;
1182     buffer_.hdr_mdcv = frame->hdr_mdcv();
1183   } else {
1184     buffer_.has_hdr_mdcv = 0;
1185   }
1186   if (frame->itut_t35_set()) {
1187     buffer_.has_itut_t35 = 1;
1188     buffer_.itut_t35 = frame->itut_t35();
1189   } else {
1190     buffer_.has_itut_t35 = 0;
1191   }
1192   output_frame_ = frame;
1193   return kStatusOk;
1194 }
1195 
ReleaseOutputFrame()1196 void DecoderImpl::ReleaseOutputFrame() {
1197   for (auto& plane : buffer_.plane) {
1198     plane = nullptr;
1199   }
1200   output_frame_ = nullptr;
1201 }
1202 
DecodeTiles(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const Vector<TileBuffer> & tile_buffers,const DecoderState & state,FrameScratchBuffer * const frame_scratch_buffer,RefCountedBuffer * const current_frame)1203 StatusCode DecoderImpl::DecodeTiles(
1204     const ObuSequenceHeader& sequence_header,
1205     const ObuFrameHeader& frame_header, const Vector<TileBuffer>& tile_buffers,
1206     const DecoderState& state, FrameScratchBuffer* const frame_scratch_buffer,
1207     RefCountedBuffer* const current_frame) {
1208   frame_scratch_buffer->tile_scratch_buffer_pool.Reset(
1209       sequence_header.color_config.bitdepth);
1210   if (!frame_scratch_buffer->loop_restoration_info.Reset(
1211           &frame_header.loop_restoration, frame_header.upscaled_width,
1212           frame_header.height, sequence_header.color_config.subsampling_x,
1213           sequence_header.color_config.subsampling_y,
1214           sequence_header.color_config.is_monochrome)) {
1215     LIBGAV1_DLOG(ERROR,
1216                  "Failed to allocate memory for loop restoration info units.");
1217     return kStatusOutOfMemory;
1218   }
1219   ThreadingStrategy& threading_strategy =
1220       frame_scratch_buffer->threading_strategy;
1221   if (!is_frame_parallel_ &&
1222       !threading_strategy.Reset(frame_header, settings_.threads)) {
1223     return kStatusOutOfMemory;
1224   }
1225   const bool do_cdef =
1226       PostFilter::DoCdef(frame_header, settings_.post_filter_mask);
1227   const int num_planes = sequence_header.color_config.is_monochrome
1228                              ? kMaxPlanesMonochrome
1229                              : kMaxPlanes;
1230   const bool do_restoration = PostFilter::DoRestoration(
1231       frame_header.loop_restoration, settings_.post_filter_mask, num_planes);
1232   const bool do_superres =
1233       PostFilter::DoSuperRes(frame_header, settings_.post_filter_mask);
1234   // Use kBorderPixels for the left, right, and top borders. Only the bottom
1235   // border may need to be bigger. Cdef border is needed only if we apply Cdef
1236   // without multithreading.
1237   const int bottom_border = GetBottomBorderPixels(
1238       do_cdef && threading_strategy.post_filter_thread_pool() == nullptr,
1239       do_restoration, do_superres, sequence_header.color_config.subsampling_y);
1240   current_frame->set_chroma_sample_position(
1241       sequence_header.color_config.chroma_sample_position);
1242   if (!current_frame->Realloc(sequence_header.color_config.bitdepth,
1243                               sequence_header.color_config.is_monochrome,
1244                               frame_header.upscaled_width, frame_header.height,
1245                               sequence_header.color_config.subsampling_x,
1246                               sequence_header.color_config.subsampling_y,
1247                               /*left_border=*/kBorderPixels,
1248                               /*right_border=*/kBorderPixels,
1249                               /*top_border=*/kBorderPixels, bottom_border)) {
1250     LIBGAV1_DLOG(ERROR, "Failed to allocate memory for the decoder buffer.");
1251     return kStatusOutOfMemory;
1252   }
1253   if (frame_header.cdef.bits > 0) {
1254     if (!frame_scratch_buffer->cdef_index.Reset(
1255             DivideBy16(frame_header.rows4x4 + kMaxBlockHeight4x4),
1256             DivideBy16(frame_header.columns4x4 + kMaxBlockWidth4x4),
1257             /*zero_initialize=*/false)) {
1258       LIBGAV1_DLOG(ERROR, "Failed to allocate memory for cdef index.");
1259       return kStatusOutOfMemory;
1260     }
1261   }
1262   if (do_cdef) {
1263     if (!frame_scratch_buffer->cdef_skip.Reset(
1264             DivideBy2(frame_header.rows4x4 + kMaxBlockHeight4x4),
1265             DivideBy16(frame_header.columns4x4 + kMaxBlockWidth4x4),
1266             /*zero_initialize=*/true)) {
1267       LIBGAV1_DLOG(ERROR, "Failed to allocate memory for cdef skip.");
1268       return kStatusOutOfMemory;
1269     }
1270   }
1271   if (!frame_scratch_buffer->inter_transform_sizes.Reset(
1272           frame_header.rows4x4 + kMaxBlockHeight4x4,
1273           frame_header.columns4x4 + kMaxBlockWidth4x4,
1274           /*zero_initialize=*/false)) {
1275     LIBGAV1_DLOG(ERROR, "Failed to allocate memory for inter_transform_sizes.");
1276     return kStatusOutOfMemory;
1277   }
1278   if (frame_header.use_ref_frame_mvs) {
1279     if (!frame_scratch_buffer->motion_field.mv.Reset(
1280             DivideBy2(frame_header.rows4x4), DivideBy2(frame_header.columns4x4),
1281             /*zero_initialize=*/false) ||
1282         !frame_scratch_buffer->motion_field.reference_offset.Reset(
1283             DivideBy2(frame_header.rows4x4), DivideBy2(frame_header.columns4x4),
1284             /*zero_initialize=*/false)) {
1285       LIBGAV1_DLOG(ERROR,
1286                    "Failed to allocate memory for temporal motion vectors.");
1287       return kStatusOutOfMemory;
1288     }
1289 
1290     // For each motion vector, only mv[0] needs to be initialized to
1291     // kInvalidMvValue, mv[1] is not necessary to be initialized and can be
1292     // set to an arbitrary value. For simplicity, mv[1] is set to 0.
1293     // The following memory initialization of contiguous memory is very fast. It
1294     // is not recommended to make the initialization multi-threaded, unless the
1295     // memory which needs to be initialized in each thread is still contiguous.
1296     MotionVector invalid_mv;
1297     invalid_mv.mv[0] = kInvalidMvValue;
1298     invalid_mv.mv[1] = 0;
1299     MotionVector* const motion_field_mv =
1300         &frame_scratch_buffer->motion_field.mv[0][0];
1301     std::fill(motion_field_mv,
1302               motion_field_mv + frame_scratch_buffer->motion_field.mv.size(),
1303               invalid_mv);
1304   }
1305 
1306   // The addition of kMaxBlockHeight4x4 and kMaxBlockWidth4x4 is necessary so
1307   // that the block parameters cache can be filled in for the last row/column
1308   // without having to check for boundary conditions.
1309   if (!frame_scratch_buffer->block_parameters_holder.Reset(
1310           frame_header.rows4x4 + kMaxBlockHeight4x4,
1311           frame_header.columns4x4 + kMaxBlockWidth4x4)) {
1312     return kStatusOutOfMemory;
1313   }
1314   const dsp::Dsp* const dsp =
1315       dsp::GetDspTable(sequence_header.color_config.bitdepth);
1316   if (dsp == nullptr) {
1317     LIBGAV1_DLOG(ERROR, "Failed to get the dsp table for bitdepth %d.",
1318                  sequence_header.color_config.bitdepth);
1319     return kStatusInternalError;
1320   }
1321 
1322   const int tile_count = frame_header.tile_info.tile_count;
1323   assert(tile_count >= 1);
1324   Vector<std::unique_ptr<Tile>> tiles;
1325   if (!tiles.reserve(tile_count)) {
1326     LIBGAV1_DLOG(ERROR, "tiles.reserve(%d) failed.\n", tile_count);
1327     return kStatusOutOfMemory;
1328   }
1329 
1330   if (threading_strategy.row_thread_pool(0) != nullptr || is_frame_parallel_) {
1331     if (frame_scratch_buffer->residual_buffer_pool == nullptr) {
1332       frame_scratch_buffer->residual_buffer_pool.reset(
1333           new (std::nothrow) ResidualBufferPool(
1334               sequence_header.use_128x128_superblock,
1335               sequence_header.color_config.subsampling_x,
1336               sequence_header.color_config.subsampling_y,
1337               sequence_header.color_config.bitdepth == 8 ? sizeof(int16_t)
1338                                                          : sizeof(int32_t)));
1339       if (frame_scratch_buffer->residual_buffer_pool == nullptr) {
1340         LIBGAV1_DLOG(ERROR, "Failed to allocate residual buffer.\n");
1341         return kStatusOutOfMemory;
1342       }
1343     } else {
1344       frame_scratch_buffer->residual_buffer_pool->Reset(
1345           sequence_header.use_128x128_superblock,
1346           sequence_header.color_config.subsampling_x,
1347           sequence_header.color_config.subsampling_y,
1348           sequence_header.color_config.bitdepth == 8 ? sizeof(int16_t)
1349                                                      : sizeof(int32_t));
1350     }
1351   }
1352 
1353   if (threading_strategy.post_filter_thread_pool() != nullptr && do_cdef) {
1354     // We need to store 4 rows per 64x64 unit.
1355     const int num_units =
1356         MultiplyBy4(RightShiftWithCeiling(frame_header.rows4x4, 4));
1357     // subsampling_y is set to zero irrespective of the actual frame's
1358     // subsampling since we need to store exactly |num_units| rows of the loop
1359     // restoration border pixels.
1360     if (!frame_scratch_buffer->cdef_border.Realloc(
1361             sequence_header.color_config.bitdepth,
1362             sequence_header.color_config.is_monochrome,
1363             MultiplyBy4(frame_header.columns4x4), num_units,
1364             sequence_header.color_config.subsampling_x,
1365             /*subsampling_y=*/0, kBorderPixels, kBorderPixels, kBorderPixels,
1366             kBorderPixels, nullptr, nullptr, nullptr)) {
1367       return kStatusOutOfMemory;
1368     }
1369   }
1370 
1371   if (do_restoration &&
1372       (do_cdef || threading_strategy.post_filter_thread_pool() != nullptr)) {
1373     // We need to store 4 rows per 64x64 unit.
1374     const int num_units =
1375         MultiplyBy4(RightShiftWithCeiling(frame_header.rows4x4, 4));
1376     // subsampling_y is set to zero irrespective of the actual frame's
1377     // subsampling since we need to store exactly |num_units| rows of the loop
1378     // restoration border pixels.
1379     if (!frame_scratch_buffer->loop_restoration_border.Realloc(
1380             sequence_header.color_config.bitdepth,
1381             sequence_header.color_config.is_monochrome,
1382             frame_header.upscaled_width, num_units,
1383             sequence_header.color_config.subsampling_x,
1384             /*subsampling_y=*/0, kBorderPixels, kBorderPixels, kBorderPixels,
1385             kBorderPixels, nullptr, nullptr, nullptr)) {
1386       return kStatusOutOfMemory;
1387     }
1388   }
1389 
1390   if (do_superres) {
1391     const int pixel_size = sequence_header.color_config.bitdepth == 8
1392                                ? sizeof(uint8_t)
1393                                : sizeof(uint16_t);
1394     const int coefficients_size = kSuperResFilterTaps *
1395                                   Align(frame_header.upscaled_width, 16) *
1396                                   pixel_size;
1397     if (!frame_scratch_buffer->superres_coefficients[kPlaneTypeY].Resize(
1398             coefficients_size)) {
1399       LIBGAV1_DLOG(ERROR,
1400                    "Failed to Resize superres_coefficients[kPlaneTypeY].");
1401       return kStatusOutOfMemory;
1402     }
1403 #if LIBGAV1_MSAN
1404     // Quiet SuperRes_NEON() msan warnings.
1405     memset(frame_scratch_buffer->superres_coefficients[kPlaneTypeY].get(), 0,
1406            coefficients_size);
1407 #endif
1408     const int uv_coefficients_size =
1409         kSuperResFilterTaps *
1410         Align(SubsampledValue(frame_header.upscaled_width, 1), 16) * pixel_size;
1411     if (!sequence_header.color_config.is_monochrome &&
1412         sequence_header.color_config.subsampling_x != 0 &&
1413         !frame_scratch_buffer->superres_coefficients[kPlaneTypeUV].Resize(
1414             uv_coefficients_size)) {
1415       LIBGAV1_DLOG(ERROR,
1416                    "Failed to Resize superres_coefficients[kPlaneTypeUV].");
1417       return kStatusOutOfMemory;
1418     }
1419 #if LIBGAV1_MSAN
1420     if (!sequence_header.color_config.is_monochrome &&
1421         sequence_header.color_config.subsampling_x != 0) {
1422       // Quiet SuperRes_NEON() msan warnings.
1423       memset(frame_scratch_buffer->superres_coefficients[kPlaneTypeUV].get(), 0,
1424              uv_coefficients_size);
1425     }
1426 #endif
1427   }
1428 
1429   if (do_superres && threading_strategy.post_filter_thread_pool() != nullptr) {
1430     const int num_threads =
1431         threading_strategy.post_filter_thread_pool()->num_threads() + 1;
1432     // subsampling_y is set to zero irrespective of the actual frame's
1433     // subsampling since we need to store exactly |num_threads| rows of the
1434     // down-scaled pixels.
1435     // Left and right borders are for line extension. They are doubled for the Y
1436     // plane to make sure the U and V planes have enough space after possible
1437     // subsampling.
1438     if (!frame_scratch_buffer->superres_line_buffer.Realloc(
1439             sequence_header.color_config.bitdepth,
1440             sequence_header.color_config.is_monochrome,
1441             MultiplyBy4(frame_header.columns4x4), num_threads,
1442             sequence_header.color_config.subsampling_x,
1443             /*subsampling_y=*/0, 2 * kSuperResHorizontalBorder,
1444             2 * (kSuperResHorizontalBorder + kSuperResHorizontalPadding), 0, 0,
1445             nullptr, nullptr, nullptr)) {
1446       LIBGAV1_DLOG(ERROR, "Failed to resize superres line buffer.\n");
1447       return kStatusOutOfMemory;
1448     }
1449   }
1450 
1451   if (is_frame_parallel_ && !IsIntraFrame(frame_header.frame_type)) {
1452     // We can parse the current frame if all the reference frames have been
1453     // parsed.
1454     for (const int index : frame_header.reference_frame_index) {
1455       if (!state.reference_frame[index]->WaitUntilParsed()) {
1456         return kStatusUnknownError;
1457       }
1458     }
1459   }
1460 
1461   // If prev_segment_ids is a null pointer, it is treated as if it pointed to
1462   // a segmentation map containing all 0s.
1463   const SegmentationMap* prev_segment_ids = nullptr;
1464   if (frame_header.primary_reference_frame == kPrimaryReferenceNone) {
1465     frame_scratch_buffer->symbol_decoder_context.Initialize(
1466         frame_header.quantizer.base_index);
1467   } else {
1468     const int index =
1469         frame_header
1470             .reference_frame_index[frame_header.primary_reference_frame];
1471     assert(index != -1);
1472     const RefCountedBuffer* prev_frame = state.reference_frame[index].get();
1473     frame_scratch_buffer->symbol_decoder_context = prev_frame->FrameContext();
1474     if (frame_header.segmentation.enabled &&
1475         prev_frame->columns4x4() == frame_header.columns4x4 &&
1476         prev_frame->rows4x4() == frame_header.rows4x4) {
1477       prev_segment_ids = prev_frame->segmentation_map();
1478     }
1479   }
1480 
1481   // The Tile class must make use of a separate buffer to store the unfiltered
1482   // pixels for the intra prediction of the next superblock row. This is done
1483   // only when one of the following conditions are true:
1484   //   * is_frame_parallel_ is true.
1485   //   * settings_.threads == 1.
1486   // In the non-frame-parallel multi-threaded case, we do not run the post
1487   // filters in the decode loop. So this buffer need not be used.
1488   const bool use_intra_prediction_buffer =
1489       is_frame_parallel_ || settings_.threads == 1;
1490   if (use_intra_prediction_buffer) {
1491     if (!frame_scratch_buffer->intra_prediction_buffers.Resize(
1492             frame_header.tile_info.tile_rows)) {
1493       LIBGAV1_DLOG(ERROR, "Failed to Resize intra_prediction_buffers.");
1494       return kStatusOutOfMemory;
1495     }
1496     IntraPredictionBuffer* const intra_prediction_buffers =
1497         frame_scratch_buffer->intra_prediction_buffers.get();
1498     for (int plane = kPlaneY; plane < num_planes; ++plane) {
1499       const int subsampling =
1500           (plane == kPlaneY) ? 0 : sequence_header.color_config.subsampling_x;
1501       const size_t intra_prediction_buffer_size =
1502           ((MultiplyBy4(frame_header.columns4x4) >> subsampling) *
1503            (sequence_header.color_config.bitdepth == 8 ? sizeof(uint8_t)
1504                                                        : sizeof(uint16_t)));
1505       for (int tile_row = 0; tile_row < frame_header.tile_info.tile_rows;
1506            ++tile_row) {
1507         if (!intra_prediction_buffers[tile_row][plane].Resize(
1508                 intra_prediction_buffer_size)) {
1509           LIBGAV1_DLOG(ERROR,
1510                        "Failed to allocate intra prediction buffer for tile "
1511                        "row %d plane %d.\n",
1512                        tile_row, plane);
1513           return kStatusOutOfMemory;
1514         }
1515       }
1516     }
1517   }
1518 
1519   PostFilter post_filter(frame_header, sequence_header, frame_scratch_buffer,
1520                          current_frame->buffer(), dsp,
1521                          settings_.post_filter_mask);
1522   SymbolDecoderContext saved_symbol_decoder_context;
1523   BlockingCounterWithStatus pending_tiles(tile_count);
1524   for (int tile_number = 0; tile_number < tile_count; ++tile_number) {
1525     std::unique_ptr<Tile> tile = Tile::Create(
1526         tile_number, tile_buffers[tile_number].data,
1527         tile_buffers[tile_number].size, sequence_header, frame_header,
1528         current_frame, state, frame_scratch_buffer, wedge_masks_,
1529         quantizer_matrix_, &saved_symbol_decoder_context, prev_segment_ids,
1530         &post_filter, dsp, threading_strategy.row_thread_pool(tile_number),
1531         &pending_tiles, is_frame_parallel_, use_intra_prediction_buffer);
1532     if (tile == nullptr) {
1533       LIBGAV1_DLOG(ERROR, "Failed to create tile.");
1534       return kStatusOutOfMemory;
1535     }
1536     tiles.push_back_unchecked(std::move(tile));
1537   }
1538   assert(tiles.size() == static_cast<size_t>(tile_count));
1539   if (is_frame_parallel_) {
1540     if (frame_scratch_buffer->threading_strategy.thread_pool() == nullptr) {
1541       return DecodeTilesFrameParallel(
1542           sequence_header, frame_header, tiles, saved_symbol_decoder_context,
1543           prev_segment_ids, frame_scratch_buffer, &post_filter, current_frame);
1544     }
1545     return DecodeTilesThreadedFrameParallel(
1546         sequence_header, frame_header, tiles, saved_symbol_decoder_context,
1547         prev_segment_ids, frame_scratch_buffer, &post_filter, current_frame);
1548   }
1549   StatusCode status;
1550   if (settings_.threads == 1) {
1551     status = DecodeTilesNonFrameParallel(sequence_header, frame_header, tiles,
1552                                          frame_scratch_buffer, &post_filter);
1553   } else {
1554     status = DecodeTilesThreadedNonFrameParallel(tiles, frame_scratch_buffer,
1555                                                  &post_filter, &pending_tiles);
1556   }
1557   if (status != kStatusOk) return status;
1558   if (frame_header.enable_frame_end_update_cdf) {
1559     frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
1560   }
1561   current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
1562   SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
1563   return kStatusOk;
1564 }
1565 
ApplyFilmGrain(const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,const RefCountedBufferPtr & displayable_frame,RefCountedBufferPtr * film_grain_frame,ThreadPool * thread_pool)1566 StatusCode DecoderImpl::ApplyFilmGrain(
1567     const ObuSequenceHeader& sequence_header,
1568     const ObuFrameHeader& frame_header,
1569     const RefCountedBufferPtr& displayable_frame,
1570     RefCountedBufferPtr* film_grain_frame, ThreadPool* thread_pool) {
1571   if (!sequence_header.film_grain_params_present ||
1572       !displayable_frame->film_grain_params().apply_grain ||
1573       (settings_.post_filter_mask & 0x10) == 0) {
1574     *film_grain_frame = displayable_frame;
1575     return kStatusOk;
1576   }
1577   if (!frame_header.show_existing_frame &&
1578       frame_header.refresh_frame_flags == 0) {
1579     // If show_existing_frame is true, then the current frame is a previously
1580     // saved reference frame. If refresh_frame_flags is nonzero, then the
1581     // state_.UpdateReferenceFrames() call above has saved the current frame as
1582     // a reference frame. Therefore, if both of these conditions are false, then
1583     // the current frame is not saved as a reference frame. displayable_frame
1584     // should hold the only reference to the current frame.
1585     assert(displayable_frame.use_count() == 1);
1586     // Add film grain noise in place.
1587     *film_grain_frame = displayable_frame;
1588   } else {
1589     *film_grain_frame = buffer_pool_.GetFreeBuffer();
1590     if (*film_grain_frame == nullptr) {
1591       LIBGAV1_DLOG(ERROR,
1592                    "Could not get film_grain_frame from the buffer pool.");
1593       return kStatusResourceExhausted;
1594     }
1595     if (!(*film_grain_frame)
1596              ->Realloc(displayable_frame->buffer()->bitdepth(),
1597                        displayable_frame->buffer()->is_monochrome(),
1598                        displayable_frame->upscaled_width(),
1599                        displayable_frame->frame_height(),
1600                        displayable_frame->buffer()->subsampling_x(),
1601                        displayable_frame->buffer()->subsampling_y(),
1602                        kBorderPixelsFilmGrain, kBorderPixelsFilmGrain,
1603                        kBorderPixelsFilmGrain, kBorderPixelsFilmGrain)) {
1604       LIBGAV1_DLOG(ERROR, "film_grain_frame->Realloc() failed.");
1605       return kStatusOutOfMemory;
1606     }
1607     (*film_grain_frame)
1608         ->set_chroma_sample_position(
1609             displayable_frame->chroma_sample_position());
1610     (*film_grain_frame)->set_spatial_id(displayable_frame->spatial_id());
1611     (*film_grain_frame)->set_temporal_id(displayable_frame->temporal_id());
1612   }
1613   const bool color_matrix_is_identity =
1614       sequence_header.color_config.matrix_coefficients ==
1615       kMatrixCoefficientsIdentity;
1616   assert(displayable_frame->buffer()->stride(kPlaneU) ==
1617          displayable_frame->buffer()->stride(kPlaneV));
1618   const int input_stride_uv = displayable_frame->buffer()->stride(kPlaneU);
1619   assert((*film_grain_frame)->buffer()->stride(kPlaneU) ==
1620          (*film_grain_frame)->buffer()->stride(kPlaneV));
1621   const int output_stride_uv = (*film_grain_frame)->buffer()->stride(kPlaneU);
1622 #if LIBGAV1_MAX_BITDEPTH >= 10
1623   if (displayable_frame->buffer()->bitdepth() == 10) {
1624     FilmGrain<10> film_grain(displayable_frame->film_grain_params(),
1625                              displayable_frame->buffer()->is_monochrome(),
1626                              color_matrix_is_identity,
1627                              displayable_frame->buffer()->subsampling_x(),
1628                              displayable_frame->buffer()->subsampling_y(),
1629                              displayable_frame->upscaled_width(),
1630                              displayable_frame->frame_height(), thread_pool);
1631     if (!film_grain.AddNoise(
1632             displayable_frame->buffer()->data(kPlaneY),
1633             displayable_frame->buffer()->stride(kPlaneY),
1634             displayable_frame->buffer()->data(kPlaneU),
1635             displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
1636             (*film_grain_frame)->buffer()->data(kPlaneY),
1637             (*film_grain_frame)->buffer()->stride(kPlaneY),
1638             (*film_grain_frame)->buffer()->data(kPlaneU),
1639             (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
1640       LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
1641       return kStatusOutOfMemory;
1642     }
1643     return kStatusOk;
1644   }
1645 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
1646 #if LIBGAV1_MAX_BITDEPTH == 12
1647   if (displayable_frame->buffer()->bitdepth() == 12) {
1648     FilmGrain<12> film_grain(displayable_frame->film_grain_params(),
1649                              displayable_frame->buffer()->is_monochrome(),
1650                              color_matrix_is_identity,
1651                              displayable_frame->buffer()->subsampling_x(),
1652                              displayable_frame->buffer()->subsampling_y(),
1653                              displayable_frame->upscaled_width(),
1654                              displayable_frame->frame_height(), thread_pool);
1655     if (!film_grain.AddNoise(
1656             displayable_frame->buffer()->data(kPlaneY),
1657             displayable_frame->buffer()->stride(kPlaneY),
1658             displayable_frame->buffer()->data(kPlaneU),
1659             displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
1660             (*film_grain_frame)->buffer()->data(kPlaneY),
1661             (*film_grain_frame)->buffer()->stride(kPlaneY),
1662             (*film_grain_frame)->buffer()->data(kPlaneU),
1663             (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
1664       LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
1665       return kStatusOutOfMemory;
1666     }
1667     return kStatusOk;
1668   }
1669 #endif  // LIBGAV1_MAX_BITDEPTH == 12
1670   FilmGrain<8> film_grain(displayable_frame->film_grain_params(),
1671                           displayable_frame->buffer()->is_monochrome(),
1672                           color_matrix_is_identity,
1673                           displayable_frame->buffer()->subsampling_x(),
1674                           displayable_frame->buffer()->subsampling_y(),
1675                           displayable_frame->upscaled_width(),
1676                           displayable_frame->frame_height(), thread_pool);
1677   if (!film_grain.AddNoise(
1678           displayable_frame->buffer()->data(kPlaneY),
1679           displayable_frame->buffer()->stride(kPlaneY),
1680           displayable_frame->buffer()->data(kPlaneU),
1681           displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
1682           (*film_grain_frame)->buffer()->data(kPlaneY),
1683           (*film_grain_frame)->buffer()->stride(kPlaneY),
1684           (*film_grain_frame)->buffer()->data(kPlaneU),
1685           (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
1686     LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
1687     return kStatusOutOfMemory;
1688   }
1689   return kStatusOk;
1690 }
1691 
IsNewSequenceHeader(const ObuParser & obu)1692 bool DecoderImpl::IsNewSequenceHeader(const ObuParser& obu) {
1693   if (std::find_if(obu.obu_headers().begin(), obu.obu_headers().end(),
1694                    [](const ObuHeader& obu_header) {
1695                      return obu_header.type == kObuSequenceHeader;
1696                    }) == obu.obu_headers().end()) {
1697     return false;
1698   }
1699   const ObuSequenceHeader sequence_header = obu.sequence_header();
1700   const bool sequence_header_changed =
1701       !has_sequence_header_ ||
1702       sequence_header_.color_config.bitdepth !=
1703           sequence_header.color_config.bitdepth ||
1704       sequence_header_.color_config.is_monochrome !=
1705           sequence_header.color_config.is_monochrome ||
1706       sequence_header_.color_config.subsampling_x !=
1707           sequence_header.color_config.subsampling_x ||
1708       sequence_header_.color_config.subsampling_y !=
1709           sequence_header.color_config.subsampling_y ||
1710       sequence_header_.max_frame_width != sequence_header.max_frame_width ||
1711       sequence_header_.max_frame_height != sequence_header.max_frame_height;
1712   sequence_header_ = sequence_header;
1713   has_sequence_header_ = true;
1714   return sequence_header_changed;
1715 }
1716 
MaybeInitializeWedgeMasks(FrameType frame_type)1717 bool DecoderImpl::MaybeInitializeWedgeMasks(FrameType frame_type) {
1718   if (IsIntraFrame(frame_type) || wedge_masks_initialized_) {
1719     return true;
1720   }
1721   if (!GenerateWedgeMask(&wedge_masks_)) {
1722     return false;
1723   }
1724   wedge_masks_initialized_ = true;
1725   return true;
1726 }
1727 
MaybeInitializeQuantizerMatrix(const ObuFrameHeader & frame_header)1728 bool DecoderImpl::MaybeInitializeQuantizerMatrix(
1729     const ObuFrameHeader& frame_header) {
1730   if (quantizer_matrix_initialized_ || !frame_header.quantizer.use_matrix) {
1731     return true;
1732   }
1733   if (!InitializeQuantizerMatrix(&quantizer_matrix_)) {
1734     return false;
1735   }
1736   quantizer_matrix_initialized_ = true;
1737   return true;
1738 }
1739 
1740 }  // namespace libgav1
1741