1 /* 2 * Copyright 2019 The libgav1 Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBGAV1_SRC_TILE_H_ 18 #define LIBGAV1_SRC_TILE_H_ 19 20 #include <algorithm> 21 #include <array> 22 #include <cassert> 23 #include <condition_variable> // NOLINT (unapproved c++11 header) 24 #include <cstddef> 25 #include <cstdint> 26 #include <memory> 27 #include <mutex> // NOLINT (unapproved c++11 header) 28 #include <vector> 29 30 #include "src/buffer_pool.h" 31 #include "src/decoder_state.h" 32 #include "src/dsp/common.h" 33 #include "src/dsp/constants.h" 34 #include "src/dsp/dsp.h" 35 #include "src/frame_scratch_buffer.h" 36 #include "src/loop_restoration_info.h" 37 #include "src/obu_parser.h" 38 #include "src/post_filter.h" 39 #include "src/quantizer.h" 40 #include "src/residual_buffer_pool.h" 41 #include "src/symbol_decoder_context.h" 42 #include "src/tile_scratch_buffer.h" 43 #include "src/utils/array_2d.h" 44 #include "src/utils/block_parameters_holder.h" 45 #include "src/utils/blocking_counter.h" 46 #include "src/utils/common.h" 47 #include "src/utils/compiler_attributes.h" 48 #include "src/utils/constants.h" 49 #include "src/utils/entropy_decoder.h" 50 #include "src/utils/memory.h" 51 #include "src/utils/parameter_tree.h" 52 #include "src/utils/segmentation_map.h" 53 #include "src/utils/threadpool.h" 54 #include "src/utils/types.h" 55 #include "src/yuv_buffer.h" 56 57 namespace libgav1 { 58 59 // Indicates what the ProcessSuperBlock() and TransformBlock() functions should 60 // do. "Parse" refers to consuming the bitstream, reading the transform 61 // coefficients and performing the dequantization. "Decode" refers to computing 62 // the prediction, applying the inverse transforms and adding the residual. 63 enum ProcessingMode { 64 kProcessingModeParseOnly, 65 kProcessingModeDecodeOnly, 66 kProcessingModeParseAndDecode, 67 }; 68 69 class Tile : public Allocable { 70 public: Create(int tile_number,const uint8_t * const data,size_t size,const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,RefCountedBuffer * const current_frame,const DecoderState & state,FrameScratchBuffer * const frame_scratch_buffer,const WedgeMaskArray & wedge_masks,SymbolDecoderContext * const saved_symbol_decoder_context,const SegmentationMap * prev_segment_ids,PostFilter * const post_filter,const dsp::Dsp * const dsp,ThreadPool * const thread_pool,BlockingCounterWithStatus * const pending_tiles,bool frame_parallel,bool use_intra_prediction_buffer)71 static std::unique_ptr<Tile> Create( 72 int tile_number, const uint8_t* const data, size_t size, 73 const ObuSequenceHeader& sequence_header, 74 const ObuFrameHeader& frame_header, RefCountedBuffer* const current_frame, 75 const DecoderState& state, FrameScratchBuffer* const frame_scratch_buffer, 76 const WedgeMaskArray& wedge_masks, 77 SymbolDecoderContext* const saved_symbol_decoder_context, 78 const SegmentationMap* prev_segment_ids, PostFilter* const post_filter, 79 const dsp::Dsp* const dsp, ThreadPool* const thread_pool, 80 BlockingCounterWithStatus* const pending_tiles, bool frame_parallel, 81 bool use_intra_prediction_buffer) { 82 std::unique_ptr<Tile> tile(new (std::nothrow) Tile( 83 tile_number, data, size, sequence_header, frame_header, current_frame, 84 state, frame_scratch_buffer, wedge_masks, saved_symbol_decoder_context, 85 prev_segment_ids, post_filter, dsp, thread_pool, pending_tiles, 86 frame_parallel, use_intra_prediction_buffer)); 87 return (tile != nullptr && tile->Init()) ? std::move(tile) : nullptr; 88 } 89 90 // Move only. 91 Tile(Tile&& tile) noexcept; 92 Tile& operator=(Tile&& tile) noexcept; 93 Tile(const Tile&) = delete; 94 Tile& operator=(const Tile&) = delete; 95 96 struct Block; // Defined after this class. 97 98 // Parses the entire tile. 99 bool Parse(); 100 // Decodes the entire tile. |superblock_row_progress| and 101 // |superblock_row_progress_condvar| are arrays of size equal to the number of 102 // superblock rows in the frame. Increments |superblock_row_progress[i]| after 103 // each superblock row at index |i| is decoded. If the count reaches the 104 // number of tile columns, then it notifies 105 // |superblock_row_progress_condvar[i]|. 106 bool Decode(std::mutex* mutex, int* superblock_row_progress, 107 std::condition_variable* superblock_row_progress_condvar); 108 // Parses and decodes the entire tile. Depending on the configuration of this 109 // Tile, this function may do multithreaded decoding. 110 bool ParseAndDecode(); // 5.11.2. 111 // Processes all the columns of the superblock row at |row4x4| that are within 112 // this Tile. If |save_symbol_decoder_context| is true, then 113 // SaveSymbolDecoderContext() is invoked for the last superblock row. 114 template <ProcessingMode processing_mode, bool save_symbol_decoder_context> 115 bool ProcessSuperBlockRow(int row4x4, TileScratchBuffer* scratch_buffer); 116 sequence_header()117 const ObuSequenceHeader& sequence_header() const { return sequence_header_; } frame_header()118 const ObuFrameHeader& frame_header() const { return frame_header_; } current_frame()119 const RefCountedBuffer& current_frame() const { return current_frame_; } motion_field()120 const TemporalMotionField& motion_field() const { return motion_field_; } reference_frame_sign_bias()121 const std::array<bool, kNumReferenceFrameTypes>& reference_frame_sign_bias() 122 const { 123 return reference_frame_sign_bias_; 124 } 125 IsRow4x4Inside(int row4x4)126 bool IsRow4x4Inside(int row4x4) const { 127 return row4x4 >= row4x4_start_ && row4x4 < row4x4_end_; 128 } 129 130 // 5.11.51. IsInside(int row4x4,int column4x4)131 bool IsInside(int row4x4, int column4x4) const { 132 return IsRow4x4Inside(row4x4) && column4x4 >= column4x4_start_ && 133 column4x4 < column4x4_end_; 134 } 135 IsLeftInside(int column4x4)136 bool IsLeftInside(int column4x4) const { 137 // We use "larger than" as the condition. Don't pass in the left column 138 // offset column4x4 - 1. 139 assert(column4x4 <= column4x4_end_); 140 return column4x4 > column4x4_start_; 141 } 142 IsTopInside(int row4x4)143 bool IsTopInside(int row4x4) const { 144 // We use "larger than" as the condition. Don't pass in the top row offset 145 // row4x4 - 1. 146 assert(row4x4 <= row4x4_end_); 147 return row4x4 > row4x4_start_; 148 } 149 IsTopLeftInside(int row4x4,int column4x4)150 bool IsTopLeftInside(int row4x4, int column4x4) const { 151 // We use "larger than" as the condition. Don't pass in the top row offset 152 // row4x4 - 1 or the left column offset column4x4 - 1. 153 assert(row4x4 <= row4x4_end_); 154 assert(column4x4 <= column4x4_end_); 155 return row4x4 > row4x4_start_ && column4x4 > column4x4_start_; 156 } 157 IsBottomRightInside(int row4x4,int column4x4)158 bool IsBottomRightInside(int row4x4, int column4x4) const { 159 assert(row4x4 >= row4x4_start_); 160 assert(column4x4 >= column4x4_start_); 161 return row4x4 < row4x4_end_ && column4x4 < column4x4_end_; 162 } 163 BlockParametersAddress(int row4x4,int column4x4)164 BlockParameters** BlockParametersAddress(int row4x4, int column4x4) const { 165 return block_parameters_holder_.Address(row4x4, column4x4); 166 } 167 BlockParametersStride()168 int BlockParametersStride() const { 169 return block_parameters_holder_.columns4x4(); 170 } 171 172 // Returns true if Parameters() can be called with |row| and |column| as 173 // inputs, false otherwise. HasParameters(int row,int column)174 bool HasParameters(int row, int column) const { 175 return block_parameters_holder_.Find(row, column) != nullptr; 176 } Parameters(int row,int column)177 const BlockParameters& Parameters(int row, int column) const { 178 return *block_parameters_holder_.Find(row, column); 179 } 180 number()181 int number() const { return number_; } superblock_rows()182 int superblock_rows() const { return superblock_rows_; } superblock_columns()183 int superblock_columns() const { return superblock_columns_; } row4x4_start()184 int row4x4_start() const { return row4x4_start_; } column4x4_start()185 int column4x4_start() const { return column4x4_start_; } column4x4_end()186 int column4x4_end() const { return column4x4_end_; } 187 188 private: 189 Tile(int tile_number, const uint8_t* data, size_t size, 190 const ObuSequenceHeader& sequence_header, 191 const ObuFrameHeader& frame_header, RefCountedBuffer* current_frame, 192 const DecoderState& state, FrameScratchBuffer* frame_scratch_buffer, 193 const WedgeMaskArray& wedge_masks, 194 SymbolDecoderContext* saved_symbol_decoder_context, 195 const SegmentationMap* prev_segment_ids, PostFilter* post_filter, 196 const dsp::Dsp* dsp, ThreadPool* thread_pool, 197 BlockingCounterWithStatus* pending_tiles, bool frame_parallel, 198 bool use_intra_prediction_buffer); 199 200 // Stores the transform tree state when reading variable size transform trees 201 // and when applying the transform tree. When applying the transform tree, 202 // |depth| is not used. 203 struct TransformTreeNode { 204 // The default constructor is invoked by the Stack<TransformTreeNode, n> 205 // constructor. Stack<> does not use the default-constructed elements, so it 206 // is safe for the default constructor to not initialize the members. 207 TransformTreeNode() = default; 208 TransformTreeNode(int x, int y, TransformSize tx_size, int depth = -1) xTransformTreeNode209 : x(x), y(y), tx_size(tx_size), depth(depth) {} 210 211 int x; 212 int y; 213 TransformSize tx_size; 214 int depth; 215 }; 216 217 // Enum to track the processing state of a superblock. 218 enum SuperBlockState : uint8_t { 219 kSuperBlockStateNone, // Not yet parsed or decoded. 220 kSuperBlockStateParsed, // Parsed but not yet decoded. 221 kSuperBlockStateScheduled, // Scheduled for decoding. 222 kSuperBlockStateDecoded // Parsed and decoded. 223 }; 224 225 // Parameters used to facilitate multi-threading within the Tile. 226 struct ThreadingParameters { 227 std::mutex mutex; 228 // 2d array of size |superblock_rows_| by |superblock_columns_| containing 229 // the processing state of each superblock. 230 Array2D<SuperBlockState> sb_state LIBGAV1_GUARDED_BY(mutex); 231 // Variable used to indicate either parse or decode failure. 232 bool abort LIBGAV1_GUARDED_BY(mutex) = false; 233 int pending_jobs LIBGAV1_GUARDED_BY(mutex) = 0; 234 std::condition_variable pending_jobs_zero_condvar; 235 }; 236 237 // The residual pointer is used to traverse the |residual_buffer_|. It is 238 // used in two different ways. 239 // If |split_parse_and_decode_| is true: 240 // The pointer points to the beginning of the |residual_buffer_| when the 241 // "parse" and "decode" steps begin. It is then moved forward tx_size in 242 // each iteration of the "parse" and the "decode" steps. In this case, the 243 // ResidualPtr variable passed into various functions starting from 244 // ProcessSuperBlock is used as an in/out parameter to keep track of the 245 // residual pointer. 246 // If |split_parse_and_decode_| is false: 247 // The pointer is reset to the beginning of the |residual_buffer_| for 248 // every transform block. 249 using ResidualPtr = uint8_t*; 250 251 // Performs member initializations that may fail. Helper function used by 252 // Create(). 253 LIBGAV1_MUST_USE_RESULT bool Init(); 254 255 // Saves the symbol decoder context of this tile into 256 // |saved_symbol_decoder_context_| if necessary. 257 void SaveSymbolDecoderContext(); 258 259 // Entry point for multi-threaded decoding. This function performs the same 260 // functionality as ParseAndDecode(). The current thread does the "parse" step 261 // while the worker threads do the "decode" step. 262 bool ThreadedParseAndDecode(); 263 264 // Returns whether or not the prerequisites for decoding the superblock at 265 // |row_index| and |column_index| are satisfied. |threading_.mutex| must be 266 // held when calling this function. 267 bool CanDecode(int row_index, int column_index) const; 268 269 // This function is run by the worker threads when multi-threaded decoding is 270 // enabled. Once a superblock is decoded, this function will set the 271 // corresponding |threading_.sb_state| entry to kSuperBlockStateDecoded. On 272 // failure, |threading_.abort| will be set to true. If at any point 273 // |threading_.abort| becomes true, this function will return as early as it 274 // can. If the decoding succeeds, this function will also schedule the 275 // decoding jobs for the superblock to the bottom-left and the superblock to 276 // the right of this superblock (if it is allowed). 277 void DecodeSuperBlock(int row_index, int column_index, int block_width4x4); 278 279 // If |use_intra_prediction_buffer_| is true, then this function copies the 280 // last row of the superblockrow starting at |row4x4| into the 281 // |intra_prediction_buffer_| (which may be used by the intra prediction 282 // process for the next superblock row). 283 void PopulateIntraPredictionBuffer(int row4x4); 284 285 uint16_t* GetPartitionCdf(int row4x4, int column4x4, BlockSize block_size); 286 bool ReadPartition(int row4x4, int column4x4, BlockSize block_size, 287 bool has_rows, bool has_columns, Partition* partition); 288 // Processes the Partition starting at |row4x4_start|, |column4x4_start| 289 // iteratively. It performs a DFS traversal over the partition tree to process 290 // the blocks in the right order. 291 bool ProcessPartition( 292 int row4x4_start, int column4x4_start, ParameterTree* root, 293 TileScratchBuffer* scratch_buffer, 294 ResidualPtr* residual); // Iterative implementation of 5.11.4. 295 bool ProcessBlock(int row4x4, int column4x4, BlockSize block_size, 296 ParameterTree* tree, TileScratchBuffer* scratch_buffer, 297 ResidualPtr* residual); // 5.11.5. 298 void ResetCdef(int row4x4, int column4x4); // 5.11.55. 299 300 // This function is used to decode a superblock when the parsing has already 301 // been done for that superblock. 302 bool DecodeSuperBlock(ParameterTree* tree, TileScratchBuffer* scratch_buffer, 303 ResidualPtr* residual); 304 // Helper function used by DecodeSuperBlock(). Note that the decode_block() 305 // function in the spec is equivalent to ProcessBlock() in the code. 306 bool DecodeBlock(ParameterTree* tree, TileScratchBuffer* scratch_buffer, 307 ResidualPtr* residual); 308 309 void ClearBlockDecoded(TileScratchBuffer* scratch_buffer, int row4x4, 310 int column4x4); // 5.11.3. 311 bool ProcessSuperBlock(int row4x4, int column4x4, int block_width4x4, 312 TileScratchBuffer* scratch_buffer, 313 ProcessingMode mode); 314 void ResetLoopRestorationParams(); 315 void ReadLoopRestorationCoefficients(int row4x4, int column4x4, 316 BlockSize block_size); // 5.11.57. 317 318 // Helper functions for DecodeBlock. 319 bool ReadSegmentId(const Block& block); // 5.11.9. 320 bool ReadIntraSegmentId(const Block& block); // 5.11.8. 321 void ReadSkip(const Block& block); // 5.11.11. 322 void ReadSkipMode(const Block& block); // 5.11.10. 323 void ReadCdef(const Block& block); // 5.11.56. 324 // Returns the new value. |cdf| is an array of size kDeltaSymbolCount + 1. 325 int ReadAndClipDelta(uint16_t* cdf, int delta_small, int scale, int min_value, 326 int max_value, int value); 327 void ReadQuantizerIndexDelta(const Block& block); // 5.11.12. 328 void ReadLoopFilterDelta(const Block& block); // 5.11.13. 329 // Populates |BlockParameters::deblock_filter_level| for the given |block| 330 // using |deblock_filter_levels_|. 331 void PopulateDeblockFilterLevel(const Block& block); 332 void ReadPredictionModeY(const Block& block, bool intra_y_mode); 333 void ReadIntraAngleInfo(const Block& block, 334 PlaneType plane_type); // 5.11.42 and 5.11.43. 335 void ReadPredictionModeUV(const Block& block); 336 void ReadCflAlpha(const Block& block); // 5.11.45. 337 int GetPaletteCache(const Block& block, PlaneType plane_type, 338 uint16_t* cache); 339 void ReadPaletteColors(const Block& block, Plane plane); 340 void ReadPaletteModeInfo(const Block& block); // 5.11.46. 341 void ReadFilterIntraModeInfo(const Block& block); // 5.11.24. 342 int ReadMotionVectorComponent(const Block& block, 343 int component); // 5.11.32. 344 void ReadMotionVector(const Block& block, int index); // 5.11.31. 345 bool DecodeIntraModeInfo(const Block& block); // 5.11.7. 346 int8_t ComputePredictedSegmentId(const Block& block) const; // 5.11.21. 347 bool ReadInterSegmentId(const Block& block, bool pre_skip); // 5.11.19. 348 void ReadIsInter(const Block& block); // 5.11.20. 349 bool ReadIntraBlockModeInfo(const Block& block, 350 bool intra_y_mode); // 5.11.22. 351 CompoundReferenceType ReadCompoundReferenceType(const Block& block); 352 template <bool is_single, bool is_backward, int index> 353 uint16_t* GetReferenceCdf(const Block& block, CompoundReferenceType type = 354 kNumCompoundReferenceTypes); 355 void ReadReferenceFrames(const Block& block); // 5.11.25. 356 void ReadInterPredictionModeY(const Block& block, 357 const MvContexts& mode_contexts); 358 void ReadRefMvIndex(const Block& block); 359 void ReadInterIntraMode(const Block& block, bool is_compound); // 5.11.28. 360 bool IsScaled(ReferenceFrameType type) const; // Part of 5.11.27. 361 void ReadMotionMode(const Block& block, bool is_compound); // 5.11.27. 362 uint16_t* GetIsExplicitCompoundTypeCdf(const Block& block); 363 uint16_t* GetIsCompoundTypeAverageCdf(const Block& block); 364 void ReadCompoundType(const Block& block, bool is_compound); // 5.11.29. 365 uint16_t* GetInterpolationFilterCdf(const Block& block, int direction); 366 void ReadInterpolationFilter(const Block& block); 367 bool ReadInterBlockModeInfo(const Block& block); // 5.11.23. 368 bool DecodeInterModeInfo(const Block& block); // 5.11.18. 369 bool DecodeModeInfo(const Block& block); // 5.11.6. 370 bool IsMvValid(const Block& block, bool is_compound) const; // 6.10.25. 371 bool AssignInterMv(const Block& block, bool is_compound); // 5.11.26. 372 bool AssignIntraMv(const Block& block); // 5.11.26. 373 int GetTopTransformWidth(const Block& block, int row4x4, int column4x4, 374 bool ignore_skip); 375 int GetLeftTransformHeight(const Block& block, int row4x4, int column4x4, 376 bool ignore_skip); 377 TransformSize ReadFixedTransformSize(const Block& block); // 5.11.15. 378 // Iterative implementation of 5.11.17. 379 void ReadVariableTransformTree(const Block& block, int row4x4, int column4x4, 380 TransformSize tx_size); 381 void DecodeTransformSize(const Block& block); // 5.11.16. 382 bool ComputePrediction(const Block& block); // 5.11.33. 383 // |x4| and |y4| are the column and row positions of the 4x4 block. |w4| and 384 // |h4| are the width and height in 4x4 units of |tx_size|. 385 int GetTransformAllZeroContext(const Block& block, Plane plane, 386 TransformSize tx_size, int x4, int y4, int w4, 387 int h4); 388 TransformSet GetTransformSet(TransformSize tx_size, 389 bool is_inter) const; // 5.11.48. 390 TransformType ComputeTransformType(const Block& block, Plane plane, 391 TransformSize tx_size, int block_x, 392 int block_y); // 5.11.40. 393 void ReadTransformType(const Block& block, int x4, int y4, 394 TransformSize tx_size); // 5.11.47. 395 template <typename ResidualType> 396 void ReadCoeffBase2D( 397 const uint16_t* scan, PlaneType plane_type, TransformSize tx_size, 398 int clamped_tx_size_context, int adjusted_tx_width_log2, int eob, 399 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1], 400 ResidualType* quantized_buffer); 401 template <typename ResidualType> 402 void ReadCoeffBaseHorizontal( 403 const uint16_t* scan, PlaneType plane_type, TransformSize tx_size, 404 int clamped_tx_size_context, int adjusted_tx_width_log2, int eob, 405 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1], 406 ResidualType* quantized_buffer); 407 template <typename ResidualType> 408 void ReadCoeffBaseVertical( 409 const uint16_t* scan, PlaneType plane_type, TransformSize tx_size, 410 int clamped_tx_size_context, int adjusted_tx_width_log2, int eob, 411 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1], 412 ResidualType* quantized_buffer); 413 int GetDcSignContext(int x4, int y4, int w4, int h4, Plane plane); 414 void SetEntropyContexts(int x4, int y4, int w4, int h4, Plane plane, 415 uint8_t coefficient_level, int8_t dc_category); 416 void InterIntraPrediction( 417 uint16_t* prediction_0, const uint8_t* prediction_mask, 418 ptrdiff_t prediction_mask_stride, 419 const PredictionParameters& prediction_parameters, int prediction_width, 420 int prediction_height, int subsampling_x, int subsampling_y, 421 uint8_t* dest, 422 ptrdiff_t dest_stride); // Part of section 7.11.3.1 in the spec. 423 void CompoundInterPrediction( 424 const Block& block, const uint8_t* prediction_mask, 425 ptrdiff_t prediction_mask_stride, int prediction_width, 426 int prediction_height, int subsampling_x, int subsampling_y, 427 int candidate_row, int candidate_column, uint8_t* dest, 428 ptrdiff_t dest_stride); // Part of section 7.11.3.1 in the spec. 429 GlobalMotion* GetWarpParams(const Block& block, Plane plane, 430 int prediction_width, int prediction_height, 431 const PredictionParameters& prediction_parameters, 432 ReferenceFrameType reference_type, 433 bool* is_local_valid, 434 GlobalMotion* global_motion_params, 435 GlobalMotion* local_warp_params) 436 const; // Part of section 7.11.3.1 in the spec. 437 bool InterPrediction(const Block& block, Plane plane, int x, int y, 438 int prediction_width, int prediction_height, 439 int candidate_row, int candidate_column, 440 bool* is_local_valid, 441 GlobalMotion* local_warp_params); // 7.11.3.1. 442 void ScaleMotionVector(const MotionVector& mv, Plane plane, 443 int reference_frame_index, int x, int y, int* start_x, 444 int* start_y, int* step_x, int* step_y); // 7.11.3.3. 445 // If the method returns false, the caller only uses the output parameters 446 // *ref_block_start_x and *ref_block_start_y. If the method returns true, the 447 // caller uses all three output parameters. 448 static bool GetReferenceBlockPosition( 449 int reference_frame_index, bool is_scaled, int width, int height, 450 int ref_start_x, int ref_last_x, int ref_start_y, int ref_last_y, 451 int start_x, int start_y, int step_x, int step_y, int left_border, 452 int right_border, int top_border, int bottom_border, 453 int* ref_block_start_x, int* ref_block_start_y, int* ref_block_end_x); 454 455 template <typename Pixel> 456 void BuildConvolveBlock(Plane plane, int reference_frame_index, 457 bool is_scaled, int height, int ref_start_x, 458 int ref_last_x, int ref_start_y, int ref_last_y, 459 int step_y, int ref_block_start_x, 460 int ref_block_end_x, int ref_block_start_y, 461 uint8_t* block_buffer, 462 ptrdiff_t convolve_buffer_stride, 463 ptrdiff_t block_extended_width); 464 bool BlockInterPrediction(const Block& block, Plane plane, 465 int reference_frame_index, const MotionVector& mv, 466 int x, int y, int width, int height, 467 int candidate_row, int candidate_column, 468 uint16_t* prediction, bool is_compound, 469 bool is_inter_intra, uint8_t* dest, 470 ptrdiff_t dest_stride); // 7.11.3.4. 471 bool BlockWarpProcess(const Block& block, Plane plane, int index, 472 int block_start_x, int block_start_y, int width, 473 int height, GlobalMotion* warp_params, bool is_compound, 474 bool is_inter_intra, uint8_t* dest, 475 ptrdiff_t dest_stride); // 7.11.3.5. 476 bool ObmcBlockPrediction(const Block& block, const MotionVector& mv, 477 Plane plane, int reference_frame_index, int width, 478 int height, int x, int y, int candidate_row, 479 int candidate_column, 480 ObmcDirection blending_direction); 481 bool ObmcPrediction(const Block& block, Plane plane, int width, 482 int height); // 7.11.3.9. 483 void DistanceWeightedPrediction(void* prediction_0, void* prediction_1, 484 int width, int height, int candidate_row, 485 int candidate_column, uint8_t* dest, 486 ptrdiff_t dest_stride); // 7.11.3.15. 487 // This function specializes the parsing of DC coefficient by removing some of 488 // the branches when i == 0 (since scan[0] is always 0 and scan[i] is always 489 // non-zero for all other possible values of i). |dc_category| is an output 490 // parameter that is populated when |is_dc_coefficient| is true. 491 // |coefficient_level| is an output parameter which accumulates the 492 // coefficient level. 493 template <typename ResidualType, bool is_dc_coefficient> 494 LIBGAV1_ALWAYS_INLINE bool ReadSignAndApplyDequantization( 495 const uint16_t* scan, int i, int q_value, const uint8_t* quantizer_matrix, 496 int shift, int max_value, uint16_t* dc_sign_cdf, int8_t* dc_category, 497 int* coefficient_level, 498 ResidualType* residual_buffer); // Part of 5.11.39. 499 int ReadCoeffBaseRange(int clamped_tx_size_context, int cdf_context, 500 int plane_type); // Part of 5.11.39. 501 // Returns the number of non-zero coefficients that were read. |tx_type| is an 502 // output parameter that stores the computed transform type for the plane 503 // whose coefficients were read. Returns -1 on failure. 504 template <typename ResidualType> 505 int ReadTransformCoefficients(const Block& block, Plane plane, int start_x, 506 int start_y, TransformSize tx_size, 507 TransformType* tx_type); // 5.11.39. 508 bool TransformBlock(const Block& block, Plane plane, int base_x, int base_y, 509 TransformSize tx_size, int x, int y, 510 ProcessingMode mode); // 5.11.35. 511 // Iterative implementation of 5.11.36. 512 bool TransformTree(const Block& block, int start_x, int start_y, 513 BlockSize plane_size, ProcessingMode mode); 514 void ReconstructBlock(const Block& block, Plane plane, int start_x, 515 int start_y, TransformSize tx_size, 516 TransformType tx_type, 517 int non_zero_coeff_count); // Part of 7.12.3. 518 bool Residual(const Block& block, ProcessingMode mode); // 5.11.34. 519 // part of 5.11.5 (reset_block_context() in the spec). 520 void ResetEntropyContext(const Block& block); 521 // Populates the |color_context| and |color_order| for the |i|th iteration 522 // with entries counting down from |start| to |end| (|start| > |end|). 523 void PopulatePaletteColorContexts( 524 const Block& block, PlaneType plane_type, int i, int start, int end, 525 uint8_t color_order[kMaxPaletteSquare][kMaxPaletteSize], 526 uint8_t color_context[kMaxPaletteSquare]); // 5.11.50. 527 bool ReadPaletteTokens(const Block& block); // 5.11.49. 528 template <typename Pixel> 529 void IntraPrediction(const Block& block, Plane plane, int x, int y, 530 bool has_left, bool has_top, bool has_top_right, 531 bool has_bottom_left, PredictionMode mode, 532 TransformSize tx_size); 533 bool IsSmoothPrediction(int row, int column, Plane plane) const; 534 int GetIntraEdgeFilterType(const Block& block, 535 Plane plane) const; // 7.11.2.8. 536 template <typename Pixel> 537 void DirectionalPrediction(const Block& block, Plane plane, int x, int y, 538 bool has_left, bool has_top, bool needs_left, 539 bool needs_top, int prediction_angle, int width, 540 int height, int max_x, int max_y, 541 TransformSize tx_size, Pixel* top_row, 542 Pixel* left_column); // 7.11.2.4. 543 template <typename Pixel> 544 void PalettePrediction(const Block& block, Plane plane, int start_x, 545 int start_y, int x, int y, 546 TransformSize tx_size); // 7.11.4. 547 template <typename Pixel> 548 void ChromaFromLumaPrediction(const Block& block, Plane plane, int start_x, 549 int start_y, 550 TransformSize tx_size); // 7.11.5. 551 // Section 7.19. Applies some filtering and reordering to the motion vectors 552 // for the given |block| and stores them into |current_frame_|. 553 void StoreMotionFieldMvsIntoCurrentFrame(const Block& block); 554 555 // Returns the zero-based index of the super block that contains |row4x4| 556 // relative to the start of this tile. SuperBlockRowIndex(int row4x4)557 int SuperBlockRowIndex(int row4x4) const { 558 return (row4x4 - row4x4_start_) >> 559 (sequence_header_.use_128x128_superblock ? 5 : 4); 560 } 561 562 // Returns the zero-based index of the super block that contains |column4x4| 563 // relative to the start of this tile. SuperBlockColumnIndex(int column4x4)564 int SuperBlockColumnIndex(int column4x4) const { 565 return (column4x4 - column4x4_start_) >> 566 (sequence_header_.use_128x128_superblock ? 5 : 4); 567 } 568 SuperBlockSize()569 BlockSize SuperBlockSize() const { 570 return sequence_header_.use_128x128_superblock ? kBlock128x128 571 : kBlock64x64; 572 } PlaneCount()573 int PlaneCount() const { 574 return sequence_header_.color_config.is_monochrome ? kMaxPlanesMonochrome 575 : kMaxPlanes; 576 } 577 578 const int number_; 579 const int row_; 580 const int column_; 581 const uint8_t* const data_; 582 size_t size_; 583 int row4x4_start_; 584 int row4x4_end_; 585 int column4x4_start_; 586 int column4x4_end_; 587 int superblock_rows_; 588 int superblock_columns_; 589 bool read_deltas_; 590 const int8_t subsampling_x_[kMaxPlanes]; 591 const int8_t subsampling_y_[kMaxPlanes]; 592 int deblock_row_limit_[kMaxPlanes]; 593 int deblock_column_limit_[kMaxPlanes]; 594 595 // The dimensions (in order) are: segment_id, level_index (based on plane and 596 // direction), reference_frame and mode_id. 597 uint8_t deblock_filter_levels_[kMaxSegments][kFrameLfCount] 598 [kNumReferenceFrameTypes][2]; 599 600 // current_quantizer_index_ is in the range [0, 255]. 601 uint8_t current_quantizer_index_; 602 // These two arrays (|coefficient_levels_| and |dc_categories_|) are used to 603 // store the entropy context. Their dimensions are as follows: First - 604 // left/top; Second - plane; Third - row4x4 (if first dimension is 605 // left)/column4x4 (if first dimension is top). 606 // 607 // This is equivalent to the LeftLevelContext and AboveLevelContext arrays in 608 // the spec. In the spec, it stores values from 0 through 63 (inclusive). The 609 // stored values are used to compute the left and top contexts in 610 // GetTransformAllZeroContext. In that function, we only care about the 611 // following values: 0, 1, 2, 3 and >= 4. So instead of clamping to 63, we 612 // clamp to 4 (i.e.) all the values greater than 4 are stored as 4. 613 std::array<Array2D<uint8_t>, 2> coefficient_levels_; 614 // This is equivalent to the LeftDcContext and AboveDcContext arrays in the 615 // spec. In the spec, it can store 3 possible values: 0, 1 and 2 (where 1 616 // means the value is < 0, 2 means the value is > 0 and 0 means the value is 617 // equal to 0). 618 // 619 // The stored values are used in two places: 620 // * GetTransformAllZeroContext: Here, we only care about whether the 621 // value is 0 or not (whether it is 1 or 2 is irrelevant). 622 // * GetDcSignContext: Here, we do the following computation: if the 623 // stored value is 1, we decrement a counter. If the stored value is 2 624 // we increment a counter. 625 // 626 // Based on this usage, we can simply replace 1 with -1 and 2 with 1 and 627 // use that value to compute the counter. 628 // 629 // The usage on GetTransformAllZeroContext is unaffected since there we 630 // only care about whether it is 0 or not. 631 std::array<Array2D<int8_t>, 2> dc_categories_; 632 const ObuSequenceHeader& sequence_header_; 633 const ObuFrameHeader& frame_header_; 634 const std::array<bool, kNumReferenceFrameTypes>& reference_frame_sign_bias_; 635 const std::array<RefCountedBufferPtr, kNumReferenceFrameTypes>& 636 reference_frames_; 637 TemporalMotionField& motion_field_; 638 const std::array<uint8_t, kNumReferenceFrameTypes>& reference_order_hint_; 639 const WedgeMaskArray& wedge_masks_; 640 DaalaBitReader reader_; 641 SymbolDecoderContext symbol_decoder_context_; 642 SymbolDecoderContext* const saved_symbol_decoder_context_; 643 const SegmentationMap* prev_segment_ids_; 644 const dsp::Dsp& dsp_; 645 PostFilter& post_filter_; 646 BlockParametersHolder& block_parameters_holder_; 647 Quantizer quantizer_; 648 // When there is no multi-threading within the Tile, |residual_buffer_| is 649 // used. When there is multi-threading within the Tile, 650 // |residual_buffer_threaded_| is used. In the following comment, 651 // |residual_buffer| refers to either |residual_buffer_| or 652 // |residual_buffer_threaded_| depending on whether multi-threading is enabled 653 // within the Tile or not. 654 // The |residual_buffer| is used to help with the dequantization and the 655 // inverse transform processes. It is declared as a uint8_t, but is always 656 // accessed either as an int16_t or int32_t depending on |bitdepth|. Here is 657 // what it stores at various stages of the decoding process (in the order 658 // which they happen): 659 // 1) In ReadTransformCoefficients(), this buffer is used to store the 660 // dequantized values. 661 // 2) In Reconstruct(), this buffer is used as the input to the row 662 // transform process. 663 // The size of this buffer would be: 664 // For |residual_buffer_|: (4096 + 32 * |kResidualPaddingVertical|) * 665 // |residual_size_|. Where 4096 = 64x64 which is the maximum transform 666 // size, and 32 * |kResidualPaddingVertical| is the padding to avoid 667 // bottom boundary checks when parsing quantized coefficients. This 668 // memory is allocated and owned by the Tile class. 669 // For |residual_buffer_threaded_|: See the comment below. This memory is 670 // not allocated or owned by the Tile class. 671 AlignedUniquePtr<uint8_t> residual_buffer_; 672 // This is a 2d array of pointers of size |superblock_rows_| by 673 // |superblock_columns_| where each pointer points to a ResidualBuffer for a 674 // single super block. The array is populated when the parsing process begins 675 // by calling |residual_buffer_pool_->Get()| and the memory is released back 676 // to the pool by calling |residual_buffer_pool_->Release()| when the decoding 677 // process is complete. 678 Array2D<std::unique_ptr<ResidualBuffer>> residual_buffer_threaded_; 679 // sizeof(int16_t or int32_t) depending on |bitdepth|. 680 const size_t residual_size_; 681 // Number of superblocks on the top-right that will have to be decoded before 682 // the current superblock can be decoded. This will be 1 if allow_intrabc is 683 // false. If allow_intrabc is true, then this value will be 684 // use_128x128_superblock ? 3 : 5. This is the allowed range of reference for 685 // the top rows for intrabc. 686 const int intra_block_copy_lag_; 687 688 // In the Tile class, we use the "current_frame" in two ways: 689 // 1) To write the decoded output into (using the |buffer_| view). 690 // 2) To read the pixels for intra block copy (using the |current_frame_| 691 // reference). 692 // 693 // When intra block copy is off, |buffer_| and |current_frame_| may or may not 694 // point to the same plane pointers. But it is okay since |current_frame_| is 695 // never used in this case. 696 // 697 // When intra block copy is on, |buffer_| and |current_frame_| always point to 698 // the same plane pointers (since post filtering is disabled). So the usage in 699 // both case 1 and case 2 remain valid. 700 Array2DView<uint8_t> buffer_[kMaxPlanes]; 701 RefCountedBuffer& current_frame_; 702 703 Array2D<int16_t>& cdef_index_; 704 Array2D<TransformSize>& inter_transform_sizes_; 705 std::array<RestorationUnitInfo, kMaxPlanes> reference_unit_info_; 706 // If |thread_pool_| is nullptr, the calling thread will do the parsing and 707 // the decoding in one pass. If |thread_pool_| is not nullptr, then the main 708 // thread will do the parsing while the thread pool workers will do the 709 // decoding. 710 ThreadPool* const thread_pool_; 711 ThreadingParameters threading_; 712 ResidualBufferPool* const residual_buffer_pool_; 713 TileScratchBufferPool* const tile_scratch_buffer_pool_; 714 BlockingCounterWithStatus* const pending_tiles_; 715 bool split_parse_and_decode_; 716 // This is used only when |split_parse_and_decode_| is false. 717 std::unique_ptr<PredictionParameters> prediction_parameters_ = nullptr; 718 // Stores the |transform_type| for the super block being decoded at a 4x4 719 // granularity. The spec uses absolute indices for this array but it is 720 // sufficient to use indices relative to the super block being decoded. 721 TransformType transform_types_[32][32]; 722 // delta_lf_[i] is in the range [-63, 63]. 723 int8_t delta_lf_[kFrameLfCount]; 724 // True if all the values in |delta_lf_| are zero. False otherwise. 725 bool delta_lf_all_zero_; 726 const bool frame_parallel_; 727 const bool use_intra_prediction_buffer_; 728 // Buffer used to store the unfiltered pixels that are necessary for decoding 729 // the next superblock row (for the intra prediction process). Used only if 730 // |use_intra_prediction_buffer_| is true. The |frame_scratch_buffer| contains 731 // one row buffer for each tile row. This tile will have to use the buffer 732 // corresponding to this tile's row. 733 IntraPredictionBuffer* const intra_prediction_buffer_; 734 // Stores the progress of the reference frames. This will be used to avoid 735 // unnecessary calls into RefCountedBuffer::WaitUntil(). 736 std::array<int, kNumReferenceFrameTypes> reference_frame_progress_cache_; 737 }; 738 739 struct Tile::Block { BlockBlock740 Block(const Tile& tile, BlockSize size, int row4x4, int column4x4, 741 TileScratchBuffer* const scratch_buffer, ResidualPtr* residual) 742 : tile(tile), 743 size(size), 744 row4x4(row4x4), 745 column4x4(column4x4), 746 width(kBlockWidthPixels[size]), 747 height(kBlockHeightPixels[size]), 748 width4x4(width >> 2), 749 height4x4(height >> 2), 750 scratch_buffer(scratch_buffer), 751 residual(residual) { 752 assert(size != kBlockInvalid); 753 residual_size[kPlaneY] = kPlaneResidualSize[size][0][0]; 754 residual_size[kPlaneU] = residual_size[kPlaneV] = 755 kPlaneResidualSize[size][tile.subsampling_x_[kPlaneU]] 756 [tile.subsampling_y_[kPlaneU]]; 757 assert(residual_size[kPlaneY] != kBlockInvalid); 758 if (tile.PlaneCount() > 1) { 759 assert(residual_size[kPlaneU] != kBlockInvalid); 760 } 761 if ((row4x4 & 1) == 0 && 762 (tile.sequence_header_.color_config.subsampling_y & height4x4) == 1) { 763 has_chroma = false; 764 } else if ((column4x4 & 1) == 0 && 765 (tile.sequence_header_.color_config.subsampling_x & width4x4) == 766 1) { 767 has_chroma = false; 768 } else { 769 has_chroma = !tile.sequence_header_.color_config.is_monochrome; 770 } 771 top_available[kPlaneY] = tile.IsTopInside(row4x4); 772 left_available[kPlaneY] = tile.IsLeftInside(column4x4); 773 if (has_chroma) { 774 // top_available[kPlaneU] and top_available[kPlaneV] are valid only if 775 // has_chroma is true. 776 // The next 3 lines are equivalent to: 777 // top_available[kPlaneU] = top_available[kPlaneV] = 778 // top_available[kPlaneY] && 779 // ((tile.sequence_header_.color_config.subsampling_y & height4x4) == 780 // 0 || tile.IsTopInside(row4x4 - 1)); 781 top_available[kPlaneU] = top_available[kPlaneV] = tile.IsTopInside( 782 row4x4 - 783 (tile.sequence_header_.color_config.subsampling_y & height4x4)); 784 // left_available[kPlaneU] and left_available[kPlaneV] are valid only if 785 // has_chroma is true. 786 // The next 3 lines are equivalent to: 787 // left_available[kPlaneU] = left_available[kPlaneV] = 788 // left_available[kPlaneY] && 789 // ((tile.sequence_header_.color_config.subsampling_x & width4x4) == 0 790 // || tile.IsLeftInside(column4x4 - 1)); 791 left_available[kPlaneU] = left_available[kPlaneV] = tile.IsLeftInside( 792 column4x4 - 793 (tile.sequence_header_.color_config.subsampling_x & width4x4)); 794 } 795 const ptrdiff_t stride = tile.BlockParametersStride(); 796 BlockParameters** const bps = 797 tile.BlockParametersAddress(row4x4, column4x4); 798 bp = *bps; 799 // bp_top is valid only if top_available[kPlaneY] is true. 800 if (top_available[kPlaneY]) { 801 bp_top = *(bps - stride); 802 } 803 // bp_left is valid only if left_available[kPlaneY] is true. 804 if (left_available[kPlaneY]) { 805 bp_left = *(bps - 1); 806 } 807 } 808 HasChromaBlock809 bool HasChroma() const { return has_chroma; } 810 811 // These return values of these group of functions are valid only if the 812 // corresponding top_available or left_available is true. TopReferenceBlock813 ReferenceFrameType TopReference(int index) const { 814 return bp_top->reference_frame[index]; 815 } 816 LeftReferenceBlock817 ReferenceFrameType LeftReference(int index) const { 818 return bp_left->reference_frame[index]; 819 } 820 IsTopIntraBlock821 bool IsTopIntra() const { return TopReference(0) <= kReferenceFrameIntra; } IsLeftIntraBlock822 bool IsLeftIntra() const { return LeftReference(0) <= kReferenceFrameIntra; } 823 IsTopSingleBlock824 bool IsTopSingle() const { return TopReference(1) <= kReferenceFrameIntra; } IsLeftSingleBlock825 bool IsLeftSingle() const { return LeftReference(1) <= kReferenceFrameIntra; } 826 CountReferencesBlock827 int CountReferences(ReferenceFrameType type) const { 828 return static_cast<int>(top_available[kPlaneY] && 829 bp_top->reference_frame[0] == type) + 830 static_cast<int>(top_available[kPlaneY] && 831 bp_top->reference_frame[1] == type) + 832 static_cast<int>(left_available[kPlaneY] && 833 bp_left->reference_frame[0] == type) + 834 static_cast<int>(left_available[kPlaneY] && 835 bp_left->reference_frame[1] == type); 836 } 837 838 // 7.10.3. 839 // Checks if there are any inter blocks to the left or above. If so, it 840 // returns true indicating that the block has neighbors that are suitable for 841 // use by overlapped motion compensation. HasOverlappableCandidatesBlock842 bool HasOverlappableCandidates() const { 843 const ptrdiff_t stride = tile.BlockParametersStride(); 844 BlockParameters** const bps = tile.BlockParametersAddress(0, 0); 845 if (top_available[kPlaneY]) { 846 BlockParameters** bps_top = bps + (row4x4 - 1) * stride + (column4x4 | 1); 847 const int columns = std::min(tile.frame_header_.columns4x4 - column4x4, 848 static_cast<int>(width4x4)); 849 BlockParameters** const bps_top_end = bps_top + columns; 850 do { 851 if ((*bps_top)->reference_frame[0] > kReferenceFrameIntra) { 852 return true; 853 } 854 bps_top += 2; 855 } while (bps_top < bps_top_end); 856 } 857 if (left_available[kPlaneY]) { 858 BlockParameters** bps_left = bps + (row4x4 | 1) * stride + column4x4 - 1; 859 const int rows = std::min(tile.frame_header_.rows4x4 - row4x4, 860 static_cast<int>(height4x4)); 861 BlockParameters** const bps_left_end = bps_left + rows * stride; 862 do { 863 if ((*bps_left)->reference_frame[0] > kReferenceFrameIntra) { 864 return true; 865 } 866 bps_left += 2 * stride; 867 } while (bps_left < bps_left_end); 868 } 869 return false; 870 } 871 872 const Tile& tile; 873 bool has_chroma; 874 const BlockSize size; 875 bool top_available[kMaxPlanes]; 876 bool left_available[kMaxPlanes]; 877 BlockSize residual_size[kMaxPlanes]; 878 const int row4x4; 879 const int column4x4; 880 const int width; 881 const int height; 882 const int width4x4; 883 const int height4x4; 884 const BlockParameters* bp_top; 885 const BlockParameters* bp_left; 886 BlockParameters* bp; 887 TileScratchBuffer* const scratch_buffer; 888 ResidualPtr* const residual; 889 }; 890 891 extern template bool 892 Tile::ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>( 893 int row4x4, TileScratchBuffer* scratch_buffer); 894 extern template bool 895 Tile::ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>( 896 int row4x4, TileScratchBuffer* scratch_buffer); 897 898 } // namespace libgav1 899 900 #endif // LIBGAV1_SRC_TILE_H_ 901