1 /* 2 * Copyright 2019 The libgav1 Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBGAV1_SRC_TILE_H_ 18 #define LIBGAV1_SRC_TILE_H_ 19 20 #include <algorithm> 21 #include <array> 22 #include <cassert> 23 #include <condition_variable> // NOLINT (unapproved c++11 header) 24 #include <cstddef> 25 #include <cstdint> 26 #include <memory> 27 #include <mutex> // NOLINT (unapproved c++11 header) 28 #include <vector> 29 30 #include "src/buffer_pool.h" 31 #include "src/decoder_state.h" 32 #include "src/dsp/common.h" 33 #include "src/dsp/constants.h" 34 #include "src/dsp/dsp.h" 35 #include "src/frame_scratch_buffer.h" 36 #include "src/loop_restoration_info.h" 37 #include "src/obu_parser.h" 38 #include "src/post_filter.h" 39 #include "src/quantizer.h" 40 #include "src/residual_buffer_pool.h" 41 #include "src/symbol_decoder_context.h" 42 #include "src/tile_scratch_buffer.h" 43 #include "src/utils/array_2d.h" 44 #include "src/utils/block_parameters_holder.h" 45 #include "src/utils/blocking_counter.h" 46 #include "src/utils/common.h" 47 #include "src/utils/compiler_attributes.h" 48 #include "src/utils/constants.h" 49 #include "src/utils/entropy_decoder.h" 50 #include "src/utils/memory.h" 51 #include "src/utils/segmentation_map.h" 52 #include "src/utils/threadpool.h" 53 #include "src/utils/types.h" 54 #include "src/yuv_buffer.h" 55 56 namespace libgav1 { 57 58 // Indicates what the ProcessSuperBlock() and TransformBlock() functions should 59 // do. "Parse" refers to consuming the bitstream, reading the transform 60 // coefficients and performing the dequantization. "Decode" refers to computing 61 // the prediction, applying the inverse transforms and adding the residual. 62 enum ProcessingMode { 63 kProcessingModeParseOnly, 64 kProcessingModeDecodeOnly, 65 kProcessingModeParseAndDecode, 66 }; 67 68 class Tile : public Allocable { 69 public: Create(int tile_number,const uint8_t * const data,size_t size,const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,RefCountedBuffer * const current_frame,const DecoderState & state,FrameScratchBuffer * const frame_scratch_buffer,const WedgeMaskArray & wedge_masks,const QuantizerMatrix & quantizer_matrix,SymbolDecoderContext * const saved_symbol_decoder_context,const SegmentationMap * prev_segment_ids,PostFilter * const post_filter,const dsp::Dsp * const dsp,ThreadPool * const thread_pool,BlockingCounterWithStatus * const pending_tiles,bool frame_parallel,bool use_intra_prediction_buffer)70 static std::unique_ptr<Tile> Create( 71 int tile_number, const uint8_t* const data, size_t size, 72 const ObuSequenceHeader& sequence_header, 73 const ObuFrameHeader& frame_header, RefCountedBuffer* const current_frame, 74 const DecoderState& state, FrameScratchBuffer* const frame_scratch_buffer, 75 const WedgeMaskArray& wedge_masks, 76 const QuantizerMatrix& quantizer_matrix, 77 SymbolDecoderContext* const saved_symbol_decoder_context, 78 const SegmentationMap* prev_segment_ids, PostFilter* const post_filter, 79 const dsp::Dsp* const dsp, ThreadPool* const thread_pool, 80 BlockingCounterWithStatus* const pending_tiles, bool frame_parallel, 81 bool use_intra_prediction_buffer) { 82 std::unique_ptr<Tile> tile(new (std::nothrow) Tile( 83 tile_number, data, size, sequence_header, frame_header, current_frame, 84 state, frame_scratch_buffer, wedge_masks, quantizer_matrix, 85 saved_symbol_decoder_context, prev_segment_ids, post_filter, dsp, 86 thread_pool, pending_tiles, frame_parallel, 87 use_intra_prediction_buffer)); 88 return (tile != nullptr && tile->Init()) ? std::move(tile) : nullptr; 89 } 90 91 // Move only. 92 Tile(Tile&& tile) noexcept; 93 Tile& operator=(Tile&& tile) noexcept; 94 Tile(const Tile&) = delete; 95 Tile& operator=(const Tile&) = delete; 96 97 struct Block; // Defined after this class. 98 99 // Parses the entire tile. 100 bool Parse(); 101 // Decodes the entire tile. |superblock_row_progress| and 102 // |superblock_row_progress_condvar| are arrays of size equal to the number of 103 // superblock rows in the frame. Increments |superblock_row_progress[i]| after 104 // each superblock row at index |i| is decoded. If the count reaches the 105 // number of tile columns, then it notifies 106 // |superblock_row_progress_condvar[i]|. 107 bool Decode(std::mutex* mutex, int* superblock_row_progress, 108 std::condition_variable* superblock_row_progress_condvar); 109 // Parses and decodes the entire tile. Depending on the configuration of this 110 // Tile, this function may do multithreaded decoding. 111 bool ParseAndDecode(); // 5.11.2. 112 // Processes all the columns of the superblock row at |row4x4| that are within 113 // this Tile. If |save_symbol_decoder_context| is true, then 114 // SaveSymbolDecoderContext() is invoked for the last superblock row. 115 template <ProcessingMode processing_mode, bool save_symbol_decoder_context> 116 bool ProcessSuperBlockRow(int row4x4, TileScratchBuffer* scratch_buffer); 117 sequence_header()118 const ObuSequenceHeader& sequence_header() const { return sequence_header_; } frame_header()119 const ObuFrameHeader& frame_header() const { return frame_header_; } current_frame()120 const RefCountedBuffer& current_frame() const { return current_frame_; } motion_field()121 const TemporalMotionField& motion_field() const { return motion_field_; } reference_frame_sign_bias()122 const std::array<bool, kNumReferenceFrameTypes>& reference_frame_sign_bias() 123 const { 124 return reference_frame_sign_bias_; 125 } 126 IsRow4x4Inside(int row4x4)127 bool IsRow4x4Inside(int row4x4) const { 128 return row4x4 >= row4x4_start_ && row4x4 < row4x4_end_; 129 } 130 131 // 5.11.51. IsInside(int row4x4,int column4x4)132 bool IsInside(int row4x4, int column4x4) const { 133 return IsRow4x4Inside(row4x4) && column4x4 >= column4x4_start_ && 134 column4x4 < column4x4_end_; 135 } 136 IsLeftInside(int column4x4)137 bool IsLeftInside(int column4x4) const { 138 // We use "larger than" as the condition. Don't pass in the left column 139 // offset column4x4 - 1. 140 assert(column4x4 <= column4x4_end_); 141 return column4x4 > column4x4_start_; 142 } 143 IsTopInside(int row4x4)144 bool IsTopInside(int row4x4) const { 145 // We use "larger than" as the condition. Don't pass in the top row offset 146 // row4x4 - 1. 147 assert(row4x4 <= row4x4_end_); 148 return row4x4 > row4x4_start_; 149 } 150 IsTopLeftInside(int row4x4,int column4x4)151 bool IsTopLeftInside(int row4x4, int column4x4) const { 152 // We use "larger than" as the condition. Don't pass in the top row offset 153 // row4x4 - 1 or the left column offset column4x4 - 1. 154 assert(row4x4 <= row4x4_end_); 155 assert(column4x4 <= column4x4_end_); 156 return row4x4 > row4x4_start_ && column4x4 > column4x4_start_; 157 } 158 IsBottomRightInside(int row4x4,int column4x4)159 bool IsBottomRightInside(int row4x4, int column4x4) const { 160 assert(row4x4 >= row4x4_start_); 161 assert(column4x4 >= column4x4_start_); 162 return row4x4 < row4x4_end_ && column4x4 < column4x4_end_; 163 } 164 BlockParametersAddress(int row4x4,int column4x4)165 BlockParameters** BlockParametersAddress(int row4x4, int column4x4) const { 166 return block_parameters_holder_.Address(row4x4, column4x4); 167 } 168 BlockParametersStride()169 int BlockParametersStride() const { 170 return block_parameters_holder_.columns4x4(); 171 } 172 173 // Returns true if Parameters() can be called with |row| and |column| as 174 // inputs, false otherwise. HasParameters(int row,int column)175 bool HasParameters(int row, int column) const { 176 return block_parameters_holder_.Find(row, column) != nullptr; 177 } Parameters(int row,int column)178 const BlockParameters& Parameters(int row, int column) const { 179 return *block_parameters_holder_.Find(row, column); 180 } 181 number()182 int number() const { return number_; } superblock_rows()183 int superblock_rows() const { return superblock_rows_; } superblock_columns()184 int superblock_columns() const { return superblock_columns_; } row4x4_start()185 int row4x4_start() const { return row4x4_start_; } column4x4_start()186 int column4x4_start() const { return column4x4_start_; } column4x4_end()187 int column4x4_end() const { return column4x4_end_; } 188 189 private: 190 // Stores the transform tree state when reading variable size transform trees 191 // and when applying the transform tree. When applying the transform tree, 192 // |depth| is not used. 193 struct TransformTreeNode { 194 // The default constructor is invoked by the Stack<TransformTreeNode, n> 195 // constructor. Stack<> does not use the default-constructed elements, so it 196 // is safe for the default constructor to not initialize the members. 197 TransformTreeNode() = default; 198 TransformTreeNode(int x, int y, TransformSize tx_size, int depth = -1) xTransformTreeNode199 : x(x), y(y), tx_size(tx_size), depth(depth) {} 200 201 int x; 202 int y; 203 TransformSize tx_size; 204 int depth; 205 }; 206 207 // Enum to track the processing state of a superblock. 208 enum SuperBlockState : uint8_t { 209 kSuperBlockStateNone, // Not yet parsed or decoded. 210 kSuperBlockStateParsed, // Parsed but not yet decoded. 211 kSuperBlockStateScheduled, // Scheduled for decoding. 212 kSuperBlockStateDecoded // Parsed and decoded. 213 }; 214 215 // Parameters used to facilitate multi-threading within the Tile. 216 struct ThreadingParameters { 217 std::mutex mutex; 218 // 2d array of size |superblock_rows_| by |superblock_columns_| containing 219 // the processing state of each superblock. 220 Array2D<SuperBlockState> sb_state LIBGAV1_GUARDED_BY(mutex); 221 // Variable used to indicate either parse or decode failure. 222 bool abort LIBGAV1_GUARDED_BY(mutex) = false; 223 int pending_jobs LIBGAV1_GUARDED_BY(mutex) = 0; 224 std::condition_variable pending_jobs_zero_condvar; 225 }; 226 227 // The residual pointer is used to traverse the |residual_buffer_|. It is 228 // used in two different ways. 229 // If |split_parse_and_decode_| is true: 230 // The pointer points to the beginning of the |residual_buffer_| when the 231 // "parse" and "decode" steps begin. It is then moved forward tx_size in 232 // each iteration of the "parse" and the "decode" steps. In this case, the 233 // ResidualPtr variable passed into various functions starting from 234 // ProcessSuperBlock is used as an in/out parameter to keep track of the 235 // residual pointer. 236 // If |split_parse_and_decode_| is false: 237 // The pointer is reset to the beginning of the |residual_buffer_| for 238 // every transform block. 239 using ResidualPtr = uint8_t*; 240 241 Tile(int tile_number, const uint8_t* data, size_t size, 242 const ObuSequenceHeader& sequence_header, 243 const ObuFrameHeader& frame_header, RefCountedBuffer* current_frame, 244 const DecoderState& state, FrameScratchBuffer* frame_scratch_buffer, 245 const WedgeMaskArray& wedge_masks, 246 const QuantizerMatrix& quantizer_matrix, 247 SymbolDecoderContext* saved_symbol_decoder_context, 248 const SegmentationMap* prev_segment_ids, PostFilter* post_filter, 249 const dsp::Dsp* dsp, ThreadPool* thread_pool, 250 BlockingCounterWithStatus* pending_tiles, bool frame_parallel, 251 bool use_intra_prediction_buffer); 252 253 // Performs member initializations that may fail. Helper function used by 254 // Create(). 255 LIBGAV1_MUST_USE_RESULT bool Init(); 256 257 // Saves the symbol decoder context of this tile into 258 // |saved_symbol_decoder_context_| if necessary. 259 void SaveSymbolDecoderContext(); 260 261 // Entry point for multi-threaded decoding. This function performs the same 262 // functionality as ParseAndDecode(). The current thread does the "parse" step 263 // while the worker threads do the "decode" step. 264 bool ThreadedParseAndDecode(); 265 266 // Returns whether or not the prerequisites for decoding the superblock at 267 // |row_index| and |column_index| are satisfied. |threading_.mutex| must be 268 // held when calling this function. 269 bool CanDecode(int row_index, int column_index) const; 270 271 // This function is run by the worker threads when multi-threaded decoding is 272 // enabled. Once a superblock is decoded, this function will set the 273 // corresponding |threading_.sb_state| entry to kSuperBlockStateDecoded. On 274 // failure, |threading_.abort| will be set to true. If at any point 275 // |threading_.abort| becomes true, this function will return as early as it 276 // can. If the decoding succeeds, this function will also schedule the 277 // decoding jobs for the superblock to the bottom-left and the superblock to 278 // the right of this superblock (if it is allowed). 279 void DecodeSuperBlock(int row_index, int column_index, int block_width4x4); 280 281 // If |use_intra_prediction_buffer_| is true, then this function copies the 282 // last row of the superblockrow starting at |row4x4| into the 283 // |intra_prediction_buffer_| (which may be used by the intra prediction 284 // process for the next superblock row). 285 void PopulateIntraPredictionBuffer(int row4x4); 286 287 uint16_t* GetPartitionCdf(int row4x4, int column4x4, BlockSize block_size); 288 bool ReadPartition(int row4x4, int column4x4, BlockSize block_size, 289 bool has_rows, bool has_columns, Partition* partition); 290 // Processes the Partition starting at |row4x4_start|, |column4x4_start| 291 // iteratively. It performs a DFS traversal over the partition tree to process 292 // the blocks in the right order. 293 bool ProcessPartition( 294 int row4x4_start, int column4x4_start, TileScratchBuffer* scratch_buffer, 295 ResidualPtr* residual); // Iterative implementation of 5.11.4. 296 bool ProcessBlock(int row4x4, int column4x4, BlockSize block_size, 297 TileScratchBuffer* scratch_buffer, 298 ResidualPtr* residual); // 5.11.5. 299 void ResetCdef(int row4x4, int column4x4); // 5.11.55. 300 301 // This function is used to decode a superblock when the parsing has already 302 // been done for that superblock. 303 bool DecodeSuperBlock(int sb_row_index, int sb_column_index, 304 TileScratchBuffer* scratch_buffer); 305 // Helper function used by DecodeSuperBlock(). Note that the decode_block() 306 // function in the spec is equivalent to ProcessBlock() in the code. 307 bool DecodeBlock(int row4x4, int column4x4, BlockSize block_size, 308 TileScratchBuffer* scratch_buffer, ResidualPtr* residual); 309 310 void ClearBlockDecoded(TileScratchBuffer* scratch_buffer, int row4x4, 311 int column4x4); // 5.11.3. 312 bool ProcessSuperBlock(int row4x4, int column4x4, 313 TileScratchBuffer* scratch_buffer, 314 ProcessingMode mode); 315 void ResetLoopRestorationParams(); 316 void ReadLoopRestorationCoefficients(int row4x4, int column4x4, 317 BlockSize block_size); // 5.11.57. 318 319 // Helper functions for DecodeBlock. 320 bool ReadSegmentId(const Block& block); // 5.11.9. 321 bool ReadIntraSegmentId(const Block& block); // 5.11.8. 322 void ReadSkip(const Block& block); // 5.11.11. 323 void ReadSkipMode(const Block& block); // 5.11.10. 324 void ReadCdef(const Block& block); // 5.11.56. 325 // Returns the new value. |cdf| is an array of size kDeltaSymbolCount + 1. 326 int ReadAndClipDelta(uint16_t* cdf, int delta_small, int scale, int min_value, 327 int max_value, int value); 328 void ReadQuantizerIndexDelta(const Block& block); // 5.11.12. 329 void ReadLoopFilterDelta(const Block& block); // 5.11.13. 330 // Populates |BlockParameters::deblock_filter_level| for the given |block| 331 // using |deblock_filter_levels_|. 332 void PopulateDeblockFilterLevel(const Block& block); 333 void ReadPredictionModeY(const Block& block, bool intra_y_mode); 334 void ReadIntraAngleInfo(const Block& block, 335 PlaneType plane_type); // 5.11.42 and 5.11.43. 336 void ReadPredictionModeUV(const Block& block); 337 void ReadCflAlpha(const Block& block); // 5.11.45. 338 int GetPaletteCache(const Block& block, PlaneType plane_type, 339 uint16_t* cache); 340 void ReadPaletteColors(const Block& block, Plane plane); 341 void ReadPaletteModeInfo(const Block& block); // 5.11.46. 342 void ReadFilterIntraModeInfo(const Block& block); // 5.11.24. 343 int ReadMotionVectorComponent(const Block& block, 344 int component); // 5.11.32. 345 void ReadMotionVector(const Block& block, int index); // 5.11.31. 346 bool DecodeIntraModeInfo(const Block& block); // 5.11.7. 347 int8_t ComputePredictedSegmentId(const Block& block) const; // 5.11.21. 348 bool ReadInterSegmentId(const Block& block, bool pre_skip); // 5.11.19. 349 void ReadIsInter(const Block& block); // 5.11.20. 350 bool ReadIntraBlockModeInfo(const Block& block, 351 bool intra_y_mode); // 5.11.22. 352 CompoundReferenceType ReadCompoundReferenceType(const Block& block); 353 template <bool is_single, bool is_backward, int index> 354 uint16_t* GetReferenceCdf(const Block& block, CompoundReferenceType type = 355 kNumCompoundReferenceTypes); 356 void ReadReferenceFrames(const Block& block); // 5.11.25. 357 void ReadInterPredictionModeY(const Block& block, 358 const MvContexts& mode_contexts); 359 void ReadRefMvIndex(const Block& block); 360 void ReadInterIntraMode(const Block& block, bool is_compound); // 5.11.28. IsScaled(ReferenceFrameType type)361 bool IsScaled(ReferenceFrameType type) const { // Part of 5.11.27. 362 const int index = 363 frame_header_.reference_frame_index[type - kReferenceFrameLast]; 364 return reference_frames_[index]->upscaled_width() != frame_header_.width || 365 reference_frames_[index]->frame_height() != frame_header_.height; 366 } 367 void ReadMotionMode(const Block& block, bool is_compound); // 5.11.27. 368 uint16_t* GetIsExplicitCompoundTypeCdf(const Block& block); 369 uint16_t* GetIsCompoundTypeAverageCdf(const Block& block); 370 void ReadCompoundType(const Block& block, bool is_compound); // 5.11.29. 371 uint16_t* GetInterpolationFilterCdf(const Block& block, int direction); 372 void ReadInterpolationFilter(const Block& block); 373 bool ReadInterBlockModeInfo(const Block& block); // 5.11.23. 374 bool DecodeInterModeInfo(const Block& block); // 5.11.18. 375 bool DecodeModeInfo(const Block& block); // 5.11.6. 376 bool IsMvValid(const Block& block, bool is_compound) const; // 6.10.25. 377 bool AssignInterMv(const Block& block, bool is_compound); // 5.11.26. 378 bool AssignIntraMv(const Block& block); // 5.11.26. 379 int GetTopTransformWidth(const Block& block, int row4x4, int column4x4, 380 bool ignore_skip); 381 int GetLeftTransformHeight(const Block& block, int row4x4, int column4x4, 382 bool ignore_skip); 383 TransformSize ReadFixedTransformSize(const Block& block); // 5.11.15. 384 // Iterative implementation of 5.11.17. 385 void ReadVariableTransformTree(const Block& block, int row4x4, int column4x4, 386 TransformSize tx_size); 387 void DecodeTransformSize(const Block& block); // 5.11.16. 388 bool ComputePrediction(const Block& block); // 5.11.33. 389 // |x4| and |y4| are the column and row positions of the 4x4 block. |w4| and 390 // |h4| are the width and height in 4x4 units of |tx_size|. 391 int GetTransformAllZeroContext(const Block& block, Plane plane, 392 TransformSize tx_size, int x4, int y4, int w4, 393 int h4); 394 TransformSet GetTransformSet(TransformSize tx_size, 395 bool is_inter) const; // 5.11.48. 396 TransformType ComputeTransformType(const Block& block, Plane plane, 397 TransformSize tx_size, int block_x, 398 int block_y); // 5.11.40. 399 void ReadTransformType(const Block& block, int x4, int y4, 400 TransformSize tx_size); // 5.11.47. 401 template <typename ResidualType> 402 void ReadCoeffBase2D( 403 const uint16_t* scan, TransformSize tx_size, int adjusted_tx_width_log2, 404 int eob, 405 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1], 406 uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts] 407 [kCoeffBaseRangeSymbolCount + 1], 408 ResidualType* quantized_buffer, uint8_t* level_buffer); 409 template <typename ResidualType> 410 void ReadCoeffBaseHorizontal( 411 const uint16_t* scan, TransformSize tx_size, int adjusted_tx_width_log2, 412 int eob, 413 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1], 414 uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts] 415 [kCoeffBaseRangeSymbolCount + 1], 416 ResidualType* quantized_buffer, uint8_t* level_buffer); 417 template <typename ResidualType> 418 void ReadCoeffBaseVertical( 419 const uint16_t* scan, TransformSize tx_size, int adjusted_tx_width_log2, 420 int eob, 421 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1], 422 uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts] 423 [kCoeffBaseRangeSymbolCount + 1], 424 ResidualType* quantized_buffer, uint8_t* level_buffer); 425 int GetDcSignContext(int x4, int y4, int w4, int h4, Plane plane); 426 void SetEntropyContexts(int x4, int y4, int w4, int h4, Plane plane, 427 uint8_t coefficient_level, int8_t dc_category); 428 void InterIntraPrediction( 429 uint16_t* prediction_0, const uint8_t* prediction_mask, 430 ptrdiff_t prediction_mask_stride, 431 const PredictionParameters& prediction_parameters, int prediction_width, 432 int prediction_height, int subsampling_x, int subsampling_y, 433 uint8_t* dest, 434 ptrdiff_t dest_stride); // Part of section 7.11.3.1 in the spec. 435 void CompoundInterPrediction( 436 const Block& block, const uint8_t* prediction_mask, 437 ptrdiff_t prediction_mask_stride, int prediction_width, 438 int prediction_height, int subsampling_x, int subsampling_y, 439 int candidate_row, int candidate_column, uint8_t* dest, 440 ptrdiff_t dest_stride); // Part of section 7.11.3.1 in the spec. 441 GlobalMotion* GetWarpParams(const Block& block, Plane plane, 442 int prediction_width, int prediction_height, 443 const PredictionParameters& prediction_parameters, 444 ReferenceFrameType reference_type, 445 bool* is_local_valid, 446 GlobalMotion* global_motion_params, 447 GlobalMotion* local_warp_params) 448 const; // Part of section 7.11.3.1 in the spec. 449 bool InterPrediction(const Block& block, Plane plane, int x, int y, 450 int prediction_width, int prediction_height, 451 int candidate_row, int candidate_column, 452 bool* is_local_valid, 453 GlobalMotion* local_warp_params); // 7.11.3.1. 454 void ScaleMotionVector(const MotionVector& mv, Plane plane, 455 int reference_frame_index, int x, int y, int* start_x, 456 int* start_y, int* step_x, int* step_y); // 7.11.3.3. 457 // If the method returns false, the caller only uses the output parameters 458 // *ref_block_start_x and *ref_block_start_y. If the method returns true, the 459 // caller uses all three output parameters. 460 static bool GetReferenceBlockPosition( 461 int reference_frame_index, bool is_scaled, int width, int height, 462 int ref_start_x, int ref_last_x, int ref_start_y, int ref_last_y, 463 int start_x, int start_y, int step_x, int step_y, int left_border, 464 int right_border, int top_border, int bottom_border, 465 int* ref_block_start_x, int* ref_block_start_y, int* ref_block_end_x); 466 467 template <typename Pixel> 468 void BuildConvolveBlock(Plane plane, int reference_frame_index, 469 bool is_scaled, int height, int ref_start_x, 470 int ref_last_x, int ref_start_y, int ref_last_y, 471 int step_y, int ref_block_start_x, 472 int ref_block_end_x, int ref_block_start_y, 473 uint8_t* block_buffer, 474 ptrdiff_t convolve_buffer_stride, 475 ptrdiff_t block_extended_width); 476 bool BlockInterPrediction(const Block& block, Plane plane, 477 int reference_frame_index, const MotionVector& mv, 478 int x, int y, int width, int height, 479 int candidate_row, int candidate_column, 480 uint16_t* prediction, bool is_compound, 481 bool is_inter_intra, uint8_t* dest, 482 ptrdiff_t dest_stride); // 7.11.3.4. 483 bool BlockWarpProcess(const Block& block, Plane plane, int index, 484 int block_start_x, int block_start_y, int width, 485 int height, GlobalMotion* warp_params, bool is_compound, 486 bool is_inter_intra, uint8_t* dest, 487 ptrdiff_t dest_stride); // 7.11.3.5. 488 bool ObmcBlockPrediction(const Block& block, const MotionVector& mv, 489 Plane plane, int reference_frame_index, int width, 490 int height, int x, int y, int candidate_row, 491 int candidate_column, 492 ObmcDirection blending_direction); 493 bool ObmcPrediction(const Block& block, Plane plane, int width, 494 int height); // 7.11.3.9. 495 void DistanceWeightedPrediction(void* prediction_0, void* prediction_1, 496 int width, int height, int candidate_row, 497 int candidate_column, uint8_t* dest, 498 ptrdiff_t dest_stride); // 7.11.3.15. 499 // This function specializes the parsing of DC coefficient by removing some of 500 // the branches when i == 0 (since scan[0] is always 0 and scan[i] is always 501 // non-zero for all other possible values of i). |dc_category| is an output 502 // parameter that is populated when |is_dc_coefficient| is true. 503 // |coefficient_level| is an output parameter which accumulates the 504 // coefficient level. 505 template <typename ResidualType, bool is_dc_coefficient> 506 LIBGAV1_ALWAYS_INLINE bool ReadSignAndApplyDequantization( 507 const uint16_t* scan, int i, int q_value, const uint8_t* quantizer_matrix, 508 int shift, int max_value, uint16_t* dc_sign_cdf, int8_t* dc_category, 509 int* coefficient_level, 510 ResidualType* residual_buffer); // Part of 5.11.39. 511 int ReadCoeffBaseRange(uint16_t* cdf); // Part of 5.11.39. 512 // Returns the number of non-zero coefficients that were read. |tx_type| is an 513 // output parameter that stores the computed transform type for the plane 514 // whose coefficients were read. Returns -1 on failure. 515 template <typename ResidualType> 516 int ReadTransformCoefficients(const Block& block, Plane plane, int start_x, 517 int start_y, TransformSize tx_size, 518 TransformType* tx_type); // 5.11.39. 519 bool TransformBlock(const Block& block, Plane plane, int base_x, int base_y, 520 TransformSize tx_size, int x, int y, 521 ProcessingMode mode); // 5.11.35. 522 // Iterative implementation of 5.11.36. 523 bool TransformTree(const Block& block, int start_x, int start_y, 524 BlockSize plane_size, ProcessingMode mode); 525 void ReconstructBlock(const Block& block, Plane plane, int start_x, 526 int start_y, TransformSize tx_size, 527 TransformType tx_type, 528 int non_zero_coeff_count); // Part of 7.12.3. 529 bool Residual(const Block& block, ProcessingMode mode); // 5.11.34. 530 // part of 5.11.5 (reset_block_context() in the spec). 531 void ResetEntropyContext(const Block& block); 532 // Populates the |color_context| and |color_order| for the |i|th iteration 533 // with entries counting down from |start| to |end| (|start| > |end|). 534 void PopulatePaletteColorContexts( 535 const Block& block, PlaneType plane_type, int i, int start, int end, 536 uint8_t color_order[kMaxPaletteSquare][kMaxPaletteSize], 537 uint8_t color_context[kMaxPaletteSquare]); // 5.11.50. 538 bool ReadPaletteTokens(const Block& block); // 5.11.49. 539 template <typename Pixel> 540 void IntraPrediction(const Block& block, Plane plane, int x, int y, 541 bool has_left, bool has_top, bool has_top_right, 542 bool has_bottom_left, PredictionMode mode, 543 TransformSize tx_size); 544 bool IsSmoothPrediction(int row, int column, Plane plane) const; 545 int GetIntraEdgeFilterType(const Block& block, 546 Plane plane) const; // 7.11.2.8. 547 template <typename Pixel> 548 void DirectionalPrediction(const Block& block, Plane plane, int x, int y, 549 bool has_left, bool has_top, bool needs_left, 550 bool needs_top, int prediction_angle, int width, 551 int height, int max_x, int max_y, 552 TransformSize tx_size, Pixel* top_row, 553 Pixel* left_column); // 7.11.2.4. 554 template <typename Pixel> 555 void PalettePrediction(const Block& block, Plane plane, int start_x, 556 int start_y, int x, int y, 557 TransformSize tx_size); // 7.11.4. 558 template <typename Pixel> 559 void ChromaFromLumaPrediction(const Block& block, Plane plane, int start_x, 560 int start_y, 561 TransformSize tx_size); // 7.11.5. 562 // Section 7.19. Applies some filtering and reordering to the motion vectors 563 // for the given |block| and stores them into |current_frame_|. 564 void StoreMotionFieldMvsIntoCurrentFrame(const Block& block); 565 566 // Returns the zero-based index of the super block that contains |row4x4| 567 // relative to the start of this tile. SuperBlockRowIndex(int row4x4)568 int SuperBlockRowIndex(int row4x4) const { 569 return (row4x4 - row4x4_start_) >> 570 (sequence_header_.use_128x128_superblock ? 5 : 4); 571 } 572 573 // Returns the zero-based index of the super block that contains |column4x4| 574 // relative to the start of this tile. SuperBlockColumnIndex(int column4x4)575 int SuperBlockColumnIndex(int column4x4) const { 576 return (column4x4 - column4x4_start_) >> 577 (sequence_header_.use_128x128_superblock ? 5 : 4); 578 } 579 SuperBlockSize()580 BlockSize SuperBlockSize() const { 581 return sequence_header_.use_128x128_superblock ? kBlock128x128 582 : kBlock64x64; 583 } PlaneCount()584 int PlaneCount() const { 585 return sequence_header_.color_config.is_monochrome ? kMaxPlanesMonochrome 586 : kMaxPlanes; 587 } 588 589 const int number_; 590 const int row_; 591 const int column_; 592 const uint8_t* const data_; 593 size_t size_; 594 int row4x4_start_; 595 int row4x4_end_; 596 int column4x4_start_; 597 int column4x4_end_; 598 int superblock_rows_; 599 int superblock_columns_; 600 bool read_deltas_; 601 const int8_t subsampling_x_[kMaxPlanes]; 602 const int8_t subsampling_y_[kMaxPlanes]; 603 int deblock_row_limit_[kMaxPlanes]; 604 int deblock_column_limit_[kMaxPlanes]; 605 606 // The dimensions (in order) are: segment_id, level_index (based on plane and 607 // direction), reference_frame and mode_id. 608 uint8_t deblock_filter_levels_[kMaxSegments][kFrameLfCount] 609 [kNumReferenceFrameTypes][2]; 610 611 // current_quantizer_index_ is in the range [0, 255]. 612 uint8_t current_quantizer_index_; 613 // These two arrays (|coefficient_levels_| and |dc_categories_|) are used to 614 // store the entropy context. Their dimensions are as follows: First - 615 // left/top; Second - plane; Third - row4x4 (if first dimension is 616 // left)/column4x4 (if first dimension is top). 617 // 618 // This is equivalent to the LeftLevelContext and AboveLevelContext arrays in 619 // the spec. In the spec, it stores values from 0 through 63 (inclusive). The 620 // stored values are used to compute the left and top contexts in 621 // GetTransformAllZeroContext. In that function, we only care about the 622 // following values: 0, 1, 2, 3 and >= 4. So instead of clamping to 63, we 623 // clamp to 4 (i.e.) all the values greater than 4 are stored as 4. 624 std::array<Array2D<uint8_t>, 2> coefficient_levels_; 625 // This is equivalent to the LeftDcContext and AboveDcContext arrays in the 626 // spec. In the spec, it can store 3 possible values: 0, 1 and 2 (where 1 627 // means the value is < 0, 2 means the value is > 0 and 0 means the value is 628 // equal to 0). 629 // 630 // The stored values are used in two places: 631 // * GetTransformAllZeroContext: Here, we only care about whether the 632 // value is 0 or not (whether it is 1 or 2 is irrelevant). 633 // * GetDcSignContext: Here, we do the following computation: if the 634 // stored value is 1, we decrement a counter. If the stored value is 2 635 // we increment a counter. 636 // 637 // Based on this usage, we can simply replace 1 with -1 and 2 with 1 and 638 // use that value to compute the counter. 639 // 640 // The usage on GetTransformAllZeroContext is unaffected since there we 641 // only care about whether it is 0 or not. 642 std::array<Array2D<int8_t>, 2> dc_categories_; 643 const ObuSequenceHeader& sequence_header_; 644 const ObuFrameHeader& frame_header_; 645 const std::array<bool, kNumReferenceFrameTypes>& reference_frame_sign_bias_; 646 const std::array<RefCountedBufferPtr, kNumReferenceFrameTypes>& 647 reference_frames_; 648 TemporalMotionField& motion_field_; 649 const std::array<uint8_t, kNumReferenceFrameTypes>& reference_order_hint_; 650 const WedgeMaskArray& wedge_masks_; 651 const QuantizerMatrix& quantizer_matrix_; 652 DaalaBitReader reader_; 653 SymbolDecoderContext symbol_decoder_context_; 654 SymbolDecoderContext* const saved_symbol_decoder_context_; 655 const SegmentationMap* prev_segment_ids_; 656 const dsp::Dsp& dsp_; 657 PostFilter& post_filter_; 658 BlockParametersHolder& block_parameters_holder_; 659 Quantizer quantizer_; 660 // When there is no multi-threading within the Tile, |residual_buffer_| is 661 // used. When there is multi-threading within the Tile, 662 // |residual_buffer_threaded_| is used. In the following comment, 663 // |residual_buffer| refers to either |residual_buffer_| or 664 // |residual_buffer_threaded_| depending on whether multi-threading is enabled 665 // within the Tile or not. 666 // The |residual_buffer| is used to help with the dequantization and the 667 // inverse transform processes. It is declared as a uint8_t, but is always 668 // accessed either as an int16_t or int32_t depending on |bitdepth|. Here is 669 // what it stores at various stages of the decoding process (in the order 670 // which they happen): 671 // 1) In ReadTransformCoefficients(), this buffer is used to store the 672 // dequantized values. 673 // 2) In Reconstruct(), this buffer is used as the input to the row 674 // transform process. 675 // The size of this buffer would be: 676 // For |residual_buffer_|: (4096 + 32 * |kResidualPaddingVertical|) * 677 // |residual_size_|. Where 4096 = 64x64 which is the maximum transform 678 // size, and 32 * |kResidualPaddingVertical| is the padding to avoid 679 // bottom boundary checks when parsing quantized coefficients. This 680 // memory is allocated and owned by the Tile class. 681 // For |residual_buffer_threaded_|: See the comment below. This memory is 682 // not allocated or owned by the Tile class. 683 AlignedUniquePtr<uint8_t> residual_buffer_; 684 // This is a 2d array of pointers of size |superblock_rows_| by 685 // |superblock_columns_| where each pointer points to a ResidualBuffer for a 686 // single super block. The array is populated when the parsing process begins 687 // by calling |residual_buffer_pool_->Get()| and the memory is released back 688 // to the pool by calling |residual_buffer_pool_->Release()| when the decoding 689 // process is complete. 690 Array2D<std::unique_ptr<ResidualBuffer>> residual_buffer_threaded_; 691 // sizeof(int16_t or int32_t) depending on |bitdepth|. 692 const size_t residual_size_; 693 // Number of superblocks on the top-right that will have to be decoded before 694 // the current superblock can be decoded. This will be 1 if allow_intrabc is 695 // false. If allow_intrabc is true, then this value will be 696 // use_128x128_superblock ? 3 : 5. This is the allowed range of reference for 697 // the top rows for intrabc. 698 const int intra_block_copy_lag_; 699 700 // In the Tile class, we use the "current_frame" in two ways: 701 // 1) To write the decoded output into (using the |buffer_| view). 702 // 2) To read the pixels for intra block copy (using the |current_frame_| 703 // reference). 704 // 705 // When intra block copy is off, |buffer_| and |current_frame_| may or may not 706 // point to the same plane pointers. But it is okay since |current_frame_| is 707 // never used in this case. 708 // 709 // When intra block copy is on, |buffer_| and |current_frame_| always point to 710 // the same plane pointers (since post filtering is disabled). So the usage in 711 // both case 1 and case 2 remain valid. 712 Array2DView<uint8_t> buffer_[kMaxPlanes]; 713 RefCountedBuffer& current_frame_; 714 715 Array2D<int16_t>& cdef_index_; 716 Array2D<TransformSize>& inter_transform_sizes_; 717 std::array<RestorationUnitInfo, kMaxPlanes> reference_unit_info_; 718 // If |thread_pool_| is nullptr, the calling thread will do the parsing and 719 // the decoding in one pass. If |thread_pool_| is not nullptr, then the main 720 // thread will do the parsing while the thread pool workers will do the 721 // decoding. 722 ThreadPool* const thread_pool_; 723 ThreadingParameters threading_; 724 ResidualBufferPool* const residual_buffer_pool_; 725 TileScratchBufferPool* const tile_scratch_buffer_pool_; 726 BlockingCounterWithStatus* const pending_tiles_; 727 bool split_parse_and_decode_; 728 // This is used only when |split_parse_and_decode_| is false. 729 std::unique_ptr<PredictionParameters> prediction_parameters_ = nullptr; 730 // Stores the |transform_type| for the super block being decoded at a 4x4 731 // granularity. The spec uses absolute indices for this array but it is 732 // sufficient to use indices relative to the super block being decoded. 733 TransformType transform_types_[32][32]; 734 // delta_lf_[i] is in the range [-63, 63]. 735 int8_t delta_lf_[kFrameLfCount]; 736 // True if all the values in |delta_lf_| are zero. False otherwise. 737 bool delta_lf_all_zero_; 738 const bool frame_parallel_; 739 const bool use_intra_prediction_buffer_; 740 // Buffer used to store the unfiltered pixels that are necessary for decoding 741 // the next superblock row (for the intra prediction process). Used only if 742 // |use_intra_prediction_buffer_| is true. The |frame_scratch_buffer| contains 743 // one row buffer for each tile row. This tile will have to use the buffer 744 // corresponding to this tile's row. 745 IntraPredictionBuffer* const intra_prediction_buffer_; 746 // Stores the progress of the reference frames. This will be used to avoid 747 // unnecessary calls into RefCountedBuffer::WaitUntil(). 748 std::array<int, kNumReferenceFrameTypes> reference_frame_progress_cache_; 749 }; 750 751 struct Tile::Block { BlockBlock752 Block(const Tile& tile, BlockSize size, int row4x4, int column4x4, 753 TileScratchBuffer* const scratch_buffer, ResidualPtr* residual) 754 : tile(tile), 755 size(size), 756 row4x4(row4x4), 757 column4x4(column4x4), 758 width(kBlockWidthPixels[size]), 759 height(kBlockHeightPixels[size]), 760 width4x4(width >> 2), 761 height4x4(height >> 2), 762 scratch_buffer(scratch_buffer), 763 residual(residual) { 764 assert(size != kBlockInvalid); 765 residual_size[kPlaneY] = kPlaneResidualSize[size][0][0]; 766 residual_size[kPlaneU] = residual_size[kPlaneV] = 767 kPlaneResidualSize[size][tile.subsampling_x_[kPlaneU]] 768 [tile.subsampling_y_[kPlaneU]]; 769 assert(residual_size[kPlaneY] != kBlockInvalid); 770 if (tile.PlaneCount() > 1) { 771 assert(residual_size[kPlaneU] != kBlockInvalid); 772 } 773 if ((row4x4 & 1) == 0 && 774 (tile.sequence_header_.color_config.subsampling_y & height4x4) == 1) { 775 has_chroma = false; 776 } else if ((column4x4 & 1) == 0 && 777 (tile.sequence_header_.color_config.subsampling_x & width4x4) == 778 1) { 779 has_chroma = false; 780 } else { 781 has_chroma = !tile.sequence_header_.color_config.is_monochrome; 782 } 783 top_available[kPlaneY] = tile.IsTopInside(row4x4); 784 left_available[kPlaneY] = tile.IsLeftInside(column4x4); 785 if (has_chroma) { 786 // top_available[kPlaneU] and top_available[kPlaneV] are valid only if 787 // has_chroma is true. 788 // The next 3 lines are equivalent to: 789 // top_available[kPlaneU] = top_available[kPlaneV] = 790 // top_available[kPlaneY] && 791 // ((tile.sequence_header_.color_config.subsampling_y & height4x4) == 792 // 0 || tile.IsTopInside(row4x4 - 1)); 793 top_available[kPlaneU] = top_available[kPlaneV] = tile.IsTopInside( 794 row4x4 - 795 (tile.sequence_header_.color_config.subsampling_y & height4x4)); 796 // left_available[kPlaneU] and left_available[kPlaneV] are valid only if 797 // has_chroma is true. 798 // The next 3 lines are equivalent to: 799 // left_available[kPlaneU] = left_available[kPlaneV] = 800 // left_available[kPlaneY] && 801 // ((tile.sequence_header_.color_config.subsampling_x & width4x4) == 0 802 // || tile.IsLeftInside(column4x4 - 1)); 803 left_available[kPlaneU] = left_available[kPlaneV] = tile.IsLeftInside( 804 column4x4 - 805 (tile.sequence_header_.color_config.subsampling_x & width4x4)); 806 } 807 const ptrdiff_t stride = tile.BlockParametersStride(); 808 BlockParameters** const bps = 809 tile.BlockParametersAddress(row4x4, column4x4); 810 bp = *bps; 811 // bp_top is valid only if top_available[kPlaneY] is true. 812 if (top_available[kPlaneY]) { 813 bp_top = *(bps - stride); 814 } 815 // bp_left is valid only if left_available[kPlaneY] is true. 816 if (left_available[kPlaneY]) { 817 bp_left = *(bps - 1); 818 } 819 } 820 HasChromaBlock821 bool HasChroma() const { return has_chroma; } 822 823 // These return values of these group of functions are valid only if the 824 // corresponding top_available or left_available is true. TopReferenceBlock825 ReferenceFrameType TopReference(int index) const { 826 return bp_top->reference_frame[index]; 827 } 828 LeftReferenceBlock829 ReferenceFrameType LeftReference(int index) const { 830 return bp_left->reference_frame[index]; 831 } 832 IsTopIntraBlock833 bool IsTopIntra() const { return TopReference(0) <= kReferenceFrameIntra; } IsLeftIntraBlock834 bool IsLeftIntra() const { return LeftReference(0) <= kReferenceFrameIntra; } 835 IsTopSingleBlock836 bool IsTopSingle() const { return TopReference(1) <= kReferenceFrameIntra; } IsLeftSingleBlock837 bool IsLeftSingle() const { return LeftReference(1) <= kReferenceFrameIntra; } 838 CountReferencesBlock839 int CountReferences(ReferenceFrameType type) const { 840 return static_cast<int>(top_available[kPlaneY] && 841 bp_top->reference_frame[0] == type) + 842 static_cast<int>(top_available[kPlaneY] && 843 bp_top->reference_frame[1] == type) + 844 static_cast<int>(left_available[kPlaneY] && 845 bp_left->reference_frame[0] == type) + 846 static_cast<int>(left_available[kPlaneY] && 847 bp_left->reference_frame[1] == type); 848 } 849 850 // 7.10.3. 851 // Checks if there are any inter blocks to the left or above. If so, it 852 // returns true indicating that the block has neighbors that are suitable for 853 // use by overlapped motion compensation. HasOverlappableCandidatesBlock854 bool HasOverlappableCandidates() const { 855 const ptrdiff_t stride = tile.BlockParametersStride(); 856 BlockParameters** const bps = tile.BlockParametersAddress(0, 0); 857 if (top_available[kPlaneY]) { 858 BlockParameters** bps_top = bps + (row4x4 - 1) * stride + (column4x4 | 1); 859 const int columns = std::min(tile.frame_header_.columns4x4 - column4x4, 860 static_cast<int>(width4x4)); 861 BlockParameters** const bps_top_end = bps_top + columns; 862 do { 863 if ((*bps_top)->reference_frame[0] > kReferenceFrameIntra) { 864 return true; 865 } 866 bps_top += 2; 867 } while (bps_top < bps_top_end); 868 } 869 if (left_available[kPlaneY]) { 870 BlockParameters** bps_left = bps + (row4x4 | 1) * stride + column4x4 - 1; 871 const int rows = std::min(tile.frame_header_.rows4x4 - row4x4, 872 static_cast<int>(height4x4)); 873 BlockParameters** const bps_left_end = bps_left + rows * stride; 874 do { 875 if ((*bps_left)->reference_frame[0] > kReferenceFrameIntra) { 876 return true; 877 } 878 bps_left += 2 * stride; 879 } while (bps_left < bps_left_end); 880 } 881 return false; 882 } 883 884 const Tile& tile; 885 bool has_chroma; 886 const BlockSize size; 887 bool top_available[kMaxPlanes]; 888 bool left_available[kMaxPlanes]; 889 BlockSize residual_size[kMaxPlanes]; 890 const int row4x4; 891 const int column4x4; 892 const int width; 893 const int height; 894 const int width4x4; 895 const int height4x4; 896 const BlockParameters* bp_top; 897 const BlockParameters* bp_left; 898 BlockParameters* bp; 899 TileScratchBuffer* const scratch_buffer; 900 ResidualPtr* const residual; 901 }; 902 903 extern template bool 904 Tile::ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>( 905 int row4x4, TileScratchBuffer* scratch_buffer); 906 extern template bool 907 Tile::ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>( 908 int row4x4, TileScratchBuffer* scratch_buffer); 909 910 } // namespace libgav1 911 912 #endif // LIBGAV1_SRC_TILE_H_ 913