1 /* 2 * Copyright 2019 The libgav1 Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBGAV1_SRC_UTILS_TYPES_H_ 18 #define LIBGAV1_SRC_UTILS_TYPES_H_ 19 20 #include <array> 21 #include <cstddef> 22 #include <cstdint> 23 #include <memory> 24 25 #include "src/utils/array_2d.h" 26 #include "src/utils/constants.h" 27 #include "src/utils/memory.h" 28 29 namespace libgav1 { 30 31 struct MotionVector : public Allocable { 32 static constexpr int kRow = 0; 33 static constexpr int kColumn = 1; 34 35 MotionVector() = default; 36 MotionVector(const MotionVector& mv) = default; 37 38 MotionVector& operator=(const MotionVector& rhs) { 39 mv32 = rhs.mv32; 40 return *this; 41 } 42 43 bool operator==(const MotionVector& rhs) const { return mv32 == rhs.mv32; } 44 45 union { 46 // Motion vectors will always fit in int16_t and using int16_t here instead 47 // of int saves significant memory since some of the frame sized structures 48 // store motion vectors. 49 int16_t mv[2]; 50 // A uint32_t view into the |mv| array. Useful for cases where both the 51 // motion vectors have to be copied or compared with a single 32 bit 52 // instruction. 53 uint32_t mv32; 54 }; 55 }; 56 57 union CompoundMotionVector { 58 CompoundMotionVector() = default; 59 CompoundMotionVector(const CompoundMotionVector& mv) = default; 60 61 CompoundMotionVector& operator=(const CompoundMotionVector& rhs) { 62 mv64 = rhs.mv64; 63 return *this; 64 } 65 66 bool operator==(const CompoundMotionVector& rhs) const { 67 return mv64 == rhs.mv64; 68 } 69 70 MotionVector mv[2]; 71 // A uint64_t view into the |mv| array. Useful for cases where all the motion 72 // vectors have to be copied or compared with a single 64 bit instruction. 73 uint64_t mv64; 74 }; 75 76 // Stores the motion information used for motion field estimation. 77 struct TemporalMotionField : public Allocable { 78 Array2D<MotionVector> mv; 79 Array2D<int8_t> reference_offset; 80 }; 81 82 // MvContexts contains the contexts used to decode portions of an inter block 83 // mode info to set the y_mode field in BlockParameters. 84 // 85 // The contexts in the struct correspond to the ZeroMvContext, RefMvContext, 86 // and NewMvContext variables in the spec. 87 struct MvContexts { 88 int zero_mv; 89 int reference_mv; 90 int new_mv; 91 }; 92 93 struct PaletteModeInfo { 94 uint8_t size[kNumPlaneTypes]; 95 uint16_t color[kMaxPlanes][kMaxPaletteSize]; 96 }; 97 98 // Stores the parameters used by the prediction process. The members of the 99 // struct are filled in when parsing the bitstream and used when the prediction 100 // is computed. The information in this struct is associated with a single 101 // block. 102 // While both BlockParameters and PredictionParameters store information 103 // pertaining to a Block, the only difference is that BlockParameters outlives 104 // the block itself (for example, some of the variables in BlockParameters are 105 // used to compute the context for reading elements in the subsequent blocks). 106 struct PredictionParameters : public Allocable { 107 // Restore the index in the unsorted mv stack from the least 3 bits of sorted 108 // |weight_index_stack|. reference_mvPredictionParameters109 const MotionVector& reference_mv(int stack_index) const { 110 return ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)]; 111 } reference_mvPredictionParameters112 const MotionVector& reference_mv(int stack_index, int mv_index) const { 113 return compound_ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)] 114 .mv[mv_index]; 115 } 116 IncreaseWeightPredictionParameters117 void IncreaseWeight(ptrdiff_t index, int weight) { 118 weight_index_stack[index] += weight << 3; 119 } 120 SetWeightIndexStackEntryPredictionParameters121 void SetWeightIndexStackEntry(int index, int weight) { 122 weight_index_stack[index] = (weight << 3) + 7 - index; 123 } 124 125 bool use_filter_intra; 126 FilterIntraPredictor filter_intra_mode; 127 int angle_delta[kNumPlaneTypes]; 128 int8_t cfl_alpha_u; 129 int8_t cfl_alpha_v; 130 int max_luma_width; 131 int max_luma_height; 132 Array2D<uint8_t> color_index_map[kNumPlaneTypes]; 133 bool use_intra_block_copy; 134 InterIntraMode inter_intra_mode; 135 bool is_wedge_inter_intra; 136 int wedge_index; 137 int wedge_sign; 138 bool mask_is_inverse; 139 MotionMode motion_mode; 140 CompoundPredictionType compound_prediction_type; 141 union { 142 // |ref_mv_stack| and |compound_ref_mv_stack| are not sorted after 143 // construction. reference_mv() must be called to get the correct element. 144 MotionVector ref_mv_stack[kMaxRefMvStackSize]; 145 CompoundMotionVector compound_ref_mv_stack[kMaxRefMvStackSize]; 146 }; 147 // The least 3 bits of |weight_index_stack| store the index information, and 148 // the other bits store the weight. The index information is actually 7 - 149 // index to make the descending order sort stable (preserves the original 150 // order for elements with the same weight). Sorting an int16_t array is much 151 // faster than sorting a struct array with weight and index stored separately. 152 int16_t weight_index_stack[kMaxRefMvStackSize]; 153 // In the spec, the weights of all the nearest mvs are incremented by a bonus 154 // weight which is larger than any natural weight, and later the weights of 155 // the mvs are compared with this bonus weight to determine their contexts. We 156 // replace this procedure by introducing |nearest_mv_count|, which records the 157 // count of the nearest mvs. Since all the nearest mvs are in the beginning of 158 // the mv stack, the index of a mv in the mv stack can be compared with 159 // |nearest_mv_count| to get that mv's context. 160 int nearest_mv_count; 161 int ref_mv_count; 162 int ref_mv_index; 163 MotionVector global_mv[2]; 164 int num_warp_samples; 165 int warp_estimate_candidates[kMaxLeastSquaresSamples][4]; 166 }; 167 168 // A lot of BlockParameters objects are created, so the smallest type is used 169 // for each field. The ranges of some fields are documented to justify why 170 // their types are large enough. 171 struct BlockParameters : public Allocable { 172 BlockSize size; 173 bool skip; 174 // True means that this block will use some default settings (that 175 // correspond to compound prediction) and so most of the mode info is 176 // skipped. False means that the mode info is not skipped. 177 bool skip_mode; 178 bool is_inter; 179 bool is_explicit_compound_type; // comp_group_idx in the spec. 180 bool is_compound_type_average; // compound_idx in the spec. 181 bool is_global_mv_block; 182 bool use_predicted_segment_id; // only valid with temporal update enabled. 183 int8_t segment_id; // segment_id is in the range [0, 7]. 184 PredictionMode y_mode; 185 PredictionMode uv_mode; 186 TransformSize transform_size; 187 TransformSize uv_transform_size; 188 InterpolationFilter interpolation_filter[2]; 189 ReferenceFrameType reference_frame[2]; 190 // The index of this array is as follows: 191 // 0 - Y plane vertical filtering. 192 // 1 - Y plane horizontal filtering. 193 // 2 - U plane (both directions). 194 // 3 - V plane (both directions). 195 uint8_t deblock_filter_level[kFrameLfCount]; 196 CompoundMotionVector mv; 197 PaletteModeInfo palette_mode_info; 198 // When |Tile::split_parse_and_decode_| is true, each block gets its own 199 // instance of |prediction_parameters|. When it is false, all the blocks point 200 // to |Tile::prediction_parameters_|. This field is valid only as long as the 201 // block is *being* decoded. The lifetime and usage of this field can be 202 // better understood by following its flow in tile.cc. 203 std::unique_ptr<PredictionParameters> prediction_parameters; 204 }; 205 206 // A five dimensional array used to store the wedge masks. The dimensions are: 207 // - block_size_index (returned by GetWedgeBlockSizeIndex() in prediction.cc). 208 // - flip_sign (0 or 1). 209 // - wedge_index (0 to 15). 210 // - each of those three dimensions is a 2d array of block_width by 211 // block_height. 212 using WedgeMaskArray = 213 std::array<std::array<std::array<Array2D<uint8_t>, 16>, 2>, 9>; 214 215 enum GlobalMotionTransformationType : uint8_t { 216 kGlobalMotionTransformationTypeIdentity, 217 kGlobalMotionTransformationTypeTranslation, 218 kGlobalMotionTransformationTypeRotZoom, 219 kGlobalMotionTransformationTypeAffine, 220 kNumGlobalMotionTransformationTypes 221 }; 222 223 // Global motion and warped motion parameters. See the paper for more info: 224 // S. Parker, Y. Chen, D. Barker, P. de Rivaz, D. Mukherjee, "Global and locally 225 // adaptive warped motion compensation in video compression", Proc. IEEE 226 // International Conference on Image Processing (ICIP), pp. 275-279, Sep. 2017. 227 struct GlobalMotion { 228 GlobalMotionTransformationType type; 229 int32_t params[6]; 230 231 // Represent two shearing operations. Computed from |params| by SetupShear(). 232 // 233 // The least significant six (= kWarpParamRoundingBits) bits are all zeros. 234 // (This means alpha, beta, gamma, and delta could be represented by a 10-bit 235 // signed integer.) The minimum value is INT16_MIN (= -32768) and the maximum 236 // value is 32704 = 0x7fc0, the largest int16_t value whose least significant 237 // six bits are all zeros. 238 // 239 // Valid warp parameters (as validated by SetupShear()) have smaller ranges. 240 // Their absolute values are less than 2^14 (= 16384). (This follows from 241 // the warpValid check at the end of Section 7.11.3.6.) 242 // 243 // NOTE: Section 7.11.3.6 of the spec allows a maximum value of 32768, which 244 // is outside the range of int16_t. When cast to int16_t, 32768 becomes 245 // -32768. This potential int16_t overflow does not matter because either 246 // 32768 or -32768 causes SetupShear() to return false, 247 int16_t alpha; 248 int16_t beta; 249 int16_t gamma; 250 int16_t delta; 251 }; 252 253 // Loop filter parameters: 254 // 255 // If level[0] and level[1] are both equal to 0, the loop filter process is 256 // not invoked. 257 // 258 // |sharpness| and |delta_enabled| are only used by the loop filter process. 259 // 260 // The |ref_deltas| and |mode_deltas| arrays are used not only by the loop 261 // filter process but also by the reference frame update and loading 262 // processes. The loop filter process uses |ref_deltas| and |mode_deltas| only 263 // when |delta_enabled| is true. 264 struct LoopFilter { 265 // Contains loop filter strength values in the range of [0, 63]. 266 std::array<int8_t, kFrameLfCount> level; 267 // Indicates the sharpness level in the range of [0, 7]. 268 int8_t sharpness; 269 // Whether the filter level depends on the mode and reference frame used to 270 // predict a block. 271 bool delta_enabled; 272 // Whether additional syntax elements were read that specify which mode and 273 // reference frame deltas are to be updated. loop_filter_delta_update field in 274 // Section 5.9.11 of the spec. 275 bool delta_update; 276 // Contains the adjustment needed for the filter level based on the chosen 277 // reference frame, in the range of [-64, 63]. 278 std::array<int8_t, kNumReferenceFrameTypes> ref_deltas; 279 // Contains the adjustment needed for the filter level based on the chosen 280 // mode, in the range of [-64, 63]. 281 std::array<int8_t, kLoopFilterMaxModeDeltas> mode_deltas; 282 }; 283 284 struct Delta { 285 bool present; 286 uint8_t scale; 287 bool multi; 288 }; 289 290 struct Cdef { 291 uint8_t damping; // damping value from the spec + (bitdepth - 8). 292 uint8_t bits; 293 // All the strength values are the values from the spec and left shifted by 294 // (bitdepth - 8). 295 uint8_t y_primary_strength[kMaxCdefStrengths]; 296 uint8_t y_secondary_strength[kMaxCdefStrengths]; 297 uint8_t uv_primary_strength[kMaxCdefStrengths]; 298 uint8_t uv_secondary_strength[kMaxCdefStrengths]; 299 }; 300 301 struct TileInfo { 302 bool uniform_spacing; 303 int sb_rows; 304 int sb_columns; 305 int tile_count; 306 int tile_columns_log2; 307 int tile_columns; 308 int tile_column_start[kMaxTileColumns + 1]; 309 // This field is not used by libgav1, but is populated for use by some 310 // hardware decoders. So it must not be removed. 311 int tile_column_width_in_superblocks[kMaxTileColumns + 1]; 312 int tile_rows_log2; 313 int tile_rows; 314 int tile_row_start[kMaxTileRows + 1]; 315 // This field is not used by libgav1, but is populated for use by some 316 // hardware decoders. So it must not be removed. 317 int tile_row_height_in_superblocks[kMaxTileRows + 1]; 318 int16_t context_update_id; 319 uint8_t tile_size_bytes; 320 }; 321 322 struct LoopRestoration { 323 LoopRestorationType type[kMaxPlanes]; 324 int unit_size_log2[kMaxPlanes]; 325 }; 326 327 // Stores the quantization parameters of Section 5.9.12. 328 struct QuantizerParameters { 329 // base_index is in the range [0, 255]. 330 uint8_t base_index; 331 int8_t delta_dc[kMaxPlanes]; 332 // delta_ac[kPlaneY] is always 0. 333 int8_t delta_ac[kMaxPlanes]; 334 bool use_matrix; 335 // The |matrix_level| array is used only when |use_matrix| is true. 336 // matrix_level[plane] specifies the level in the quantizer matrix that 337 // should be used for decoding |plane|. The quantizer matrix has 15 levels, 338 // from 0 to 14. The range of matrix_level[plane] is [0, 15]. If 339 // matrix_level[plane] is 15, the quantizer matrix is not used. 340 int8_t matrix_level[kMaxPlanes]; 341 }; 342 343 // The corresponding segment feature constants in the AV1 spec are named 344 // SEG_LVL_xxx. 345 enum SegmentFeature : uint8_t { 346 kSegmentFeatureQuantizer, 347 kSegmentFeatureLoopFilterYVertical, 348 kSegmentFeatureLoopFilterYHorizontal, 349 kSegmentFeatureLoopFilterU, 350 kSegmentFeatureLoopFilterV, 351 kSegmentFeatureReferenceFrame, 352 kSegmentFeatureSkip, 353 kSegmentFeatureGlobalMv, 354 kSegmentFeatureMax 355 }; 356 357 struct Segmentation { 358 // 5.11.14. 359 // Returns true if the feature is enabled in the segment. FeatureActiveSegmentation360 bool FeatureActive(int segment_id, SegmentFeature feature) const { 361 return enabled && segment_id < kMaxSegments && 362 feature_enabled[segment_id][feature]; 363 } 364 365 // Returns true if the feature is signed. FeatureSignedSegmentation366 static bool FeatureSigned(SegmentFeature feature) { 367 // Only the first five segment features are signed, so this comparison 368 // suffices. 369 return feature <= kSegmentFeatureLoopFilterV; 370 } 371 372 bool enabled; 373 bool update_map; 374 bool update_data; 375 bool temporal_update; 376 // True if the segment id will be read before the skip syntax element. False 377 // if the skip syntax element will be read first. 378 bool segment_id_pre_skip; 379 // The highest numbered segment id that has some enabled feature. Used as 380 // the upper bound for decoding segment ids. 381 int8_t last_active_segment_id; 382 383 bool feature_enabled[kMaxSegments][kSegmentFeatureMax]; 384 int16_t feature_data[kMaxSegments][kSegmentFeatureMax]; 385 bool lossless[kMaxSegments]; 386 // Cached values of get_qindex(1, segmentId), to be consumed by 387 // Tile::ReadTransformType(). The values are in the range [0, 255]. 388 uint8_t qindex[kMaxSegments]; 389 }; 390 391 // Section 6.8.20. 392 // Note: In spec, film grain section uses YCbCr to denote variable names, 393 // such as num_cb_points, num_cr_points. To keep it consistent with other 394 // parts of code, we use YUV, i.e., num_u_points, num_v_points, etc. 395 struct FilmGrainParams { 396 bool apply_grain; 397 bool update_grain; 398 bool chroma_scaling_from_luma; 399 bool overlap_flag; 400 bool clip_to_restricted_range; 401 402 uint8_t num_y_points; // [0, 14]. 403 uint8_t num_u_points; // [0, 10]. 404 uint8_t num_v_points; // [0, 10]. 405 // Must be [0, 255]. 10/12 bit /= 4 or 16. Must be in increasing order. 406 uint8_t point_y_value[14]; 407 uint8_t point_y_scaling[14]; 408 uint8_t point_u_value[10]; 409 uint8_t point_u_scaling[10]; 410 uint8_t point_v_value[10]; 411 uint8_t point_v_scaling[10]; 412 413 uint8_t chroma_scaling; // [8, 11]. 414 uint8_t auto_regression_coeff_lag; // [0, 3]. 415 int8_t auto_regression_coeff_y[24]; // [-128, 127] 416 int8_t auto_regression_coeff_u[25]; // [-128, 127] 417 int8_t auto_regression_coeff_v[25]; // [-128, 127] 418 // Shift value: auto regression coeffs range 419 // 6: [-2, 2) 420 // 7: [-1, 1) 421 // 8: [-0.5, 0.5) 422 // 9: [-0.25, 0.25) 423 uint8_t auto_regression_shift; 424 425 uint16_t grain_seed; 426 int reference_index; 427 int grain_scale_shift; 428 // These multipliers are encoded as nonnegative values by adding 128 first. 429 // The 128 is subtracted during parsing. 430 int8_t u_multiplier; // [-128, 127] 431 int8_t u_luma_multiplier; // [-128, 127] 432 // These offsets are encoded as nonnegative values by adding 256 first. The 433 // 256 is subtracted during parsing. 434 int16_t u_offset; // [-256, 255] 435 int8_t v_multiplier; // [-128, 127] 436 int8_t v_luma_multiplier; // [-128, 127] 437 int16_t v_offset; // [-256, 255] 438 }; 439 440 struct ObuFrameHeader { 441 uint16_t display_frame_id; 442 uint16_t current_frame_id; 443 int64_t frame_offset; 444 uint16_t expected_frame_id[kNumInterReferenceFrameTypes]; 445 int32_t width; 446 int32_t height; 447 int32_t columns4x4; 448 int32_t rows4x4; 449 // The render size (render_width and render_height) is a hint to the 450 // application about the desired display size. It has no effect on the 451 // decoding process. 452 int32_t render_width; 453 int32_t render_height; 454 int32_t upscaled_width; 455 LoopRestoration loop_restoration; 456 uint32_t buffer_removal_time[kMaxOperatingPoints]; 457 uint32_t frame_presentation_time; 458 // Note: global_motion[0] (for kReferenceFrameIntra) is not used. 459 std::array<GlobalMotion, kNumReferenceFrameTypes> global_motion; 460 TileInfo tile_info; 461 QuantizerParameters quantizer; 462 Segmentation segmentation; 463 bool show_existing_frame; 464 // frame_to_show is in the range [0, 7]. Only used if show_existing_frame is 465 // true. 466 int8_t frame_to_show; 467 FrameType frame_type; 468 bool show_frame; 469 bool showable_frame; 470 bool error_resilient_mode; 471 bool enable_cdf_update; 472 bool frame_size_override_flag; 473 // The order_hint syntax element in the uncompressed header. If 474 // show_existing_frame is false, the OrderHint variable in the spec is equal 475 // to this field, and so this field can be used in place of OrderHint when 476 // show_existing_frame is known to be false, such as during tile decoding. 477 uint8_t order_hint; 478 int8_t primary_reference_frame; 479 bool render_and_frame_size_different; 480 bool use_superres; 481 uint8_t superres_scale_denominator; 482 bool allow_screen_content_tools; 483 bool allow_intrabc; 484 bool frame_refs_short_signaling; 485 // A bitmask that specifies which reference frame slots will be updated with 486 // the current frame after it is decoded. 487 uint8_t refresh_frame_flags; 488 static_assert(sizeof(ObuFrameHeader::refresh_frame_flags) * 8 == 489 kNumReferenceFrameTypes, 490 ""); 491 bool found_reference; 492 int8_t force_integer_mv; 493 bool allow_high_precision_mv; 494 InterpolationFilter interpolation_filter; 495 bool is_motion_mode_switchable; 496 bool use_ref_frame_mvs; 497 bool enable_frame_end_update_cdf; 498 // True if all segments are losslessly encoded at the coded resolution. 499 bool coded_lossless; 500 // True if all segments are losslessly encoded at the upscaled resolution. 501 bool upscaled_lossless; 502 TxMode tx_mode; 503 // True means that the mode info for inter blocks contains the syntax 504 // element comp_mode that indicates whether to use single or compound 505 // prediction. False means that all inter blocks will use single prediction. 506 bool reference_mode_select; 507 // The frames to use for compound prediction when skip_mode is true. 508 ReferenceFrameType skip_mode_frame[2]; 509 bool skip_mode_present; 510 bool reduced_tx_set; 511 bool allow_warped_motion; 512 Delta delta_q; 513 Delta delta_lf; 514 // A valid value of reference_frame_index[i] is in the range [0, 7]. -1 515 // indicates an invalid value. 516 // 517 // NOTE: When the frame is an intra frame (frame_type is kFrameKey or 518 // kFrameIntraOnly), reference_frame_index is not used and may be 519 // uninitialized. 520 int8_t reference_frame_index[kNumInterReferenceFrameTypes]; 521 // The ref_order_hint[ i ] syntax element in the uncompressed header. 522 // Specifies the expected output order hint for each reference frame. 523 uint8_t reference_order_hint[kNumReferenceFrameTypes]; 524 LoopFilter loop_filter; 525 Cdef cdef; 526 FilmGrainParams film_grain_params; 527 }; 528 529 // Structure used for traversing the partition tree. 530 struct PartitionTreeNode { 531 PartitionTreeNode() = default; PartitionTreeNodePartitionTreeNode532 PartitionTreeNode(int row4x4, int column4x4, BlockSize block_size) 533 : row4x4(row4x4), column4x4(column4x4), block_size(block_size) {} 534 int row4x4 = -1; 535 int column4x4 = -1; 536 BlockSize block_size = kBlockInvalid; 537 }; 538 539 // Structure used for storing the transform parameters in a superblock. 540 struct TransformParameters { 541 TransformParameters() = default; TransformParametersTransformParameters542 TransformParameters(TransformType type, int non_zero_coeff_count) 543 : type(type), non_zero_coeff_count(non_zero_coeff_count) {} 544 TransformType type; 545 int non_zero_coeff_count; 546 }; 547 548 } // namespace libgav1 549 #endif // LIBGAV1_SRC_UTILS_TYPES_H_ 550