1 /* 2 * Copyright 2019 The libgav1 Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBGAV1_SRC_UTILS_TYPES_H_ 18 #define LIBGAV1_SRC_UTILS_TYPES_H_ 19 20 #include <array> 21 #include <cstddef> 22 #include <cstdint> 23 #include <memory> 24 25 #include "src/utils/array_2d.h" 26 #include "src/utils/constants.h" 27 #include "src/utils/memory.h" 28 29 namespace libgav1 { 30 31 union MotionVector { 32 // Motion vectors will always fit in int16_t and using int16_t here instead 33 // of int saves significant memory since some of the frame sized structures 34 // store motion vectors. 35 // Index 0 is the entry for row (horizontal direction) motion vector. 36 // Index 1 is the entry for column (vertical direction) motion vector. 37 int16_t mv[2]; 38 // A uint32_t view into the |mv| array. Useful for cases where both the 39 // motion vectors have to be copied or compared with a single 32 bit 40 // instruction. 41 uint32_t mv32; 42 }; 43 44 union CompoundMotionVector { 45 MotionVector mv[2]; 46 // A uint64_t view into the |mv| array. Useful for cases where all the motion 47 // vectors have to be copied or compared with a single 64 bit instruction. 48 uint64_t mv64; 49 }; 50 51 // Stores the motion information used for motion field estimation. 52 struct TemporalMotionField : public Allocable { 53 Array2D<MotionVector> mv; 54 Array2D<int8_t> reference_offset; 55 }; 56 57 // MvContexts contains the contexts used to decode portions of an inter block 58 // mode info to set the y_mode field in BlockParameters. 59 // 60 // The contexts in the struct correspond to the ZeroMvContext, RefMvContext, 61 // and NewMvContext variables in the spec. 62 struct MvContexts { 63 int zero_mv; 64 int reference_mv; 65 int new_mv; 66 }; 67 68 struct PaletteModeInfo { 69 uint8_t size[kNumPlaneTypes]; 70 uint16_t color[kMaxPlanes][kMaxPaletteSize]; 71 }; 72 73 // Stores the parameters used by the prediction process. The members of the 74 // struct are filled in when parsing the bitstream and used when the prediction 75 // is computed. The information in this struct is associated with a single 76 // block. 77 // While both BlockParameters and PredictionParameters store information 78 // pertaining to a Block, the only difference is that BlockParameters outlives 79 // the block itself (for example, some of the variables in BlockParameters are 80 // used to compute the context for reading elements in the subsequent blocks). 81 struct PredictionParameters : public Allocable { 82 // Restore the index in the unsorted mv stack from the least 3 bits of sorted 83 // |weight_index_stack|. reference_mvPredictionParameters84 const MotionVector& reference_mv(int stack_index) const { 85 return ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)]; 86 } reference_mvPredictionParameters87 const MotionVector& reference_mv(int stack_index, int mv_index) const { 88 return compound_ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)] 89 .mv[mv_index]; 90 } 91 IncreaseWeightPredictionParameters92 void IncreaseWeight(ptrdiff_t index, int weight) { 93 weight_index_stack[index] += weight << 3; 94 } 95 SetWeightIndexStackEntryPredictionParameters96 void SetWeightIndexStackEntry(int index, int weight) { 97 weight_index_stack[index] = (weight << 3) + 7 - index; 98 } 99 100 bool use_filter_intra; 101 FilterIntraPredictor filter_intra_mode; 102 int angle_delta[kNumPlaneTypes]; 103 int8_t cfl_alpha_u; 104 int8_t cfl_alpha_v; 105 int max_luma_width; 106 int max_luma_height; 107 Array2D<uint8_t> color_index_map[kNumPlaneTypes]; 108 bool use_intra_block_copy; 109 InterIntraMode inter_intra_mode; 110 bool is_wedge_inter_intra; 111 int wedge_index; 112 int wedge_sign; 113 bool mask_is_inverse; 114 MotionMode motion_mode; 115 CompoundPredictionType compound_prediction_type; 116 union { 117 // |ref_mv_stack| and |compound_ref_mv_stack| are not sorted after 118 // construction. reference_mv() must be called to get the correct element. 119 MotionVector ref_mv_stack[kMaxRefMvStackSize]; 120 CompoundMotionVector compound_ref_mv_stack[kMaxRefMvStackSize]; 121 }; 122 // The least 3 bits of |weight_index_stack| store the index information, and 123 // the other bits store the weight. The index information is actually 7 - 124 // index to make the descending order sort stable (preserves the original 125 // order for elements with the same weight). Sorting an int16_t array is much 126 // faster than sorting a struct array with weight and index stored separately. 127 int16_t weight_index_stack[kMaxRefMvStackSize]; 128 // In the spec, the weights of all the nearest mvs are incremented by a bonus 129 // weight which is larger than any natural weight, and later the weights of 130 // the mvs are compared with this bonus weight to determine their contexts. We 131 // replace this procedure by introducing |nearest_mv_count|, which records the 132 // count of the nearest mvs. Since all the nearest mvs are in the beginning of 133 // the mv stack, the index of a mv in the mv stack can be compared with 134 // |nearest_mv_count| to get that mv's context. 135 int nearest_mv_count; 136 int ref_mv_count; 137 int ref_mv_index; 138 MotionVector global_mv[2]; 139 int num_warp_samples; 140 int warp_estimate_candidates[kMaxLeastSquaresSamples][4]; 141 PaletteModeInfo palette_mode_info; 142 int8_t segment_id; // segment_id is in the range [0, 7]. 143 PredictionMode uv_mode; 144 bool chroma_top_uses_smooth_prediction; 145 bool chroma_left_uses_smooth_prediction; 146 }; 147 148 // A lot of BlockParameters objects are created, so the smallest type is used 149 // for each field. The ranges of some fields are documented to justify why 150 // their types are large enough. 151 struct BlockParameters : public Allocable { 152 BlockSize size; 153 bool skip; 154 bool is_inter; 155 PredictionMode y_mode; 156 TransformSize uv_transform_size; 157 InterpolationFilter interpolation_filter[2]; 158 ReferenceFrameType reference_frame[2]; 159 // The index of this array is as follows: 160 // 0 - Y plane vertical filtering. 161 // 1 - Y plane horizontal filtering. 162 // 2 - U plane (both directions). 163 // 3 - V plane (both directions). 164 uint8_t deblock_filter_level[kFrameLfCount]; 165 CompoundMotionVector mv; 166 // When |Tile::split_parse_and_decode_| is true, each block gets its own 167 // instance of |prediction_parameters|. When it is false, all the blocks point 168 // to |Tile::prediction_parameters_|. This field is valid only as long as the 169 // block is *being* decoded. The lifetime and usage of this field can be 170 // better understood by following its flow in tile.cc. 171 std::unique_ptr<PredictionParameters> prediction_parameters; 172 }; 173 174 // Used to store the left and top block parameters that are used for computing 175 // the cdf context of the subsequent blocks. 176 struct BlockCdfContext { 177 bool use_predicted_segment_id[32]; 178 bool is_explicit_compound_type[32]; // comp_group_idx in the spec. 179 bool is_compound_type_average[32]; // compound_idx in the spec. 180 bool skip_mode[32]; 181 uint8_t palette_size[kNumPlaneTypes][32]; 182 uint16_t palette_color[32][kNumPlaneTypes][kMaxPaletteSize]; 183 PredictionMode uv_mode[32]; 184 }; 185 186 // A five dimensional array used to store the wedge masks. The dimensions are: 187 // - block_size_index (returned by GetWedgeBlockSizeIndex() in prediction.cc). 188 // - flip_sign (0 or 1). 189 // - wedge_index (0 to 15). 190 // - each of those three dimensions is a 2d array of block_width by 191 // block_height. 192 using WedgeMaskArray = 193 std::array<std::array<std::array<Array2D<uint8_t>, 16>, 2>, 9>; 194 195 enum GlobalMotionTransformationType : uint8_t { 196 kGlobalMotionTransformationTypeIdentity, 197 kGlobalMotionTransformationTypeTranslation, 198 kGlobalMotionTransformationTypeRotZoom, 199 kGlobalMotionTransformationTypeAffine, 200 kNumGlobalMotionTransformationTypes 201 }; 202 203 // Global motion and warped motion parameters. See the paper for more info: 204 // S. Parker, Y. Chen, D. Barker, P. de Rivaz, D. Mukherjee, "Global and locally 205 // adaptive warped motion compensation in video compression", Proc. IEEE 206 // International Conference on Image Processing (ICIP), pp. 275-279, Sep. 2017. 207 struct GlobalMotion { 208 GlobalMotionTransformationType type; 209 int32_t params[6]; 210 211 // Represent two shearing operations. Computed from |params| by SetupShear(). 212 // 213 // The least significant six (= kWarpParamRoundingBits) bits are all zeros. 214 // (This means alpha, beta, gamma, and delta could be represented by a 10-bit 215 // signed integer.) The minimum value is INT16_MIN (= -32768) and the maximum 216 // value is 32704 = 0x7fc0, the largest int16_t value whose least significant 217 // six bits are all zeros. 218 // 219 // Valid warp parameters (as validated by SetupShear()) have smaller ranges. 220 // Their absolute values are less than 2^14 (= 16384). (This follows from 221 // the warpValid check at the end of Section 7.11.3.6.) 222 // 223 // NOTE: Section 7.11.3.6 of the spec allows a maximum value of 32768, which 224 // is outside the range of int16_t. When cast to int16_t, 32768 becomes 225 // -32768. This potential int16_t overflow does not matter because either 226 // 32768 or -32768 causes SetupShear() to return false, 227 int16_t alpha; 228 int16_t beta; 229 int16_t gamma; 230 int16_t delta; 231 }; 232 233 // Loop filter parameters: 234 // 235 // If level[0] and level[1] are both equal to 0, the loop filter process is 236 // not invoked. 237 // 238 // |sharpness| and |delta_enabled| are only used by the loop filter process. 239 // 240 // The |ref_deltas| and |mode_deltas| arrays are used not only by the loop 241 // filter process but also by the reference frame update and loading 242 // processes. The loop filter process uses |ref_deltas| and |mode_deltas| only 243 // when |delta_enabled| is true. 244 struct LoopFilter { 245 // Contains loop filter strength values in the range of [0, 63]. 246 std::array<int8_t, kFrameLfCount> level; 247 // Indicates the sharpness level in the range of [0, 7]. 248 int8_t sharpness; 249 // Whether the filter level depends on the mode and reference frame used to 250 // predict a block. 251 bool delta_enabled; 252 // Whether additional syntax elements were read that specify which mode and 253 // reference frame deltas are to be updated. loop_filter_delta_update field in 254 // Section 5.9.11 of the spec. 255 bool delta_update; 256 // Contains the adjustment needed for the filter level based on the chosen 257 // reference frame, in the range of [-64, 63]. 258 std::array<int8_t, kNumReferenceFrameTypes> ref_deltas; 259 // Contains the adjustment needed for the filter level based on the chosen 260 // mode, in the range of [-64, 63]. 261 std::array<int8_t, kLoopFilterMaxModeDeltas> mode_deltas; 262 }; 263 264 struct Delta { 265 bool present; 266 uint8_t scale; 267 bool multi; 268 }; 269 270 struct Cdef { 271 uint8_t damping; // damping value from the spec + (bitdepth - 8). 272 uint8_t bits; 273 // All the strength values are the values from the spec and left shifted by 274 // (bitdepth - 8). 275 uint8_t y_primary_strength[kMaxCdefStrengths]; 276 uint8_t y_secondary_strength[kMaxCdefStrengths]; 277 uint8_t uv_primary_strength[kMaxCdefStrengths]; 278 uint8_t uv_secondary_strength[kMaxCdefStrengths]; 279 }; 280 281 struct TileInfo { 282 bool uniform_spacing; 283 int sb_rows; 284 int sb_columns; 285 int tile_count; 286 int tile_columns_log2; 287 int tile_columns; 288 int tile_column_start[kMaxTileColumns + 1]; 289 // This field is not used by libgav1, but is populated for use by some 290 // hardware decoders. So it must not be removed. 291 int tile_column_width_in_superblocks[kMaxTileColumns + 1]; 292 int tile_rows_log2; 293 int tile_rows; 294 int tile_row_start[kMaxTileRows + 1]; 295 // This field is not used by libgav1, but is populated for use by some 296 // hardware decoders. So it must not be removed. 297 int tile_row_height_in_superblocks[kMaxTileRows + 1]; 298 int16_t context_update_id; 299 uint8_t tile_size_bytes; 300 }; 301 302 struct LoopRestoration { 303 LoopRestorationType type[kMaxPlanes]; 304 int unit_size_log2[kMaxPlanes]; 305 }; 306 307 // Stores the quantization parameters of Section 5.9.12. 308 struct QuantizerParameters { 309 // base_index is in the range [0, 255]. 310 uint8_t base_index; 311 int8_t delta_dc[kMaxPlanes]; 312 // delta_ac[kPlaneY] is always 0. 313 int8_t delta_ac[kMaxPlanes]; 314 bool use_matrix; 315 // The |matrix_level| array is used only when |use_matrix| is true. 316 // matrix_level[plane] specifies the level in the quantizer matrix that 317 // should be used for decoding |plane|. The quantizer matrix has 15 levels, 318 // from 0 to 14. The range of matrix_level[plane] is [0, 15]. If 319 // matrix_level[plane] is 15, the quantizer matrix is not used. 320 int8_t matrix_level[kMaxPlanes]; 321 }; 322 323 // The corresponding segment feature constants in the AV1 spec are named 324 // SEG_LVL_xxx. 325 enum SegmentFeature : uint8_t { 326 kSegmentFeatureQuantizer, 327 kSegmentFeatureLoopFilterYVertical, 328 kSegmentFeatureLoopFilterYHorizontal, 329 kSegmentFeatureLoopFilterU, 330 kSegmentFeatureLoopFilterV, 331 kSegmentFeatureReferenceFrame, 332 kSegmentFeatureSkip, 333 kSegmentFeatureGlobalMv, 334 kSegmentFeatureMax 335 }; 336 337 struct Segmentation { 338 // 5.11.14. 339 // Returns true if the feature is enabled in the segment. FeatureActiveSegmentation340 bool FeatureActive(int segment_id, SegmentFeature feature) const { 341 return enabled && segment_id < kMaxSegments && 342 feature_enabled[segment_id][feature]; 343 } 344 345 // Returns true if the feature is signed. FeatureSignedSegmentation346 static bool FeatureSigned(SegmentFeature feature) { 347 // Only the first five segment features are signed, so this comparison 348 // suffices. 349 return feature <= kSegmentFeatureLoopFilterV; 350 } 351 352 bool enabled; 353 bool update_map; 354 bool update_data; 355 bool temporal_update; 356 // True if the segment id will be read before the skip syntax element. False 357 // if the skip syntax element will be read first. 358 bool segment_id_pre_skip; 359 // The highest numbered segment id that has some enabled feature. Used as 360 // the upper bound for decoding segment ids. 361 int8_t last_active_segment_id; 362 363 bool feature_enabled[kMaxSegments][kSegmentFeatureMax]; 364 int16_t feature_data[kMaxSegments][kSegmentFeatureMax]; 365 bool lossless[kMaxSegments]; 366 // Cached values of get_qindex(1, segmentId), to be consumed by 367 // Tile::ReadTransformType(). The values are in the range [0, 255]. 368 uint8_t qindex[kMaxSegments]; 369 }; 370 371 // Section 6.8.20. 372 // Note: In spec, film grain section uses YCbCr to denote variable names, 373 // such as num_cb_points, num_cr_points. To keep it consistent with other 374 // parts of code, we use YUV, i.e., num_u_points, num_v_points, etc. 375 struct FilmGrainParams { 376 bool apply_grain; 377 bool update_grain; 378 bool chroma_scaling_from_luma; 379 bool overlap_flag; 380 bool clip_to_restricted_range; 381 382 uint8_t num_y_points; // [0, 14]. 383 uint8_t num_u_points; // [0, 10]. 384 uint8_t num_v_points; // [0, 10]. 385 // Must be [0, 255]. 10/12 bit /= 4 or 16. Must be in increasing order. 386 uint8_t point_y_value[14]; 387 uint8_t point_y_scaling[14]; 388 uint8_t point_u_value[10]; 389 uint8_t point_u_scaling[10]; 390 uint8_t point_v_value[10]; 391 uint8_t point_v_scaling[10]; 392 393 uint8_t chroma_scaling; // [8, 11]. 394 uint8_t auto_regression_coeff_lag; // [0, 3]. 395 int8_t auto_regression_coeff_y[24]; // [-128, 127] 396 int8_t auto_regression_coeff_u[25]; // [-128, 127] 397 int8_t auto_regression_coeff_v[25]; // [-128, 127] 398 // Shift value: auto regression coeffs range 399 // 6: [-2, 2) 400 // 7: [-1, 1) 401 // 8: [-0.5, 0.5) 402 // 9: [-0.25, 0.25) 403 uint8_t auto_regression_shift; 404 405 uint16_t grain_seed; 406 int reference_index; 407 int grain_scale_shift; 408 // These multipliers are encoded as nonnegative values by adding 128 first. 409 // The 128 is subtracted during parsing. 410 int8_t u_multiplier; // [-128, 127] 411 int8_t u_luma_multiplier; // [-128, 127] 412 // These offsets are encoded as nonnegative values by adding 256 first. The 413 // 256 is subtracted during parsing. 414 int16_t u_offset; // [-256, 255] 415 int8_t v_multiplier; // [-128, 127] 416 int8_t v_luma_multiplier; // [-128, 127] 417 int16_t v_offset; // [-256, 255] 418 }; 419 420 struct ObuFrameHeader { 421 uint16_t display_frame_id; 422 uint16_t current_frame_id; 423 int64_t frame_offset; 424 uint16_t expected_frame_id[kNumInterReferenceFrameTypes]; 425 int32_t width; 426 int32_t height; 427 int32_t columns4x4; 428 int32_t rows4x4; 429 // The render size (render_width and render_height) is a hint to the 430 // application about the desired display size. It has no effect on the 431 // decoding process. 432 int32_t render_width; 433 int32_t render_height; 434 int32_t upscaled_width; 435 LoopRestoration loop_restoration; 436 uint32_t buffer_removal_time[kMaxOperatingPoints]; 437 uint32_t frame_presentation_time; 438 // Note: global_motion[0] (for kReferenceFrameIntra) is not used. 439 std::array<GlobalMotion, kNumReferenceFrameTypes> global_motion; 440 TileInfo tile_info; 441 QuantizerParameters quantizer; 442 Segmentation segmentation; 443 bool show_existing_frame; 444 // frame_to_show is in the range [0, 7]. Only used if show_existing_frame is 445 // true. 446 int8_t frame_to_show; 447 FrameType frame_type; 448 bool show_frame; 449 bool showable_frame; 450 bool error_resilient_mode; 451 bool enable_cdf_update; 452 bool frame_size_override_flag; 453 // The order_hint syntax element in the uncompressed header. If 454 // show_existing_frame is false, the OrderHint variable in the spec is equal 455 // to this field, and so this field can be used in place of OrderHint when 456 // show_existing_frame is known to be false, such as during tile decoding. 457 uint8_t order_hint; 458 int8_t primary_reference_frame; 459 bool render_and_frame_size_different; 460 bool use_superres; 461 uint8_t superres_scale_denominator; 462 bool allow_screen_content_tools; 463 bool allow_intrabc; 464 bool frame_refs_short_signaling; 465 // A bitmask that specifies which reference frame slots will be updated with 466 // the current frame after it is decoded. 467 uint8_t refresh_frame_flags; 468 static_assert(sizeof(ObuFrameHeader::refresh_frame_flags) * 8 == 469 kNumReferenceFrameTypes, 470 ""); 471 bool found_reference; 472 int8_t force_integer_mv; 473 bool allow_high_precision_mv; 474 InterpolationFilter interpolation_filter; 475 bool is_motion_mode_switchable; 476 bool use_ref_frame_mvs; 477 bool enable_frame_end_update_cdf; 478 // True if all segments are losslessly encoded at the coded resolution. 479 bool coded_lossless; 480 // True if all segments are losslessly encoded at the upscaled resolution. 481 bool upscaled_lossless; 482 TxMode tx_mode; 483 // True means that the mode info for inter blocks contains the syntax 484 // element comp_mode that indicates whether to use single or compound 485 // prediction. False means that all inter blocks will use single prediction. 486 bool reference_mode_select; 487 // The frames to use for compound prediction when skip_mode is true. 488 ReferenceFrameType skip_mode_frame[2]; 489 bool skip_mode_present; 490 bool reduced_tx_set; 491 bool allow_warped_motion; 492 Delta delta_q; 493 Delta delta_lf; 494 // A valid value of reference_frame_index[i] is in the range [0, 7]. -1 495 // indicates an invalid value. 496 // 497 // NOTE: When the frame is an intra frame (frame_type is kFrameKey or 498 // kFrameIntraOnly), reference_frame_index is not used and may be 499 // uninitialized. 500 int8_t reference_frame_index[kNumInterReferenceFrameTypes]; 501 // The ref_order_hint[ i ] syntax element in the uncompressed header. 502 // Specifies the expected output order hint for each reference frame. 503 uint8_t reference_order_hint[kNumReferenceFrameTypes]; 504 LoopFilter loop_filter; 505 Cdef cdef; 506 FilmGrainParams film_grain_params; 507 }; 508 509 // Structure used for traversing the partition tree. 510 struct PartitionTreeNode { 511 PartitionTreeNode() = default; PartitionTreeNodePartitionTreeNode512 PartitionTreeNode(int row4x4, int column4x4, BlockSize block_size) 513 : row4x4(row4x4), column4x4(column4x4), block_size(block_size) {} 514 int row4x4 = -1; 515 int column4x4 = -1; 516 BlockSize block_size = kBlockInvalid; 517 }; 518 519 // Structure used for storing the transform parameters in a superblock. 520 struct TransformParameters { 521 TransformParameters() = default; TransformParametersTransformParameters522 TransformParameters(TransformType type, int non_zero_coeff_count) 523 : type(type), non_zero_coeff_count(non_zero_coeff_count) {} 524 TransformType type; 525 int non_zero_coeff_count; 526 }; 527 528 } // namespace libgav1 529 #endif // LIBGAV1_SRC_UTILS_TYPES_H_ 530