1 /* 2 * Copyright 2019 The libgav1 Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBGAV1_SRC_UTILS_TYPES_H_ 18 #define LIBGAV1_SRC_UTILS_TYPES_H_ 19 20 #include <array> 21 #include <cstdint> 22 #include <memory> 23 24 #include "src/utils/array_2d.h" 25 #include "src/utils/constants.h" 26 #include "src/utils/memory.h" 27 28 namespace libgav1 { 29 30 struct MotionVector : public Allocable { 31 static constexpr int kRow = 0; 32 static constexpr int kColumn = 1; 33 34 MotionVector() = default; 35 MotionVector(const MotionVector& mv) = default; 36 37 MotionVector& operator=(const MotionVector& rhs) { 38 mv32 = rhs.mv32; 39 return *this; 40 } 41 42 bool operator==(const MotionVector& rhs) const { return mv32 == rhs.mv32; } 43 44 union { 45 // Motion vectors will always fit in int16_t and using int16_t here instead 46 // of int saves significant memory since some of the frame sized structures 47 // store motion vectors. 48 int16_t mv[2]; 49 // A uint32_t view into the |mv| array. Useful for cases where both the 50 // motion vectors have to be copied or compared with a single 32 bit 51 // instruction. 52 uint32_t mv32; 53 }; 54 }; 55 56 union CompoundMotionVector { 57 CompoundMotionVector() = default; 58 CompoundMotionVector(const CompoundMotionVector& mv) = default; 59 60 CompoundMotionVector& operator=(const CompoundMotionVector& rhs) { 61 mv64 = rhs.mv64; 62 return *this; 63 } 64 65 bool operator==(const CompoundMotionVector& rhs) const { 66 return mv64 == rhs.mv64; 67 } 68 69 MotionVector mv[2]; 70 // A uint64_t view into the |mv| array. Useful for cases where all the motion 71 // vectors have to be copied or compared with a single 64 bit instruction. 72 uint64_t mv64; 73 }; 74 75 // Stores the motion information used for motion field estimation. 76 struct TemporalMotionField : public Allocable { 77 Array2D<MotionVector> mv; 78 Array2D<int8_t> reference_offset; 79 }; 80 81 // MvContexts contains the contexts used to decode portions of an inter block 82 // mode info to set the y_mode field in BlockParameters. 83 // 84 // The contexts in the struct correspond to the ZeroMvContext, RefMvContext, 85 // and NewMvContext variables in the spec. 86 struct MvContexts { 87 int zero_mv; 88 int reference_mv; 89 int new_mv; 90 }; 91 92 struct PaletteModeInfo { 93 uint8_t size[kNumPlaneTypes]; 94 uint16_t color[kMaxPlanes][kMaxPaletteSize]; 95 }; 96 97 // Stores the parameters used by the prediction process. The members of the 98 // struct are filled in when parsing the bitstream and used when the prediction 99 // is computed. The information in this struct is associated with a single 100 // block. 101 // While both BlockParameters and PredictionParameters store information 102 // pertaining to a Block, the only difference is that BlockParameters outlives 103 // the block itself (for example, some of the variables in BlockParameters are 104 // used to compute the context for reading elements in the subsequent blocks). 105 struct PredictionParameters : public Allocable { 106 // Restore the index in the unsorted mv stack from the least 3 bits of sorted 107 // |weight_index_stack|. reference_mvPredictionParameters108 const MotionVector& reference_mv(int stack_index) const { 109 return ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)]; 110 } reference_mvPredictionParameters111 const MotionVector& reference_mv(int stack_index, int mv_index) const { 112 return compound_ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)] 113 .mv[mv_index]; 114 } 115 IncreaseWeightPredictionParameters116 void IncreaseWeight(ptrdiff_t index, int weight) { 117 weight_index_stack[index] += weight << 3; 118 } 119 SetWeightIndexStackEntryPredictionParameters120 void SetWeightIndexStackEntry(int index, int weight) { 121 weight_index_stack[index] = (weight << 3) + 7 - index; 122 } 123 124 bool use_filter_intra; 125 FilterIntraPredictor filter_intra_mode; 126 int angle_delta[kNumPlaneTypes]; 127 int8_t cfl_alpha_u; 128 int8_t cfl_alpha_v; 129 int max_luma_width; 130 int max_luma_height; 131 Array2D<uint8_t> color_index_map[kNumPlaneTypes]; 132 bool use_intra_block_copy; 133 InterIntraMode inter_intra_mode; 134 bool is_wedge_inter_intra; 135 int wedge_index; 136 int wedge_sign; 137 bool mask_is_inverse; 138 MotionMode motion_mode; 139 CompoundPredictionType compound_prediction_type; 140 union { 141 // |ref_mv_stack| and |compound_ref_mv_stack| are not sorted after 142 // construction. reference_mv() must be called to get the correct element. 143 MotionVector ref_mv_stack[kMaxRefMvStackSize]; 144 CompoundMotionVector compound_ref_mv_stack[kMaxRefMvStackSize]; 145 }; 146 // The least 3 bits of |weight_index_stack| store the index information, and 147 // the other bits store the weight. The index information is actually 7 - 148 // index to make the descending order sort stable (preserves the original 149 // order for elements with the same weight). Sorting an int16_t array is much 150 // faster than sorting a struct array with weight and index stored separately. 151 int16_t weight_index_stack[kMaxRefMvStackSize]; 152 // In the spec, the weights of all the nearest mvs are incremented by a bonus 153 // weight which is larger than any natural weight, and later the weights of 154 // the mvs are compared with this bonus weight to determine their contexts. We 155 // replace this procedure by introducing |nearest_mv_count|, which records the 156 // count of the nearest mvs. Since all the nearest mvs are in the beginning of 157 // the mv stack, the index of a mv in the mv stack can be compared with 158 // |nearest_mv_count| to get that mv's context. 159 int nearest_mv_count; 160 int ref_mv_count; 161 int ref_mv_index; 162 MotionVector global_mv[2]; 163 int num_warp_samples; 164 int warp_estimate_candidates[kMaxLeastSquaresSamples][4]; 165 }; 166 167 // A lot of BlockParameters objects are created, so the smallest type is used 168 // for each field. The ranges of some fields are documented to justify why 169 // their types are large enough. 170 struct BlockParameters : public Allocable { 171 BlockSize size; 172 bool skip; 173 // True means that this block will use some default settings (that 174 // correspond to compound prediction) and so most of the mode info is 175 // skipped. False means that the mode info is not skipped. 176 bool skip_mode; 177 bool is_inter; 178 bool is_explicit_compound_type; // comp_group_idx in the spec. 179 bool is_compound_type_average; // compound_idx in the spec. 180 bool is_global_mv_block; 181 bool use_predicted_segment_id; // only valid with temporal update enabled. 182 int8_t segment_id; // segment_id is in the range [0, 7]. 183 PredictionMode y_mode; 184 PredictionMode uv_mode; 185 TransformSize transform_size; 186 TransformSize uv_transform_size; 187 InterpolationFilter interpolation_filter[2]; 188 ReferenceFrameType reference_frame[2]; 189 // The index of this array is as follows: 190 // 0 - Y plane vertical filtering. 191 // 1 - Y plane horizontal filtering. 192 // 2 - U plane (both directions). 193 // 3 - V plane (both directions). 194 uint8_t deblock_filter_level[kFrameLfCount]; 195 CompoundMotionVector mv; 196 PaletteModeInfo palette_mode_info; 197 // When |Tile::split_parse_and_decode_| is true, each block gets its own 198 // instance of |prediction_parameters|. When it is false, all the blocks point 199 // to |Tile::prediction_parameters_|. This field is valid only as long as the 200 // block is *being* decoded. The lifetime and usage of this field can be 201 // better understood by following its flow in tile.cc. 202 std::unique_ptr<PredictionParameters> prediction_parameters; 203 }; 204 205 // A five dimensional array used to store the wedge masks. The dimensions are: 206 // - block_size_index (returned by GetWedgeBlockSizeIndex() in prediction.cc). 207 // - flip_sign (0 or 1). 208 // - wedge_index (0 to 15). 209 // - each of those three dimensions is a 2d array of block_width by 210 // block_height. 211 using WedgeMaskArray = 212 std::array<std::array<std::array<Array2D<uint8_t>, 16>, 2>, 9>; 213 214 enum GlobalMotionTransformationType : uint8_t { 215 kGlobalMotionTransformationTypeIdentity, 216 kGlobalMotionTransformationTypeTranslation, 217 kGlobalMotionTransformationTypeRotZoom, 218 kGlobalMotionTransformationTypeAffine, 219 kNumGlobalMotionTransformationTypes 220 }; 221 222 // Global motion and warped motion parameters. See the paper for more info: 223 // S. Parker, Y. Chen, D. Barker, P. de Rivaz, D. Mukherjee, "Global and locally 224 // adaptive warped motion compensation in video compression", Proc. IEEE 225 // International Conference on Image Processing (ICIP), pp. 275-279, Sep. 2017. 226 struct GlobalMotion { 227 GlobalMotionTransformationType type; 228 int32_t params[6]; 229 230 // Represent two shearing operations. Computed from |params| by SetupShear(). 231 // 232 // The least significant six (= kWarpParamRoundingBits) bits are all zeros. 233 // (This means alpha, beta, gamma, and delta could be represented by a 10-bit 234 // signed integer.) The minimum value is INT16_MIN (= -32768) and the maximum 235 // value is 32704 = 0x7fc0, the largest int16_t value whose least significant 236 // six bits are all zeros. 237 // 238 // Valid warp parameters (as validated by SetupShear()) have smaller ranges. 239 // Their absolute values are less than 2^14 (= 16384). (This follows from 240 // the warpValid check at the end of Section 7.11.3.6.) 241 // 242 // NOTE: Section 7.11.3.6 of the spec allows a maximum value of 32768, which 243 // is outside the range of int16_t. When cast to int16_t, 32768 becomes 244 // -32768. This potential int16_t overflow does not matter because either 245 // 32768 or -32768 causes SetupShear() to return false, 246 int16_t alpha; 247 int16_t beta; 248 int16_t gamma; 249 int16_t delta; 250 }; 251 252 // Loop filter parameters: 253 // 254 // If level[0] and level[1] are both equal to 0, the loop filter process is 255 // not invoked. 256 // 257 // |sharpness| and |delta_enabled| are only used by the loop filter process. 258 // 259 // The |ref_deltas| and |mode_deltas| arrays are used not only by the loop 260 // filter process but also by the reference frame update and loading 261 // processes. The loop filter process uses |ref_deltas| and |mode_deltas| only 262 // when |delta_enabled| is true. 263 struct LoopFilter { 264 // Contains loop filter strength values in the range of [0, 63]. 265 std::array<int8_t, kFrameLfCount> level; 266 // Indicates the sharpness level in the range of [0, 7]. 267 int8_t sharpness; 268 // Whether the filter level depends on the mode and reference frame used to 269 // predict a block. 270 bool delta_enabled; 271 // Whether additional syntax elements were read that specify which mode and 272 // reference frame deltas are to be updated. loop_filter_delta_update field in 273 // Section 5.9.11 of the spec. 274 bool delta_update; 275 // Contains the adjustment needed for the filter level based on the chosen 276 // reference frame, in the range of [-64, 63]. 277 std::array<int8_t, kNumReferenceFrameTypes> ref_deltas; 278 // Contains the adjustment needed for the filter level based on the chosen 279 // mode, in the range of [-64, 63]. 280 std::array<int8_t, kLoopFilterMaxModeDeltas> mode_deltas; 281 }; 282 283 struct Delta { 284 bool present; 285 uint8_t scale; 286 bool multi; 287 }; 288 289 struct Cdef { 290 uint8_t damping; // damping value from the spec + (bitdepth - 8). 291 uint8_t bits; 292 // All the strength values are the values from the spec and left shifted by 293 // (bitdepth - 8). 294 uint8_t y_primary_strength[kMaxCdefStrengths]; 295 uint8_t y_secondary_strength[kMaxCdefStrengths]; 296 uint8_t uv_primary_strength[kMaxCdefStrengths]; 297 uint8_t uv_secondary_strength[kMaxCdefStrengths]; 298 }; 299 300 struct TileInfo { 301 bool uniform_spacing; 302 int sb_rows; 303 int sb_columns; 304 int tile_count; 305 int tile_columns_log2; 306 int tile_columns; 307 int tile_column_start[kMaxTileColumns + 1]; 308 // This field is not used by libgav1, but is populated for use by some 309 // hardware decoders. So it must not be removed. 310 int tile_column_width_in_superblocks[kMaxTileColumns + 1]; 311 int tile_rows_log2; 312 int tile_rows; 313 int tile_row_start[kMaxTileRows + 1]; 314 // This field is not used by libgav1, but is populated for use by some 315 // hardware decoders. So it must not be removed. 316 int tile_row_height_in_superblocks[kMaxTileRows + 1]; 317 int16_t context_update_id; 318 uint8_t tile_size_bytes; 319 }; 320 321 struct LoopRestoration { 322 LoopRestorationType type[kMaxPlanes]; 323 int unit_size[kMaxPlanes]; 324 }; 325 326 // Stores the quantization parameters of Section 5.9.12. 327 struct QuantizerParameters { 328 // base_index is in the range [0, 255]. 329 uint8_t base_index; 330 int8_t delta_dc[kMaxPlanes]; 331 // delta_ac[kPlaneY] is always 0. 332 int8_t delta_ac[kMaxPlanes]; 333 bool use_matrix; 334 // The |matrix_level| array is used only when |use_matrix| is true. 335 // matrix_level[plane] specifies the level in the quantizer matrix that 336 // should be used for decoding |plane|. The quantizer matrix has 15 levels, 337 // from 0 to 14. The range of matrix_level[plane] is [0, 15]. If 338 // matrix_level[plane] is 15, the quantizer matrix is not used. 339 int8_t matrix_level[kMaxPlanes]; 340 }; 341 342 // The corresponding segment feature constants in the AV1 spec are named 343 // SEG_LVL_xxx. 344 enum SegmentFeature : uint8_t { 345 kSegmentFeatureQuantizer, 346 kSegmentFeatureLoopFilterYVertical, 347 kSegmentFeatureLoopFilterYHorizontal, 348 kSegmentFeatureLoopFilterU, 349 kSegmentFeatureLoopFilterV, 350 kSegmentFeatureReferenceFrame, 351 kSegmentFeatureSkip, 352 kSegmentFeatureGlobalMv, 353 kSegmentFeatureMax 354 }; 355 356 struct Segmentation { 357 // 5.11.14. 358 // Returns true if the feature is enabled in the segment. FeatureActiveSegmentation359 bool FeatureActive(int segment_id, SegmentFeature feature) const { 360 return enabled && segment_id < kMaxSegments && 361 feature_enabled[segment_id][feature]; 362 } 363 364 // Returns true if the feature is signed. FeatureSignedSegmentation365 static bool FeatureSigned(SegmentFeature feature) { 366 // Only the first five segment features are signed, so this comparison 367 // suffices. 368 return feature <= kSegmentFeatureLoopFilterV; 369 } 370 371 bool enabled; 372 bool update_map; 373 bool update_data; 374 bool temporal_update; 375 // True if the segment id will be read before the skip syntax element. False 376 // if the skip syntax element will be read first. 377 bool segment_id_pre_skip; 378 // The highest numbered segment id that has some enabled feature. Used as 379 // the upper bound for decoding segment ids. 380 int8_t last_active_segment_id; 381 382 bool feature_enabled[kMaxSegments][kSegmentFeatureMax]; 383 int16_t feature_data[kMaxSegments][kSegmentFeatureMax]; 384 bool lossless[kMaxSegments]; 385 // Cached values of get_qindex(1, segmentId), to be consumed by 386 // Tile::ReadTransformType(). The values are in the range [0, 255]. 387 uint8_t qindex[kMaxSegments]; 388 }; 389 390 // Section 6.8.20. 391 // Note: In spec, film grain section uses YCbCr to denote variable names, 392 // such as num_cb_points, num_cr_points. To keep it consistent with other 393 // parts of code, we use YUV, i.e., num_u_points, num_v_points, etc. 394 struct FilmGrainParams { 395 bool apply_grain; 396 bool update_grain; 397 bool chroma_scaling_from_luma; 398 bool overlap_flag; 399 bool clip_to_restricted_range; 400 401 uint8_t num_y_points; // [0, 14]. 402 uint8_t num_u_points; // [0, 10]. 403 uint8_t num_v_points; // [0, 10]. 404 // Must be [0, 255]. 10/12 bit /= 4 or 16. Must be in increasing order. 405 uint8_t point_y_value[14]; 406 uint8_t point_y_scaling[14]; 407 uint8_t point_u_value[10]; 408 uint8_t point_u_scaling[10]; 409 uint8_t point_v_value[10]; 410 uint8_t point_v_scaling[10]; 411 412 uint8_t chroma_scaling; // [8, 11]. 413 uint8_t auto_regression_coeff_lag; // [0, 3]. 414 int8_t auto_regression_coeff_y[24]; // [-128, 127] 415 int8_t auto_regression_coeff_u[25]; // [-128, 127] 416 int8_t auto_regression_coeff_v[25]; // [-128, 127] 417 // Shift value: auto regression coeffs range 418 // 6: [-2, 2) 419 // 7: [-1, 1) 420 // 8: [-0.5, 0.5) 421 // 9: [-0.25, 0.25) 422 uint8_t auto_regression_shift; 423 424 uint16_t grain_seed; 425 int reference_index; 426 int grain_scale_shift; 427 // These multipliers are encoded as nonnegative values by adding 128 first. 428 // The 128 is subtracted during parsing. 429 int8_t u_multiplier; // [-128, 127] 430 int8_t u_luma_multiplier; // [-128, 127] 431 // These offsets are encoded as nonnegative values by adding 256 first. The 432 // 256 is subtracted during parsing. 433 int16_t u_offset; // [-256, 255] 434 int8_t v_multiplier; // [-128, 127] 435 int8_t v_luma_multiplier; // [-128, 127] 436 int16_t v_offset; // [-256, 255] 437 }; 438 439 struct ObuFrameHeader { 440 uint16_t display_frame_id; 441 uint16_t current_frame_id; 442 int64_t frame_offset; 443 uint16_t expected_frame_id[kNumInterReferenceFrameTypes]; 444 int32_t width; 445 int32_t height; 446 int32_t columns4x4; 447 int32_t rows4x4; 448 // The render size (render_width and render_height) is a hint to the 449 // application about the desired display size. It has no effect on the 450 // decoding process. 451 int32_t render_width; 452 int32_t render_height; 453 int32_t upscaled_width; 454 LoopRestoration loop_restoration; 455 uint32_t buffer_removal_time[kMaxOperatingPoints]; 456 uint32_t frame_presentation_time; 457 // Note: global_motion[0] (for kReferenceFrameIntra) is not used. 458 std::array<GlobalMotion, kNumReferenceFrameTypes> global_motion; 459 TileInfo tile_info; 460 QuantizerParameters quantizer; 461 Segmentation segmentation; 462 bool show_existing_frame; 463 // frame_to_show is in the range [0, 7]. Only used if show_existing_frame is 464 // true. 465 int8_t frame_to_show; 466 FrameType frame_type; 467 bool show_frame; 468 bool showable_frame; 469 bool error_resilient_mode; 470 bool enable_cdf_update; 471 bool frame_size_override_flag; 472 // The order_hint syntax element in the uncompressed header. If 473 // show_existing_frame is false, the OrderHint variable in the spec is equal 474 // to this field, and so this field can be used in place of OrderHint when 475 // show_existing_frame is known to be false, such as during tile decoding. 476 uint8_t order_hint; 477 int8_t primary_reference_frame; 478 bool render_and_frame_size_different; 479 bool use_superres; 480 uint8_t superres_scale_denominator; 481 bool allow_screen_content_tools; 482 bool allow_intrabc; 483 bool frame_refs_short_signaling; 484 // A bitmask that specifies which reference frame slots will be updated with 485 // the current frame after it is decoded. 486 uint8_t refresh_frame_flags; 487 static_assert(sizeof(ObuFrameHeader::refresh_frame_flags) * 8 == 488 kNumReferenceFrameTypes, 489 ""); 490 bool found_reference; 491 int8_t force_integer_mv; 492 bool allow_high_precision_mv; 493 InterpolationFilter interpolation_filter; 494 bool is_motion_mode_switchable; 495 bool use_ref_frame_mvs; 496 bool enable_frame_end_update_cdf; 497 // True if all segments are losslessly encoded at the coded resolution. 498 bool coded_lossless; 499 // True if all segments are losslessly encoded at the upscaled resolution. 500 bool upscaled_lossless; 501 TxMode tx_mode; 502 // True means that the mode info for inter blocks contains the syntax 503 // element comp_mode that indicates whether to use single or compound 504 // prediction. False means that all inter blocks will use single prediction. 505 bool reference_mode_select; 506 // The frames to use for compound prediction when skip_mode is true. 507 ReferenceFrameType skip_mode_frame[2]; 508 bool skip_mode_present; 509 bool reduced_tx_set; 510 bool allow_warped_motion; 511 Delta delta_q; 512 Delta delta_lf; 513 // A valid value of reference_frame_index[i] is in the range [0, 7]. -1 514 // indicates an invalid value. 515 int8_t reference_frame_index[kNumInterReferenceFrameTypes]; 516 // The ref_order_hint[ i ] syntax element in the uncompressed header. 517 // Specifies the expected output order hint for each reference frame. 518 uint8_t reference_order_hint[kNumReferenceFrameTypes]; 519 LoopFilter loop_filter; 520 Cdef cdef; 521 FilmGrainParams film_grain_params; 522 }; 523 524 } // namespace libgav1 525 #endif // LIBGAV1_SRC_UTILS_TYPES_H_ 526