• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2019 The libgav1 Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LIBGAV1_SRC_UTILS_TYPES_H_
18 #define LIBGAV1_SRC_UTILS_TYPES_H_
19 
20 #include <array>
21 #include <cstddef>
22 #include <cstdint>
23 #include <memory>
24 
25 #include "src/utils/array_2d.h"
26 #include "src/utils/constants.h"
27 #include "src/utils/memory.h"
28 
29 namespace libgav1 {
30 
31 union MotionVector {
32   // Motion vectors will always fit in int16_t and using int16_t here instead
33   // of int saves significant memory since some of the frame sized structures
34   // store motion vectors.
35   // Index 0 is the entry for row (horizontal direction) motion vector.
36   // Index 1 is the entry for column (vertical direction) motion vector.
37   int16_t mv[2];
38   // A uint32_t view into the |mv| array. Useful for cases where both the
39   // motion vectors have to be copied or compared with a single 32 bit
40   // instruction.
41   uint32_t mv32;
42 };
43 
44 union CompoundMotionVector {
45   MotionVector mv[2];
46   // A uint64_t view into the |mv| array. Useful for cases where all the motion
47   // vectors have to be copied or compared with a single 64 bit instruction.
48   uint64_t mv64;
49 };
50 
51 // Stores the motion information used for motion field estimation.
52 struct TemporalMotionField : public Allocable {
53   Array2D<MotionVector> mv;
54   Array2D<int8_t> reference_offset;
55 };
56 
57 // MvContexts contains the contexts used to decode portions of an inter block
58 // mode info to set the y_mode field in BlockParameters.
59 //
60 // The contexts in the struct correspond to the ZeroMvContext, RefMvContext,
61 // and NewMvContext variables in the spec.
62 struct MvContexts {
63   int zero_mv;
64   int reference_mv;
65   int new_mv;
66 };
67 
68 struct PaletteModeInfo {
69   uint8_t size[kNumPlaneTypes];
70   uint16_t color[kMaxPlanes][kMaxPaletteSize];
71 };
72 
73 // Stores the parameters used by the prediction process. The members of the
74 // struct are filled in when parsing the bitstream and used when the prediction
75 // is computed. The information in this struct is associated with a single
76 // block.
77 // While both BlockParameters and PredictionParameters store information
78 // pertaining to a Block, the only difference is that BlockParameters outlives
79 // the block itself (for example, some of the variables in BlockParameters are
80 // used to compute the context for reading elements in the subsequent blocks).
81 struct PredictionParameters : public Allocable {
82   // Restore the index in the unsorted mv stack from the least 3 bits of sorted
83   // |weight_index_stack|.
reference_mvPredictionParameters84   const MotionVector& reference_mv(int stack_index) const {
85     return ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)];
86   }
reference_mvPredictionParameters87   const MotionVector& reference_mv(int stack_index, int mv_index) const {
88     return compound_ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)]
89         .mv[mv_index];
90   }
91 
IncreaseWeightPredictionParameters92   void IncreaseWeight(ptrdiff_t index, int weight) {
93     weight_index_stack[index] += weight << 3;
94   }
95 
SetWeightIndexStackEntryPredictionParameters96   void SetWeightIndexStackEntry(int index, int weight) {
97     weight_index_stack[index] = (weight << 3) + 7 - index;
98   }
99 
100   bool use_filter_intra;
101   FilterIntraPredictor filter_intra_mode;
102   int angle_delta[kNumPlaneTypes];
103   int8_t cfl_alpha_u;
104   int8_t cfl_alpha_v;
105   int max_luma_width;
106   int max_luma_height;
107   Array2D<uint8_t> color_index_map[kNumPlaneTypes];
108   bool use_intra_block_copy;
109   InterIntraMode inter_intra_mode;
110   bool is_wedge_inter_intra;
111   int wedge_index;
112   int wedge_sign;
113   bool mask_is_inverse;
114   MotionMode motion_mode;
115   CompoundPredictionType compound_prediction_type;
116   union {
117     // |ref_mv_stack| and |compound_ref_mv_stack| are not sorted after
118     // construction. reference_mv() must be called to get the correct element.
119     MotionVector ref_mv_stack[kMaxRefMvStackSize];
120     CompoundMotionVector compound_ref_mv_stack[kMaxRefMvStackSize];
121   };
122   // The least 3 bits of |weight_index_stack| store the index information, and
123   // the other bits store the weight. The index information is actually 7 -
124   // index to make the descending order sort stable (preserves the original
125   // order for elements with the same weight). Sorting an int16_t array is much
126   // faster than sorting a struct array with weight and index stored separately.
127   int16_t weight_index_stack[kMaxRefMvStackSize];
128   // In the spec, the weights of all the nearest mvs are incremented by a bonus
129   // weight which is larger than any natural weight, and later the weights of
130   // the mvs are compared with this bonus weight to determine their contexts. We
131   // replace this procedure by introducing |nearest_mv_count|, which records the
132   // count of the nearest mvs. Since all the nearest mvs are in the beginning of
133   // the mv stack, the index of a mv in the mv stack can be compared with
134   // |nearest_mv_count| to get that mv's context.
135   int nearest_mv_count;
136   int ref_mv_count;
137   int ref_mv_index;
138   MotionVector global_mv[2];
139   int num_warp_samples;
140   int warp_estimate_candidates[kMaxLeastSquaresSamples][4];
141   PaletteModeInfo palette_mode_info;
142   int8_t segment_id;  // segment_id is in the range [0, 7].
143   PredictionMode uv_mode;
144   bool chroma_top_uses_smooth_prediction;
145   bool chroma_left_uses_smooth_prediction;
146 };
147 
148 // A lot of BlockParameters objects are created, so the smallest type is used
149 // for each field. The ranges of some fields are documented to justify why
150 // their types are large enough.
151 struct BlockParameters : public Allocable {
152   BlockSize size;
153   bool skip;
154   bool is_inter;
155   PredictionMode y_mode;
156   TransformSize uv_transform_size;
157   InterpolationFilter interpolation_filter[2];
158   ReferenceFrameType reference_frame[2];
159   // The index of this array is as follows:
160   //  0 - Y plane vertical filtering.
161   //  1 - Y plane horizontal filtering.
162   //  2 - U plane (both directions).
163   //  3 - V plane (both directions).
164   uint8_t deblock_filter_level[kFrameLfCount];
165   CompoundMotionVector mv;
166   // When |Tile::split_parse_and_decode_| is true, each block gets its own
167   // instance of |prediction_parameters|. When it is false, all the blocks point
168   // to |Tile::prediction_parameters_|. This field is valid only as long as the
169   // block is *being* decoded. The lifetime and usage of this field can be
170   // better understood by following its flow in tile.cc.
171   std::unique_ptr<PredictionParameters> prediction_parameters;
172 };
173 
174 // Used to store the left and top block parameters that are used for computing
175 // the cdf context of the subsequent blocks.
176 struct BlockCdfContext {
177   bool use_predicted_segment_id[32];
178   bool is_explicit_compound_type[32];  // comp_group_idx in the spec.
179   bool is_compound_type_average[32];   // compound_idx in the spec.
180   bool skip_mode[32];
181   uint8_t palette_size[kNumPlaneTypes][32];
182   uint16_t palette_color[32][kNumPlaneTypes][kMaxPaletteSize];
183   PredictionMode uv_mode[32];
184 };
185 
186 // A five dimensional array used to store the wedge masks. The dimensions are:
187 //   - block_size_index (returned by GetWedgeBlockSizeIndex() in prediction.cc).
188 //   - flip_sign (0 or 1).
189 //   - wedge_index (0 to 15).
190 //   - each of those three dimensions is a 2d array of block_width by
191 //     block_height.
192 using WedgeMaskArray =
193     std::array<std::array<std::array<Array2D<uint8_t>, 16>, 2>, 9>;
194 
195 enum GlobalMotionTransformationType : uint8_t {
196   kGlobalMotionTransformationTypeIdentity,
197   kGlobalMotionTransformationTypeTranslation,
198   kGlobalMotionTransformationTypeRotZoom,
199   kGlobalMotionTransformationTypeAffine,
200   kNumGlobalMotionTransformationTypes
201 };
202 
203 // Global motion and warped motion parameters. See the paper for more info:
204 // S. Parker, Y. Chen, D. Barker, P. de Rivaz, D. Mukherjee, "Global and locally
205 // adaptive warped motion compensation in video compression", Proc. IEEE
206 // International Conference on Image Processing (ICIP), pp. 275-279, Sep. 2017.
207 struct GlobalMotion {
208   GlobalMotionTransformationType type;
209   int32_t params[6];
210 
211   // Represent two shearing operations. Computed from |params| by SetupShear().
212   //
213   // The least significant six (= kWarpParamRoundingBits) bits are all zeros.
214   // (This means alpha, beta, gamma, and delta could be represented by a 10-bit
215   // signed integer.) The minimum value is INT16_MIN (= -32768) and the maximum
216   // value is 32704 = 0x7fc0, the largest int16_t value whose least significant
217   // six bits are all zeros.
218   //
219   // Valid warp parameters (as validated by SetupShear()) have smaller ranges.
220   // Their absolute values are less than 2^14 (= 16384). (This follows from
221   // the warpValid check at the end of Section 7.11.3.6.)
222   //
223   // NOTE: Section 7.11.3.6 of the spec allows a maximum value of 32768, which
224   // is outside the range of int16_t. When cast to int16_t, 32768 becomes
225   // -32768. This potential int16_t overflow does not matter because either
226   // 32768 or -32768 causes SetupShear() to return false,
227   int16_t alpha;
228   int16_t beta;
229   int16_t gamma;
230   int16_t delta;
231 };
232 
233 // Loop filter parameters:
234 //
235 // If level[0] and level[1] are both equal to 0, the loop filter process is
236 // not invoked.
237 //
238 // |sharpness| and |delta_enabled| are only used by the loop filter process.
239 //
240 // The |ref_deltas| and |mode_deltas| arrays are used not only by the loop
241 // filter process but also by the reference frame update and loading
242 // processes. The loop filter process uses |ref_deltas| and |mode_deltas| only
243 // when |delta_enabled| is true.
244 struct LoopFilter {
245   // Contains loop filter strength values in the range of [0, 63].
246   std::array<int8_t, kFrameLfCount> level;
247   // Indicates the sharpness level in the range of [0, 7].
248   int8_t sharpness;
249   // Whether the filter level depends on the mode and reference frame used to
250   // predict a block.
251   bool delta_enabled;
252   // Whether additional syntax elements were read that specify which mode and
253   // reference frame deltas are to be updated. loop_filter_delta_update field in
254   // Section 5.9.11 of the spec.
255   bool delta_update;
256   // Contains the adjustment needed for the filter level based on the chosen
257   // reference frame, in the range of [-64, 63].
258   std::array<int8_t, kNumReferenceFrameTypes> ref_deltas;
259   // Contains the adjustment needed for the filter level based on the chosen
260   // mode, in the range of [-64, 63].
261   std::array<int8_t, kLoopFilterMaxModeDeltas> mode_deltas;
262 };
263 
264 struct Delta {
265   bool present;
266   uint8_t scale;
267   bool multi;
268 };
269 
270 struct Cdef {
271   uint8_t damping;  // damping value from the spec + (bitdepth - 8).
272   uint8_t bits;
273   // All the strength values are the values from the spec and left shifted by
274   // (bitdepth - 8).
275   uint8_t y_primary_strength[kMaxCdefStrengths];
276   uint8_t y_secondary_strength[kMaxCdefStrengths];
277   uint8_t uv_primary_strength[kMaxCdefStrengths];
278   uint8_t uv_secondary_strength[kMaxCdefStrengths];
279 };
280 
281 struct TileInfo {
282   bool uniform_spacing;
283   int sb_rows;
284   int sb_columns;
285   int tile_count;
286   int tile_columns_log2;
287   int tile_columns;
288   int tile_column_start[kMaxTileColumns + 1];
289   // This field is not used by libgav1, but is populated for use by some
290   // hardware decoders. So it must not be removed.
291   int tile_column_width_in_superblocks[kMaxTileColumns + 1];
292   int tile_rows_log2;
293   int tile_rows;
294   int tile_row_start[kMaxTileRows + 1];
295   // This field is not used by libgav1, but is populated for use by some
296   // hardware decoders. So it must not be removed.
297   int tile_row_height_in_superblocks[kMaxTileRows + 1];
298   int16_t context_update_id;
299   uint8_t tile_size_bytes;
300 };
301 
302 struct LoopRestoration {
303   LoopRestorationType type[kMaxPlanes];
304   int unit_size_log2[kMaxPlanes];
305 };
306 
307 // Stores the quantization parameters of Section 5.9.12.
308 struct QuantizerParameters {
309   // base_index is in the range [0, 255].
310   uint8_t base_index;
311   int8_t delta_dc[kMaxPlanes];
312   // delta_ac[kPlaneY] is always 0.
313   int8_t delta_ac[kMaxPlanes];
314   bool use_matrix;
315   // The |matrix_level| array is used only when |use_matrix| is true.
316   // matrix_level[plane] specifies the level in the quantizer matrix that
317   // should be used for decoding |plane|. The quantizer matrix has 15 levels,
318   // from 0 to 14. The range of matrix_level[plane] is [0, 15]. If
319   // matrix_level[plane] is 15, the quantizer matrix is not used.
320   int8_t matrix_level[kMaxPlanes];
321 };
322 
323 // The corresponding segment feature constants in the AV1 spec are named
324 // SEG_LVL_xxx.
325 enum SegmentFeature : uint8_t {
326   kSegmentFeatureQuantizer,
327   kSegmentFeatureLoopFilterYVertical,
328   kSegmentFeatureLoopFilterYHorizontal,
329   kSegmentFeatureLoopFilterU,
330   kSegmentFeatureLoopFilterV,
331   kSegmentFeatureReferenceFrame,
332   kSegmentFeatureSkip,
333   kSegmentFeatureGlobalMv,
334   kSegmentFeatureMax
335 };
336 
337 struct Segmentation {
338   // 5.11.14.
339   // Returns true if the feature is enabled in the segment.
FeatureActiveSegmentation340   bool FeatureActive(int segment_id, SegmentFeature feature) const {
341     return enabled && segment_id < kMaxSegments &&
342            feature_enabled[segment_id][feature];
343   }
344 
345   // Returns true if the feature is signed.
FeatureSignedSegmentation346   static bool FeatureSigned(SegmentFeature feature) {
347     // Only the first five segment features are signed, so this comparison
348     // suffices.
349     return feature <= kSegmentFeatureLoopFilterV;
350   }
351 
352   bool enabled;
353   bool update_map;
354   bool update_data;
355   bool temporal_update;
356   // True if the segment id will be read before the skip syntax element. False
357   // if the skip syntax element will be read first.
358   bool segment_id_pre_skip;
359   // The highest numbered segment id that has some enabled feature. Used as
360   // the upper bound for decoding segment ids.
361   int8_t last_active_segment_id;
362 
363   bool feature_enabled[kMaxSegments][kSegmentFeatureMax];
364   int16_t feature_data[kMaxSegments][kSegmentFeatureMax];
365   bool lossless[kMaxSegments];
366   // Cached values of get_qindex(1, segmentId), to be consumed by
367   // Tile::ReadTransformType(). The values are in the range [0, 255].
368   uint8_t qindex[kMaxSegments];
369 };
370 
371 // Section 6.8.20.
372 // Note: In spec, film grain section uses YCbCr to denote variable names,
373 // such as num_cb_points, num_cr_points. To keep it consistent with other
374 // parts of code, we use YUV, i.e., num_u_points, num_v_points, etc.
375 struct FilmGrainParams {
376   bool apply_grain;
377   bool update_grain;
378   bool chroma_scaling_from_luma;
379   bool overlap_flag;
380   bool clip_to_restricted_range;
381 
382   uint8_t num_y_points;  // [0, 14].
383   uint8_t num_u_points;  // [0, 10].
384   uint8_t num_v_points;  // [0, 10].
385   // Must be [0, 255]. 10/12 bit /= 4 or 16. Must be in increasing order.
386   uint8_t point_y_value[14];
387   uint8_t point_y_scaling[14];
388   uint8_t point_u_value[10];
389   uint8_t point_u_scaling[10];
390   uint8_t point_v_value[10];
391   uint8_t point_v_scaling[10];
392 
393   uint8_t chroma_scaling;              // [8, 11].
394   uint8_t auto_regression_coeff_lag;   // [0, 3].
395   int8_t auto_regression_coeff_y[24];  // [-128, 127]
396   int8_t auto_regression_coeff_u[25];  // [-128, 127]
397   int8_t auto_regression_coeff_v[25];  // [-128, 127]
398   // Shift value: auto regression coeffs range
399   // 6: [-2, 2)
400   // 7: [-1, 1)
401   // 8: [-0.5, 0.5)
402   // 9: [-0.25, 0.25)
403   uint8_t auto_regression_shift;
404 
405   uint16_t grain_seed;
406   int reference_index;
407   int grain_scale_shift;
408   // These multipliers are encoded as nonnegative values by adding 128 first.
409   // The 128 is subtracted during parsing.
410   int8_t u_multiplier;       // [-128, 127]
411   int8_t u_luma_multiplier;  // [-128, 127]
412   // These offsets are encoded as nonnegative values by adding 256 first. The
413   // 256 is subtracted during parsing.
414   int16_t u_offset;          // [-256, 255]
415   int8_t v_multiplier;       // [-128, 127]
416   int8_t v_luma_multiplier;  // [-128, 127]
417   int16_t v_offset;          // [-256, 255]
418 };
419 
420 struct ObuFrameHeader {
421   uint16_t display_frame_id;
422   uint16_t current_frame_id;
423   int64_t frame_offset;
424   uint16_t expected_frame_id[kNumInterReferenceFrameTypes];
425   int32_t width;
426   int32_t height;
427   int32_t columns4x4;
428   int32_t rows4x4;
429   // The render size (render_width and render_height) is a hint to the
430   // application about the desired display size. It has no effect on the
431   // decoding process.
432   int32_t render_width;
433   int32_t render_height;
434   int32_t upscaled_width;
435   LoopRestoration loop_restoration;
436   uint32_t buffer_removal_time[kMaxOperatingPoints];
437   uint32_t frame_presentation_time;
438   // Note: global_motion[0] (for kReferenceFrameIntra) is not used.
439   std::array<GlobalMotion, kNumReferenceFrameTypes> global_motion;
440   TileInfo tile_info;
441   QuantizerParameters quantizer;
442   Segmentation segmentation;
443   bool show_existing_frame;
444   // frame_to_show is in the range [0, 7]. Only used if show_existing_frame is
445   // true.
446   int8_t frame_to_show;
447   FrameType frame_type;
448   bool show_frame;
449   bool showable_frame;
450   bool error_resilient_mode;
451   bool enable_cdf_update;
452   bool frame_size_override_flag;
453   // The order_hint syntax element in the uncompressed header. If
454   // show_existing_frame is false, the OrderHint variable in the spec is equal
455   // to this field, and so this field can be used in place of OrderHint when
456   // show_existing_frame is known to be false, such as during tile decoding.
457   uint8_t order_hint;
458   int8_t primary_reference_frame;
459   bool render_and_frame_size_different;
460   bool use_superres;
461   uint8_t superres_scale_denominator;
462   bool allow_screen_content_tools;
463   bool allow_intrabc;
464   bool frame_refs_short_signaling;
465   // A bitmask that specifies which reference frame slots will be updated with
466   // the current frame after it is decoded.
467   uint8_t refresh_frame_flags;
468   static_assert(sizeof(ObuFrameHeader::refresh_frame_flags) * 8 ==
469                     kNumReferenceFrameTypes,
470                 "");
471   bool found_reference;
472   int8_t force_integer_mv;
473   bool allow_high_precision_mv;
474   InterpolationFilter interpolation_filter;
475   bool is_motion_mode_switchable;
476   bool use_ref_frame_mvs;
477   bool enable_frame_end_update_cdf;
478   // True if all segments are losslessly encoded at the coded resolution.
479   bool coded_lossless;
480   // True if all segments are losslessly encoded at the upscaled resolution.
481   bool upscaled_lossless;
482   TxMode tx_mode;
483   // True means that the mode info for inter blocks contains the syntax
484   // element comp_mode that indicates whether to use single or compound
485   // prediction. False means that all inter blocks will use single prediction.
486   bool reference_mode_select;
487   // The frames to use for compound prediction when skip_mode is true.
488   ReferenceFrameType skip_mode_frame[2];
489   bool skip_mode_present;
490   bool reduced_tx_set;
491   bool allow_warped_motion;
492   Delta delta_q;
493   Delta delta_lf;
494   // A valid value of reference_frame_index[i] is in the range [0, 7]. -1
495   // indicates an invalid value.
496   //
497   // NOTE: When the frame is an intra frame (frame_type is kFrameKey or
498   // kFrameIntraOnly), reference_frame_index is not used and may be
499   // uninitialized.
500   int8_t reference_frame_index[kNumInterReferenceFrameTypes];
501   // The ref_order_hint[ i ] syntax element in the uncompressed header.
502   // Specifies the expected output order hint for each reference frame.
503   uint8_t reference_order_hint[kNumReferenceFrameTypes];
504   LoopFilter loop_filter;
505   Cdef cdef;
506   FilmGrainParams film_grain_params;
507 };
508 
509 // Structure used for traversing the partition tree.
510 struct PartitionTreeNode {
511   PartitionTreeNode() = default;
PartitionTreeNodePartitionTreeNode512   PartitionTreeNode(int row4x4, int column4x4, BlockSize block_size)
513       : row4x4(row4x4), column4x4(column4x4), block_size(block_size) {}
514   int row4x4 = -1;
515   int column4x4 = -1;
516   BlockSize block_size = kBlockInvalid;
517 };
518 
519 // Structure used for storing the transform parameters in a superblock.
520 struct TransformParameters {
521   TransformParameters() = default;
TransformParametersTransformParameters522   TransformParameters(TransformType type, int non_zero_coeff_count)
523       : type(type), non_zero_coeff_count(non_zero_coeff_count) {}
524   TransformType type;
525   int non_zero_coeff_count;
526 };
527 
528 }  // namespace libgav1
529 #endif  // LIBGAV1_SRC_UTILS_TYPES_H_
530