• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2019 The libgav1 Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LIBGAV1_SRC_UTILS_TYPES_H_
18 #define LIBGAV1_SRC_UTILS_TYPES_H_
19 
20 #include <array>
21 #include <cstdint>
22 #include <memory>
23 
24 #include "src/utils/array_2d.h"
25 #include "src/utils/constants.h"
26 #include "src/utils/memory.h"
27 
28 namespace libgav1 {
29 
30 struct MotionVector : public Allocable {
31   static constexpr int kRow = 0;
32   static constexpr int kColumn = 1;
33 
34   MotionVector() = default;
35   MotionVector(const MotionVector& mv) = default;
36 
37   MotionVector& operator=(const MotionVector& rhs) {
38     mv32 = rhs.mv32;
39     return *this;
40   }
41 
42   bool operator==(const MotionVector& rhs) const { return mv32 == rhs.mv32; }
43 
44   union {
45     // Motion vectors will always fit in int16_t and using int16_t here instead
46     // of int saves significant memory since some of the frame sized structures
47     // store motion vectors.
48     int16_t mv[2];
49     // A uint32_t view into the |mv| array. Useful for cases where both the
50     // motion vectors have to be copied or compared with a single 32 bit
51     // instruction.
52     uint32_t mv32;
53   };
54 };
55 
56 union CompoundMotionVector {
57   CompoundMotionVector() = default;
58   CompoundMotionVector(const CompoundMotionVector& mv) = default;
59 
60   CompoundMotionVector& operator=(const CompoundMotionVector& rhs) {
61     mv64 = rhs.mv64;
62     return *this;
63   }
64 
65   bool operator==(const CompoundMotionVector& rhs) const {
66     return mv64 == rhs.mv64;
67   }
68 
69   MotionVector mv[2];
70   // A uint64_t view into the |mv| array. Useful for cases where all the motion
71   // vectors have to be copied or compared with a single 64 bit instruction.
72   uint64_t mv64;
73 };
74 
75 // Stores the motion information used for motion field estimation.
76 struct TemporalMotionField : public Allocable {
77   Array2D<MotionVector> mv;
78   Array2D<int8_t> reference_offset;
79 };
80 
81 // MvContexts contains the contexts used to decode portions of an inter block
82 // mode info to set the y_mode field in BlockParameters.
83 //
84 // The contexts in the struct correspond to the ZeroMvContext, RefMvContext,
85 // and NewMvContext variables in the spec.
86 struct MvContexts {
87   int zero_mv;
88   int reference_mv;
89   int new_mv;
90 };
91 
92 struct PaletteModeInfo {
93   uint8_t size[kNumPlaneTypes];
94   uint16_t color[kMaxPlanes][kMaxPaletteSize];
95 };
96 
97 // Stores the parameters used by the prediction process. The members of the
98 // struct are filled in when parsing the bitstream and used when the prediction
99 // is computed. The information in this struct is associated with a single
100 // block.
101 // While both BlockParameters and PredictionParameters store information
102 // pertaining to a Block, the only difference is that BlockParameters outlives
103 // the block itself (for example, some of the variables in BlockParameters are
104 // used to compute the context for reading elements in the subsequent blocks).
105 struct PredictionParameters : public Allocable {
106   // Restore the index in the unsorted mv stack from the least 3 bits of sorted
107   // |weight_index_stack|.
reference_mvPredictionParameters108   const MotionVector& reference_mv(int stack_index) const {
109     return ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)];
110   }
reference_mvPredictionParameters111   const MotionVector& reference_mv(int stack_index, int mv_index) const {
112     return compound_ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)]
113         .mv[mv_index];
114   }
115 
IncreaseWeightPredictionParameters116   void IncreaseWeight(ptrdiff_t index, int weight) {
117     weight_index_stack[index] += weight << 3;
118   }
119 
SetWeightIndexStackEntryPredictionParameters120   void SetWeightIndexStackEntry(int index, int weight) {
121     weight_index_stack[index] = (weight << 3) + 7 - index;
122   }
123 
124   bool use_filter_intra;
125   FilterIntraPredictor filter_intra_mode;
126   int angle_delta[kNumPlaneTypes];
127   int8_t cfl_alpha_u;
128   int8_t cfl_alpha_v;
129   int max_luma_width;
130   int max_luma_height;
131   Array2D<uint8_t> color_index_map[kNumPlaneTypes];
132   bool use_intra_block_copy;
133   InterIntraMode inter_intra_mode;
134   bool is_wedge_inter_intra;
135   int wedge_index;
136   int wedge_sign;
137   bool mask_is_inverse;
138   MotionMode motion_mode;
139   CompoundPredictionType compound_prediction_type;
140   union {
141     // |ref_mv_stack| and |compound_ref_mv_stack| are not sorted after
142     // construction. reference_mv() must be called to get the correct element.
143     MotionVector ref_mv_stack[kMaxRefMvStackSize];
144     CompoundMotionVector compound_ref_mv_stack[kMaxRefMvStackSize];
145   };
146   // The least 3 bits of |weight_index_stack| store the index information, and
147   // the other bits store the weight. The index information is actually 7 -
148   // index to make the descending order sort stable (preserves the original
149   // order for elements with the same weight). Sorting an int16_t array is much
150   // faster than sorting a struct array with weight and index stored separately.
151   int16_t weight_index_stack[kMaxRefMvStackSize];
152   // In the spec, the weights of all the nearest mvs are incremented by a bonus
153   // weight which is larger than any natural weight, and later the weights of
154   // the mvs are compared with this bonus weight to determine their contexts. We
155   // replace this procedure by introducing |nearest_mv_count|, which records the
156   // count of the nearest mvs. Since all the nearest mvs are in the beginning of
157   // the mv stack, the index of a mv in the mv stack can be compared with
158   // |nearest_mv_count| to get that mv's context.
159   int nearest_mv_count;
160   int ref_mv_count;
161   int ref_mv_index;
162   MotionVector global_mv[2];
163   int num_warp_samples;
164   int warp_estimate_candidates[kMaxLeastSquaresSamples][4];
165 };
166 
167 // A lot of BlockParameters objects are created, so the smallest type is used
168 // for each field. The ranges of some fields are documented to justify why
169 // their types are large enough.
170 struct BlockParameters : public Allocable {
171   BlockSize size;
172   bool skip;
173   // True means that this block will use some default settings (that
174   // correspond to compound prediction) and so most of the mode info is
175   // skipped. False means that the mode info is not skipped.
176   bool skip_mode;
177   bool is_inter;
178   bool is_explicit_compound_type;  // comp_group_idx in the spec.
179   bool is_compound_type_average;   // compound_idx in the spec.
180   bool is_global_mv_block;
181   bool use_predicted_segment_id;  // only valid with temporal update enabled.
182   int8_t segment_id;              // segment_id is in the range [0, 7].
183   PredictionMode y_mode;
184   PredictionMode uv_mode;
185   TransformSize transform_size;
186   TransformSize uv_transform_size;
187   InterpolationFilter interpolation_filter[2];
188   ReferenceFrameType reference_frame[2];
189   // The index of this array is as follows:
190   //  0 - Y plane vertical filtering.
191   //  1 - Y plane horizontal filtering.
192   //  2 - U plane (both directions).
193   //  3 - V plane (both directions).
194   uint8_t deblock_filter_level[kFrameLfCount];
195   CompoundMotionVector mv;
196   PaletteModeInfo palette_mode_info;
197   // When |Tile::split_parse_and_decode_| is true, each block gets its own
198   // instance of |prediction_parameters|. When it is false, all the blocks point
199   // to |Tile::prediction_parameters_|. This field is valid only as long as the
200   // block is *being* decoded. The lifetime and usage of this field can be
201   // better understood by following its flow in tile.cc.
202   std::unique_ptr<PredictionParameters> prediction_parameters;
203 };
204 
205 // A five dimensional array used to store the wedge masks. The dimensions are:
206 //   - block_size_index (returned by GetWedgeBlockSizeIndex() in prediction.cc).
207 //   - flip_sign (0 or 1).
208 //   - wedge_index (0 to 15).
209 //   - each of those three dimensions is a 2d array of block_width by
210 //     block_height.
211 using WedgeMaskArray =
212     std::array<std::array<std::array<Array2D<uint8_t>, 16>, 2>, 9>;
213 
214 enum GlobalMotionTransformationType : uint8_t {
215   kGlobalMotionTransformationTypeIdentity,
216   kGlobalMotionTransformationTypeTranslation,
217   kGlobalMotionTransformationTypeRotZoom,
218   kGlobalMotionTransformationTypeAffine,
219   kNumGlobalMotionTransformationTypes
220 };
221 
222 // Global motion and warped motion parameters. See the paper for more info:
223 // S. Parker, Y. Chen, D. Barker, P. de Rivaz, D. Mukherjee, "Global and locally
224 // adaptive warped motion compensation in video compression", Proc. IEEE
225 // International Conference on Image Processing (ICIP), pp. 275-279, Sep. 2017.
226 struct GlobalMotion {
227   GlobalMotionTransformationType type;
228   int32_t params[6];
229 
230   // Represent two shearing operations. Computed from |params| by SetupShear().
231   //
232   // The least significant six (= kWarpParamRoundingBits) bits are all zeros.
233   // (This means alpha, beta, gamma, and delta could be represented by a 10-bit
234   // signed integer.) The minimum value is INT16_MIN (= -32768) and the maximum
235   // value is 32704 = 0x7fc0, the largest int16_t value whose least significant
236   // six bits are all zeros.
237   //
238   // Valid warp parameters (as validated by SetupShear()) have smaller ranges.
239   // Their absolute values are less than 2^14 (= 16384). (This follows from
240   // the warpValid check at the end of Section 7.11.3.6.)
241   //
242   // NOTE: Section 7.11.3.6 of the spec allows a maximum value of 32768, which
243   // is outside the range of int16_t. When cast to int16_t, 32768 becomes
244   // -32768. This potential int16_t overflow does not matter because either
245   // 32768 or -32768 causes SetupShear() to return false,
246   int16_t alpha;
247   int16_t beta;
248   int16_t gamma;
249   int16_t delta;
250 };
251 
252 // Loop filter parameters:
253 //
254 // If level[0] and level[1] are both equal to 0, the loop filter process is
255 // not invoked.
256 //
257 // |sharpness| and |delta_enabled| are only used by the loop filter process.
258 //
259 // The |ref_deltas| and |mode_deltas| arrays are used not only by the loop
260 // filter process but also by the reference frame update and loading
261 // processes. The loop filter process uses |ref_deltas| and |mode_deltas| only
262 // when |delta_enabled| is true.
263 struct LoopFilter {
264   // Contains loop filter strength values in the range of [0, 63].
265   std::array<int8_t, kFrameLfCount> level;
266   // Indicates the sharpness level in the range of [0, 7].
267   int8_t sharpness;
268   // Whether the filter level depends on the mode and reference frame used to
269   // predict a block.
270   bool delta_enabled;
271   // Whether additional syntax elements were read that specify which mode and
272   // reference frame deltas are to be updated. loop_filter_delta_update field in
273   // Section 5.9.11 of the spec.
274   bool delta_update;
275   // Contains the adjustment needed for the filter level based on the chosen
276   // reference frame, in the range of [-64, 63].
277   std::array<int8_t, kNumReferenceFrameTypes> ref_deltas;
278   // Contains the adjustment needed for the filter level based on the chosen
279   // mode, in the range of [-64, 63].
280   std::array<int8_t, kLoopFilterMaxModeDeltas> mode_deltas;
281 };
282 
283 struct Delta {
284   bool present;
285   uint8_t scale;
286   bool multi;
287 };
288 
289 struct Cdef {
290   uint8_t damping;  // damping value from the spec + (bitdepth - 8).
291   uint8_t bits;
292   // All the strength values are the values from the spec and left shifted by
293   // (bitdepth - 8).
294   uint8_t y_primary_strength[kMaxCdefStrengths];
295   uint8_t y_secondary_strength[kMaxCdefStrengths];
296   uint8_t uv_primary_strength[kMaxCdefStrengths];
297   uint8_t uv_secondary_strength[kMaxCdefStrengths];
298 };
299 
300 struct TileInfo {
301   bool uniform_spacing;
302   int sb_rows;
303   int sb_columns;
304   int tile_count;
305   int tile_columns_log2;
306   int tile_columns;
307   int tile_column_start[kMaxTileColumns + 1];
308   // This field is not used by libgav1, but is populated for use by some
309   // hardware decoders. So it must not be removed.
310   int tile_column_width_in_superblocks[kMaxTileColumns + 1];
311   int tile_rows_log2;
312   int tile_rows;
313   int tile_row_start[kMaxTileRows + 1];
314   // This field is not used by libgav1, but is populated for use by some
315   // hardware decoders. So it must not be removed.
316   int tile_row_height_in_superblocks[kMaxTileRows + 1];
317   int16_t context_update_id;
318   uint8_t tile_size_bytes;
319 };
320 
321 struct LoopRestoration {
322   LoopRestorationType type[kMaxPlanes];
323   int unit_size[kMaxPlanes];
324 };
325 
326 // Stores the quantization parameters of Section 5.9.12.
327 struct QuantizerParameters {
328   // base_index is in the range [0, 255].
329   uint8_t base_index;
330   int8_t delta_dc[kMaxPlanes];
331   // delta_ac[kPlaneY] is always 0.
332   int8_t delta_ac[kMaxPlanes];
333   bool use_matrix;
334   // The |matrix_level| array is used only when |use_matrix| is true.
335   // matrix_level[plane] specifies the level in the quantizer matrix that
336   // should be used for decoding |plane|. The quantizer matrix has 15 levels,
337   // from 0 to 14. The range of matrix_level[plane] is [0, 15]. If
338   // matrix_level[plane] is 15, the quantizer matrix is not used.
339   int8_t matrix_level[kMaxPlanes];
340 };
341 
342 // The corresponding segment feature constants in the AV1 spec are named
343 // SEG_LVL_xxx.
344 enum SegmentFeature : uint8_t {
345   kSegmentFeatureQuantizer,
346   kSegmentFeatureLoopFilterYVertical,
347   kSegmentFeatureLoopFilterYHorizontal,
348   kSegmentFeatureLoopFilterU,
349   kSegmentFeatureLoopFilterV,
350   kSegmentFeatureReferenceFrame,
351   kSegmentFeatureSkip,
352   kSegmentFeatureGlobalMv,
353   kSegmentFeatureMax
354 };
355 
356 struct Segmentation {
357   // 5.11.14.
358   // Returns true if the feature is enabled in the segment.
FeatureActiveSegmentation359   bool FeatureActive(int segment_id, SegmentFeature feature) const {
360     return enabled && segment_id < kMaxSegments &&
361            feature_enabled[segment_id][feature];
362   }
363 
364   // Returns true if the feature is signed.
FeatureSignedSegmentation365   static bool FeatureSigned(SegmentFeature feature) {
366     // Only the first five segment features are signed, so this comparison
367     // suffices.
368     return feature <= kSegmentFeatureLoopFilterV;
369   }
370 
371   bool enabled;
372   bool update_map;
373   bool update_data;
374   bool temporal_update;
375   // True if the segment id will be read before the skip syntax element. False
376   // if the skip syntax element will be read first.
377   bool segment_id_pre_skip;
378   // The highest numbered segment id that has some enabled feature. Used as
379   // the upper bound for decoding segment ids.
380   int8_t last_active_segment_id;
381 
382   bool feature_enabled[kMaxSegments][kSegmentFeatureMax];
383   int16_t feature_data[kMaxSegments][kSegmentFeatureMax];
384   bool lossless[kMaxSegments];
385   // Cached values of get_qindex(1, segmentId), to be consumed by
386   // Tile::ReadTransformType(). The values are in the range [0, 255].
387   uint8_t qindex[kMaxSegments];
388 };
389 
390 // Section 6.8.20.
391 // Note: In spec, film grain section uses YCbCr to denote variable names,
392 // such as num_cb_points, num_cr_points. To keep it consistent with other
393 // parts of code, we use YUV, i.e., num_u_points, num_v_points, etc.
394 struct FilmGrainParams {
395   bool apply_grain;
396   bool update_grain;
397   bool chroma_scaling_from_luma;
398   bool overlap_flag;
399   bool clip_to_restricted_range;
400 
401   uint8_t num_y_points;  // [0, 14].
402   uint8_t num_u_points;  // [0, 10].
403   uint8_t num_v_points;  // [0, 10].
404   // Must be [0, 255]. 10/12 bit /= 4 or 16. Must be in increasing order.
405   uint8_t point_y_value[14];
406   uint8_t point_y_scaling[14];
407   uint8_t point_u_value[10];
408   uint8_t point_u_scaling[10];
409   uint8_t point_v_value[10];
410   uint8_t point_v_scaling[10];
411 
412   uint8_t chroma_scaling;              // [8, 11].
413   uint8_t auto_regression_coeff_lag;   // [0, 3].
414   int8_t auto_regression_coeff_y[24];  // [-128, 127]
415   int8_t auto_regression_coeff_u[25];  // [-128, 127]
416   int8_t auto_regression_coeff_v[25];  // [-128, 127]
417   // Shift value: auto regression coeffs range
418   // 6: [-2, 2)
419   // 7: [-1, 1)
420   // 8: [-0.5, 0.5)
421   // 9: [-0.25, 0.25)
422   uint8_t auto_regression_shift;
423 
424   uint16_t grain_seed;
425   int reference_index;
426   int grain_scale_shift;
427   // These multipliers are encoded as nonnegative values by adding 128 first.
428   // The 128 is subtracted during parsing.
429   int8_t u_multiplier;       // [-128, 127]
430   int8_t u_luma_multiplier;  // [-128, 127]
431   // These offsets are encoded as nonnegative values by adding 256 first. The
432   // 256 is subtracted during parsing.
433   int16_t u_offset;          // [-256, 255]
434   int8_t v_multiplier;       // [-128, 127]
435   int8_t v_luma_multiplier;  // [-128, 127]
436   int16_t v_offset;          // [-256, 255]
437 };
438 
439 struct ObuFrameHeader {
440   uint16_t display_frame_id;
441   uint16_t current_frame_id;
442   int64_t frame_offset;
443   uint16_t expected_frame_id[kNumInterReferenceFrameTypes];
444   int32_t width;
445   int32_t height;
446   int32_t columns4x4;
447   int32_t rows4x4;
448   // The render size (render_width and render_height) is a hint to the
449   // application about the desired display size. It has no effect on the
450   // decoding process.
451   int32_t render_width;
452   int32_t render_height;
453   int32_t upscaled_width;
454   LoopRestoration loop_restoration;
455   uint32_t buffer_removal_time[kMaxOperatingPoints];
456   uint32_t frame_presentation_time;
457   // Note: global_motion[0] (for kReferenceFrameIntra) is not used.
458   std::array<GlobalMotion, kNumReferenceFrameTypes> global_motion;
459   TileInfo tile_info;
460   QuantizerParameters quantizer;
461   Segmentation segmentation;
462   bool show_existing_frame;
463   // frame_to_show is in the range [0, 7]. Only used if show_existing_frame is
464   // true.
465   int8_t frame_to_show;
466   FrameType frame_type;
467   bool show_frame;
468   bool showable_frame;
469   bool error_resilient_mode;
470   bool enable_cdf_update;
471   bool frame_size_override_flag;
472   // The order_hint syntax element in the uncompressed header. If
473   // show_existing_frame is false, the OrderHint variable in the spec is equal
474   // to this field, and so this field can be used in place of OrderHint when
475   // show_existing_frame is known to be false, such as during tile decoding.
476   uint8_t order_hint;
477   int8_t primary_reference_frame;
478   bool render_and_frame_size_different;
479   bool use_superres;
480   uint8_t superres_scale_denominator;
481   bool allow_screen_content_tools;
482   bool allow_intrabc;
483   bool frame_refs_short_signaling;
484   // A bitmask that specifies which reference frame slots will be updated with
485   // the current frame after it is decoded.
486   uint8_t refresh_frame_flags;
487   static_assert(sizeof(ObuFrameHeader::refresh_frame_flags) * 8 ==
488                     kNumReferenceFrameTypes,
489                 "");
490   bool found_reference;
491   int8_t force_integer_mv;
492   bool allow_high_precision_mv;
493   InterpolationFilter interpolation_filter;
494   bool is_motion_mode_switchable;
495   bool use_ref_frame_mvs;
496   bool enable_frame_end_update_cdf;
497   // True if all segments are losslessly encoded at the coded resolution.
498   bool coded_lossless;
499   // True if all segments are losslessly encoded at the upscaled resolution.
500   bool upscaled_lossless;
501   TxMode tx_mode;
502   // True means that the mode info for inter blocks contains the syntax
503   // element comp_mode that indicates whether to use single or compound
504   // prediction. False means that all inter blocks will use single prediction.
505   bool reference_mode_select;
506   // The frames to use for compound prediction when skip_mode is true.
507   ReferenceFrameType skip_mode_frame[2];
508   bool skip_mode_present;
509   bool reduced_tx_set;
510   bool allow_warped_motion;
511   Delta delta_q;
512   Delta delta_lf;
513   // A valid value of reference_frame_index[i] is in the range [0, 7]. -1
514   // indicates an invalid value.
515   int8_t reference_frame_index[kNumInterReferenceFrameTypes];
516   // The ref_order_hint[ i ] syntax element in the uncompressed header.
517   // Specifies the expected output order hint for each reference frame.
518   uint8_t reference_order_hint[kNumReferenceFrameTypes];
519   LoopFilter loop_filter;
520   Cdef cdef;
521   FilmGrainParams film_grain_params;
522 };
523 
524 }  // namespace libgav1
525 #endif  // LIBGAV1_SRC_UTILS_TYPES_H_
526