1 /* 2 * Copyright (c) 2022, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #ifndef AOM_AV1_QMODE_RC_RATECTRL_QMODE_INTERFACE_H_ 13 #define AOM_AV1_QMODE_RC_RATECTRL_QMODE_INTERFACE_H_ 14 15 #include <array> 16 #include <string> 17 #include <vector> 18 19 #include "aom/aom_codec.h" 20 #include "av1/encoder/firstpass.h" 21 22 namespace aom { 23 24 constexpr int kBlockRefCount = 2; 25 26 struct MotionVector { 27 int row; // subpel row 28 int col; // subpel col 29 // TODO(b/241589513): Move this to TplFrameStats; it's wasteful to code it 30 // separately for each block. 31 int subpel_bits; // number of fractional bits used by row/col 32 }; 33 34 enum class TplPassCount { 35 kOneTplPass = 1, 36 kTwoTplPasses = 2, 37 }; 38 39 struct RateControlParam { 40 // Range of allowed GOP sizes (number of displayed frames). 41 int max_gop_show_frame_count; 42 int min_gop_show_frame_count; 43 // Number of reference frame buffers, i.e., size of the DPB. 44 int ref_frame_table_size; 45 // Maximum number of references a single frame may use. 46 int max_ref_frames; 47 48 int base_q_index; 49 50 // If greater than 1, enables per-superblock q_index, and limits the number of 51 // unique q_index values which may be used in a frame (each of which will have 52 // its own unique rdmult value). 53 int max_distinct_q_indices_per_frame; 54 55 // If per-superblock q_index is enabled and this is greater than 1, enables 56 // additional per-superblock scaling of lambda, and limits the number of 57 // unique lambda scale values which may be used in a frame. 58 int max_distinct_lambda_scales_per_frame; 59 60 int frame_width; 61 int frame_height; 62 63 // Total number of TPL passes. 64 TplPassCount tpl_pass_count = TplPassCount::kOneTplPass; 65 // Current TPL pass number, 0 or 1 (for GetTplPassGopEncodeInfo). 66 int tpl_pass_index = 0; 67 }; 68 69 struct TplBlockStats { 70 int16_t height; // Pixel height. 71 int16_t width; // Pixel width. 72 int16_t row; // Pixel row of the top left corner. 73 int16_t col; // Pixel col of the top left corner. 74 int64_t intra_cost; // Rd cost of the best intra mode. 75 int64_t inter_cost; // Rd cost of the best inter mode. 76 77 // Valid only if TplFrameStats::rate_dist_present is true: 78 int64_t recrf_rate; // Bits when using recon as reference. 79 int64_t recrf_dist; // Distortion when using recon as reference. 80 int64_t intra_pred_err; // Prediction residual of the intra mode. 81 int64_t inter_pred_err; // Prediction residual of the inter mode. 82 83 std::array<MotionVector, kBlockRefCount> mv; 84 std::array<int, kBlockRefCount> ref_frame_index; 85 }; 86 87 // gop frame type used for facilitate setting up GopFrame 88 // TODO(angiebird): Define names for forward key frame and 89 // key frame with overlay 90 enum class GopFrameType { 91 kRegularKey, // High quality key frame without overlay 92 kRegularLeaf, // Regular leaf frame 93 kRegularGolden, // Regular golden frame 94 kRegularArf, // High quality arf with strong filtering followed by an overlay 95 // later 96 kOverlay, // Overlay frame 97 kIntermediateOverlay, // Intermediate overlay frame 98 kIntermediateArf, // Good quality arf with weak or no filtering followed by a 99 // show_existing later 100 }; 101 102 enum class EncodeRefMode { 103 kRegular, 104 kOverlay, 105 kShowExisting, 106 }; 107 108 enum class ReferenceName { 109 kNoneFrame = -1, 110 kIntraFrame = 0, 111 kLastFrame = 1, 112 kLast2Frame = 2, 113 kLast3Frame = 3, 114 kGoldenFrame = 4, 115 kBwdrefFrame = 5, 116 kAltref2Frame = 6, 117 kAltrefFrame = 7, 118 }; 119 120 struct Status { 121 aom_codec_err_t code; 122 std::string message; // Empty if code == AOM_CODEC_OK. okStatus123 bool ok() const { return code == AOM_CODEC_OK; } 124 }; 125 126 // A very simple imitation of absl::StatusOr, this is conceptually a union of a 127 // Status struct and an object of type T. It models an object that is either a 128 // usable object, or an error explaining why such an object is not present. A 129 // StatusOr<T> may never hold a status with a code of AOM_CODEC_OK. 130 template <typename T> 131 class StatusOr { 132 public: StatusOr(const T & value)133 StatusOr(const T &value) : value_(value) {} StatusOr(T && value)134 StatusOr(T &&value) : value_(std::move(value)) {} StatusOr(Status status)135 StatusOr(Status status) : status_(std::move(status)) { 136 assert(status_.code != AOM_CODEC_OK); 137 } 138 status()139 const Status &status() const { return status_; } ok()140 bool ok() const { return status().ok(); } 141 142 // operator* returns the value; it should only be called after checking that 143 // ok() returns true. 144 const T &operator*() const & { return value_; } 145 T &operator*() & { return value_; } 146 const T &&operator*() const && { return value_; } 147 T &&operator*() && { return std::move(value_); } 148 149 // sor->field is equivalent to (*sor).field. 150 const T *operator->() const & { return &value_; } 151 T *operator->() & { return &value_; } 152 153 // value() is equivalent to operator*, but asserts that ok() is true. value()154 const T &value() const & { 155 assert(ok()); 156 return value_; 157 } value()158 T &value() & { 159 assert(ok()); 160 return value_; 161 } value()162 const T &&value() const && { 163 assert(ok()); 164 return value_; 165 } value()166 T &&value() && { 167 assert(ok()); 168 return std::move(value_); 169 } 170 171 private: 172 T value_; // This could be std::optional<T> if it were available. 173 Status status_ = { AOM_CODEC_OK, "" }; 174 }; 175 176 struct ReferenceFrame { 177 int index; // Index of reference slot containing the reference frame 178 ReferenceName name; 179 }; 180 181 struct GopFrame { 182 // basic info 183 bool is_valid; 184 int order_idx; // Index in display order in a GOP 185 int coding_idx; // Index in coding order in a GOP 186 int display_idx; // The number of displayed frames preceding this frame in 187 // a GOP 188 189 int global_order_idx; // Index in display order in the whole video chunk 190 int global_coding_idx; // Index in coding order in the whole video chunk 191 192 bool is_key_frame; // If this is key frame, reset reference buffers are 193 // required 194 bool is_arf_frame; // Is this a forward frame, a frame with order_idx 195 // higher than the current display order 196 bool is_show_frame; // Is this frame a show frame after coding 197 bool is_golden_frame; // Is this a high quality frame 198 199 GopFrameType update_type; // This is a redundant field. It is only used for 200 // easy conversion in SW integration. 201 202 // reference frame info 203 EncodeRefMode encode_ref_mode; 204 int colocated_ref_idx; // colocated_ref_idx == -1 when encode_ref_mode == 205 // EncodeRefMode::kRegular 206 int update_ref_idx; // The reference index that this frame should be 207 // updated to. update_ref_idx == -1 when this frame 208 // will not serve as a reference frame 209 std::vector<ReferenceFrame> 210 ref_frame_list; // A list of available reference frames in priority order 211 // for the current to-be-coded frame. The list size 212 // should be less or equal to ref_frame_table_size. The 213 // reference frames with smaller indices are more likely 214 // to be a good reference frame. Therefore, they should 215 // be prioritized when the reference frame count is 216 // limited. For example, if we plan to use 3 reference 217 // frames, we should choose ref_frame_list[0], 218 // ref_frame_list[1] and ref_frame_list[2]. 219 int layer_depth; // Layer depth in the GOP structure 220 ReferenceFrame primary_ref_frame; // We will use the primary reference frame 221 // to update current frame's initial 222 // probability model 223 }; 224 225 struct GopStruct { 226 int show_frame_count; 227 int global_coding_idx_offset; 228 int global_order_idx_offset; 229 // TODO(jingning): This can be removed once the framework is up running. 230 int display_tracker; // Track the number of frames displayed proceeding a 231 // current coding frame. 232 std::vector<GopFrame> gop_frame_list; 233 }; 234 235 using GopStructList = std::vector<GopStruct>; 236 237 struct SuperblockEncodeParameters { 238 int q_index; 239 int rdmult; 240 }; 241 242 struct FrameEncodeParameters { 243 // Base q_index for the frame. 244 int q_index; 245 246 // Frame level Lagrangian multiplier. 247 int rdmult; 248 249 // If max_distinct_q_indices_per_frame <= 1, this will be empty. 250 // Otherwise: 251 // - There must be one entry per 64x64 superblock, in row-major order 252 // - There may be no more than max_distinct_q_indices_per_frame unique q_index 253 // values 254 // - All entries with the same q_index must have the same rdmult 255 // (If it's desired to use different rdmult values with the same q_index, this 256 // must be done with superblock_lambda_scales.) 257 std::vector<SuperblockEncodeParameters> superblock_encode_params; 258 259 // If max_distinct_q_indices_per_frame <= 1 or 260 // max_distinct_lambda_scales_per_frame <= 1, this will be empty. Otherwise, 261 // it will have one entry per 64x64 superblock, in row-major order, with no 262 // more than max_distinct_lambda_scales_per_frame unique values. Each entry 263 // should be multiplied by the rdmult in the corresponding superblock's entry 264 // in superblock_encode_params. 265 std::vector<float> superblock_lambda_scales; 266 }; 267 268 struct FirstpassInfo { 269 int num_mbs_16x16; // Count of 16x16 unit blocks in each frame. 270 // FIRSTPASS_STATS's unit block size is 16x16 271 std::vector<FIRSTPASS_STATS> stats_list; 272 }; 273 274 // In general, the number of elements in RefFrameTable must always equal 275 // ref_frame_table_size (as specified in RateControlParam), but see 276 // GetGopEncodeInfo for the one exception. 277 using RefFrameTable = std::vector<GopFrame>; 278 279 struct GopEncodeInfo { 280 std::vector<FrameEncodeParameters> param_list; 281 RefFrameTable final_snapshot; // RefFrameTable snapshot after coding this GOP 282 }; 283 284 struct TplFrameStats { 285 int min_block_size; 286 int frame_width; 287 int frame_height; 288 bool rate_dist_present; // True if recrf_rate and recrf_dist are populated. 289 std::vector<TplBlockStats> block_stats_list; 290 // Optional stats computed with different settings, should be empty unless 291 // tpl_pass_count == kTwoTplPasses. 292 std::vector<TplBlockStats> alternate_block_stats_list; 293 }; 294 295 struct TplGopStats { 296 std::vector<TplFrameStats> frame_stats_list; 297 }; 298 299 // Structure and TPL stats for a single GOP, to be used for lookahead. 300 struct LookaheadStats { 301 const GopStruct *gop_struct; // Not owned, may not be nullptr. 302 const TplGopStats *tpl_gop_stats; // Not owned, may not be nullptr. 303 }; 304 305 class AV1RateControlQModeInterface { 306 public: 307 AV1RateControlQModeInterface(); 308 virtual ~AV1RateControlQModeInterface(); 309 310 virtual Status SetRcParam(const RateControlParam &rc_param) = 0; 311 virtual StatusOr<GopStructList> DetermineGopInfo( 312 const FirstpassInfo &firstpass_info) = 0; 313 314 // Accepts GOP structure and TPL info from the encoder and returns q index and 315 // rdmult for each frame. This should be called with consecutive GOPs as 316 // returned by DetermineGopInfo. 317 // 318 // GOP structure and TPL info from zero or more subsequent GOPs may optionally 319 // be passed in lookahead_stats. 320 // 321 // For the first GOP, a default-constructed RefFrameTable may be passed in as 322 // ref_frame_table_snapshot_init; for subsequent GOPs, it should be the 323 // final_snapshot returned on the previous call. 324 // 325 // TODO(b/260859962): Remove these once all callers and overrides are gone. GetGopEncodeInfo(const GopStruct & gop_struct AOM_UNUSED,const TplGopStats & tpl_gop_stats AOM_UNUSED,const std::vector<LookaheadStats> & lookahead_stats AOM_UNUSED,const RefFrameTable & ref_frame_table_snapshot AOM_UNUSED)326 virtual StatusOr<GopEncodeInfo> GetGopEncodeInfo( 327 const GopStruct &gop_struct AOM_UNUSED, 328 const TplGopStats &tpl_gop_stats AOM_UNUSED, 329 const std::vector<LookaheadStats> &lookahead_stats AOM_UNUSED, 330 const RefFrameTable &ref_frame_table_snapshot AOM_UNUSED) { 331 return Status{ AOM_CODEC_UNSUP_FEATURE, "Deprecated" }; 332 } GetTplPassGopEncodeInfo(const GopStruct & gop_struct AOM_UNUSED)333 virtual StatusOr<GopEncodeInfo> GetTplPassGopEncodeInfo( 334 const GopStruct &gop_struct AOM_UNUSED) { 335 return Status{ AOM_CODEC_UNSUP_FEATURE, "Deprecated" }; 336 } 337 338 // Extensions to the API to pass in the first pass info. There should be stats 339 // for all frames starting from the first frame of the GOP and continuing to 340 // the end of the sequence. 341 // TODO(b/260859962): Make pure virtual once all derived classes implement it. GetGopEncodeInfo(const GopStruct & gop_struct AOM_UNUSED,const TplGopStats & tpl_gop_stats AOM_UNUSED,const std::vector<LookaheadStats> & lookahead_stats AOM_UNUSED,const FirstpassInfo & firstpass_info AOM_UNUSED,const RefFrameTable & ref_frame_table_snapshot AOM_UNUSED)342 virtual StatusOr<GopEncodeInfo> GetGopEncodeInfo( 343 const GopStruct &gop_struct AOM_UNUSED, 344 const TplGopStats &tpl_gop_stats AOM_UNUSED, 345 const std::vector<LookaheadStats> &lookahead_stats AOM_UNUSED, 346 const FirstpassInfo &firstpass_info AOM_UNUSED, 347 const RefFrameTable &ref_frame_table_snapshot AOM_UNUSED) { 348 return Status{ AOM_CODEC_UNSUP_FEATURE, "Not yet implemented" }; 349 } GetTplPassGopEncodeInfo(const GopStruct & gop_struct AOM_UNUSED,const FirstpassInfo & firstpass_info AOM_UNUSED)350 virtual StatusOr<GopEncodeInfo> GetTplPassGopEncodeInfo( 351 const GopStruct &gop_struct AOM_UNUSED, 352 const FirstpassInfo &firstpass_info AOM_UNUSED) { 353 return Status{ AOM_CODEC_UNSUP_FEATURE, "Not yet implemented" }; 354 } 355 }; 356 } // namespace aom 357 358 #endif // AOM_AV1_QMODE_RC_RATECTRL_QMODE_INTERFACE_H_ 359