1 /* 2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 3-Clause Clear License 5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear 6 * License was not distributed with this source code in the LICENSE file, you 7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the 8 * Alliance for Open Media Patent License 1.0 was not distributed with this 9 * source code in the PATENTS file, you can obtain it at 10 * www.aomedia.org/license/patent. 11 */ 12 #ifndef OBU_AUDIO_ELEMENT_H_ 13 #define OBU_AUDIO_ELEMENT_H_ 14 15 #include <cstdint> 16 #include <limits> 17 #include <optional> 18 #include <variant> 19 #include <vector> 20 21 #include "absl/log/check.h" 22 #include "absl/status/status.h" 23 #include "absl/status/statusor.h" 24 #include "iamf/common/read_bit_buffer.h" 25 #include "iamf/common/write_bit_buffer.h" 26 #include "iamf/obu/demixing_param_definition.h" 27 #include "iamf/obu/obu_base.h" 28 #include "iamf/obu/obu_header.h" 29 #include "iamf/obu/param_definitions.h" 30 #include "iamf/obu/types.h" 31 32 namespace iamf_tools { 33 34 /*!\brief One of the parameters associated with an Audio Element OBU. */ 35 struct AudioElementParam { 36 friend bool operator==(const AudioElementParam& lhs, 37 const AudioElementParam& rhs) = default; 38 39 /*!\brief Reads from a buffer and validates the resulting output. 40 * 41 * \param rb Buffer to read from. 42 * \return `absl::OkStatus()` if successful. A specific status on failure. 43 */ 44 absl::Status ReadAndValidate(uint32_t audio_element_id, ReadBitBuffer& rb); 45 46 // One of the parameter definition subclasses allowed in an Audio Element. 47 std::variant<DemixingParamDefinition, ReconGainParamDefinition, 48 ExtendedParamDefinition> 49 param_definition; 50 51 /*!\brief Gets the actual type of parameter definition. 52 * 53 * \return Type of the stored parameter definition. 54 */ GetTypeAudioElementParam55 ParamDefinition::ParameterDefinitionType GetType() const { 56 return std::visit( 57 [](const auto& concrete_param_definition) { 58 const auto param_definition_type = 59 concrete_param_definition.GetType(); 60 61 // All alternatives have well-defined types. 62 CHECK(param_definition_type.has_value()); 63 return *param_definition_type; 64 }, 65 param_definition); 66 } 67 }; 68 69 /*!\brief An element of the `ScalableChannelLayoutConfig` vector. 70 * 71 * Implements the `ChannelAudioLayerConfig` as defined by section 3.6.2 of 72 * https://aomediacodec.github.io/iamf/v1.1.0.html. 73 */ 74 struct ChannelAudioLayerConfig { 75 /*!\brief A 4-bit enum for the type of layout. */ 76 enum LoudspeakerLayout : uint8_t { 77 kLayoutMono = 0, // C. 78 kLayoutStereo = 1, // L/R 79 kLayout5_1_ch = 2, // L/C/R/Ls/Rs/LFE. 80 kLayout5_1_2_ch = 3, // L/C/R/Ls/Rs/Ltf/Rtf/LFE. 81 kLayout5_1_4_ch = 4, // L/C/R/Ls/Rs/Ltf/Rtf/Ltr/Rtr/LFE. 82 kLayout7_1_ch = 5, // L/C/R/Lss/Rss/Lrs/Rrs/LFE. 83 kLayout7_1_2_ch = 6, // L/C/R/Lss/Rss/Lrs/Rrs/Ltf/Rtf/LFE. 84 kLayout7_1_4_ch = 7, // L/C/R/Lss/Rss/Lrs/Rrs/Ltf/Rtf/Ltb/Rtb/LFE. 85 kLayout3_1_2_ch = 8, // L/C/R//Ltf/Rtf/LFE. 86 kLayoutBinaural = 9, // L/R. 87 kLayoutReserved10 = 10, 88 kLayoutReserved11 = 11, 89 kLayoutReserved12 = 12, 90 kLayoutReserved13 = 13, 91 kLayoutReserved14 = 14, 92 kLayoutExpanded = 15, 93 }; 94 95 /*!\brief A 8-bit enum for the type of expanded layout. */ 96 enum ExpandedLoudspeakerLayout : uint8_t { 97 kExpandedLayoutLFE = 0, // Low-frequency effects subset (LFE) or 7.1.4. 98 kExpandedLayoutStereoS = 1, // Stereo subset (Ls/Rs) of 5.1.4. 99 kExpandedLayoutStereoSS = 2, // Side surround subset (Lss/Rss) of 7.1.4. 100 kExpandedLayoutStereoRS = 3, // Rear surround subset (Lrs/Rrs) of 7.1.4. 101 kExpandedLayoutStereoTF = 4, // Top front subset (Ltf/Rtf) of 7.1.4. 102 kExpandedLayoutStereoTB = 5, // Top back subset (Ltb/Rtb) of 7.1.4. 103 kExpandedLayoutTop4Ch = 6, // Top four channels (Ltf/Rtf/Ltb/Rtb) of 7.1.4. 104 kExpandedLayout3_0_ch = 7, // Front three channels (L/C/R) of 7.1.4. 105 kExpandedLayout9_1_6_ch = 8, // Subset of Sound System H [ITU-2051-3]. 106 kExpandedLayoutStereoF = 9, // Front stereo subset (FL/FR) of 9.1.6. 107 kExpandedLayoutStereoSi = 10, // Side surround subset (SiL/SiR) of 9.1.6. 108 kExpandedLayoutStereoTpSi = 109 11, // Top surround subset (TpSiL/TpSiR) of 9.1.6. 110 kExpandedLayoutTop6Ch = 111 12, // Top six channels (TpFL/TpFR/TpSiL/TpSiR/TpBL/TpBR) of 9.1.6. 112 kExpandedLayoutReserved13 = 13, 113 kExpandedLayoutReserved255 = 255, 114 }; 115 116 friend bool operator==(const ChannelAudioLayerConfig& lhs, 117 const ChannelAudioLayerConfig& rhs) = default; 118 119 /*!\brief Writes the `ChannelAudioLayerConfig` payload to the buffer. 120 * 121 * \param wb Buffer to write to. 122 * \return `absl::OkStatus()` if the payload is valid. A specific status on 123 * failure. 124 */ 125 absl::Status Write(WriteBitBuffer& wb) const; 126 127 /*!\brief Reads the `ChannelAudioLayerConfig` payload from the buffer. 128 * 129 * \param rb Buffer to read from. 130 * \return `absl::OkStatus()` if successful. A specific status on failure. 131 */ 132 absl::Status Read(ReadBitBuffer& rb); 133 134 LoudspeakerLayout loudspeaker_layout; // 4 bits. 135 uint8_t output_gain_is_present_flag; // 1 bit. 136 uint8_t recon_gain_is_present_flag; // 1 bit. 137 uint8_t reserved_a; // 2 bits. 138 uint8_t substream_count; 139 uint8_t coupled_substream_count; 140 141 // if (output_gain_is_present_flag(i) == 1) { 142 uint8_t output_gain_flag = 0; // 6 bits. 143 uint8_t reserved_b = 0; // 2 bits. 144 int16_t output_gain = 0; 145 // } 146 147 // if (loudspeaker_layout == kLayoutExpanded) { 148 std::optional<ExpandedLoudspeakerLayout> expanded_loudspeaker_layout; 149 // } 150 }; 151 152 /*!\brief Config to reconstruct an Audio Element OBU using a channel layout. 153 * 154 * The metadata required for combining the substreams identified here in order 155 * to reconstruct a scalable channel layout. 156 */ 157 struct ScalableChannelLayoutConfig { 158 friend bool operator==(const ScalableChannelLayoutConfig& lhs, 159 const ScalableChannelLayoutConfig& rhs) = default; 160 161 /*!\brief Validates the configuration. 162 * 163 * \param num_substreams_in_audio_element Number of substreams in the 164 * corresponding OBU. 165 * \return `absl::OkStatus()` if successful. A specific status on failure. 166 */ 167 absl::Status Validate(DecodedUleb128 num_substreams_in_audio_element) const; 168 169 uint8_t num_layers; // 3 bits. 170 uint8_t reserved; // 5 bits. 171 172 // Vector of length `num_layers`. 173 std::vector<ChannelAudioLayerConfig> channel_audio_layer_configs; 174 }; 175 176 /*!\brief Configuration for mono-coded Ambisonics. */ 177 struct AmbisonicsMonoConfig { 178 // RFC 8486 reserves 255 to signal an inactive ACN (ambisonics channel 179 // number). 180 static constexpr uint8_t kInactiveAmbisonicsChannelNumber = 255; 181 182 friend bool operator==(const AmbisonicsMonoConfig& lhs, 183 const AmbisonicsMonoConfig& rhs) = default; 184 185 /*!\brief Validates the configuration. 186 * 187 * \param num_substreams_in_audio_element Number of substreams in the 188 * corresponding OBU. 189 * \return `absl::OkStatus()` if successful. A specific status on failure. 190 */ 191 absl::Status Validate(DecodedUleb128 num_substreams_in_audio_element) const; 192 193 uint8_t output_channel_count; // (C). 194 uint8_t substream_count; // (N). 195 196 // Vector of length (C). 197 std::vector<uint8_t> channel_mapping; 198 }; 199 200 /*!\brief Configuration for projection-coded Ambisonics. */ 201 struct AmbisonicsProjectionConfig { 202 friend bool operator==(const AmbisonicsProjectionConfig& lhs, 203 const AmbisonicsProjectionConfig& rhs) = default; 204 205 /*!\brief Validates the configuration. 206 * 207 * \param num_substreams_in_audio_element Number of substreams in the 208 * corresponding OBU. 209 * \return `absl::OkStatus()` if successful. A specific status on failure. 210 */ 211 absl::Status Validate(DecodedUleb128 num_substreams_in_audio_element) const; 212 213 uint8_t output_channel_count; // (C). 214 uint8_t substream_count; // (N). 215 uint8_t coupled_substream_count; // (M). 216 217 // Vector of length (N + M) * C. 218 std::vector<int16_t> demixing_matrix; 219 }; 220 221 /*!\brief Config to reconstruct an Audio Element OBU using Ambisonics layout. 222 * 223 * The metadata required for combining the substreams identified here in order 224 * to reconstruct an Ambisonics layout. 225 */ 226 struct AmbisonicsConfig { 227 /*!\brief A `DecodedUleb128` enum for the method of coding Ambisonics. */ 228 enum AmbisonicsMode : DecodedUleb128 { 229 kAmbisonicsModeMono = 0, 230 kAmbisonicsModeProjection = 1, 231 kAmbisonicsModeReservedStart = 2, 232 kAmbisonicsModeReservedEnd = std::numeric_limits<DecodedUleb128>::max(), 233 }; 234 friend bool operator==(const AmbisonicsConfig& lhs, 235 const AmbisonicsConfig& rhs) = default; 236 237 /*!\brief Gets the next valid number of output channels. 238 * 239 * \param requested_output_channel_count Requested number of channels. 240 * \param next_valid_output_channel_count Minimum valid `output_channel_count` 241 * that has at least the required number of channels. 242 * \return `absl::OkStatus()` if successful. `kIamfInvalid` argument if 243 * the input is too large. 244 */ 245 static absl::Status GetNextValidOutputChannelCount( 246 uint8_t requested_output_channel_count, 247 uint8_t& next_valid_output_channel_count); 248 249 AmbisonicsMode ambisonics_mode; // Serialized to a ULEB128. 250 251 // The active field depends on `ambisonics_mode`. 252 std::variant<AmbisonicsMonoConfig, AmbisonicsProjectionConfig> 253 ambisonics_config; 254 }; 255 256 struct ExtensionConfig { 257 friend bool operator==(const ExtensionConfig& lhs, 258 const ExtensionConfig& rhs) = default; 259 260 DecodedUleb128 audio_element_config_size; 261 std::vector<uint8_t> audio_element_config_bytes; 262 }; 263 264 /*!\brief Audio Element OBU. 265 * 266 * After constructing, the following MUST be called and return successfully. 267 * 1. `InitializeAudioSubstreams()` and `InitializeParams()`. 268 * 2. Exactly one of [ 269 * `InitializeScalableChannelLayout()`, 270 * `InitializeAmbisonicsMono()`, 271 * `InitializeAmbisonicsProjection()`, 272 * `InitializeExtensionConfig()` 273 * ]. 274 * 275 */ 276 class AudioElementObu : public ObuBase { 277 public: 278 /*!\brief A 3-bit enum for the type of Audio Element. */ 279 enum AudioElementType : uint8_t { 280 kAudioElementChannelBased = 0, 281 kAudioElementSceneBased = 1, 282 // Values in the range of [2 - 7] are reserved. 283 kAudioElementBeginReserved = 2, 284 kAudioElementEndReserved = 7, 285 }; 286 287 typedef std::variant<ScalableChannelLayoutConfig, AmbisonicsConfig, 288 ExtensionConfig> 289 AudioElementConfig; 290 291 /*!\brief Constructor. 292 * 293 * \param header `ObuHeader` of the OBU. 294 * \param audio_element_id `audio_element_id` in the OBU. 295 * \param audio_element_type Type of the OBU. 296 * \param reserved Reserved bits of the OBU. 297 * \param codec_config_id ID of the associated Codec Config OBU. 298 */ 299 AudioElementObu(const ObuHeader& header, DecodedUleb128 audio_element_id, 300 AudioElementType audio_element_type, uint8_t reserved, 301 DecodedUleb128 codec_config_id); 302 303 /*!\brief Creates a `AudioElementObu` from a `ReadBitBuffer`. 304 * 305 * This function is designed to be used from the perspective of the decoder. 306 * It will call `ReadAndValidatePayload` in order to read from the buffer; 307 * therefore it can fail. 308 * 309 * \param header `ObuHeader` of the OBU. 310 * \param payload_size Size of the obu payload in bytes. 311 * \param rb `ReadBitBuffer` where the `AudioElementObu` data is stored. 312 * Data read from the buffer is consumed. 313 * \return an `AudioElementObu` on success. A specific status on failure. 314 */ 315 static absl::StatusOr<AudioElementObu> CreateFromBuffer( 316 const ObuHeader& header, int64_t payload_size, ReadBitBuffer& rb); 317 318 /*!\brief Copy constructor.*/ 319 AudioElementObu(const AudioElementObu& other) = default; 320 321 /*!\brief Move constructor.*/ 322 AudioElementObu(AudioElementObu&& other) = default; 323 324 /*!\brief Destructor. */ 325 ~AudioElementObu() override = default; 326 327 friend bool operator==(const AudioElementObu& lhs, 328 const AudioElementObu& rhs) = default; 329 330 /*!\brief Initializes the `audio_substream_ids_` vector. 331 * 332 * \param num_substreams Number of substreams. 333 * \return `absl::OkStatus()` if successful. A specific status on failure. 334 */ 335 void InitializeAudioSubstreams(uint32_t num_substreams); 336 337 /*!\brief Initializes the `audio_element_params_` vector. 338 * 339 * \param num_parameters Number of parameters. 340 */ 341 void InitializeParams(uint32_t num_parameters); 342 343 /*!\brief Initializes a channel-based Audio Element OBU. 344 * 345 * Must be called after `audio_element_type_` is initialized to 346 * `kAudioElementChannelBased`. 347 * 348 * \param num_layers Number of layers in the `ScalableChannelLayoutConfig`. 349 * \param reserved Reserved bits of the `ScalableChannelLayoutConfig`. 350 * \return `absl::OkStatus()` if successful. A specific status on failure. 351 */ 352 absl::Status InitializeScalableChannelLayout(uint32_t num_layers, 353 uint32_t reserved); 354 355 /*!\brief Initializes an Ambisonics Mono Audio Element OBU. 356 * 357 * Must be called if and only if 358 * `audio_element_type_` == `kAudioElementSceneBased` and 359 * `ambisonics_mode` == `kAmbisonicsModeMono`. 360 * 361 * \param output_channel_count Number of output channels. 362 * \param substream_count Number of substreams. 363 * \return `absl::OkStatus()` if successful. A specific status on failure. 364 */ 365 absl::Status InitializeAmbisonicsMono(uint32_t output_channel_count, 366 uint32_t substream_count); 367 368 /*!\brief Initializes an Ambisonics Projection Audio Element OBU. 369 * 370 * Must be called if and only if 371 * `audio_element_type_` == `kAudioElementSceneBased` and 372 * `ambisonics_mode` == `kAmbisonicsModeProjection`. 373 * 374 * \param output_channel_count Number of output channels. 375 * \param substream_count Number of substreams. 376 * \param coupled_substream_count Number of coupled substreams. 377 * \return `absl::OkStatus()` if successful. A specific status on failure. 378 */ 379 absl::Status InitializeAmbisonicsProjection(uint32_t output_channel_count, 380 uint32_t substream_count, 381 uint32_t coupled_substream_count); 382 383 /*!\brief Initializes an extended type of Audio Element OBU. 384 * 385 * For future use when new `audio_element_type_` values are defined. Must be 386 * called if and only if `audio_element_type_` is in the range of 387 * [`kAudioElementBeginReserved`, `kAudioElementEndReserved`]. 388 * 389 * \param audio_element_config_size Size in bytes of the 390 * `audio_element_config_bytes`. 391 */ 392 void InitializeExtensionConfig(uint32_t audio_element_config_size); 393 394 /*!\brief Prints logging information about the OBU.*/ 395 void PrintObu() const override; 396 GetAudioElementType()397 AudioElementType GetAudioElementType() const { return audio_element_type_; } 398 GetAudioElementId()399 DecodedUleb128 GetAudioElementId() const { return audio_element_id_; } 400 GetCodecConfigId()401 DecodedUleb128 GetCodecConfigId() const { return codec_config_id_; } 402 403 // Length and vector of substream IDs. 404 DecodedUleb128 num_substreams_; 405 std::vector<DecodedUleb128> audio_substream_ids_; 406 407 // Length and vector of audio element parameters. 408 DecodedUleb128 num_parameters_; 409 std::vector<AudioElementParam> audio_element_params_; 410 411 // Active field depends on `audio_element_type_`. 412 AudioElementConfig config_; 413 414 private: 415 DecodedUleb128 audio_element_id_; 416 AudioElementType audio_element_type_; // 3 bits. 417 uint8_t reserved_ = 0; // 5 bits. 418 419 // ID of the associated Codec Config OBU. 420 DecodedUleb128 codec_config_id_; 421 422 // Used only by the factory create function. AudioElementObu(const ObuHeader & header)423 explicit AudioElementObu(const ObuHeader& header) 424 : ObuBase(header, kObuIaAudioElement), 425 audio_element_id_(DecodedUleb128()), 426 audio_element_type_(kAudioElementBeginReserved), 427 codec_config_id_(DecodedUleb128()) {} 428 429 /*!\brief Writes the OBU payload to the buffer. 430 * 431 * \param wb Buffer to write to. 432 * \return `absl::OkStatus()` if the payload is valid. A specific status on 433 * failure. 434 */ 435 absl::Status ValidateAndWritePayload(WriteBitBuffer& wb) const override; 436 437 /*!\brief Reads the OBU payload from the buffer. 438 * 439 * \param payload_size Size of the obu payload in bytes. 440 * \param rb Buffer to read from. 441 * \return `absl::OkStatus()` if the payload is valid. A specific status on 442 * failure. 443 */ 444 absl::Status ReadAndValidatePayloadDerived(int64_t payload_size, 445 ReadBitBuffer& rb) override; 446 }; 447 448 } // namespace iamf_tools 449 450 #endif // OBU_AUDIO_ELEMENT_H_ 451