1 /* 2 * Copyright (c) 2024, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 3-Clause Clear License 5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear 6 * License was not distributed with this source code in the LICENSE file, you 7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the 8 * Alliance for Open Media Patent License 1.0 was not distributed with this 9 * source code in the PATENTS file, you can obtain it at 10 * www.aomedia.org/license/patent. 11 */ 12 13 #ifndef CLI_OBU_PROCESSOR_H_ 14 #define CLI_OBU_PROCESSOR_H_ 15 16 #include <cstdint> 17 #include <list> 18 #include <memory> 19 #include <optional> 20 #include <vector> 21 22 #include "absl/container/flat_hash_map.h" 23 #include "absl/status/status.h" 24 #include "absl/status/statusor.h" 25 #include "absl/types/span.h" 26 #include "iamf/cli/audio_element_with_data.h" 27 #include "iamf/cli/audio_frame_decoder.h" 28 #include "iamf/cli/audio_frame_with_data.h" 29 #include "iamf/cli/demixing_module.h" 30 #include "iamf/cli/global_timing_module.h" 31 #include "iamf/cli/parameter_block_with_data.h" 32 #include "iamf/cli/parameters_manager.h" 33 #include "iamf/cli/rendering_mix_presentation_finalizer.h" 34 #include "iamf/common/read_bit_buffer.h" 35 #include "iamf/obu/codec_config.h" 36 #include "iamf/obu/ia_sequence_header.h" 37 #include "iamf/obu/mix_presentation.h" 38 #include "iamf/obu/param_definition_variant.h" 39 #include "iamf/obu/temporal_delimiter.h" 40 #include "iamf/obu/types.h" 41 42 namespace iamf_tools { 43 44 class ObuProcessor { 45 public: 46 /*!\brief Processes the Descriptor OBUs of an IA Sequence. 47 * 48 * If insufficient data to process all descriptor OBUs is provided, a failing 49 * status will be returned. `insufficient_data` will be set to true, the 50 * read_bit_buffer will not be consumed, and the output parameters will not be 51 * populated. A user should call this function again after providing more 52 * data within the read_bit_buffer. 53 * 54 * \param is_exhaustive_and_exact Whether the bitstream provided is meant to 55 * include all descriptor OBUs and no other data. This should only be 56 * set to true if the user knows the exact boundaries of their set of 57 * descriptor OBUs. 58 * \param read_bit_buffer Buffer containing a portion of an iamf bitstream 59 * containing a sequence of OBUs. The buffer will be consumed up to the 60 * end of the descriptor OBUs if processing is successful. 61 * \param output_sequence_header IA sequence header processed from the 62 * bitstream. 63 * \param output_codec_config_obus Map of Codec Config OBUs processed from the 64 * bitstream. 65 * \param output_audio_elements_with_data Map of Audio Elements and metadata 66 * processed from the bitstream. 67 * \param output_mix_presentation_obus List of Mix Presentation OBUs processed 68 * from the bitstream. 69 * \param insufficient_data Whether the bitstream provided is insufficient to 70 * process all descriptor OBUs. 71 * \return `absl::OkStatus()` if the process is successful. A specific status 72 * on failure. 73 */ 74 [[deprecated( 75 "Remove when all tests are ported. Use the non-static version instead.")]] 76 static absl::Status ProcessDescriptorObus( 77 bool is_exhaustive_and_exact, ReadBitBuffer& read_bit_buffer, 78 IASequenceHeaderObu& output_sequence_header, 79 absl::flat_hash_map<DecodedUleb128, CodecConfigObu>& 80 output_codec_config_obus, 81 absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& 82 output_audio_elements_with_data, 83 std::list<MixPresentationObu>& output_mix_presentation_obus, 84 bool& insufficient_data); 85 86 // TODO(b/330732117): Remove this function and use the non-static version. 87 /*!\brief Processes one Temporal Unit OBU of an IA Sequence. 88 * 89 * This function should only be called after successfully calling 90 * ProcessDescriptorObus. Output audio frames and parameter blocks are 91 * ordered by timestamps first and then by IDs. 92 * 93 * \param audio_elements_with_data Map containing the audio elements that 94 * were present in the descriptor OBUs, keyed by audio element ID. 95 * \param codec_config_obus Map containing the codec configs that were 96 * present in the descriptor OBUs, keyed by codec config ID. 97 * \param substream_id_to_audio_element Mapping from substream IDs to the 98 * audio elements that they belong to. 99 * \param param_definition_variants Map containing the param definitions that 100 * were present in the descriptor OBUs, keyed by parameter ID. 101 * \param parameters_manager Manager of parameters. 102 * \param read_bit_buffer Buffer reader that reads the IAMF bitstream. 103 * \param global_timing_module Module to keep track of the timing of audio 104 * frames and parameters. 105 * \param output_audio_frame_with_data Output Audio Frame with the requisite 106 * data. 107 * \param output_parameter_block_with_data Output parameter Block with the 108 * requisite data. 109 * \param output_temporal_delimiter Output temporal deilimiter OBU. 110 * \param continue_processing Whether the processing should be continued. 111 * \return `absl::OkStatus()` if the process is successful. A specific status 112 * on failure. 113 */ 114 static absl::Status ProcessTemporalUnitObu( 115 const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& 116 audio_elements_with_data, 117 const absl::flat_hash_map<DecodedUleb128, CodecConfigObu>& 118 codec_config_obus, 119 const absl::flat_hash_map<DecodedUleb128, const AudioElementWithData*>& 120 substream_id_to_audio_element, 121 const absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>& 122 param_definition_variants, 123 ParametersManager& parameters_manager, ReadBitBuffer& read_bit_buffer, 124 GlobalTimingModule& global_timing_module, 125 std::optional<AudioFrameWithData>& output_audio_frame_with_data, 126 std::optional<ParameterBlockWithData>& output_parameter_block_with_data, 127 std::optional<TemporalDelimiterObu>& output_temporal_delimiter, 128 bool& continue_processing); 129 130 /*!\brief Creates the OBU processor. 131 * 132 * Creation succeeds only if the descriptor OBUs are successfully processed. 133 * 134 * \param is_exhaustive_and_exact Whether the bitstream provided is meant to 135 * include all descriptor OBUs and no other data. This should only be 136 * set to true if the user knows the exact boundaries of their set of 137 * descriptor OBUs. 138 * \param read_bit_buffer Pointer to the read bit buffer that reads the IAMF 139 * bitstream. 140 * \param output_insufficient_data True iff the bitstream provided is 141 * insufficient to process all descriptor OBUs and there is no other 142 * error. 143 * \return std::unique_ptr<ObuProcessor> on success. `nullptr` on failure. 144 */ 145 static std::unique_ptr<ObuProcessor> Create(bool is_exhaustive_and_exact, 146 ReadBitBuffer* read_bit_buffer, 147 bool& output_insufficient_data); 148 149 /*!\brief Move constructor. */ 150 ObuProcessor(ObuProcessor&& obu_processor) = delete; 151 152 /*!\brief Creates the OBU processor for rendering. 153 * 154 * Creation succeeds only if the descriptor OBUs are successfully processed 155 * and all rendering modules are successfully initialized. 156 * 157 * \param desired_layout Specifies the desired layout that will be used to 158 * render the audio, if available in the mix presentations. If not 159 * available, the first layout in the first mix presentation will be 160 * used. 161 * \param sample_processor_factory Factory to create post processors. 162 * \param is_exhaustive_and_exact Whether the bitstream provided is meant to 163 * include all descriptor OBUs and no other data. This should only be 164 * set to true if the user knows the exact boundaries of their set of 165 * descriptor OBUs. 166 * \param read_bit_buffer Pointer to the read bit buffer that reads the IAMF 167 * bitstream. 168 * \param output_layout The layout that will be used to render the audio. This 169 * is the same as `desired_layout` if it is available in the mix 170 * presentations, otherwise a default layout is used. 171 * \param output_insufficient_data True iff the bitstream provided is 172 * insufficient to process all descriptor OBUs and there is no other 173 * error. 174 * \return Pointer to an ObuProcessor on success. `nullptr` on failure. 175 */ 176 static std::unique_ptr<ObuProcessor> CreateForRendering( 177 const Layout& desired_layout, 178 const RenderingMixPresentationFinalizer::SampleProcessorFactory& 179 sample_processor_factory, 180 bool is_exhaustive_and_exact, ReadBitBuffer* read_bit_buffer, 181 Layout& output_layout, bool& output_insufficient_data); 182 183 /*!\brief Gets the sample rate of the output audio. 184 * 185 * \return Sample rate of the output audio, or a specific error code on 186 * failure. 187 */ 188 absl::StatusOr<uint32_t> GetOutputSampleRate() const; 189 190 /*!\brief Gets the frame size of the output audio. 191 * 192 * Useful to determine the maximum number of samples per 193 * `RenderTemporalUnitAndMeasureLoudness` call. 194 * 195 * \return Number of samples in per frame of the output audio, or a specific 196 * specific error code on failure. 197 */ 198 absl::StatusOr<uint32_t> GetOutputFrameSize() const; 199 200 // TODO(b/381072155): Consider removing this one in favor of 201 // `ProcessTemporalUnit()`, which outputs all OBUs 202 // belonging the whole temporal unit. 203 /*!\brief Processes one Temporal Unit OBU from the stored IA Sequence. 204 * 205 * `Initialize()` must be called first to ready the input bitstream. 206 * 207 * \param output_audio_frame_with_data Output Audio Frame with the requisite 208 * data. 209 * \param output_parameter_block_with_data Output Parameter Block with the 210 * requisite data. 211 * \param output_temporal_delimiter Output temporal deilimiter OBU. 212 * \param continue_processing Whether the processing should be continued. 213 * \return `absl::OkStatus()` if the process is successful. A specific status 214 * on failure. 215 */ 216 absl::Status ProcessTemporalUnitObu( 217 std::optional<AudioFrameWithData>& output_audio_frame_with_data, 218 std::optional<ParameterBlockWithData>& output_parameter_block_with_data, 219 std::optional<TemporalDelimiterObu>& output_temporal_delimiter, 220 bool& continue_processing); 221 222 struct OutputTemporalUnit { 223 std::list<AudioFrameWithData> output_audio_frames; 224 std::list<ParameterBlockWithData> output_parameter_blocks; 225 InternalTimestamp output_timestamp; 226 }; 227 228 // TODO(b/379819959): Also handle Temporal Delimiter OBUs. 229 /*!\brief Processes all OBUs from a Temporal Unit from the stored IA Sequence. 230 * 231 * \param eos_is_end_of_sequence Whether reaching the end of the stream 232 * should be considered as the end of the sequence, and therefore the 233 * end of the temporal unit. 234 * \param output_temporal_unit Contains the data from the temporal unit that 235 * is processed. 236 * \param continue_processing Whether the processing should be continued. 237 * \return `absl::OkStatus()` if the process is successful. A specific status 238 * on failure. 239 */ 240 absl::Status ProcessTemporalUnit( 241 bool eos_is_end_of_sequence, 242 std::optional<OutputTemporalUnit>& output_temporal_unit, 243 bool& continue_processing); 244 245 /*!\brief Renders a temporal unit and measures loudness. 246 * 247 * `InitializeForRendering()` must be called before calling this. 248 * 249 * \param timestamp Timestamp of this temporal unit. Used to verify that 250 * the input OBUs actually belong to the same temporal unit. 251 * \param audio_frames_with_data Audio Frames with the requisite data. 252 * \param parameter_blocks_with_data Parameter Blocks with the requisite data. 253 * \param output_rendered_pcm_samples Output rendered PCM samples. These 254 * should be used immediately after this function is called; they will 255 * be invalidated after the next call to 256 * `RenderTemporalUnitAndMeasureLoudness()`, as well as after the 257 * `ObuProcessor` is destroyed. 258 * \return `absl::OkStatus()` if the process is successful. A specific status 259 * on failure. 260 */ 261 absl::Status RenderTemporalUnitAndMeasureLoudness( 262 InternalTimestamp timestamp, 263 const std::list<AudioFrameWithData>& audio_frames, 264 const std::list<ParameterBlockWithData>& parameter_blocks, 265 absl::Span<const std::vector<int32_t>>& output_rendered_pcm_samples); 266 267 IASequenceHeaderObu ia_sequence_header_; 268 absl::flat_hash_map<DecodedUleb128, CodecConfigObu> codec_config_obus_ = {}; 269 absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements_ = 270 {}; 271 std::list<MixPresentationObu> mix_presentations_ = {}; 272 273 private: 274 /*!\brief Private constructor used only by Create() and CreateForRendering(). 275 * 276 * \param read_bit_buffer Pointer to the read bit buffer that reads the IAMF 277 * bitstream. 278 * \return ObuProcessor instance. 279 */ ObuProcessor(ReadBitBuffer * buffer)280 explicit ObuProcessor(ReadBitBuffer* buffer) : read_bit_buffer_(buffer) {} 281 282 /*!\brief Performs internal initialization of the OBU processor. 283 * 284 * Only used by Create() and CreateForRendering(). 285 * 286 * \param is_exhaustive_and_exact Whether the bitstream provided is meant to 287 * include all descriptor OBUs and no other data. This should only be 288 * set to true if the user knows the exact boundaries of their set of 289 * descriptor OBUs. 290 \param output_insufficient_data True iff the bitstream provided is 291 * insufficient to process all descriptor OBUs and there is no other 292 * error. 293 * \return `absl::OkStatus()` if initialization is successful. A specific 294 * status on failure. 295 */ 296 absl::Status InitializeInternal(bool is_exhaustive_and_exact, 297 bool& output_insufficient_data); 298 299 /*!\brief Initializes the OBU processor for rendering. 300 * 301 * Must be called after `Initialize()` is called. 302 * 303 * \param desired_layout Specifies the layout that will be used to render the 304 * audio, if available. 305 * \param sample_processor_factory Factory to create post processors. 306 * \param output_layout The layout that will be used to render the audio. This 307 * is the same as `desired_layout` if it is available, otherwise a 308 * default layout is used. 309 * \return `absl::OkStatus()` if the process is successful. A specific status 310 * on failure. 311 */ 312 absl::Status InitializeForRendering( 313 const Layout& desired_layout, 314 const RenderingMixPresentationFinalizer::SampleProcessorFactory& 315 sample_processor_factory, 316 Layout& output_layout); 317 318 struct DecodingLayoutInfo { 319 DecodedUleb128 mix_presentation_id; 320 int sub_mix_index; 321 int layout_index; 322 }; 323 324 struct TemporalUnitData { 325 std::list<ParameterBlockWithData> parameter_blocks; 326 std::list<AudioFrameWithData> audio_frames; 327 328 std::optional<TemporalDelimiterObu> temporal_delimiter; 329 std::optional<int32_t> timestamp; 330 EmptyTemporalUnitData331 bool Empty() const { 332 return parameter_blocks.empty() && audio_frames.empty(); 333 } 334 ClearTemporalUnitData335 void Clear() { 336 audio_frames.clear(); 337 parameter_blocks.clear(); 338 temporal_delimiter.reset(); 339 timestamp.reset(); 340 } 341 342 template <class T> AddDataToCorrectTemporalUnitTemporalUnitData343 static void AddDataToCorrectTemporalUnit( 344 TemporalUnitData& current_temporal_unit, 345 TemporalUnitData& next_temporal_unit, T&& obu_with_data) { 346 const auto new_timestamp = obu_with_data.start_timestamp; 347 if (!current_temporal_unit.timestamp.has_value()) { 348 current_temporal_unit.timestamp = new_timestamp; 349 } 350 if (*current_temporal_unit.timestamp == new_timestamp) { 351 current_temporal_unit.GetList<T>().push_back( 352 std::forward<T>(obu_with_data)); 353 } else { 354 next_temporal_unit.GetList<T>().push_back( 355 std::forward<T>(obu_with_data)); 356 next_temporal_unit.timestamp = new_timestamp; 357 } 358 } 359 360 private: 361 template <class T> GetListTemporalUnitData362 std::list<T>& GetList() { 363 if constexpr (std::is_same_v<T, ParameterBlockWithData>) { 364 return parameter_blocks; 365 } else if constexpr (std::is_same_v<T, AudioFrameWithData>) { 366 return audio_frames; 367 } 368 }; 369 }; 370 371 std::optional<uint32_t> output_sample_rate_; 372 std::optional<uint32_t> output_frame_size_; 373 374 absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant> 375 param_definition_variants_; 376 absl::flat_hash_map<DecodedUleb128, const AudioElementWithData*> 377 substream_id_to_audio_element_; 378 std::unique_ptr<GlobalTimingModule> global_timing_module_; 379 std::optional<ParametersManager> parameters_manager_; 380 ReadBitBuffer* read_bit_buffer_; 381 382 // Contains target layout information for rendering. 383 DecodingLayoutInfo decoding_layout_info_; 384 385 // Cached data when processing temporal units. 386 TemporalUnitData current_temporal_unit_; 387 TemporalUnitData next_temporal_unit_; 388 std::list<DecodedAudioFrame> decoded_frames_for_temporal_unit_; 389 390 // Modules used for rendering. 391 std::optional<AudioFrameDecoder> audio_frame_decoder_; 392 std::optional<DemixingModule> demixing_module_; 393 std::optional<RenderingMixPresentationFinalizer> mix_presentation_finalizer_; 394 }; 395 } // namespace iamf_tools 396 #endif // CLI_OBU_PROCESSOR_H_ 397