/* * Copyright (c) 2024, Alliance for Open Media. All rights reserved * * This source code is subject to the terms of the BSD 3-Clause Clear License * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear * License was not distributed with this source code in the LICENSE file, you * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the * Alliance for Open Media Patent License 1.0 was not distributed with this * source code in the PATENTS file, you can obtain it at * www.aomedia.org/license/patent. */ #include "iamf/cli/obu_processor.h" #include #include #include #include #include #include #include #include #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/log/check.h" #include "absl/log/log.h" #include "absl/memory/memory.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "iamf/cli/audio_element_with_data.h" #include "iamf/cli/audio_frame_with_data.h" #include "iamf/cli/cli_util.h" #include "iamf/cli/demixing_module.h" #include "iamf/cli/global_timing_module.h" #include "iamf/cli/obu_with_data_generator.h" #include "iamf/cli/parameter_block_with_data.h" #include "iamf/cli/parameters_manager.h" #include "iamf/cli/profile_filter.h" #include "iamf/cli/renderer_factory.h" #include "iamf/cli/rendering_mix_presentation_finalizer.h" #include "iamf/cli/sample_processor_base.h" #include "iamf/common/read_bit_buffer.h" #include "iamf/common/utils/macros.h" #include "iamf/common/utils/validation_utils.h" #include "iamf/obu/audio_element.h" #include "iamf/obu/audio_frame.h" #include "iamf/obu/codec_config.h" #include "iamf/obu/ia_sequence_header.h" #include "iamf/obu/mix_presentation.h" #include "iamf/obu/obu_header.h" #include "iamf/obu/param_definition_variant.h" #include "iamf/obu/parameter_block.h" #include "iamf/obu/temporal_delimiter.h" #include "iamf/obu/types.h" namespace iamf_tools { namespace { // Gets a CodecConfigObu from `read_bit_buffer` and stores it into // `codec_config_obu_map`, using the `codec_config_id` as the unique key. absl::Status GetAndStoreCodecConfigObu( const ObuHeader& header, int64_t payload_size, absl::flat_hash_map& codec_config_obu_map, ReadBitBuffer& read_bit_buffer) { absl::StatusOr codec_config_obu = CodecConfigObu::CreateFromBuffer(header, payload_size, read_bit_buffer); if (!codec_config_obu.ok()) { return codec_config_obu.status(); } codec_config_obu->PrintObu(); codec_config_obu_map.insert( {codec_config_obu->GetCodecConfigId(), *std::move(codec_config_obu)}); return absl::OkStatus(); } absl::Status GetAndStoreAudioElementObu( const ObuHeader& header, int64_t payload_size, absl::flat_hash_map& audio_element_obu_map, ReadBitBuffer& read_bit_buffer) { absl::StatusOr audio_element_obu = AudioElementObu::CreateFromBuffer(header, payload_size, read_bit_buffer); if (!audio_element_obu.ok()) { return audio_element_obu.status(); } audio_element_obu->PrintObu(); audio_element_obu_map.insert( {audio_element_obu->GetAudioElementId(), *std::move(audio_element_obu)}); return absl::OkStatus(); } absl::Status GetAndStoreMixPresentationObu( const ObuHeader& header, int64_t payload_size, std::list& mix_presentation_obus, ReadBitBuffer& read_bit_buffer) { absl::StatusOr mix_presentation_obu = MixPresentationObu::CreateFromBuffer(header, payload_size, read_bit_buffer); if (!mix_presentation_obu.ok()) { return mix_presentation_obu.status(); } LOG(INFO) << "Mix Presentation OBU successfully parsed."; mix_presentation_obu->PrintObu(); mix_presentation_obus.push_back(*std::move(mix_presentation_obu)); return absl::OkStatus(); } absl::Status UpdateParameterStatesIfNeeded( const absl::flat_hash_map& audio_elements_with_data, const GlobalTimingModule& global_timing_module, ParametersManager& parameters_manager) { std::optional global_timestamp; RETURN_IF_NOT_OK( global_timing_module.GetGlobalAudioFrameTimestamp(global_timestamp)); // Not ready to update the states yet. if (!global_timestamp.has_value()) { return absl::OkStatus(); } // The audio frames for all audio elements are finished; update the // parameters manager. for (const auto& [audio_element_id, unused_element] : audio_elements_with_data) { RETURN_IF_NOT_OK(parameters_manager.UpdateDemixingState(audio_element_id, *global_timestamp)); RETURN_IF_NOT_OK(parameters_manager.UpdateReconGainState( audio_element_id, *global_timestamp)); } return absl::OkStatus(); } absl::Status GetAndStoreAudioFrameWithData( const ObuHeader& header, const int64_t payload_size, const absl::flat_hash_map& audio_elements_with_data, const absl::flat_hash_map& substream_id_to_audio_element, ReadBitBuffer& read_bit_buffer, GlobalTimingModule& global_timing_module, ParametersManager& parameters_manager, std::optional& output_audio_frame_with_data) { output_audio_frame_with_data.reset(); auto audio_frame_obu = AudioFrameObu::CreateFromBuffer(header, payload_size, read_bit_buffer); if (!audio_frame_obu.ok()) { return audio_frame_obu.status(); } const auto substream_id = audio_frame_obu->GetSubstreamId(); const auto audio_element_iter = substream_id_to_audio_element.find(substream_id); if (audio_element_iter == substream_id_to_audio_element.end()) { return absl::InvalidArgumentError(absl::StrCat( "No audio element found having substream ID: ", substream_id)); } const auto& audio_element_with_data = *audio_element_iter->second; auto audio_frame_with_data = ObuWithDataGenerator::GenerateAudioFrameWithData( audio_element_with_data, *audio_frame_obu, global_timing_module, parameters_manager); if (!audio_frame_with_data.ok()) { return audio_frame_with_data.status(); } output_audio_frame_with_data = *audio_frame_with_data; RETURN_IF_NOT_OK(UpdateParameterStatesIfNeeded( audio_elements_with_data, global_timing_module, parameters_manager)); return absl::OkStatus(); } absl::Status GetAndStoreParameterBlockWithData( const ObuHeader& header, const int64_t payload_size, const absl::flat_hash_map& param_definition_variants, ReadBitBuffer& read_bit_buffer, GlobalTimingModule& global_timing_module, std::optional& output_parameter_block_with_data) { auto parameter_block_obu = ParameterBlockObu::CreateFromBuffer( header, payload_size, param_definition_variants, read_bit_buffer); if (!parameter_block_obu.ok()) { return parameter_block_obu.status(); } std::optional global_timestamp; RETURN_IF_NOT_OK( global_timing_module.GetGlobalAudioFrameTimestamp(global_timestamp)); if (!global_timestamp.has_value()) { return absl::InvalidArgumentError( "Global timestamp has no value while generating a parameter " "block"); } // Process the newly parsed parameter block OBU. auto parameter_block_with_data = ObuWithDataGenerator::GenerateParameterBlockWithData( *global_timestamp, global_timing_module, std::move(*parameter_block_obu)); if (!parameter_block_with_data.ok()) { return parameter_block_with_data.status(); } output_parameter_block_with_data = std::move(*parameter_block_with_data); return absl::OkStatus(); } // Returns a list of pointers to the supported mix presentations. Empty if none // are supported. std::list GetSupportedMixPresentations( const absl::flat_hash_map& audio_elements, std::list& mix_presentation_obus) { // TODO(b/377554944): Support `ProfileVersion::kIamfBaseEnhancedProfile`. // Only permit certain profiles to be used. const absl::flat_hash_set kSupportedProfiles = { ProfileVersion::kIamfSimpleProfile, ProfileVersion::kIamfBaseProfile}; std::list supported_mix_presentations; std::string cumulative_error_message; for (auto iter = mix_presentation_obus.begin(); iter != mix_presentation_obus.end(); ++iter) { auto profiles = kSupportedProfiles; const auto status = ProfileFilter::FilterProfilesForMixPresentation( audio_elements, *iter, profiles); if (status.ok()) { supported_mix_presentations.push_back(&*iter); } absl::StrAppend(&cumulative_error_message, status.message(), "\n"); } LOG(INFO) << "Filtered mix presentations: " << cumulative_error_message; return supported_mix_presentations; } // Searches for the desired layout in the supported mix presentations. If found, // the output_playback_layout is the same as the desired_layout. Otherwise, we // default to the first layout in the first unsupported mix presentation. absl::StatusOr GetPlaybackLayoutAndMixPresentation( const std::list& supported_mix_presentations, const Layout& desired_layout, Layout& output_playback_layout) { for (const auto& mix_presentation : supported_mix_presentations) { for (const auto& sub_mix : mix_presentation->sub_mixes_) { for (const auto& layout : sub_mix.layouts) { if (layout.loudness_layout == desired_layout) { output_playback_layout = layout.loudness_layout; return mix_presentation; } } } } // If we get here, we didn't find the desired layout in any of the supported // mix presentations. We default to the first layout in the first mix // presentation. MixPresentationObu* output_mix_presentation = supported_mix_presentations.front(); if (output_mix_presentation->sub_mixes_.empty()) { return absl::InvalidArgumentError( "No submixes found in the first mix presentation."); } if (output_mix_presentation->sub_mixes_.front().layouts.empty()) { return absl::InvalidArgumentError( "No layouts found in the first submix of the first mix presentation."); } output_playback_layout = output_mix_presentation->sub_mixes_.front() .layouts.front() .loudness_layout; return output_mix_presentation; } // Resets the buffer to `start_position` and sets the `insufficient_data` // flag to `true`. Clears the output maps. absl::Status InsufficientDataReset( ReadBitBuffer& read_bit_buffer, const int64_t start_position, bool& insufficient_data, absl::flat_hash_map& output_codec_config_obus, absl::flat_hash_map& output_audio_elements_with_data, std::list& output_mix_presentation_obus) { LOG(INFO) << "Insufficient data to process all descriptor OBUs."; insufficient_data = true; output_codec_config_obus.clear(); output_audio_elements_with_data.clear(); output_mix_presentation_obus.clear(); RETURN_IF_NOT_OK(read_bit_buffer.Seek(start_position)); LOG(INFO) << "Reset the buffer to the beginning."; return absl::ResourceExhaustedError( "Insufficient data to process all descriptor OBUs. Please provide " "more data and try again."); } void GetSampleRateAndFrameSize( const absl::flat_hash_map& output_codec_config_obus, std::optional& output_sample_rate, std::optional& output_frame_size) { if (output_codec_config_obus.size() != 1) { LOG(WARNING) << "Expected exactly one codec config OBUs, but found " << output_codec_config_obus.size(); return; } const auto& first_codec_config_obu = output_codec_config_obus.begin()->second; output_sample_rate = first_codec_config_obu.GetOutputSampleRate(); output_frame_size = first_codec_config_obu.GetNumSamplesPerFrame(); } } // namespace absl::Status ObuProcessor::InitializeInternal(bool is_exhaustive_and_exact, bool& output_insufficient_data) { // Process the descriptor OBUs. LOG(INFO) << "Starting Descriptor OBU processing"; RETURN_IF_NOT_OK(ObuProcessor::ProcessDescriptorObus( is_exhaustive_and_exact, *read_bit_buffer_, ia_sequence_header_, codec_config_obus_, audio_elements_, mix_presentations_, output_insufficient_data)); LOG(INFO) << "Processed Descriptor OBUs"; RETURN_IF_NOT_OK(CollectAndValidateParamDefinitions( audio_elements_, mix_presentations_, param_definition_variants_)); GetSampleRateAndFrameSize(codec_config_obus_, output_sample_rate_, output_frame_size_); // Mapping from substream IDs to pointers to audio element with data. for (const auto& [audio_element_id, audio_element_with_data] : audio_elements_) { for (const auto& [substream_id, unused_labels] : audio_element_with_data.substream_id_to_labels) { auto [unused_iter, inserted] = substream_id_to_audio_element_.insert( {substream_id, &audio_element_with_data}); if (!inserted) { return absl::InvalidArgumentError(absl::StrCat( "Duplicated substream ID: ", substream_id, " associated with audio element ID: ", audio_element_id)); } } } global_timing_module_ = GlobalTimingModule::Create(audio_elements_, param_definition_variants_); if (global_timing_module_ == nullptr) { return absl::InvalidArgumentError( "Failed to initialize the global timing module"); } parameters_manager_.emplace(audio_elements_); RETURN_IF_NOT_OK(parameters_manager_->Initialize()); return absl::OkStatus(); } absl::Status ObuProcessor::ProcessDescriptorObus( bool is_exhaustive_and_exact, ReadBitBuffer& read_bit_buffer, IASequenceHeaderObu& output_sequence_header, absl::flat_hash_map& output_codec_config_obus, absl::flat_hash_map& output_audio_elements_with_data, std::list& output_mix_presentation_obus, bool& output_insufficient_data) { // `output_insufficient_data` indicates a specific error condition and so is // true iff we've received valid data but need more of it. output_insufficient_data = false; auto audio_element_obu_map = absl::flat_hash_map(); const int64_t global_position_before_all_obus = read_bit_buffer.Tell(); bool processed_ia_header = false; bool continue_processing = true; while (continue_processing) { auto header_metadata = ObuHeader::PeekObuTypeAndTotalObuSize(read_bit_buffer); if (!header_metadata.ok()) { if (header_metadata.status().code() == absl::StatusCode::kResourceExhausted) { // Can't read header because there is not enough data. return InsufficientDataReset( read_bit_buffer, global_position_before_all_obus, output_insufficient_data, output_codec_config_obus, output_audio_elements_with_data, output_mix_presentation_obus); } else { // Some other error occurred, propagate it. return header_metadata.status(); } } // Now, we know we were at least able to read obu_type and the total size of // the obu. if (ObuHeader::IsTemporalUnitObuType(header_metadata->obu_type)) { if (is_exhaustive_and_exact) { auto error_status = absl::InvalidArgumentError( "Descriptor OBUs must not contain a temporal unit OBU when " "is_exhaustive_and_exact is true."); LOG(ERROR) << error_status; RETURN_IF_NOT_OK(read_bit_buffer.Seek(global_position_before_all_obus)); return error_status; } // Since it's a temporal unit, we know we are done reading descriptor // OBUs. Since we've only peeked on this iteration of the loop, no need to // rewind the buffer. // Check that we've processed an IA header to ensure it's a valid IA // Sequence. if (!processed_ia_header) { return absl::InvalidArgumentError( "An IA Sequence and/or descriptor OBUs must always start with an " "IA Header."); } // Break out of the while loop since we've reached the end of the // descriptor OBUs; should not seek back to the beginning of the buffer // since this is a successful termination. break; } // Now, we know that this is not a temporal unit OBU. if (!read_bit_buffer.CanReadBytes(header_metadata->total_obu_size)) { // This is a descriptor OBU for which we don't have enough data. return InsufficientDataReset( read_bit_buffer, global_position_before_all_obus, output_insufficient_data, output_codec_config_obus, output_audio_elements_with_data, output_mix_presentation_obus); } // Now we know we can read the entire obu. const int64_t position_before_header = read_bit_buffer.Tell(); ObuHeader header; // Note that `payload_size` is different from the total obu size calculated // by `PeekObuTypeAndTotalObuSize`. int64_t payload_size; RETURN_IF_NOT_OK(header.ReadAndValidate(read_bit_buffer, payload_size)); switch (header.obu_type) { case kObuIaSequenceHeader: { if (processed_ia_header && !header.obu_redundant_copy) { LOG(WARNING) << "Detected an IA Sequence without temporal units."; continue_processing = false; break; } auto ia_sequence_header_obu = IASequenceHeaderObu::CreateFromBuffer( header, payload_size, read_bit_buffer); if (!ia_sequence_header_obu.ok()) { return ia_sequence_header_obu.status(); } output_sequence_header = *std::move(ia_sequence_header_obu); output_sequence_header.PrintObu(); processed_ia_header = true; break; } case kObuIaCodecConfig: { RETURN_IF_NOT_OK(GetAndStoreCodecConfigObu( header, payload_size, output_codec_config_obus, read_bit_buffer)); break; } case kObuIaAudioElement: { RETURN_IF_NOT_OK(GetAndStoreAudioElementObu( header, payload_size, audio_element_obu_map, read_bit_buffer)); break; } case kObuIaMixPresentation: { RETURN_IF_NOT_OK(GetAndStoreMixPresentationObu( header, payload_size, output_mix_presentation_obus, read_bit_buffer)); break; } case kObuIaReserved24: case kObuIaReserved25: case kObuIaReserved26: case kObuIaReserved27: case kObuIaReserved28: case kObuIaReserved29: case kObuIaReserved30: { // Reserved OBUs may occur in the sequence of Descriptor OBUs. For // now, ignore any reserved OBUs by skipping over their bits in the // buffer. continue_processing = true; LOG(INFO) << "Detected a reserved OBU while parsing Descriptor OBUs. " << "Safely ignoring it."; std::vector buffer_to_discard(payload_size); RETURN_IF_NOT_OK( read_bit_buffer.ReadUint8Span(absl::MakeSpan(buffer_to_discard))); break; } default: /// TODO(b/387550488): Handle reserved OBUs. continue_processing = false; break; } if (!continue_processing) { // Rewind the position to before the last header was read. LOG(INFO) << "position_before_header: " << position_before_header; RETURN_IF_NOT_OK(read_bit_buffer.Seek(position_before_header)); } if (!processed_ia_header) { return absl::InvalidArgumentError( "An IA Sequence and/or descriptor OBUs must always start with an IA " "Header."); } if (is_exhaustive_and_exact && !read_bit_buffer.IsDataAvailable()) { // We've reached the end of the bitstream and we've processed all // descriptor OBUs. break; } } if (!audio_element_obu_map.empty()) { auto audio_elements_with_data = ObuWithDataGenerator::GenerateAudioElementsWithData( output_codec_config_obus, audio_element_obu_map); if (!audio_elements_with_data.ok()) { return audio_elements_with_data.status(); } output_audio_elements_with_data = std::move(*audio_elements_with_data); } return absl::OkStatus(); } absl::Status ObuProcessor::ProcessTemporalUnitObu( const absl::flat_hash_map& audio_elements_with_data, const absl::flat_hash_map& codec_config_obus, const absl::flat_hash_map& substream_id_to_audio_element, const absl::flat_hash_map& param_definition_variants, ParametersManager& parameters_manager, ReadBitBuffer& read_bit_buffer, GlobalTimingModule& global_timing_module, std::optional& output_audio_frame_with_data, std::optional& output_parameter_block_with_data, std::optional& output_temporal_delimiter, bool& continue_processing) { continue_processing = true; output_audio_frame_with_data.reset(); output_parameter_block_with_data.reset(); output_temporal_delimiter.reset(); auto header_metadata = ObuHeader::PeekObuTypeAndTotalObuSize(read_bit_buffer); if (!header_metadata.ok()) { if (header_metadata.status().code() == absl::StatusCode::kResourceExhausted) { // Can't read header because there is not enough data. This is not an // error, but we're done processing for now. continue_processing = false; return absl::OkStatus(); } else { // Some other error occurred, propagate it. return header_metadata.status(); } } if (!read_bit_buffer.CanReadBytes(header_metadata->total_obu_size)) { // This is a temporal unit OBU for which we don't have enough data. This is // not an error, but we're done processing for now. continue_processing = false; return absl::OkStatus(); } const int64_t position_before_header = read_bit_buffer.Tell(); // Read in the header and determines the size of the payload in bytes. ObuHeader header; int64_t payload_size; RETURN_IF_NOT_OK(header.ReadAndValidate(read_bit_buffer, payload_size)); // Typically we should expect {`kObuIaAudioFrameX`,`kObuIaParameterBlock`, // `kObuIaTemporalDelimiter`}. We also want to detect an `kIaSequenceHeader` // which would signal the start of a new IA Sequence, and to gracefully // handle "reserved" OBUs. switch (header.obu_type) { case kObuIaAudioFrame: case kObuIaAudioFrameId0: case kObuIaAudioFrameId1: case kObuIaAudioFrameId2: case kObuIaAudioFrameId3: case kObuIaAudioFrameId4: case kObuIaAudioFrameId5: case kObuIaAudioFrameId6: case kObuIaAudioFrameId7: case kObuIaAudioFrameId8: case kObuIaAudioFrameId9: case kObuIaAudioFrameId10: case kObuIaAudioFrameId11: case kObuIaAudioFrameId12: case kObuIaAudioFrameId13: case kObuIaAudioFrameId14: case kObuIaAudioFrameId15: case kObuIaAudioFrameId16: case kObuIaAudioFrameId17: { RETURN_IF_NOT_OK(GetAndStoreAudioFrameWithData( header, payload_size, audio_elements_with_data, substream_id_to_audio_element, read_bit_buffer, global_timing_module, parameters_manager, output_audio_frame_with_data)); break; } case kObuIaParameterBlock: { RETURN_IF_NOT_OK(GetAndStoreParameterBlockWithData( header, payload_size, param_definition_variants, read_bit_buffer, global_timing_module, output_parameter_block_with_data)); break; } case kObuIaTemporalDelimiter: { // This implementation does not process by temporal unit. Safely ignore // it. const auto& temporal_delimiter = TemporalDelimiterObu::CreateFromBuffer( header, payload_size, read_bit_buffer); if (!temporal_delimiter.ok()) { return temporal_delimiter.status(); } output_temporal_delimiter = *temporal_delimiter; break; } case kObuIaSequenceHeader: if (!header.obu_redundant_copy) { // OK. The user of this function will need to reconfigure its state to // process the next IA sequence. LOG(INFO) << "Detected the start of the next IA Sequence."; continue_processing = false; break; } // Ok for any IAMF v1.1.0 descriptor OBUs we can skip over redundant // copies. [[fallthrough]]; case kObuIaCodecConfig: case kObuIaAudioElement: case kObuIaMixPresentation: if (!header.obu_redundant_copy) { return absl::InvalidArgumentError(absl::StrCat( "Unexpected non-reserved OBU obu_type= ", header.obu_type)); } // Consume and discard the OBU. IAMF allows us to ignore it (even if the // redundant flag is misleading). [[fallthrough]]; default: // TODO(b/329705373): Read in the data as an `ArbitraryOBU` and output // it from this function. LOG(INFO) << "Detected a reserved or redundant OBU. Safely ignoring it."; std::vector buffer_to_discard(payload_size); RETURN_IF_NOT_OK( read_bit_buffer.ReadUint8Span(absl::MakeSpan(buffer_to_discard))); break; } if (!continue_processing) { // Rewind the position to before the last header was read. LOG(INFO) << "position_before_header: " << position_before_header; RETURN_IF_NOT_OK(read_bit_buffer.Seek(position_before_header)); } return absl::OkStatus(); } std::unique_ptr ObuProcessor::Create( bool is_exhaustive_and_exact, ReadBitBuffer* read_bit_buffer, bool& output_insufficient_data) { // `output_insufficient_data` indicates a specific error condition and so is // true iff we've received valid data but need more of it. output_insufficient_data = false; if (read_bit_buffer == nullptr) { return nullptr; } std::unique_ptr obu_processor = absl::WrapUnique(new ObuProcessor(read_bit_buffer)); if (const auto status = obu_processor->InitializeInternal( is_exhaustive_and_exact, output_insufficient_data); !status.ok()) { LOG(ERROR) << status; return nullptr; } return obu_processor; } std::unique_ptr ObuProcessor::CreateForRendering( const Layout& desired_layout, const RenderingMixPresentationFinalizer::SampleProcessorFactory& sample_processor_factory, bool is_exhaustive_and_exact, ReadBitBuffer* read_bit_buffer, Layout& output_layout, bool& output_insufficient_data) { // `output_insufficient_data` indicates a specific error condition and so is // true iff we've received valid data but need more of it. output_insufficient_data = false; if (read_bit_buffer == nullptr) { return nullptr; } std::unique_ptr obu_processor = absl::WrapUnique(new ObuProcessor(read_bit_buffer)); if (const auto status = obu_processor->InitializeInternal( is_exhaustive_and_exact, output_insufficient_data); !status.ok()) { LOG(ERROR) << status; return nullptr; } if (const auto status = obu_processor->InitializeForRendering( desired_layout, sample_processor_factory, output_layout); !status.ok()) { LOG(ERROR) << status; return nullptr; } return obu_processor; } absl::StatusOr ObuProcessor::GetOutputSampleRate() const { RETURN_IF_NOT_OK( ValidateHasValue(output_sample_rate_, "Output sample rate, was this a trivial IA Sequence?")); return *output_sample_rate_; } absl::StatusOr ObuProcessor::GetOutputFrameSize() const { RETURN_IF_NOT_OK( ValidateHasValue(output_frame_size_, "Output frame size, was this a trivial IA Sequence?")); return *output_frame_size_; } absl::Status ObuProcessor::InitializeForRendering( const Layout& desired_layout, const RenderingMixPresentationFinalizer::SampleProcessorFactory& sample_processor_factory, Layout& output_layout) { if (mix_presentations_.empty()) { return absl::InvalidArgumentError("No mix presentation OBUs found."); } if (audio_elements_.empty()) { return absl::InvalidArgumentError("No audio element OBUs found."); } // TODO(b/377747704): Decode only the frames selected for the playback // layout. audio_frame_decoder_.emplace(); for (const auto& [unused_id, audio_element_with_data] : audio_elements_) { RETURN_IF_NOT_OK(audio_frame_decoder_->InitDecodersForSubstreams( audio_element_with_data.substream_id_to_labels, *audio_element_with_data.codec_config)); } { auto temp_demixing_module = DemixingModule::CreateForReconstruction(audio_elements_); if (!temp_demixing_module.ok()) { return temp_demixing_module.status(); } demixing_module_.emplace(*std::move(temp_demixing_module)); } // TODO(b/340289717): Add a way to select the mix presentation if multiple // are supported. const std::list supported_mix_presentations = GetSupportedMixPresentations(audio_elements_, mix_presentations_); if (supported_mix_presentations.empty()) { return absl::NotFoundError("No supported mix presentation OBUs found."); } Layout playback_layout; auto mix_presentation_to_render = GetPlaybackLayoutAndMixPresentation( supported_mix_presentations, desired_layout, output_layout); if (!mix_presentation_to_render.ok()) { return mix_presentation_to_render.status(); } int playback_sub_mix_index; int playback_layout_index; RETURN_IF_NOT_OK(GetIndicesForLayout( (*mix_presentation_to_render)->sub_mixes_, output_layout, playback_sub_mix_index, playback_layout_index)); decoding_layout_info_ = { .mix_presentation_id = (*mix_presentation_to_render)->GetMixPresentationId(), .sub_mix_index = playback_sub_mix_index, .layout_index = playback_layout_index, }; auto forward_on_desired_layout = [&sample_processor_factory, mix_presentation_to_render, playback_sub_mix_index, playback_layout_index]( DecodedUleb128 mix_presentation_id, int sub_mix_index, int layout_index, const Layout& layout, int num_channels, int sample_rate, int bit_depth, size_t max_input_samples_per_frame) -> std::unique_ptr { if (mix_presentation_id == (*mix_presentation_to_render)->GetMixPresentationId() && playback_sub_mix_index == sub_mix_index && playback_layout_index == layout_index) { return sample_processor_factory( mix_presentation_id, sub_mix_index, layout_index, layout, num_channels, sample_rate, bit_depth, max_input_samples_per_frame); } return nullptr; }; // Create the mix presentation finalizer which is used to render the output // files. We neither trust the user-provided loudness, nor care about the // calculated loudness. const RendererFactory renderer_factory; absl::StatusOr mix_presentation_finalizer = RenderingMixPresentationFinalizer::Create( /*renderer_factory=*/&renderer_factory, /*loudness_calculator_factory=*/nullptr, audio_elements_, forward_on_desired_layout, mix_presentations_); if (!mix_presentation_finalizer.ok()) { return mix_presentation_finalizer.status(); } mix_presentation_finalizer_.emplace(*std::move(mix_presentation_finalizer)); return absl::OkStatus(); } absl::Status ObuProcessor::ProcessTemporalUnitObu( std::optional& output_audio_frame_with_data, std::optional& output_parameter_block_with_data, std::optional& output_temporal_delimiter, bool& continue_processing) { if (!parameters_manager_.has_value()) { return absl::InvalidArgumentError( "Parameters manager is not constructed; " "remember to call `Initialize()` first."); } if (global_timing_module_ == nullptr) { return absl::InvalidArgumentError( "Global timing module is not constructed; " "remember to call `Initialize()` first."); } if (read_bit_buffer_ == nullptr) { return absl::InvalidArgumentError( "Read bit buffer is not constructed; " "remember to call `Initialize()` first."); } return ObuProcessor::ProcessTemporalUnitObu( audio_elements_, codec_config_obus_, substream_id_to_audio_element_, param_definition_variants_, *parameters_manager_, *read_bit_buffer_, *global_timing_module_, output_audio_frame_with_data, output_parameter_block_with_data, output_temporal_delimiter, continue_processing); } absl::Status ObuProcessor::ProcessTemporalUnit( bool eos_is_end_of_sequence, std::optional& output_temporal_unit, bool& continue_processing) { continue_processing = true; while (continue_processing) { std::optional audio_frame_with_data; std::optional parameter_block_with_data; std::optional temporal_delimiter; RETURN_IF_NOT_OK( ProcessTemporalUnitObu(audio_frame_with_data, parameter_block_with_data, temporal_delimiter, continue_processing)); // Collect OBUs into a temporal unit. if (audio_frame_with_data.has_value()) { TemporalUnitData::AddDataToCorrectTemporalUnit( current_temporal_unit_, next_temporal_unit_, *std::move(audio_frame_with_data)); } else if (parameter_block_with_data.has_value()) { TemporalUnitData::AddDataToCorrectTemporalUnit( current_temporal_unit_, next_temporal_unit_, *std::move(parameter_block_with_data)); } else if (temporal_delimiter.has_value()) { current_temporal_unit_.temporal_delimiter = *temporal_delimiter; } // The current temporal unit is considered finished if any of the // following conditions is met: // - The end of sequence is reached. // - The timestamp has advanced (i.e. when the next temporal unit gets its // timestamp). // - A temporal delimiter is encountered. if ((!continue_processing && eos_is_end_of_sequence) || next_temporal_unit_.timestamp.has_value() || current_temporal_unit_.temporal_delimiter.has_value()) { output_temporal_unit = OutputTemporalUnit(); output_temporal_unit->output_audio_frames = std::move(current_temporal_unit_.audio_frames); output_temporal_unit->output_parameter_blocks = std::move(current_temporal_unit_.parameter_blocks); if (current_temporal_unit_.timestamp.has_value()) { output_temporal_unit->output_timestamp = current_temporal_unit_.timestamp.value(); } current_temporal_unit_ = std::move(next_temporal_unit_); next_temporal_unit_ = TemporalUnitData(); break; } } return absl::OkStatus(); } absl::Status ObuProcessor::RenderTemporalUnitAndMeasureLoudness( InternalTimestamp start_timestamp, const std::list& audio_frames, const std::list& parameter_blocks, absl::Span>& output_rendered_pcm_samples) { if (audio_frames.empty()) { // Nothing to decode, render, or measure loudness of. return absl::OkStatus(); } if (!audio_frame_decoder_.has_value()) { return absl::InvalidArgumentError( "Audio frame decoder is not constructed; " "remember to call `InitializeForRendering()` first."); } if (!demixing_module_.has_value()) { return absl::InvalidArgumentError( "Demxing module is not constructed; " "remember to call `InitializeForRendering()` first."); } if (!mix_presentation_finalizer_.has_value()) { return absl::InvalidArgumentError( "Mix presentation finalizer is not constructed; " "remember to call `InitializeForRendering()` first."); } // Decode the temporal unit. std::optional end_timestamp; // This resizing should happen only once per IA sequence, since all the // temporal units should contain the same number of audio frames. decoded_frames_for_temporal_unit_.resize(audio_frames.size()); auto decoded_frames_iter = decoded_frames_for_temporal_unit_.begin(); for (const auto& audio_frame : audio_frames) { if (!end_timestamp.has_value()) { end_timestamp = audio_frame.end_timestamp; } RETURN_IF_NOT_OK( CompareTimestamps(start_timestamp, audio_frame.start_timestamp, "Audio frame has a different start timestamp than " "the temporal unit: ")); RETURN_IF_NOT_OK(CompareTimestamps(*end_timestamp, audio_frame.end_timestamp, "Audio frame has a different end " "timestamp than the temporal unit: ")); auto decoded_frame = audio_frame_decoder_->Decode(audio_frame); if (!decoded_frame.ok()) { return decoded_frame.status(); } *decoded_frames_iter = std::move(*decoded_frame); decoded_frames_iter++; } // Reconstruct the temporal unit and store the result in the output map. const auto decoded_labeled_frames_for_temporal_unit = demixing_module_->DemixDecodedAudioSamples( decoded_frames_for_temporal_unit_); if (!decoded_labeled_frames_for_temporal_unit.ok()) { return decoded_labeled_frames_for_temporal_unit.status(); } RETURN_IF_NOT_OK(mix_presentation_finalizer_->PushTemporalUnit( *decoded_labeled_frames_for_temporal_unit, start_timestamp, *end_timestamp, parameter_blocks)); auto rendered_samples = mix_presentation_finalizer_->GetPostProcessedSamplesAsSpan( decoding_layout_info_.mix_presentation_id, decoding_layout_info_.sub_mix_index, decoding_layout_info_.layout_index); if (!rendered_samples.ok()) { return rendered_samples.status(); } output_rendered_pcm_samples = *rendered_samples; // TODO(b/379122580): Add a call to `FinalizePushingTemporalUnits`, then a // final call to `GetPostProcessedSamplesAsSpan` when there // are no more temporal units to push. Those calls may // belong elsewhere in the class depending on the // interface. return absl::OkStatus(); } } // namespace iamf_tools