/* * Copyright (c) 2023, Alliance for Open Media. All rights reserved * * This source code is subject to the terms of the BSD 3-Clause Clear License * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear * License was not distributed with this source code in the LICENSE file, you * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the * Alliance for Open Media Patent License 1.0 was not distributed with this * source code in the PATENTS file, you can obtain it at * www.aomedia.org/license/patent. */ #include "iamf/cli/encoder_main_lib.h" #include #include #include #include #include #include #include #include #include #include "absl/container/flat_hash_map.h" #include "absl/log/log.h" #include "absl/status/status.h" #include "absl/strings/str_cat.h" #include "iamf/cli/audio_element_with_data.h" #include "iamf/cli/audio_frame_with_data.h" #include "iamf/cli/demixing_module.h" #include "iamf/cli/iamf_components.h" #include "iamf/cli/iamf_encoder.h" #include "iamf/cli/obu_sequencer_base.h" #include "iamf/cli/parameter_block_partitioner.h" #include "iamf/cli/parameter_block_with_data.h" #include "iamf/cli/proto/temporal_delimiter.pb.h" #include "iamf/cli/proto/test_vector_metadata.pb.h" #include "iamf/cli/proto/user_metadata.pb.h" #include "iamf/cli/wav_sample_provider.h" #include "iamf/cli/wav_writer.h" #include "iamf/common/utils/macros.h" #include "iamf/obu/arbitrary_obu.h" #include "iamf/obu/codec_config.h" #include "iamf/obu/ia_sequence_header.h" #include "iamf/obu/mix_presentation.h" #include "iamf/obu/types.h" #include "src/google/protobuf/repeated_ptr_field.h" namespace iamf_tools { namespace { using iamf_tools_cli_proto::ParameterBlockObuMetadata; using iamf_tools_cli_proto::UserMetadata; absl::Status PartitionParameterMetadata(UserMetadata& user_metadata) { uint32_t partition_duration = 0; if (user_metadata.ia_sequence_header_metadata().empty() || user_metadata.codec_config_metadata().empty()) { return absl::InvalidArgumentError( "Determining the partition duration requires at least one " "`ia_sequence_header_metadata` and one `codec_config_metadata`"); } std::list partitioned_parameter_blocks; RETURN_IF_NOT_OK(ParameterBlockPartitioner::FindPartitionDuration( user_metadata.ia_sequence_header_metadata(0).primary_profile(), user_metadata.codec_config_metadata(0), partition_duration)); for (const auto& parameter_block_metadata : user_metadata.parameter_block_metadata()) { RETURN_IF_NOT_OK(ParameterBlockPartitioner::PartitionFrameAligned( partition_duration, parameter_block_metadata, partitioned_parameter_blocks)); } // Replace the original parameter block metadata. user_metadata.clear_parameter_block_metadata(); for (const auto& partitioned_parameter_block : partitioned_parameter_blocks) { *user_metadata.add_parameter_block_metadata() = partitioned_parameter_block; } return absl::OkStatus(); } // Mapping from the start timestamps to lists of parameter block metadata. typedef absl::flat_hash_map> TimeParameterBlockMetadataMap; absl::Status OrganizeParameterBlockMetadata( const google::protobuf::RepeatedPtrField& parameter_block_metadata, TimeParameterBlockMetadataMap& time_parameter_block_metadata) { for (const auto& metadata : parameter_block_metadata) { time_parameter_block_metadata[metadata.start_timestamp()].push_back( metadata); } return absl::OkStatus(); } absl::Status CollectLabeledSamplesForAudioElements( const absl::flat_hash_map& audio_elements, WavSampleProvider& wav_sample_provider, absl::flat_hash_map& id_to_labeled_samples, bool& no_more_real_samples) { for (const auto& [audio_element_id, unused_audio_element] : audio_elements) { RETURN_IF_NOT_OK(wav_sample_provider.ReadFrames( audio_element_id, id_to_labeled_samples[audio_element_id], no_more_real_samples)); } return absl::OkStatus(); } void PrintAudioFrames(const std::list& audio_frames) { // Print the first, last, and any audio frames with `trimming_status_flag` // set. int i = 0; for (const auto& audio_frame_with_data : audio_frames) { if (i == 0 || i == audio_frames.size() - 1 || audio_frame_with_data.obu.header_.obu_trimming_status_flag) { LOG(INFO) << "Audio Frame OBU[" << i << "]"; audio_frame_with_data.obu.PrintObu(); LOG(INFO) << " audio frame.start_timestamp= " << audio_frame_with_data.start_timestamp; LOG(INFO) << " audio frame.end_timestamp= " << audio_frame_with_data.end_timestamp; } i++; } } absl::Status CreateOutputDirectory(const std::string& output_directory) { if (output_directory.empty() || std::filesystem::is_directory(output_directory) || std::filesystem::is_character_file(output_directory)) { return absl::OkStatus(); } std::error_code error_code; if (!std::filesystem::create_directories(output_directory, error_code)) { return absl::UnknownError( absl::StrCat("Failed to create output directory = ", output_directory)); } return absl::OkStatus(); } absl::Status GenerateTemporalUnitObus( const UserMetadata& user_metadata, const std::string& input_wav_directory, IamfEncoder& iamf_encoder, absl::flat_hash_map& audio_elements, std::list& audio_frames, std::list& parameter_blocks) { auto wav_sample_provider = WavSampleProvider::Create(user_metadata.audio_frame_metadata(), input_wav_directory, audio_elements); if (!wav_sample_provider.ok()) { return wav_sample_provider.status(); } // Parameter blocks. TimeParameterBlockMetadataMap time_parameter_block_metadata; RETURN_IF_NOT_OK(OrganizeParameterBlockMetadata( user_metadata.parameter_block_metadata(), time_parameter_block_metadata)); // TODO(b/329375123): Make two while loops that run on two threads: one for // adding samples and parameter block metadata, and one for // outputing OBUs. int data_obus_iteration = 0; // Just for logging purposes. while (iamf_encoder.GeneratingDataObus()) { LOG_EVERY_N_SEC(INFO, 5) << "\n\n============================= Generating Data OBUs Iter #" << data_obus_iteration++ << " =============================\n"; iamf_encoder.BeginTemporalUnit(); int32_t input_timestamp = 0; RETURN_IF_NOT_OK(iamf_encoder.GetInputTimestamp(input_timestamp)); // Add audio samples. absl::flat_hash_map id_to_labeled_samples; bool no_more_real_samples = false; RETURN_IF_NOT_OK(CollectLabeledSamplesForAudioElements( audio_elements, *wav_sample_provider, id_to_labeled_samples, no_more_real_samples)); for (const auto& [audio_element_id, labeled_samples] : id_to_labeled_samples) { for (const auto& [channel_label, samples] : labeled_samples) { iamf_encoder.AddSamples(audio_element_id, channel_label, samples); } } // In this program we always use up all samples from a WAV file, so we // call `IamfEncoder::FinalizeAddSamples()` only when there is no more // real samples. In other applications, the user may decide to stop adding // audio samples based on other criteria. if (no_more_real_samples) { iamf_encoder.FinalizeAddSamples(); } // Add parameter block metadata. for (const auto& metadata : time_parameter_block_metadata[input_timestamp]) { RETURN_IF_NOT_OK(iamf_encoder.AddParameterBlockMetadata(metadata)); } std::list temp_audio_frames; std::list temp_parameter_blocks; IdLabeledFrameMap id_to_labeled_frame; RETURN_IF_NOT_OK(iamf_encoder.OutputTemporalUnit(temp_audio_frames, temp_parameter_blocks)); if (temp_audio_frames.empty()) { // Some audio codec will only output an encoded frame after the next // frame "pushes" the old one out. So we wait till the next iteration to // retrieve it. LOG(INFO) << "No audio frame generated in this iteration; continue."; continue; } audio_frames.splice(audio_frames.end(), temp_audio_frames); parameter_blocks.splice(parameter_blocks.end(), temp_parameter_blocks); } LOG(INFO) << "\n============================= END of Generating Data OBUs" << " =============================\n\n"; PrintAudioFrames(audio_frames); return absl::OkStatus(); } // TODO(b/390392510): Update control of output wav file bit-depth. std::optional GetOverrideBitDepth(uint32_t requested_bit_depth) { if (requested_bit_depth == 0) { return std::nullopt; } // Clamp the bit-depth to something supported by wav files. constexpr uint32_t kMinWavFileBitDepth = 16; constexpr uint32_t kMaxWavFileBitDepth = 32; const uint32_t clamped_bit_depth = std::clamp(requested_bit_depth, kMinWavFileBitDepth, kMaxWavFileBitDepth); return static_cast(clamped_bit_depth); } absl::Status WriteObus( const UserMetadata& user_metadata, const std::string& output_iamf_directory, const IASequenceHeaderObu& ia_sequence_header_obu, const absl::flat_hash_map& codec_config_obus, const absl::flat_hash_map& audio_elements, const std::list& mix_presentation_obus, const std::list& audio_frames, const std::list& parameter_blocks, const std::list& arbitrary_obus) { const bool include_temporal_delimiters = user_metadata.temporal_delimiter_metadata().enable_temporal_delimiters(); // TODO(b/349271859): Move the OBU sequencer inside `IamfEncoder`. auto obu_sequencers = CreateObuSequencers( user_metadata, output_iamf_directory, include_temporal_delimiters); for (auto& obu_sequencer : obu_sequencers) { RETURN_IF_NOT_OK(obu_sequencer->PickAndPlace( ia_sequence_header_obu, codec_config_obus, audio_elements, mix_presentation_obus, audio_frames, parameter_blocks, arbitrary_obus)); } return absl::OkStatus(); } } // namespace absl::Status TestMain(const UserMetadata& input_user_metadata, const std::string& input_wav_directory, const std::string& output_iamf_directory) { // Make a copy before modifying. UserMetadata user_metadata(input_user_metadata); std::optional ia_sequence_header_obu; absl::flat_hash_map codec_config_obus; absl::flat_hash_map audio_elements; std::list preliminary_mix_presentation_obus; std::list audio_frames; std::list parameter_blocks; std::list arbitrary_obus; // Create output directories. RETURN_IF_NOT_OK(CreateOutputDirectory(output_iamf_directory)); // Partition parameter block metadata if necessary. This will overwrite // `user_metadata.mutable_parameter_block_metadata()`. if (user_metadata.test_vector_metadata() .partition_mix_gain_parameter_blocks()) { RETURN_IF_NOT_OK(PartitionParameterMetadata(user_metadata)); } // We want to hold the `IamfEncoder` until all OBUs have been written. // Write the output audio streams which were used to measure loudness to the // same directory as the IAMF file. const std::string output_wav_file_prefix = (std::filesystem::path(output_iamf_directory) / user_metadata.test_vector_metadata().file_name_prefix()) .string(); const std::optional override_bit_depth = GetOverrideBitDepth(user_metadata.test_vector_metadata() .output_wav_file_bit_depth_override()); LOG(INFO) << "output_wav_file_prefix = " << output_wav_file_prefix; const auto ProduceAllWavWriters = [output_wav_file_prefix, override_bit_depth]( DecodedUleb128 mix_presentation_id, int sub_mix_index, int layout_index, const Layout&, int num_channels, int sample_rate, int bit_depth, size_t max_input_samples_per_frame) -> std::unique_ptr { const auto wav_path = absl::StrCat( output_wav_file_prefix, "_rendered_id_", mix_presentation_id, "_sub_mix_", sub_mix_index, "_layout_", layout_index, ".wav"); // Obey the override bit depth. But if it is not set, we can infer a good // bit-depth from the input audio. const uint8_t wav_file_bit_depth = override_bit_depth.value_or(bit_depth); return WavWriter::Create(wav_path, num_channels, sample_rate, wav_file_bit_depth, max_input_samples_per_frame); }; auto iamf_encoder = IamfEncoder::Create( user_metadata, CreateRendererFactory().get(), CreateLoudnessCalculatorFactory().get(), ProduceAllWavWriters, ia_sequence_header_obu, codec_config_obus, audio_elements, preliminary_mix_presentation_obus, arbitrary_obus); if (!iamf_encoder.ok()) { return iamf_encoder.status(); } // Discard the "preliminary" mix presentation OBUs. We only care about the // finalized ones, which are not possible to know until audio encoding is // complete. preliminary_mix_presentation_obus.clear(); RETURN_IF_NOT_OK(GenerateTemporalUnitObus(user_metadata, input_wav_directory, *iamf_encoder, audio_elements, audio_frames, parameter_blocks)); // Audio encoding is complete. Retrieve the OBUs with have the finalized // loudness information. const auto finalized_mix_presentation_obus = iamf_encoder->GetFinalizedMixPresentationObus(); if (!finalized_mix_presentation_obus.ok()) { return finalized_mix_presentation_obus.status(); } RETURN_IF_NOT_OK(WriteObus(user_metadata, output_iamf_directory, ia_sequence_header_obu.value(), codec_config_obus, audio_elements, *finalized_mix_presentation_obus, audio_frames, parameter_blocks, arbitrary_obus)); return absl::OkStatus(); } } // namespace iamf_tools