1 /* 2 * Copyright (c) 2024, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 3-Clause Clear License 5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear 6 * License was not distributed with this source code in the LICENSE file, you 7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the 8 * Alliance for Open Media Patent License 1.0 was not distributed with this 9 * source code in the PATENTS file, you can obtain it at 10 * www.aomedia.org/license/patent. 11 */ 12 #ifndef CLI_IAMF_ENCODER_H_ 13 #define CLI_IAMF_ENCODER_H_ 14 15 #include <cstdint> 16 #include <list> 17 #include <memory> 18 #include <optional> 19 #include <utility> 20 #include <vector> 21 22 #include "absl/base/nullability.h" 23 #include "absl/container/flat_hash_map.h" 24 #include "absl/log/log.h" 25 #include "absl/status/status.h" 26 #include "iamf/cli/audio_element_with_data.h" 27 #include "iamf/cli/audio_frame_decoder.h" 28 #include "iamf/cli/audio_frame_with_data.h" 29 #include "iamf/cli/channel_label.h" 30 #include "iamf/cli/demixing_module.h" 31 #include "iamf/cli/global_timing_module.h" 32 #include "iamf/cli/loudness_calculator_factory_base.h" 33 #include "iamf/cli/parameter_block_with_data.h" 34 #include "iamf/cli/parameters_manager.h" 35 #include "iamf/cli/proto/test_vector_metadata.pb.h" 36 #include "iamf/cli/proto/user_metadata.pb.h" 37 #include "iamf/cli/proto_conversion/proto_to_obu/audio_frame_generator.h" 38 #include "iamf/cli/proto_conversion/proto_to_obu/parameter_block_generator.h" 39 #include "iamf/cli/renderer_factory.h" 40 #include "iamf/cli/rendering_mix_presentation_finalizer.h" 41 #include "iamf/obu/arbitrary_obu.h" 42 #include "iamf/obu/codec_config.h" 43 #include "iamf/obu/ia_sequence_header.h" 44 #include "iamf/obu/mix_presentation.h" 45 #include "iamf/obu/param_definition_variant.h" 46 #include "iamf/obu/types.h" 47 48 namespace iamf_tools { 49 50 /*!\brief A class that encodes an IA Sequence and generates OBUs. 51 * 52 * Descriptor OBUs are generated once at the beginning, and data OBUs are 53 * generated iteratively for each temporal unit (TU). The use pattern of this 54 * class is: 55 * // Call factory function. 56 * absl::StatusOr<IamfEncoder> encoder = IamfEncoder::Create(...); 57 * if(!encoder.ok()) { 58 * // Handle error. 59 * } 60 * 61 * while (encoder->GeneratingDataObus()) { 62 * // Prepare for the next temporal unit; clear state of the previous TU. 63 * encoder->BeginTemporalUnit(); 64 * 65 * // For all audio elements and labels corresponding to this temporal unit: 66 * for each audio element: { 67 * for each channel label from the current element { 68 * encoder->AddSamples(audio_element_id, label, samples); 69 * } 70 * } 71 * 72 * // When all samples (for all temporal units) are added: 73 * if (done_receiving_all_audio) { 74 * encoder->FinalizeAddSamples(); 75 * } 76 * 77 * // For all parameter block metadata corresponding to this temporal unit: 78 * encoder->AddParameterBlockMetadata(...); 79 * 80 * // Get OBUs for next encoded temporal unit. 81 * encoder->OutputTemporalUnit(...); 82 * } 83 * // Get the final mix presentation OBUs, with measured loudness information. 84 * auto mix_presentation_obus = encoder->GetFinalizedMixPresentationObus(); 85 * 86 * Note the timestamps corresponding to `AddSamples()` and 87 * `AddParameterBlockMetadata()` might be different from that of the output 88 * OBUs obtained in `OutputTemporalUnit()`, because some codecs introduce a 89 * frame of delay. We thus distinguish the concepts of input and output 90 * timestamps (`input_timestamp` and `output_timestamp`) in the code below. 91 */ 92 class IamfEncoder { 93 public: 94 /*!\brief Factory function to create an `IamfEncoder`. 95 * 96 * \param user_metadata Input user metadata describing the IAMF stream. 97 * \param renderer_factory Factory to create renderers for use in measuring 98 * the loudness. 99 * \param loudness_calculator_factory Factory to create loudness calculators 100 * to measure the loudness of the output layouts. 101 * \param sample_processor_factory Factory to create processors for use after 102 * rendering. 103 * \param ia_sequence_header_obu Generated IA Sequence Header OBU. 104 * \param codec_config_obus Map of Codec Config ID to generated Codec Config 105 * OBUs. 106 * \param audio_elements Map of Audio Element IDs to generated OBUs with data. 107 * \param preliminary_mix_presentation_obus List of preliminary Mix 108 * Presentation OBUs. Using these directly almost certainly results in 109 * incorrect loudness metadata. It is best practice to replace these 110 * with the result of `GetFinalizedMixPresentationObus()` after all 111 * data OBUs are generated. 112 * \param arbitrary_obus List of generated Arbitrary OBUs. 113 * \return `absl::OkStatus()` if successful. A specific status on failure. 114 */ 115 static absl::StatusOr<IamfEncoder> Create( 116 const iamf_tools_cli_proto::UserMetadata& user_metadata, 117 absl::Nullable<const RendererFactoryBase*> renderer_factory, 118 absl::Nullable<const LoudnessCalculatorFactoryBase*> 119 loudness_calculator_factory, 120 const RenderingMixPresentationFinalizer::SampleProcessorFactory& 121 sample_processor_factory, 122 std::optional<IASequenceHeaderObu>& ia_sequence_header_obu, 123 absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus, 124 absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& audio_elements, 125 std::list<MixPresentationObu>& preliminary_mix_presentation_obus, 126 std::list<ArbitraryObu>& arbitrary_obus); 127 128 /*!\brief Returns whether this encoder is generating data OBUs. 129 * 130 * \return True if still generating data OBUs. 131 */ 132 bool GeneratingDataObus() const; 133 134 /*!\brief Clears the state, e.g. accumulated samples for next temporal unit. 135 */ 136 void BeginTemporalUnit(); 137 138 /*!\brief Gets the input timestamp of the data OBU generation iteration. 139 * 140 * \param input_timestamp Result of input timestamp. 141 * \return `absl::OkStatus()` if successful. A specific status on failure. 142 */ 143 absl::Status GetInputTimestamp(int32_t& input_timestamp); 144 145 /*!\brief Adds audio samples belonging to the same temporal unit. 146 * 147 * The best practice is to not call this function after 148 * `FinalizeAddSamples()`. But it is OK if you do -- just that the added 149 * samples will be ignored and not encoded. 150 * 151 * \param audio_element_id ID of the audio element to add samples to. 152 * \param label Channel label to add samples to. 153 * \param samples Audio samples to add. 154 */ 155 void AddSamples(DecodedUleb128 audio_element_id, ChannelLabel::Label label, 156 const std::vector<InternalSampleType>& samples); 157 158 /*!\brief Finalizes the process of adding samples. 159 * 160 * This will signal the underlying codecs to flush all remaining samples, 161 * as well as trim samples from the end. 162 */ 163 void FinalizeAddSamples(); 164 165 /*!\brief Adds parameter block metadata belonging to the same temporal unit. 166 * 167 * \param parameter_block_metadata Parameter block metadata to add. 168 * \return `absl::OkStatus()` if successful. A specific status on failure. 169 */ 170 absl::Status AddParameterBlockMetadata( 171 const iamf_tools_cli_proto::ParameterBlockObuMetadata& 172 parameter_block_metadata); 173 174 /*!\brief Outputs data OBUs corresponding to one temporal unit. 175 * 176 * \param audio_frames List of generated audio frames corresponding to this 177 * temporal unit. 178 * \param parameter_blocks List of generated parameter block corresponding 179 * to this temporal unit. 180 * \return `absl::OkStatus()` if successful. A specific status on failure. 181 */ 182 absl::Status OutputTemporalUnit( 183 std::list<AudioFrameWithData>& audio_frames, 184 std::list<ParameterBlockWithData>& parameter_blocks); 185 186 /*!\brief Gets the finalized mix presentation OBUs. 187 * 188 * Mix Presentation OBUs contain loudness information, which is only possible 189 * to know after all data OBUs are generated. 190 * 191 * Must only be called only once and after all data OBUs are generated, i.e. 192 * after `GeneratingDataObus()` returns false. 193 * 194 * \return Finalized Mix Presentation OBUs. A specific status on failure. 195 */ 196 absl::StatusOr<std::list<MixPresentationObu>> 197 GetFinalizedMixPresentationObus(); 198 199 private: 200 /*!\brief Private constructor. 201 * 202 * Moves from the input arguments Some arguments are wrapped in unique 203 * pointers to ensure pointer or reference stability after move. 204 * 205 * \param validate_user_loudness Whether to validate the user-provided 206 * loudness. 207 * \param parameter_id_to_metadata Mapping from parameter IDs to per-ID 208 * parameter metadata. 209 * \param param_definition_variants Parameter definitions for the IA Sequence. 210 * \param parameters_manager Manager to support internal querying 211 * of parameters. 212 * \param demixing_module Module to demix audio elements. 213 * \param audio_frame_generator Audio frame generator. 214 * \param audio_frame_decoder Decodes the original audio frames, to facilitate 215 * recon gain computation. 216 * \param global_timing_module Manages global timing information. 217 */ IamfEncoder(bool validate_user_loudness,std::unique_ptr<absl::flat_hash_map<DecodedUleb128,ParamDefinitionVariant>> param_definition_variants,ParameterBlockGenerator && parameter_block_generator,std::unique_ptr<ParametersManager> parameters_manager,const DemixingModule & demixing_module,std::unique_ptr<AudioFrameGenerator> audio_frame_generator,AudioFrameDecoder && audio_frame_decoder,std::unique_ptr<GlobalTimingModule> global_timing_module,RenderingMixPresentationFinalizer && mix_presentation_finalizer)218 IamfEncoder(bool validate_user_loudness, 219 std::unique_ptr< 220 absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>> 221 param_definition_variants, 222 ParameterBlockGenerator&& parameter_block_generator, 223 std::unique_ptr<ParametersManager> parameters_manager, 224 const DemixingModule& demixing_module, 225 std::unique_ptr<AudioFrameGenerator> audio_frame_generator, 226 AudioFrameDecoder&& audio_frame_decoder, 227 std::unique_ptr<GlobalTimingModule> global_timing_module, 228 RenderingMixPresentationFinalizer&& mix_presentation_finalizer) 229 : validate_user_loudness_(validate_user_loudness), 230 param_definition_variants_(std::move(param_definition_variants)), 231 parameter_block_generator_(std::move(parameter_block_generator)), 232 parameters_manager_(std::move(parameters_manager)), 233 demixing_module_(demixing_module), 234 audio_frame_generator_(std::move(audio_frame_generator)), 235 audio_frame_decoder_(std::move(audio_frame_decoder)), 236 global_timing_module_(std::move(global_timing_module)), 237 mix_presentation_finalizer_(std::move(mix_presentation_finalizer)) {} 238 239 const bool validate_user_loudness_; 240 241 // Mapping from parameter IDs to parameter definitions. 242 // Parameter block generator owns a reference to this map. Wrapped in 243 // `std::unique_ptr` for reference stability after move. 244 absl::Nonnull<std::unique_ptr< 245 const absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>>> 246 param_definition_variants_; 247 248 // Saved parameter blocks generated in one iteration. 249 std::list<ParameterBlockWithData> temp_mix_gain_parameter_blocks_; 250 std::list<ParameterBlockWithData> temp_demixing_parameter_blocks_; 251 std::list<ParameterBlockWithData> temp_recon_gain_parameter_blocks_; 252 253 // Cached mapping from Audio Element ID to labeled samples added in the same 254 // iteration. 255 absl::flat_hash_map<DecodedUleb128, LabelSamplesMap> id_to_labeled_samples_; 256 257 // Whether the `FinalizeAddSamples()` has been called. 258 bool add_samples_finalized_ = false; 259 260 // Various generators and modules used when generating data OBUs iteratively. 261 // Some are held in `unique_ptr` for reference stability after move. 262 ParameterBlockGenerator parameter_block_generator_; 263 absl::Nonnull<std::unique_ptr<ParametersManager>> parameters_manager_; 264 const DemixingModule demixing_module_; 265 absl::Nonnull<std::unique_ptr<AudioFrameGenerator>> audio_frame_generator_; 266 AudioFrameDecoder audio_frame_decoder_; 267 absl::Nonnull<std::unique_ptr<GlobalTimingModule>> global_timing_module_; 268 269 // Modules to render the output layouts and measure their loudness. 270 RenderingMixPresentationFinalizer mix_presentation_finalizer_; 271 }; 272 273 } // namespace iamf_tools 274 275 #endif // CLI_IAMF_ENCODER_H_ 276