1 /*
2 * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * www.aomedia.org/license/patent.
11 */
12 #include "iamf/cli/iamf_encoder.h"
13
14 #include <algorithm>
15 #include <cstdint>
16 #include <list>
17 #include <memory>
18 #include <optional>
19 #include <utility>
20 #include <vector>
21
22 #include "absl/base/nullability.h"
23 #include "absl/container/flat_hash_map.h"
24 #include "absl/log/check.h"
25 #include "absl/log/log.h"
26 #include "absl/status/status.h"
27 #include "absl/strings/string_view.h"
28 #include "iamf/cli/audio_element_with_data.h"
29 #include "iamf/cli/audio_frame_decoder.h"
30 #include "iamf/cli/audio_frame_with_data.h"
31 #include "iamf/cli/channel_label.h"
32 #include "iamf/cli/cli_util.h"
33 #include "iamf/cli/demixing_module.h"
34 #include "iamf/cli/global_timing_module.h"
35 #include "iamf/cli/loudness_calculator_factory_base.h"
36 #include "iamf/cli/parameter_block_with_data.h"
37 #include "iamf/cli/parameters_manager.h"
38 #include "iamf/cli/proto/encoder_control_metadata.pb.h"
39 #include "iamf/cli/proto/test_vector_metadata.pb.h"
40 #include "iamf/cli/proto/user_metadata.pb.h"
41 #include "iamf/cli/proto_conversion/downmixing_reconstruction_util.h"
42 #include "iamf/cli/proto_conversion/proto_to_obu/arbitrary_obu_generator.h"
43 #include "iamf/cli/proto_conversion/proto_to_obu/audio_element_generator.h"
44 #include "iamf/cli/proto_conversion/proto_to_obu/audio_frame_generator.h"
45 #include "iamf/cli/proto_conversion/proto_to_obu/codec_config_generator.h"
46 #include "iamf/cli/proto_conversion/proto_to_obu/ia_sequence_header_generator.h"
47 #include "iamf/cli/proto_conversion/proto_to_obu/mix_presentation_generator.h"
48 #include "iamf/cli/proto_conversion/proto_to_obu/parameter_block_generator.h"
49 #include "iamf/cli/renderer_factory.h"
50 #include "iamf/cli/rendering_mix_presentation_finalizer.h"
51 #include "iamf/common/utils/macros.h"
52 #include "iamf/obu/arbitrary_obu.h"
53 #include "iamf/obu/codec_config.h"
54 #include "iamf/obu/ia_sequence_header.h"
55 #include "iamf/obu/mix_presentation.h"
56 #include "iamf/obu/param_definition_variant.h"
57 #include "iamf/obu/types.h"
58
59 namespace iamf_tools {
60
61 namespace {
62
InitAudioFrameDecoderForAllAudioElements(const absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements,AudioFrameDecoder & audio_frame_decoder)63 absl::Status InitAudioFrameDecoderForAllAudioElements(
64 const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
65 audio_elements,
66 AudioFrameDecoder& audio_frame_decoder) {
67 for (const auto& [unused_audio_element_id, audio_element] : audio_elements) {
68 if (audio_element.codec_config == nullptr) {
69 // Skip stray audio elements. We won't know how to decode their
70 // substreams.
71 continue;
72 }
73
74 RETURN_IF_NOT_OK(audio_frame_decoder.InitDecodersForSubstreams(
75 audio_element.substream_id_to_labels, *audio_element.codec_config));
76 }
77 return absl::OkStatus();
78 }
79
80 } // namespace
81
Create(const iamf_tools_cli_proto::UserMetadata & user_metadata,absl::Nullable<const RendererFactoryBase * > renderer_factory,absl::Nullable<const LoudnessCalculatorFactoryBase * > loudness_calculator_factory,const RenderingMixPresentationFinalizer::SampleProcessorFactory & sample_processor_factory,std::optional<IASequenceHeaderObu> & ia_sequence_header_obu,absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements,std::list<MixPresentationObu> & mix_presentation_obus,std::list<ArbitraryObu> & arbitrary_obus)82 absl::StatusOr<IamfEncoder> IamfEncoder::Create(
83 const iamf_tools_cli_proto::UserMetadata& user_metadata,
84 absl::Nullable<const RendererFactoryBase*> renderer_factory,
85 absl::Nullable<const LoudnessCalculatorFactoryBase*>
86 loudness_calculator_factory,
87 const RenderingMixPresentationFinalizer::SampleProcessorFactory&
88 sample_processor_factory,
89 std::optional<IASequenceHeaderObu>& ia_sequence_header_obu,
90 absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
91 absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& audio_elements,
92 std::list<MixPresentationObu>& mix_presentation_obus,
93 std::list<ArbitraryObu>& arbitrary_obus) {
94 // IA Sequence Header OBU. Only one is allowed.
95 if (user_metadata.ia_sequence_header_metadata_size() != 1) {
96 return absl::InvalidArgumentError(
97 "Only one IA Sequence Header allowed in an IA Sequence.");
98 }
99 IaSequenceHeaderGenerator ia_sequence_header_generator(
100 user_metadata.ia_sequence_header_metadata(0));
101 RETURN_IF_NOT_OK(
102 ia_sequence_header_generator.Generate(ia_sequence_header_obu));
103
104 // Codec Config OBUs.
105 CodecConfigGenerator codec_config_generator(
106 user_metadata.codec_config_metadata());
107 RETURN_IF_NOT_OK(codec_config_generator.Generate(codec_config_obus));
108
109 // Audio Element OBUs.
110 AudioElementGenerator audio_element_generator(
111 user_metadata.audio_element_metadata());
112 RETURN_IF_NOT_OK(
113 audio_element_generator.Generate(codec_config_obus, audio_elements));
114
115 // Generate the majority of Mix Presentation OBUs - loudness will be
116 // calculated later.
117 MixPresentationGenerator mix_presentation_generator(
118 user_metadata.mix_presentation_metadata());
119 RETURN_IF_NOT_OK(mix_presentation_generator.Generate(
120 user_metadata.encoder_control_metadata().add_build_information_tag(),
121 mix_presentation_obus));
122 // Initialize a mix presentation mix presentation finalizer. Requires
123 // rendering data for every submix to accurately compute loudness.
124 auto mix_presentation_finalizer = RenderingMixPresentationFinalizer::Create(
125 renderer_factory, loudness_calculator_factory, audio_elements,
126 sample_processor_factory, mix_presentation_obus);
127 if (!mix_presentation_finalizer.ok()) {
128 return mix_presentation_finalizer.status();
129 }
130
131 // Generate Arbitrary OBUs.
132 ArbitraryObuGenerator arbitrary_obu_generator(
133 user_metadata.arbitrary_obu_metadata());
134 RETURN_IF_NOT_OK(arbitrary_obu_generator.Generate(arbitrary_obus));
135
136 // Collect and validate consistency of all `ParamDefinition`s in all
137 // Audio Element and Mix Presentation OBUs.
138 auto param_definition_variants = std::make_unique<
139 absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>>();
140
141 RETURN_IF_NOT_OK(CollectAndValidateParamDefinitions(
142 audio_elements, mix_presentation_obus, *param_definition_variants));
143
144 // Initialize the global timing module.
145 auto global_timing_module =
146 GlobalTimingModule::Create(audio_elements, *param_definition_variants);
147 if (global_timing_module == nullptr) {
148 return absl::InvalidArgumentError(
149 "Failed to initialize the global timing module");
150 }
151
152 // Initialize the parameter block generator.
153 ParameterBlockGenerator parameter_block_generator(
154 user_metadata.test_vector_metadata().override_computed_recon_gains(),
155 *param_definition_variants);
156 RETURN_IF_NOT_OK(parameter_block_generator.Initialize(audio_elements));
157
158 // Put generated parameter blocks in a manager that supports easier queries.
159 auto parameters_manager = std::make_unique<ParametersManager>(audio_elements);
160 RETURN_IF_NOT_OK(parameters_manager->Initialize());
161
162 // Down-mix the audio samples and then demix audio samples while decoding
163 // them. This is useful to create multi-layer audio elements and to determine
164 // the recon gain parameters and to measuring loudness.
165 const absl::StatusOr<absl::flat_hash_map<
166 DecodedUleb128, DemixingModule::DownmixingAndReconstructionConfig>>
167 audio_element_id_to_demixing_metadata =
168 CreateAudioElementIdToDemixingMetadata(user_metadata, audio_elements);
169 if (!audio_element_id_to_demixing_metadata.ok()) {
170 return audio_element_id_to_demixing_metadata.status();
171 }
172 auto demixing_module = DemixingModule::CreateForDownMixingAndReconstruction(
173 *std::move(audio_element_id_to_demixing_metadata));
174 if (!demixing_module.ok()) {
175 return demixing_module.status();
176 }
177
178 auto audio_frame_generator = std::make_unique<AudioFrameGenerator>(
179 user_metadata.audio_frame_metadata(),
180 user_metadata.codec_config_metadata(), audio_elements, *demixing_module,
181 *parameters_manager, *global_timing_module);
182 RETURN_IF_NOT_OK(audio_frame_generator->Initialize());
183
184 // Initialize the audio frame decoder. It is needed to determine the recon
185 // gain parameters and measure the loudness of the mixes.
186 AudioFrameDecoder audio_frame_decoder;
187 RETURN_IF_NOT_OK(InitAudioFrameDecoderForAllAudioElements(
188 audio_elements, audio_frame_decoder));
189
190 return IamfEncoder(
191 user_metadata.test_vector_metadata().validate_user_loudness(),
192 std::move(param_definition_variants),
193 std::move(parameter_block_generator), std::move(parameters_manager),
194 *demixing_module, std::move(audio_frame_generator),
195 std::move(audio_frame_decoder), std::move(global_timing_module),
196 std::move(*mix_presentation_finalizer));
197 }
198
GeneratingDataObus() const199 bool IamfEncoder::GeneratingDataObus() const {
200 return (audio_frame_generator_ != nullptr) &&
201 (audio_frame_generator_->TakingSamples() ||
202 audio_frame_generator_->GeneratingFrames());
203 }
204
BeginTemporalUnit()205 void IamfEncoder::BeginTemporalUnit() {
206 // Clear cached samples for this iteration of data OBU generation.
207 for (auto& [audio_element_id, labeled_samples] : id_to_labeled_samples_) {
208 for (auto& [label, samples] : labeled_samples) {
209 samples.clear();
210 }
211 }
212 }
213
GetInputTimestamp(int32_t & input_timestamp)214 absl::Status IamfEncoder::GetInputTimestamp(int32_t& input_timestamp) {
215 std::optional<int32_t> timestamp;
216 RETURN_IF_NOT_OK(
217 global_timing_module_->GetGlobalAudioFrameTimestamp(timestamp));
218 if (!timestamp.has_value()) {
219 return absl::InvalidArgumentError("Global timestamp has no value");
220 }
221 input_timestamp = *timestamp;
222 return absl::OkStatus();
223 }
224
AddSamples(const DecodedUleb128 audio_element_id,ChannelLabel::Label label,const std::vector<InternalSampleType> & samples)225 void IamfEncoder::AddSamples(const DecodedUleb128 audio_element_id,
226 ChannelLabel::Label label,
227 const std::vector<InternalSampleType>& samples) {
228 if (add_samples_finalized_) {
229 LOG_FIRST_N(WARNING, 3)
230 << "Calling `AddSamples()` after `FinalizeAddSamples()` has no effect; "
231 << samples.size() << " input samples discarded.";
232 return;
233 }
234
235 id_to_labeled_samples_[audio_element_id][label] = samples;
236 }
237
FinalizeAddSamples()238 void IamfEncoder::FinalizeAddSamples() { add_samples_finalized_ = true; }
239
AddParameterBlockMetadata(const iamf_tools_cli_proto::ParameterBlockObuMetadata & parameter_block_metadata)240 absl::Status IamfEncoder::AddParameterBlockMetadata(
241 const iamf_tools_cli_proto::ParameterBlockObuMetadata&
242 parameter_block_metadata) {
243 RETURN_IF_NOT_OK(
244 parameter_block_generator_.AddMetadata(parameter_block_metadata));
245 return absl::OkStatus();
246 }
247
OutputTemporalUnit(std::list<AudioFrameWithData> & audio_frames,std::list<ParameterBlockWithData> & parameter_blocks)248 absl::Status IamfEncoder::OutputTemporalUnit(
249 std::list<AudioFrameWithData>& audio_frames,
250 std::list<ParameterBlockWithData>& parameter_blocks) {
251 audio_frames.clear();
252 parameter_blocks.clear();
253
254 // Generate mix gain and demixing parameter blocks.
255 RETURN_IF_NOT_OK(parameter_block_generator_.GenerateDemixing(
256 *global_timing_module_, temp_demixing_parameter_blocks_));
257 RETURN_IF_NOT_OK(parameter_block_generator_.GenerateMixGain(
258 *global_timing_module_, temp_mix_gain_parameter_blocks_));
259
260 // Add the newly generated demixing parameter blocks to the parameters
261 // manager so they can be easily queried by the audio frame generator.
262 for (const auto& demixing_parameter_block : temp_demixing_parameter_blocks_) {
263 parameters_manager_->AddDemixingParameterBlock(&demixing_parameter_block);
264 }
265
266 for (const auto& [audio_element_id, labeled_samples] :
267 id_to_labeled_samples_) {
268 for (const auto& [label, samples] : labeled_samples) {
269 // Skip adding empty `samples` to the audio frame generator.
270 if (samples.empty()) {
271 continue;
272 }
273 RETURN_IF_NOT_OK(
274 audio_frame_generator_->AddSamples(audio_element_id, label, samples));
275 }
276 }
277
278 if (add_samples_finalized_) {
279 RETURN_IF_NOT_OK(audio_frame_generator_->Finalize());
280 }
281
282 RETURN_IF_NOT_OK(audio_frame_generator_->OutputFrames(audio_frames));
283 if (audio_frames.empty()) {
284 // Some audio codec will only output an encoded frame after the next
285 // frame "pushes" the old one out. So we wait till the next iteration to
286 // retrieve it.
287 return absl::OkStatus();
288 }
289 // All generated audio frame should be in the same temporal unit; they all
290 // have the same timestamps.
291 const InternalTimestamp output_start_timestamp =
292 audio_frames.front().start_timestamp;
293 const InternalTimestamp output_end_timestamp =
294 audio_frames.front().end_timestamp;
295
296 // Decode the audio frames. They are required to determine the demixed
297 // frames.
298 std::list<DecodedAudioFrame> decoded_audio_frames;
299 for (const auto& audio_frame : audio_frames) {
300 auto decoded_audio_frame = audio_frame_decoder_.Decode(audio_frame);
301 if (!decoded_audio_frame.ok()) {
302 return decoded_audio_frame.status();
303 }
304 CHECK_EQ(output_start_timestamp, decoded_audio_frame->start_timestamp);
305 CHECK_EQ(output_end_timestamp, decoded_audio_frame->end_timestamp);
306 decoded_audio_frames.emplace_back(*decoded_audio_frame);
307 }
308
309 // Demix the original and decoded audio frames, differences between them are
310 // useful to compute the recon gain parameters.
311 const auto id_to_labeled_frame =
312 demixing_module_.DemixOriginalAudioSamples(audio_frames);
313 if (!id_to_labeled_frame.ok()) {
314 return id_to_labeled_frame.status();
315 }
316 const auto id_to_labeled_decoded_frame =
317 demixing_module_.DemixDecodedAudioSamples(decoded_audio_frames);
318 if (!id_to_labeled_decoded_frame.ok()) {
319 return id_to_labeled_decoded_frame.status();
320 }
321
322 // Recon gain parameter blocks are generated based on the original and
323 // demixed audio frames.
324 RETURN_IF_NOT_OK(parameter_block_generator_.GenerateReconGain(
325 *id_to_labeled_frame, *id_to_labeled_decoded_frame,
326 *global_timing_module_, temp_recon_gain_parameter_blocks_));
327
328 // Move all generated parameter blocks belonging to this temporal unit to
329 // the output.
330 for (auto* temp_parameter_blocks :
331 {&temp_mix_gain_parameter_blocks_, &temp_demixing_parameter_blocks_,
332 &temp_recon_gain_parameter_blocks_}) {
333 auto last_same_timestamp_iter = std::find_if(
334 temp_parameter_blocks->begin(), temp_parameter_blocks->end(),
335 [output_start_timestamp](const auto& parameter_block) {
336 return parameter_block.start_timestamp > output_start_timestamp;
337 });
338 parameter_blocks.splice(parameter_blocks.end(), *temp_parameter_blocks,
339 temp_parameter_blocks->begin(),
340 last_same_timestamp_iter);
341 }
342
343 return mix_presentation_finalizer_.PushTemporalUnit(
344 *id_to_labeled_frame, output_start_timestamp, output_end_timestamp,
345 parameter_blocks);
346 }
347
348 absl::StatusOr<std::list<MixPresentationObu>>
GetFinalizedMixPresentationObus()349 IamfEncoder::GetFinalizedMixPresentationObus() {
350 if (GeneratingDataObus()) {
351 return absl::FailedPreconditionError(
352 "Cannot finalize mix presentation OBUs while generating data OBUs.");
353 }
354
355 RETURN_IF_NOT_OK(mix_presentation_finalizer_.FinalizePushingTemporalUnits());
356 return mix_presentation_finalizer_.GetFinalizedMixPresentationObus(
357 validate_user_loudness_);
358 }
359
360 } // namespace iamf_tools
361