• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 3-Clause Clear License
5  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6  * License was not distributed with this source code in the LICENSE file, you
7  * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8  * Alliance for Open Media Patent License 1.0 was not distributed with this
9  * source code in the PATENTS file, you can obtain it at
10  * www.aomedia.org/license/patent.
11  */
12 #include "iamf/cli/iamf_encoder.h"
13 
14 #include <algorithm>
15 #include <cstdint>
16 #include <list>
17 #include <memory>
18 #include <optional>
19 #include <utility>
20 #include <vector>
21 
22 #include "absl/base/nullability.h"
23 #include "absl/container/flat_hash_map.h"
24 #include "absl/log/check.h"
25 #include "absl/log/log.h"
26 #include "absl/status/status.h"
27 #include "absl/strings/string_view.h"
28 #include "iamf/cli/audio_element_with_data.h"
29 #include "iamf/cli/audio_frame_decoder.h"
30 #include "iamf/cli/audio_frame_with_data.h"
31 #include "iamf/cli/channel_label.h"
32 #include "iamf/cli/cli_util.h"
33 #include "iamf/cli/demixing_module.h"
34 #include "iamf/cli/global_timing_module.h"
35 #include "iamf/cli/loudness_calculator_factory_base.h"
36 #include "iamf/cli/parameter_block_with_data.h"
37 #include "iamf/cli/parameters_manager.h"
38 #include "iamf/cli/proto/encoder_control_metadata.pb.h"
39 #include "iamf/cli/proto/test_vector_metadata.pb.h"
40 #include "iamf/cli/proto/user_metadata.pb.h"
41 #include "iamf/cli/proto_conversion/downmixing_reconstruction_util.h"
42 #include "iamf/cli/proto_conversion/proto_to_obu/arbitrary_obu_generator.h"
43 #include "iamf/cli/proto_conversion/proto_to_obu/audio_element_generator.h"
44 #include "iamf/cli/proto_conversion/proto_to_obu/audio_frame_generator.h"
45 #include "iamf/cli/proto_conversion/proto_to_obu/codec_config_generator.h"
46 #include "iamf/cli/proto_conversion/proto_to_obu/ia_sequence_header_generator.h"
47 #include "iamf/cli/proto_conversion/proto_to_obu/mix_presentation_generator.h"
48 #include "iamf/cli/proto_conversion/proto_to_obu/parameter_block_generator.h"
49 #include "iamf/cli/renderer_factory.h"
50 #include "iamf/cli/rendering_mix_presentation_finalizer.h"
51 #include "iamf/common/utils/macros.h"
52 #include "iamf/obu/arbitrary_obu.h"
53 #include "iamf/obu/codec_config.h"
54 #include "iamf/obu/ia_sequence_header.h"
55 #include "iamf/obu/mix_presentation.h"
56 #include "iamf/obu/param_definition_variant.h"
57 #include "iamf/obu/types.h"
58 
59 namespace iamf_tools {
60 
61 namespace {
62 
InitAudioFrameDecoderForAllAudioElements(const absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements,AudioFrameDecoder & audio_frame_decoder)63 absl::Status InitAudioFrameDecoderForAllAudioElements(
64     const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
65         audio_elements,
66     AudioFrameDecoder& audio_frame_decoder) {
67   for (const auto& [unused_audio_element_id, audio_element] : audio_elements) {
68     if (audio_element.codec_config == nullptr) {
69       // Skip stray audio elements. We won't know how to decode their
70       // substreams.
71       continue;
72     }
73 
74     RETURN_IF_NOT_OK(audio_frame_decoder.InitDecodersForSubstreams(
75         audio_element.substream_id_to_labels, *audio_element.codec_config));
76   }
77   return absl::OkStatus();
78 }
79 
80 }  // namespace
81 
Create(const iamf_tools_cli_proto::UserMetadata & user_metadata,absl::Nullable<const RendererFactoryBase * > renderer_factory,absl::Nullable<const LoudnessCalculatorFactoryBase * > loudness_calculator_factory,const RenderingMixPresentationFinalizer::SampleProcessorFactory & sample_processor_factory,std::optional<IASequenceHeaderObu> & ia_sequence_header_obu,absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements,std::list<MixPresentationObu> & mix_presentation_obus,std::list<ArbitraryObu> & arbitrary_obus)82 absl::StatusOr<IamfEncoder> IamfEncoder::Create(
83     const iamf_tools_cli_proto::UserMetadata& user_metadata,
84     absl::Nullable<const RendererFactoryBase*> renderer_factory,
85     absl::Nullable<const LoudnessCalculatorFactoryBase*>
86         loudness_calculator_factory,
87     const RenderingMixPresentationFinalizer::SampleProcessorFactory&
88         sample_processor_factory,
89     std::optional<IASequenceHeaderObu>& ia_sequence_header_obu,
90     absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
91     absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& audio_elements,
92     std::list<MixPresentationObu>& mix_presentation_obus,
93     std::list<ArbitraryObu>& arbitrary_obus) {
94   // IA Sequence Header OBU. Only one is allowed.
95   if (user_metadata.ia_sequence_header_metadata_size() != 1) {
96     return absl::InvalidArgumentError(
97         "Only one IA Sequence Header allowed in an IA Sequence.");
98   }
99   IaSequenceHeaderGenerator ia_sequence_header_generator(
100       user_metadata.ia_sequence_header_metadata(0));
101   RETURN_IF_NOT_OK(
102       ia_sequence_header_generator.Generate(ia_sequence_header_obu));
103 
104   // Codec Config OBUs.
105   CodecConfigGenerator codec_config_generator(
106       user_metadata.codec_config_metadata());
107   RETURN_IF_NOT_OK(codec_config_generator.Generate(codec_config_obus));
108 
109   // Audio Element OBUs.
110   AudioElementGenerator audio_element_generator(
111       user_metadata.audio_element_metadata());
112   RETURN_IF_NOT_OK(
113       audio_element_generator.Generate(codec_config_obus, audio_elements));
114 
115   // Generate the majority of Mix Presentation OBUs - loudness will be
116   // calculated later.
117   MixPresentationGenerator mix_presentation_generator(
118       user_metadata.mix_presentation_metadata());
119   RETURN_IF_NOT_OK(mix_presentation_generator.Generate(
120       user_metadata.encoder_control_metadata().add_build_information_tag(),
121       mix_presentation_obus));
122   // Initialize a mix presentation mix presentation finalizer. Requires
123   // rendering data for every submix to accurately compute loudness.
124   auto mix_presentation_finalizer = RenderingMixPresentationFinalizer::Create(
125       renderer_factory, loudness_calculator_factory, audio_elements,
126       sample_processor_factory, mix_presentation_obus);
127   if (!mix_presentation_finalizer.ok()) {
128     return mix_presentation_finalizer.status();
129   }
130 
131   // Generate Arbitrary OBUs.
132   ArbitraryObuGenerator arbitrary_obu_generator(
133       user_metadata.arbitrary_obu_metadata());
134   RETURN_IF_NOT_OK(arbitrary_obu_generator.Generate(arbitrary_obus));
135 
136   // Collect and validate consistency of all `ParamDefinition`s in all
137   // Audio Element and Mix Presentation OBUs.
138   auto param_definition_variants = std::make_unique<
139       absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>>();
140 
141   RETURN_IF_NOT_OK(CollectAndValidateParamDefinitions(
142       audio_elements, mix_presentation_obus, *param_definition_variants));
143 
144   // Initialize the global timing module.
145   auto global_timing_module =
146       GlobalTimingModule::Create(audio_elements, *param_definition_variants);
147   if (global_timing_module == nullptr) {
148     return absl::InvalidArgumentError(
149         "Failed to initialize the global timing module");
150   }
151 
152   // Initialize the parameter block generator.
153   ParameterBlockGenerator parameter_block_generator(
154       user_metadata.test_vector_metadata().override_computed_recon_gains(),
155       *param_definition_variants);
156   RETURN_IF_NOT_OK(parameter_block_generator.Initialize(audio_elements));
157 
158   // Put generated parameter blocks in a manager that supports easier queries.
159   auto parameters_manager = std::make_unique<ParametersManager>(audio_elements);
160   RETURN_IF_NOT_OK(parameters_manager->Initialize());
161 
162   // Down-mix the audio samples and then demix audio samples while decoding
163   // them. This is useful to create multi-layer audio elements and to determine
164   // the recon gain parameters and to measuring loudness.
165   const absl::StatusOr<absl::flat_hash_map<
166       DecodedUleb128, DemixingModule::DownmixingAndReconstructionConfig>>
167       audio_element_id_to_demixing_metadata =
168           CreateAudioElementIdToDemixingMetadata(user_metadata, audio_elements);
169   if (!audio_element_id_to_demixing_metadata.ok()) {
170     return audio_element_id_to_demixing_metadata.status();
171   }
172   auto demixing_module = DemixingModule::CreateForDownMixingAndReconstruction(
173       *std::move(audio_element_id_to_demixing_metadata));
174   if (!demixing_module.ok()) {
175     return demixing_module.status();
176   }
177 
178   auto audio_frame_generator = std::make_unique<AudioFrameGenerator>(
179       user_metadata.audio_frame_metadata(),
180       user_metadata.codec_config_metadata(), audio_elements, *demixing_module,
181       *parameters_manager, *global_timing_module);
182   RETURN_IF_NOT_OK(audio_frame_generator->Initialize());
183 
184   // Initialize the audio frame decoder. It is needed to determine the recon
185   // gain parameters and measure the loudness of the mixes.
186   AudioFrameDecoder audio_frame_decoder;
187   RETURN_IF_NOT_OK(InitAudioFrameDecoderForAllAudioElements(
188       audio_elements, audio_frame_decoder));
189 
190   return IamfEncoder(
191       user_metadata.test_vector_metadata().validate_user_loudness(),
192       std::move(param_definition_variants),
193       std::move(parameter_block_generator), std::move(parameters_manager),
194       *demixing_module, std::move(audio_frame_generator),
195       std::move(audio_frame_decoder), std::move(global_timing_module),
196       std::move(*mix_presentation_finalizer));
197 }
198 
GeneratingDataObus() const199 bool IamfEncoder::GeneratingDataObus() const {
200   return (audio_frame_generator_ != nullptr) &&
201          (audio_frame_generator_->TakingSamples() ||
202           audio_frame_generator_->GeneratingFrames());
203 }
204 
BeginTemporalUnit()205 void IamfEncoder::BeginTemporalUnit() {
206   // Clear cached samples for this iteration of data OBU generation.
207   for (auto& [audio_element_id, labeled_samples] : id_to_labeled_samples_) {
208     for (auto& [label, samples] : labeled_samples) {
209       samples.clear();
210     }
211   }
212 }
213 
GetInputTimestamp(int32_t & input_timestamp)214 absl::Status IamfEncoder::GetInputTimestamp(int32_t& input_timestamp) {
215   std::optional<int32_t> timestamp;
216   RETURN_IF_NOT_OK(
217       global_timing_module_->GetGlobalAudioFrameTimestamp(timestamp));
218   if (!timestamp.has_value()) {
219     return absl::InvalidArgumentError("Global timestamp has no value");
220   }
221   input_timestamp = *timestamp;
222   return absl::OkStatus();
223 }
224 
AddSamples(const DecodedUleb128 audio_element_id,ChannelLabel::Label label,const std::vector<InternalSampleType> & samples)225 void IamfEncoder::AddSamples(const DecodedUleb128 audio_element_id,
226                              ChannelLabel::Label label,
227                              const std::vector<InternalSampleType>& samples) {
228   if (add_samples_finalized_) {
229     LOG_FIRST_N(WARNING, 3)
230         << "Calling `AddSamples()` after `FinalizeAddSamples()` has no effect; "
231         << samples.size() << " input samples discarded.";
232     return;
233   }
234 
235   id_to_labeled_samples_[audio_element_id][label] = samples;
236 }
237 
FinalizeAddSamples()238 void IamfEncoder::FinalizeAddSamples() { add_samples_finalized_ = true; }
239 
AddParameterBlockMetadata(const iamf_tools_cli_proto::ParameterBlockObuMetadata & parameter_block_metadata)240 absl::Status IamfEncoder::AddParameterBlockMetadata(
241     const iamf_tools_cli_proto::ParameterBlockObuMetadata&
242         parameter_block_metadata) {
243   RETURN_IF_NOT_OK(
244       parameter_block_generator_.AddMetadata(parameter_block_metadata));
245   return absl::OkStatus();
246 }
247 
OutputTemporalUnit(std::list<AudioFrameWithData> & audio_frames,std::list<ParameterBlockWithData> & parameter_blocks)248 absl::Status IamfEncoder::OutputTemporalUnit(
249     std::list<AudioFrameWithData>& audio_frames,
250     std::list<ParameterBlockWithData>& parameter_blocks) {
251   audio_frames.clear();
252   parameter_blocks.clear();
253 
254   // Generate mix gain and demixing parameter blocks.
255   RETURN_IF_NOT_OK(parameter_block_generator_.GenerateDemixing(
256       *global_timing_module_, temp_demixing_parameter_blocks_));
257   RETURN_IF_NOT_OK(parameter_block_generator_.GenerateMixGain(
258       *global_timing_module_, temp_mix_gain_parameter_blocks_));
259 
260   // Add the newly generated demixing parameter blocks to the parameters
261   // manager so they can be easily queried by the audio frame generator.
262   for (const auto& demixing_parameter_block : temp_demixing_parameter_blocks_) {
263     parameters_manager_->AddDemixingParameterBlock(&demixing_parameter_block);
264   }
265 
266   for (const auto& [audio_element_id, labeled_samples] :
267        id_to_labeled_samples_) {
268     for (const auto& [label, samples] : labeled_samples) {
269       // Skip adding empty `samples` to the audio frame generator.
270       if (samples.empty()) {
271         continue;
272       }
273       RETURN_IF_NOT_OK(
274           audio_frame_generator_->AddSamples(audio_element_id, label, samples));
275     }
276   }
277 
278   if (add_samples_finalized_) {
279     RETURN_IF_NOT_OK(audio_frame_generator_->Finalize());
280   }
281 
282   RETURN_IF_NOT_OK(audio_frame_generator_->OutputFrames(audio_frames));
283   if (audio_frames.empty()) {
284     // Some audio codec will only output an encoded frame after the next
285     // frame "pushes" the old one out. So we wait till the next iteration to
286     // retrieve it.
287     return absl::OkStatus();
288   }
289   // All generated audio frame should be in the same temporal unit; they all
290   // have the same timestamps.
291   const InternalTimestamp output_start_timestamp =
292       audio_frames.front().start_timestamp;
293   const InternalTimestamp output_end_timestamp =
294       audio_frames.front().end_timestamp;
295 
296   // Decode the audio frames. They are required to determine the demixed
297   // frames.
298   std::list<DecodedAudioFrame> decoded_audio_frames;
299   for (const auto& audio_frame : audio_frames) {
300     auto decoded_audio_frame = audio_frame_decoder_.Decode(audio_frame);
301     if (!decoded_audio_frame.ok()) {
302       return decoded_audio_frame.status();
303     }
304     CHECK_EQ(output_start_timestamp, decoded_audio_frame->start_timestamp);
305     CHECK_EQ(output_end_timestamp, decoded_audio_frame->end_timestamp);
306     decoded_audio_frames.emplace_back(*decoded_audio_frame);
307   }
308 
309   // Demix the original and decoded audio frames, differences between them are
310   // useful to compute the recon gain parameters.
311   const auto id_to_labeled_frame =
312       demixing_module_.DemixOriginalAudioSamples(audio_frames);
313   if (!id_to_labeled_frame.ok()) {
314     return id_to_labeled_frame.status();
315   }
316   const auto id_to_labeled_decoded_frame =
317       demixing_module_.DemixDecodedAudioSamples(decoded_audio_frames);
318   if (!id_to_labeled_decoded_frame.ok()) {
319     return id_to_labeled_decoded_frame.status();
320   }
321 
322   // Recon gain parameter blocks are generated based on the original and
323   // demixed audio frames.
324   RETURN_IF_NOT_OK(parameter_block_generator_.GenerateReconGain(
325       *id_to_labeled_frame, *id_to_labeled_decoded_frame,
326       *global_timing_module_, temp_recon_gain_parameter_blocks_));
327 
328   // Move all generated parameter blocks belonging to this temporal unit to
329   // the output.
330   for (auto* temp_parameter_blocks :
331        {&temp_mix_gain_parameter_blocks_, &temp_demixing_parameter_blocks_,
332         &temp_recon_gain_parameter_blocks_}) {
333     auto last_same_timestamp_iter = std::find_if(
334         temp_parameter_blocks->begin(), temp_parameter_blocks->end(),
335         [output_start_timestamp](const auto& parameter_block) {
336           return parameter_block.start_timestamp > output_start_timestamp;
337         });
338     parameter_blocks.splice(parameter_blocks.end(), *temp_parameter_blocks,
339                             temp_parameter_blocks->begin(),
340                             last_same_timestamp_iter);
341   }
342 
343   return mix_presentation_finalizer_.PushTemporalUnit(
344       *id_to_labeled_frame, output_start_timestamp, output_end_timestamp,
345       parameter_blocks);
346 }
347 
348 absl::StatusOr<std::list<MixPresentationObu>>
GetFinalizedMixPresentationObus()349 IamfEncoder::GetFinalizedMixPresentationObus() {
350   if (GeneratingDataObus()) {
351     return absl::FailedPreconditionError(
352         "Cannot finalize mix presentation OBUs while generating data OBUs.");
353   }
354 
355   RETURN_IF_NOT_OK(mix_presentation_finalizer_.FinalizePushingTemporalUnits());
356   return mix_presentation_finalizer_.GetFinalizedMixPresentationObus(
357       validate_user_loudness_);
358 }
359 
360 }  // namespace iamf_tools
361