• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 3-Clause Clear License
5  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6  * License was not distributed with this source code in the LICENSE file, you
7  * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8  * Alliance for Open Media Patent License 1.0 was not distributed with this
9  * source code in the PATENTS file, you can obtain it at
10  * www.aomedia.org/license/patent.
11  */
12 #include "iamf/cli/encoder_main_lib.h"
13 
14 #include <algorithm>
15 #include <cstddef>
16 #include <cstdint>
17 #include <filesystem>
18 #include <list>
19 #include <memory>
20 #include <optional>
21 #include <string>
22 #include <system_error>
23 
24 #include "absl/container/flat_hash_map.h"
25 #include "absl/log/log.h"
26 #include "absl/status/status.h"
27 #include "absl/strings/str_cat.h"
28 #include "iamf/cli/audio_element_with_data.h"
29 #include "iamf/cli/audio_frame_with_data.h"
30 #include "iamf/cli/demixing_module.h"
31 #include "iamf/cli/iamf_components.h"
32 #include "iamf/cli/iamf_encoder.h"
33 #include "iamf/cli/obu_sequencer_base.h"
34 #include "iamf/cli/parameter_block_partitioner.h"
35 #include "iamf/cli/parameter_block_with_data.h"
36 #include "iamf/cli/proto/temporal_delimiter.pb.h"
37 #include "iamf/cli/proto/test_vector_metadata.pb.h"
38 #include "iamf/cli/proto/user_metadata.pb.h"
39 #include "iamf/cli/wav_sample_provider.h"
40 #include "iamf/cli/wav_writer.h"
41 #include "iamf/common/utils/macros.h"
42 #include "iamf/obu/arbitrary_obu.h"
43 #include "iamf/obu/codec_config.h"
44 #include "iamf/obu/ia_sequence_header.h"
45 #include "iamf/obu/mix_presentation.h"
46 #include "iamf/obu/types.h"
47 #include "src/google/protobuf/repeated_ptr_field.h"
48 
49 namespace iamf_tools {
50 
51 namespace {
52 
53 using iamf_tools_cli_proto::ParameterBlockObuMetadata;
54 using iamf_tools_cli_proto::UserMetadata;
55 
PartitionParameterMetadata(UserMetadata & user_metadata)56 absl::Status PartitionParameterMetadata(UserMetadata& user_metadata) {
57   uint32_t partition_duration = 0;
58   if (user_metadata.ia_sequence_header_metadata().empty() ||
59       user_metadata.codec_config_metadata().empty()) {
60     return absl::InvalidArgumentError(
61         "Determining the partition duration requires at least one "
62         "`ia_sequence_header_metadata` and one `codec_config_metadata`");
63   }
64   std::list<ParameterBlockObuMetadata> partitioned_parameter_blocks;
65   RETURN_IF_NOT_OK(ParameterBlockPartitioner::FindPartitionDuration(
66       user_metadata.ia_sequence_header_metadata(0).primary_profile(),
67       user_metadata.codec_config_metadata(0), partition_duration));
68   for (const auto& parameter_block_metadata :
69        user_metadata.parameter_block_metadata()) {
70     RETURN_IF_NOT_OK(ParameterBlockPartitioner::PartitionFrameAligned(
71         partition_duration, parameter_block_metadata,
72         partitioned_parameter_blocks));
73   }
74 
75   // Replace the original parameter block metadata.
76   user_metadata.clear_parameter_block_metadata();
77   for (const auto& partitioned_parameter_block : partitioned_parameter_blocks) {
78     *user_metadata.add_parameter_block_metadata() = partitioned_parameter_block;
79   }
80 
81   return absl::OkStatus();
82 }
83 
84 // Mapping from the start timestamps to lists of parameter block metadata.
85 typedef absl::flat_hash_map<int32_t, std::list<ParameterBlockObuMetadata>>
86     TimeParameterBlockMetadataMap;
OrganizeParameterBlockMetadata(const google::protobuf::RepeatedPtrField<ParameterBlockObuMetadata> & parameter_block_metadata,TimeParameterBlockMetadataMap & time_parameter_block_metadata)87 absl::Status OrganizeParameterBlockMetadata(
88     const google::protobuf::RepeatedPtrField<ParameterBlockObuMetadata>&
89         parameter_block_metadata,
90     TimeParameterBlockMetadataMap& time_parameter_block_metadata) {
91   for (const auto& metadata : parameter_block_metadata) {
92     time_parameter_block_metadata[metadata.start_timestamp()].push_back(
93         metadata);
94   }
95 
96   return absl::OkStatus();
97 }
98 
CollectLabeledSamplesForAudioElements(const absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements,WavSampleProvider & wav_sample_provider,absl::flat_hash_map<DecodedUleb128,LabelSamplesMap> & id_to_labeled_samples,bool & no_more_real_samples)99 absl::Status CollectLabeledSamplesForAudioElements(
100     const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
101         audio_elements,
102     WavSampleProvider& wav_sample_provider,
103     absl::flat_hash_map<DecodedUleb128, LabelSamplesMap>& id_to_labeled_samples,
104     bool& no_more_real_samples) {
105   for (const auto& [audio_element_id, unused_audio_element] : audio_elements) {
106     RETURN_IF_NOT_OK(wav_sample_provider.ReadFrames(
107         audio_element_id, id_to_labeled_samples[audio_element_id],
108         no_more_real_samples));
109   }
110   return absl::OkStatus();
111 }
112 
PrintAudioFrames(const std::list<AudioFrameWithData> & audio_frames)113 void PrintAudioFrames(const std::list<AudioFrameWithData>& audio_frames) {
114   // Print the first, last, and any audio frames with `trimming_status_flag`
115   // set.
116   int i = 0;
117   for (const auto& audio_frame_with_data : audio_frames) {
118     if (i == 0 || i == audio_frames.size() - 1 ||
119         audio_frame_with_data.obu.header_.obu_trimming_status_flag) {
120       LOG(INFO) << "Audio Frame OBU[" << i << "]";
121 
122       audio_frame_with_data.obu.PrintObu();
123       LOG(INFO) << "    audio frame.start_timestamp= "
124                 << audio_frame_with_data.start_timestamp;
125       LOG(INFO) << "    audio frame.end_timestamp= "
126                 << audio_frame_with_data.end_timestamp;
127     }
128     i++;
129   }
130 }
131 
CreateOutputDirectory(const std::string & output_directory)132 absl::Status CreateOutputDirectory(const std::string& output_directory) {
133   if (output_directory.empty() ||
134       std::filesystem::is_directory(output_directory) ||
135       std::filesystem::is_character_file(output_directory)) {
136     return absl::OkStatus();
137   }
138 
139   std::error_code error_code;
140   if (!std::filesystem::create_directories(output_directory, error_code)) {
141     return absl::UnknownError(
142         absl::StrCat("Failed to create output directory = ", output_directory));
143   }
144 
145   return absl::OkStatus();
146 }
147 
GenerateTemporalUnitObus(const UserMetadata & user_metadata,const std::string & input_wav_directory,IamfEncoder & iamf_encoder,absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements,std::list<AudioFrameWithData> & audio_frames,std::list<ParameterBlockWithData> & parameter_blocks)148 absl::Status GenerateTemporalUnitObus(
149     const UserMetadata& user_metadata, const std::string& input_wav_directory,
150     IamfEncoder& iamf_encoder,
151     absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& audio_elements,
152     std::list<AudioFrameWithData>& audio_frames,
153     std::list<ParameterBlockWithData>& parameter_blocks) {
154   auto wav_sample_provider =
155       WavSampleProvider::Create(user_metadata.audio_frame_metadata(),
156                                 input_wav_directory, audio_elements);
157   if (!wav_sample_provider.ok()) {
158     return wav_sample_provider.status();
159   }
160 
161   // Parameter blocks.
162   TimeParameterBlockMetadataMap time_parameter_block_metadata;
163   RETURN_IF_NOT_OK(OrganizeParameterBlockMetadata(
164       user_metadata.parameter_block_metadata(), time_parameter_block_metadata));
165 
166   // TODO(b/329375123): Make two while loops that run on two threads: one for
167   //                    adding samples and parameter block metadata, and one for
168   //                    outputing OBUs.
169   int data_obus_iteration = 0;  // Just for logging purposes.
170   while (iamf_encoder.GeneratingDataObus()) {
171     LOG_EVERY_N_SEC(INFO, 5)
172         << "\n\n============================= Generating Data OBUs Iter #"
173         << data_obus_iteration++ << " =============================\n";
174 
175     iamf_encoder.BeginTemporalUnit();
176 
177     int32_t input_timestamp = 0;
178     RETURN_IF_NOT_OK(iamf_encoder.GetInputTimestamp(input_timestamp));
179 
180     // Add audio samples.
181     absl::flat_hash_map<DecodedUleb128, LabelSamplesMap> id_to_labeled_samples;
182     bool no_more_real_samples = false;
183     RETURN_IF_NOT_OK(CollectLabeledSamplesForAudioElements(
184         audio_elements, *wav_sample_provider, id_to_labeled_samples,
185         no_more_real_samples));
186 
187     for (const auto& [audio_element_id, labeled_samples] :
188          id_to_labeled_samples) {
189       for (const auto& [channel_label, samples] : labeled_samples) {
190         iamf_encoder.AddSamples(audio_element_id, channel_label, samples);
191       }
192     }
193 
194     // In this program we always use up all samples from a WAV file, so we
195     // call `IamfEncoder::FinalizeAddSamples()` only when there is no more
196     // real samples. In other applications, the user may decide to stop adding
197     // audio samples based on other criteria.
198     if (no_more_real_samples) {
199       iamf_encoder.FinalizeAddSamples();
200     }
201 
202     // Add parameter block metadata.
203     for (const auto& metadata :
204          time_parameter_block_metadata[input_timestamp]) {
205       RETURN_IF_NOT_OK(iamf_encoder.AddParameterBlockMetadata(metadata));
206     }
207 
208     std::list<AudioFrameWithData> temp_audio_frames;
209     std::list<ParameterBlockWithData> temp_parameter_blocks;
210     IdLabeledFrameMap id_to_labeled_frame;
211     RETURN_IF_NOT_OK(iamf_encoder.OutputTemporalUnit(temp_audio_frames,
212                                                      temp_parameter_blocks));
213 
214     if (temp_audio_frames.empty()) {
215       // Some audio codec will only output an encoded frame after the next
216       // frame "pushes" the old one out. So we wait till the next iteration to
217       // retrieve it.
218       LOG(INFO) << "No audio frame generated in this iteration; continue.";
219       continue;
220     }
221 
222     audio_frames.splice(audio_frames.end(), temp_audio_frames);
223     parameter_blocks.splice(parameter_blocks.end(), temp_parameter_blocks);
224   }
225   LOG(INFO) << "\n============================= END of Generating Data OBUs"
226             << " =============================\n\n";
227   PrintAudioFrames(audio_frames);
228 
229   return absl::OkStatus();
230 }
231 
232 // TODO(b/390392510): Update control of output wav file bit-depth.
GetOverrideBitDepth(uint32_t requested_bit_depth)233 std::optional<uint8_t> GetOverrideBitDepth(uint32_t requested_bit_depth) {
234   if (requested_bit_depth == 0) {
235     return std::nullopt;
236   }
237 
238   // Clamp the bit-depth to something supported by wav files.
239   constexpr uint32_t kMinWavFileBitDepth = 16;
240   constexpr uint32_t kMaxWavFileBitDepth = 32;
241   const uint32_t clamped_bit_depth =
242       std::clamp(requested_bit_depth, kMinWavFileBitDepth, kMaxWavFileBitDepth);
243   return static_cast<uint8_t>(clamped_bit_depth);
244 }
245 
WriteObus(const UserMetadata & user_metadata,const std::string & output_iamf_directory,const IASequenceHeaderObu & ia_sequence_header_obu,const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,const absl::flat_hash_map<uint32_t,AudioElementWithData> & audio_elements,const std::list<MixPresentationObu> & mix_presentation_obus,const std::list<AudioFrameWithData> & audio_frames,const std::list<ParameterBlockWithData> & parameter_blocks,const std::list<ArbitraryObu> & arbitrary_obus)246 absl::Status WriteObus(
247     const UserMetadata& user_metadata, const std::string& output_iamf_directory,
248     const IASequenceHeaderObu& ia_sequence_header_obu,
249     const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
250     const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
251     const std::list<MixPresentationObu>& mix_presentation_obus,
252     const std::list<AudioFrameWithData>& audio_frames,
253     const std::list<ParameterBlockWithData>& parameter_blocks,
254     const std::list<ArbitraryObu>& arbitrary_obus) {
255   const bool include_temporal_delimiters =
256       user_metadata.temporal_delimiter_metadata().enable_temporal_delimiters();
257 
258   // TODO(b/349271859): Move the OBU sequencer inside `IamfEncoder`.
259   auto obu_sequencers = CreateObuSequencers(
260       user_metadata, output_iamf_directory, include_temporal_delimiters);
261   for (auto& obu_sequencer : obu_sequencers) {
262     RETURN_IF_NOT_OK(obu_sequencer->PickAndPlace(
263         ia_sequence_header_obu, codec_config_obus, audio_elements,
264         mix_presentation_obus, audio_frames, parameter_blocks, arbitrary_obus));
265   }
266 
267   return absl::OkStatus();
268 }
269 
270 }  // namespace
271 
TestMain(const UserMetadata & input_user_metadata,const std::string & input_wav_directory,const std::string & output_iamf_directory)272 absl::Status TestMain(const UserMetadata& input_user_metadata,
273                       const std::string& input_wav_directory,
274                       const std::string& output_iamf_directory) {
275   // Make a copy before modifying.
276   UserMetadata user_metadata(input_user_metadata);
277 
278   std::optional<IASequenceHeaderObu> ia_sequence_header_obu;
279   absl::flat_hash_map<uint32_t, CodecConfigObu> codec_config_obus;
280   absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements;
281   std::list<MixPresentationObu> preliminary_mix_presentation_obus;
282   std::list<AudioFrameWithData> audio_frames;
283   std::list<ParameterBlockWithData> parameter_blocks;
284   std::list<ArbitraryObu> arbitrary_obus;
285 
286   // Create output directories.
287   RETURN_IF_NOT_OK(CreateOutputDirectory(output_iamf_directory));
288 
289   // Partition parameter block metadata if necessary. This will overwrite
290   // `user_metadata.mutable_parameter_block_metadata()`.
291   if (user_metadata.test_vector_metadata()
292           .partition_mix_gain_parameter_blocks()) {
293     RETURN_IF_NOT_OK(PartitionParameterMetadata(user_metadata));
294   }
295 
296   // We want to hold the `IamfEncoder` until all OBUs have been written.
297   // Write the output audio streams which were used to measure loudness to the
298   // same directory as the IAMF file.
299   const std::string output_wav_file_prefix =
300       (std::filesystem::path(output_iamf_directory) /
301        user_metadata.test_vector_metadata().file_name_prefix())
302           .string();
303   const std::optional<uint8_t> override_bit_depth =
304       GetOverrideBitDepth(user_metadata.test_vector_metadata()
305                               .output_wav_file_bit_depth_override());
306   LOG(INFO) << "output_wav_file_prefix = " << output_wav_file_prefix;
307   const auto ProduceAllWavWriters =
308       [output_wav_file_prefix, override_bit_depth](
309           DecodedUleb128 mix_presentation_id, int sub_mix_index,
310           int layout_index, const Layout&, int num_channels, int sample_rate,
311           int bit_depth,
312           size_t max_input_samples_per_frame) -> std::unique_ptr<WavWriter> {
313     const auto wav_path = absl::StrCat(
314         output_wav_file_prefix, "_rendered_id_", mix_presentation_id,
315         "_sub_mix_", sub_mix_index, "_layout_", layout_index, ".wav");
316     // Obey the override bit depth. But if it is not set, we can infer a good
317     // bit-depth from the input audio.
318     const uint8_t wav_file_bit_depth = override_bit_depth.value_or(bit_depth);
319     return WavWriter::Create(wav_path, num_channels, sample_rate,
320                              wav_file_bit_depth, max_input_samples_per_frame);
321   };
322 
323   auto iamf_encoder = IamfEncoder::Create(
324       user_metadata, CreateRendererFactory().get(),
325       CreateLoudnessCalculatorFactory().get(), ProduceAllWavWriters,
326       ia_sequence_header_obu, codec_config_obus, audio_elements,
327       preliminary_mix_presentation_obus, arbitrary_obus);
328   if (!iamf_encoder.ok()) {
329     return iamf_encoder.status();
330   }
331   // Discard the "preliminary" mix presentation OBUs. We only care about the
332   // finalized ones, which are not possible to know until audio encoding is
333   // complete.
334   preliminary_mix_presentation_obus.clear();
335   RETURN_IF_NOT_OK(GenerateTemporalUnitObus(user_metadata, input_wav_directory,
336                                             *iamf_encoder, audio_elements,
337                                             audio_frames, parameter_blocks));
338   // Audio encoding is complete. Retrieve the OBUs with have the finalized
339   // loudness information.
340   const auto finalized_mix_presentation_obus =
341       iamf_encoder->GetFinalizedMixPresentationObus();
342   if (!finalized_mix_presentation_obus.ok()) {
343     return finalized_mix_presentation_obus.status();
344   }
345 
346   RETURN_IF_NOT_OK(WriteObus(user_metadata, output_iamf_directory,
347                              ia_sequence_header_obu.value(), codec_config_obus,
348                              audio_elements, *finalized_mix_presentation_obus,
349                              audio_frames, parameter_blocks, arbitrary_obus));
350 
351   return absl::OkStatus();
352 }
353 
354 }  // namespace iamf_tools
355