1 /*
2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * www.aomedia.org/license/patent.
11 */
12 #include "iamf/cli/encoder_main_lib.h"
13
14 #include <algorithm>
15 #include <cstddef>
16 #include <cstdint>
17 #include <filesystem>
18 #include <list>
19 #include <memory>
20 #include <optional>
21 #include <string>
22 #include <system_error>
23
24 #include "absl/container/flat_hash_map.h"
25 #include "absl/log/log.h"
26 #include "absl/status/status.h"
27 #include "absl/strings/str_cat.h"
28 #include "iamf/cli/audio_element_with_data.h"
29 #include "iamf/cli/audio_frame_with_data.h"
30 #include "iamf/cli/demixing_module.h"
31 #include "iamf/cli/iamf_components.h"
32 #include "iamf/cli/iamf_encoder.h"
33 #include "iamf/cli/obu_sequencer_base.h"
34 #include "iamf/cli/parameter_block_partitioner.h"
35 #include "iamf/cli/parameter_block_with_data.h"
36 #include "iamf/cli/proto/temporal_delimiter.pb.h"
37 #include "iamf/cli/proto/test_vector_metadata.pb.h"
38 #include "iamf/cli/proto/user_metadata.pb.h"
39 #include "iamf/cli/wav_sample_provider.h"
40 #include "iamf/cli/wav_writer.h"
41 #include "iamf/common/utils/macros.h"
42 #include "iamf/obu/arbitrary_obu.h"
43 #include "iamf/obu/codec_config.h"
44 #include "iamf/obu/ia_sequence_header.h"
45 #include "iamf/obu/mix_presentation.h"
46 #include "iamf/obu/types.h"
47 #include "src/google/protobuf/repeated_ptr_field.h"
48
49 namespace iamf_tools {
50
51 namespace {
52
53 using iamf_tools_cli_proto::ParameterBlockObuMetadata;
54 using iamf_tools_cli_proto::UserMetadata;
55
PartitionParameterMetadata(UserMetadata & user_metadata)56 absl::Status PartitionParameterMetadata(UserMetadata& user_metadata) {
57 uint32_t partition_duration = 0;
58 if (user_metadata.ia_sequence_header_metadata().empty() ||
59 user_metadata.codec_config_metadata().empty()) {
60 return absl::InvalidArgumentError(
61 "Determining the partition duration requires at least one "
62 "`ia_sequence_header_metadata` and one `codec_config_metadata`");
63 }
64 std::list<ParameterBlockObuMetadata> partitioned_parameter_blocks;
65 RETURN_IF_NOT_OK(ParameterBlockPartitioner::FindPartitionDuration(
66 user_metadata.ia_sequence_header_metadata(0).primary_profile(),
67 user_metadata.codec_config_metadata(0), partition_duration));
68 for (const auto& parameter_block_metadata :
69 user_metadata.parameter_block_metadata()) {
70 RETURN_IF_NOT_OK(ParameterBlockPartitioner::PartitionFrameAligned(
71 partition_duration, parameter_block_metadata,
72 partitioned_parameter_blocks));
73 }
74
75 // Replace the original parameter block metadata.
76 user_metadata.clear_parameter_block_metadata();
77 for (const auto& partitioned_parameter_block : partitioned_parameter_blocks) {
78 *user_metadata.add_parameter_block_metadata() = partitioned_parameter_block;
79 }
80
81 return absl::OkStatus();
82 }
83
84 // Mapping from the start timestamps to lists of parameter block metadata.
85 typedef absl::flat_hash_map<int32_t, std::list<ParameterBlockObuMetadata>>
86 TimeParameterBlockMetadataMap;
OrganizeParameterBlockMetadata(const google::protobuf::RepeatedPtrField<ParameterBlockObuMetadata> & parameter_block_metadata,TimeParameterBlockMetadataMap & time_parameter_block_metadata)87 absl::Status OrganizeParameterBlockMetadata(
88 const google::protobuf::RepeatedPtrField<ParameterBlockObuMetadata>&
89 parameter_block_metadata,
90 TimeParameterBlockMetadataMap& time_parameter_block_metadata) {
91 for (const auto& metadata : parameter_block_metadata) {
92 time_parameter_block_metadata[metadata.start_timestamp()].push_back(
93 metadata);
94 }
95
96 return absl::OkStatus();
97 }
98
CollectLabeledSamplesForAudioElements(const absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements,WavSampleProvider & wav_sample_provider,absl::flat_hash_map<DecodedUleb128,LabelSamplesMap> & id_to_labeled_samples,bool & no_more_real_samples)99 absl::Status CollectLabeledSamplesForAudioElements(
100 const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
101 audio_elements,
102 WavSampleProvider& wav_sample_provider,
103 absl::flat_hash_map<DecodedUleb128, LabelSamplesMap>& id_to_labeled_samples,
104 bool& no_more_real_samples) {
105 for (const auto& [audio_element_id, unused_audio_element] : audio_elements) {
106 RETURN_IF_NOT_OK(wav_sample_provider.ReadFrames(
107 audio_element_id, id_to_labeled_samples[audio_element_id],
108 no_more_real_samples));
109 }
110 return absl::OkStatus();
111 }
112
PrintAudioFrames(const std::list<AudioFrameWithData> & audio_frames)113 void PrintAudioFrames(const std::list<AudioFrameWithData>& audio_frames) {
114 // Print the first, last, and any audio frames with `trimming_status_flag`
115 // set.
116 int i = 0;
117 for (const auto& audio_frame_with_data : audio_frames) {
118 if (i == 0 || i == audio_frames.size() - 1 ||
119 audio_frame_with_data.obu.header_.obu_trimming_status_flag) {
120 LOG(INFO) << "Audio Frame OBU[" << i << "]";
121
122 audio_frame_with_data.obu.PrintObu();
123 LOG(INFO) << " audio frame.start_timestamp= "
124 << audio_frame_with_data.start_timestamp;
125 LOG(INFO) << " audio frame.end_timestamp= "
126 << audio_frame_with_data.end_timestamp;
127 }
128 i++;
129 }
130 }
131
CreateOutputDirectory(const std::string & output_directory)132 absl::Status CreateOutputDirectory(const std::string& output_directory) {
133 if (output_directory.empty() ||
134 std::filesystem::is_directory(output_directory) ||
135 std::filesystem::is_character_file(output_directory)) {
136 return absl::OkStatus();
137 }
138
139 std::error_code error_code;
140 if (!std::filesystem::create_directories(output_directory, error_code)) {
141 return absl::UnknownError(
142 absl::StrCat("Failed to create output directory = ", output_directory));
143 }
144
145 return absl::OkStatus();
146 }
147
GenerateTemporalUnitObus(const UserMetadata & user_metadata,const std::string & input_wav_directory,IamfEncoder & iamf_encoder,absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements,std::list<AudioFrameWithData> & audio_frames,std::list<ParameterBlockWithData> & parameter_blocks)148 absl::Status GenerateTemporalUnitObus(
149 const UserMetadata& user_metadata, const std::string& input_wav_directory,
150 IamfEncoder& iamf_encoder,
151 absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& audio_elements,
152 std::list<AudioFrameWithData>& audio_frames,
153 std::list<ParameterBlockWithData>& parameter_blocks) {
154 auto wav_sample_provider =
155 WavSampleProvider::Create(user_metadata.audio_frame_metadata(),
156 input_wav_directory, audio_elements);
157 if (!wav_sample_provider.ok()) {
158 return wav_sample_provider.status();
159 }
160
161 // Parameter blocks.
162 TimeParameterBlockMetadataMap time_parameter_block_metadata;
163 RETURN_IF_NOT_OK(OrganizeParameterBlockMetadata(
164 user_metadata.parameter_block_metadata(), time_parameter_block_metadata));
165
166 // TODO(b/329375123): Make two while loops that run on two threads: one for
167 // adding samples and parameter block metadata, and one for
168 // outputing OBUs.
169 int data_obus_iteration = 0; // Just for logging purposes.
170 while (iamf_encoder.GeneratingDataObus()) {
171 LOG_EVERY_N_SEC(INFO, 5)
172 << "\n\n============================= Generating Data OBUs Iter #"
173 << data_obus_iteration++ << " =============================\n";
174
175 iamf_encoder.BeginTemporalUnit();
176
177 int32_t input_timestamp = 0;
178 RETURN_IF_NOT_OK(iamf_encoder.GetInputTimestamp(input_timestamp));
179
180 // Add audio samples.
181 absl::flat_hash_map<DecodedUleb128, LabelSamplesMap> id_to_labeled_samples;
182 bool no_more_real_samples = false;
183 RETURN_IF_NOT_OK(CollectLabeledSamplesForAudioElements(
184 audio_elements, *wav_sample_provider, id_to_labeled_samples,
185 no_more_real_samples));
186
187 for (const auto& [audio_element_id, labeled_samples] :
188 id_to_labeled_samples) {
189 for (const auto& [channel_label, samples] : labeled_samples) {
190 iamf_encoder.AddSamples(audio_element_id, channel_label, samples);
191 }
192 }
193
194 // In this program we always use up all samples from a WAV file, so we
195 // call `IamfEncoder::FinalizeAddSamples()` only when there is no more
196 // real samples. In other applications, the user may decide to stop adding
197 // audio samples based on other criteria.
198 if (no_more_real_samples) {
199 iamf_encoder.FinalizeAddSamples();
200 }
201
202 // Add parameter block metadata.
203 for (const auto& metadata :
204 time_parameter_block_metadata[input_timestamp]) {
205 RETURN_IF_NOT_OK(iamf_encoder.AddParameterBlockMetadata(metadata));
206 }
207
208 std::list<AudioFrameWithData> temp_audio_frames;
209 std::list<ParameterBlockWithData> temp_parameter_blocks;
210 IdLabeledFrameMap id_to_labeled_frame;
211 RETURN_IF_NOT_OK(iamf_encoder.OutputTemporalUnit(temp_audio_frames,
212 temp_parameter_blocks));
213
214 if (temp_audio_frames.empty()) {
215 // Some audio codec will only output an encoded frame after the next
216 // frame "pushes" the old one out. So we wait till the next iteration to
217 // retrieve it.
218 LOG(INFO) << "No audio frame generated in this iteration; continue.";
219 continue;
220 }
221
222 audio_frames.splice(audio_frames.end(), temp_audio_frames);
223 parameter_blocks.splice(parameter_blocks.end(), temp_parameter_blocks);
224 }
225 LOG(INFO) << "\n============================= END of Generating Data OBUs"
226 << " =============================\n\n";
227 PrintAudioFrames(audio_frames);
228
229 return absl::OkStatus();
230 }
231
232 // TODO(b/390392510): Update control of output wav file bit-depth.
GetOverrideBitDepth(uint32_t requested_bit_depth)233 std::optional<uint8_t> GetOverrideBitDepth(uint32_t requested_bit_depth) {
234 if (requested_bit_depth == 0) {
235 return std::nullopt;
236 }
237
238 // Clamp the bit-depth to something supported by wav files.
239 constexpr uint32_t kMinWavFileBitDepth = 16;
240 constexpr uint32_t kMaxWavFileBitDepth = 32;
241 const uint32_t clamped_bit_depth =
242 std::clamp(requested_bit_depth, kMinWavFileBitDepth, kMaxWavFileBitDepth);
243 return static_cast<uint8_t>(clamped_bit_depth);
244 }
245
WriteObus(const UserMetadata & user_metadata,const std::string & output_iamf_directory,const IASequenceHeaderObu & ia_sequence_header_obu,const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,const absl::flat_hash_map<uint32_t,AudioElementWithData> & audio_elements,const std::list<MixPresentationObu> & mix_presentation_obus,const std::list<AudioFrameWithData> & audio_frames,const std::list<ParameterBlockWithData> & parameter_blocks,const std::list<ArbitraryObu> & arbitrary_obus)246 absl::Status WriteObus(
247 const UserMetadata& user_metadata, const std::string& output_iamf_directory,
248 const IASequenceHeaderObu& ia_sequence_header_obu,
249 const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
250 const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
251 const std::list<MixPresentationObu>& mix_presentation_obus,
252 const std::list<AudioFrameWithData>& audio_frames,
253 const std::list<ParameterBlockWithData>& parameter_blocks,
254 const std::list<ArbitraryObu>& arbitrary_obus) {
255 const bool include_temporal_delimiters =
256 user_metadata.temporal_delimiter_metadata().enable_temporal_delimiters();
257
258 // TODO(b/349271859): Move the OBU sequencer inside `IamfEncoder`.
259 auto obu_sequencers = CreateObuSequencers(
260 user_metadata, output_iamf_directory, include_temporal_delimiters);
261 for (auto& obu_sequencer : obu_sequencers) {
262 RETURN_IF_NOT_OK(obu_sequencer->PickAndPlace(
263 ia_sequence_header_obu, codec_config_obus, audio_elements,
264 mix_presentation_obus, audio_frames, parameter_blocks, arbitrary_obus));
265 }
266
267 return absl::OkStatus();
268 }
269
270 } // namespace
271
TestMain(const UserMetadata & input_user_metadata,const std::string & input_wav_directory,const std::string & output_iamf_directory)272 absl::Status TestMain(const UserMetadata& input_user_metadata,
273 const std::string& input_wav_directory,
274 const std::string& output_iamf_directory) {
275 // Make a copy before modifying.
276 UserMetadata user_metadata(input_user_metadata);
277
278 std::optional<IASequenceHeaderObu> ia_sequence_header_obu;
279 absl::flat_hash_map<uint32_t, CodecConfigObu> codec_config_obus;
280 absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements;
281 std::list<MixPresentationObu> preliminary_mix_presentation_obus;
282 std::list<AudioFrameWithData> audio_frames;
283 std::list<ParameterBlockWithData> parameter_blocks;
284 std::list<ArbitraryObu> arbitrary_obus;
285
286 // Create output directories.
287 RETURN_IF_NOT_OK(CreateOutputDirectory(output_iamf_directory));
288
289 // Partition parameter block metadata if necessary. This will overwrite
290 // `user_metadata.mutable_parameter_block_metadata()`.
291 if (user_metadata.test_vector_metadata()
292 .partition_mix_gain_parameter_blocks()) {
293 RETURN_IF_NOT_OK(PartitionParameterMetadata(user_metadata));
294 }
295
296 // We want to hold the `IamfEncoder` until all OBUs have been written.
297 // Write the output audio streams which were used to measure loudness to the
298 // same directory as the IAMF file.
299 const std::string output_wav_file_prefix =
300 (std::filesystem::path(output_iamf_directory) /
301 user_metadata.test_vector_metadata().file_name_prefix())
302 .string();
303 const std::optional<uint8_t> override_bit_depth =
304 GetOverrideBitDepth(user_metadata.test_vector_metadata()
305 .output_wav_file_bit_depth_override());
306 LOG(INFO) << "output_wav_file_prefix = " << output_wav_file_prefix;
307 const auto ProduceAllWavWriters =
308 [output_wav_file_prefix, override_bit_depth](
309 DecodedUleb128 mix_presentation_id, int sub_mix_index,
310 int layout_index, const Layout&, int num_channels, int sample_rate,
311 int bit_depth,
312 size_t max_input_samples_per_frame) -> std::unique_ptr<WavWriter> {
313 const auto wav_path = absl::StrCat(
314 output_wav_file_prefix, "_rendered_id_", mix_presentation_id,
315 "_sub_mix_", sub_mix_index, "_layout_", layout_index, ".wav");
316 // Obey the override bit depth. But if it is not set, we can infer a good
317 // bit-depth from the input audio.
318 const uint8_t wav_file_bit_depth = override_bit_depth.value_or(bit_depth);
319 return WavWriter::Create(wav_path, num_channels, sample_rate,
320 wav_file_bit_depth, max_input_samples_per_frame);
321 };
322
323 auto iamf_encoder = IamfEncoder::Create(
324 user_metadata, CreateRendererFactory().get(),
325 CreateLoudnessCalculatorFactory().get(), ProduceAllWavWriters,
326 ia_sequence_header_obu, codec_config_obus, audio_elements,
327 preliminary_mix_presentation_obus, arbitrary_obus);
328 if (!iamf_encoder.ok()) {
329 return iamf_encoder.status();
330 }
331 // Discard the "preliminary" mix presentation OBUs. We only care about the
332 // finalized ones, which are not possible to know until audio encoding is
333 // complete.
334 preliminary_mix_presentation_obus.clear();
335 RETURN_IF_NOT_OK(GenerateTemporalUnitObus(user_metadata, input_wav_directory,
336 *iamf_encoder, audio_elements,
337 audio_frames, parameter_blocks));
338 // Audio encoding is complete. Retrieve the OBUs with have the finalized
339 // loudness information.
340 const auto finalized_mix_presentation_obus =
341 iamf_encoder->GetFinalizedMixPresentationObus();
342 if (!finalized_mix_presentation_obus.ok()) {
343 return finalized_mix_presentation_obus.status();
344 }
345
346 RETURN_IF_NOT_OK(WriteObus(user_metadata, output_iamf_directory,
347 ia_sequence_header_obu.value(), codec_config_obus,
348 audio_elements, *finalized_mix_presentation_obus,
349 audio_frames, parameter_blocks, arbitrary_obus));
350
351 return absl::OkStatus();
352 }
353
354 } // namespace iamf_tools
355