1 /*
2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * www.aomedia.org/license/patent.
11 */
12 #include "iamf/cli/tests/cli_test_utils.h"
13
14 #include <algorithm>
15 #include <cmath>
16 #include <cstddef>
17 #include <cstdint>
18 #include <filesystem>
19 #include <fstream>
20 #include <ios>
21 #include <limits>
22 #include <list>
23 #include <memory>
24 #include <numbers>
25 #include <numeric>
26 #include <optional>
27 #include <string>
28 #include <system_error>
29 #include <utility>
30 #include <vector>
31
32 #include "absl/container/flat_hash_map.h"
33 #include "absl/log/check.h"
34 #include "absl/log/log.h"
35 #include "absl/status/status.h"
36 #include "absl/status/status_matchers.h"
37 #include "absl/status/statusor.h"
38 #include "absl/strings/str_cat.h"
39 #include "absl/strings/str_replace.h"
40 #include "absl/strings/string_view.h"
41 #include "absl/types/span.h"
42 #include "gmock/gmock.h"
43 #include "gtest/gtest.h"
44 #include "iamf/cli/audio_element_with_data.h"
45 #include "iamf/cli/audio_frame_with_data.h"
46 #include "iamf/cli/demixing_module.h"
47 #include "iamf/cli/obu_processor.h"
48 #include "iamf/cli/obu_with_data_generator.h"
49 #include "iamf/cli/parameter_block_with_data.h"
50 #include "iamf/cli/proto/mix_presentation.pb.h"
51 #include "iamf/cli/proto/user_metadata.pb.h"
52 #include "iamf/cli/proto_conversion/proto_to_obu/audio_element_generator.h"
53 #include "iamf/cli/proto_conversion/proto_to_obu/mix_presentation_generator.h"
54 #include "iamf/cli/renderer/audio_element_renderer_base.h"
55 #include "iamf/cli/user_metadata_builder/audio_element_metadata_builder.h"
56 #include "iamf/cli/user_metadata_builder/iamf_input_layout.h"
57 #include "iamf/cli/wav_reader.h"
58 #include "iamf/common/leb_generator.h"
59 #include "iamf/common/read_bit_buffer.h"
60 #include "iamf/common/utils/macros.h"
61 #include "iamf/common/write_bit_buffer.h"
62 #include "iamf/obu/audio_element.h"
63 #include "iamf/obu/codec_config.h"
64 #include "iamf/obu/decoder_config/aac_decoder_config.h"
65 #include "iamf/obu/decoder_config/flac_decoder_config.h"
66 #include "iamf/obu/decoder_config/lpcm_decoder_config.h"
67 #include "iamf/obu/decoder_config/opus_decoder_config.h"
68 #include "iamf/obu/demixing_info_parameter_data.h"
69 #include "iamf/obu/demixing_param_definition.h"
70 #include "iamf/obu/ia_sequence_header.h"
71 #include "iamf/obu/mix_presentation.h"
72 #include "iamf/obu/obu_base.h"
73 #include "iamf/obu/obu_header.h"
74 #include "iamf/obu/param_definitions.h"
75 #include "iamf/obu/types.h"
76 #include "src/google/protobuf/io/zero_copy_stream_impl.h"
77 #include "src/google/protobuf/repeated_ptr_field.h"
78 #include "src/google/protobuf/text_format.h"
79
80 namespace iamf_tools {
81
82 namespace {
83
84 constexpr bool kOverrideAudioRollDistance = true;
85
SetParamDefinitionCommonFields(DecodedUleb128 parameter_id,DecodedUleb128 parameter_rate,DecodedUleb128 duration,ParamDefinition & param_definition)86 void SetParamDefinitionCommonFields(DecodedUleb128 parameter_id,
87 DecodedUleb128 parameter_rate,
88 DecodedUleb128 duration,
89 ParamDefinition& param_definition) {
90 param_definition.parameter_id_ = parameter_id;
91 param_definition.parameter_rate_ = parameter_rate;
92 param_definition.param_definition_mode_ = 0;
93 param_definition.reserved_ = 0;
94 param_definition.duration_ = duration;
95 param_definition.constant_subblock_duration_ = duration;
96 }
97
98 template <typename ParamDefinitionType>
AddParamDefinition(DecodedUleb128 parameter_id,DecodedUleb128 parameter_rate,DecodedUleb128 duration,AudioElementObu & audio_element_obu,ParamDefinitionType & param_definition)99 void AddParamDefinition(DecodedUleb128 parameter_id,
100 DecodedUleb128 parameter_rate, DecodedUleb128 duration,
101 AudioElementObu& audio_element_obu,
102 ParamDefinitionType& param_definition) {
103 SetParamDefinitionCommonFields(parameter_id, parameter_rate, duration,
104 param_definition);
105
106 // Add to the Audio Element OBU.
107 audio_element_obu.InitializeParams(audio_element_obu.num_parameters_ + 1);
108 audio_element_obu.audio_element_params_.emplace_back(
109 AudioElementParam{param_definition});
110 }
111
112 } // namespace
113
114 using ::absl_testing::IsOk;
115
CollectObusFromIaSequence(ReadBitBuffer & read_bit_buffer,IASequenceHeaderObu & ia_sequence_header,absl::flat_hash_map<DecodedUleb128,CodecConfigObu> & codec_config_obus,absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements,std::list<MixPresentationObu> & mix_presentations,std::list<AudioFrameWithData> & audio_frames,std::list<ParameterBlockWithData> & parameter_blocks)116 absl::Status CollectObusFromIaSequence(
117 ReadBitBuffer& read_bit_buffer, IASequenceHeaderObu& ia_sequence_header,
118 absl::flat_hash_map<DecodedUleb128, CodecConfigObu>& codec_config_obus,
119 absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& audio_elements,
120 std::list<MixPresentationObu>& mix_presentations,
121 std::list<AudioFrameWithData>& audio_frames,
122 std::list<ParameterBlockWithData>& parameter_blocks) {
123 bool insufficient_data = false;
124 auto obu_processor = ObuProcessor::Create(
125 /*is_exhaustive_and_exact=*/false, &read_bit_buffer, insufficient_data);
126 EXPECT_FALSE(insufficient_data);
127
128 bool continue_processing = true;
129 int temporal_unit_count = 0;
130 LOG(INFO) << "Starting Temporal Unit OBU processing";
131 while (continue_processing) {
132 std::optional<ObuProcessor::OutputTemporalUnit> output_temporal_unit;
133 RETURN_IF_NOT_OK(obu_processor->ProcessTemporalUnit(
134 /*eos_is_end_of_sequence=*/true, output_temporal_unit,
135 continue_processing));
136 audio_frames.splice(audio_frames.end(),
137 output_temporal_unit->output_audio_frames);
138 parameter_blocks.splice(parameter_blocks.end(),
139 output_temporal_unit->output_parameter_blocks);
140 temporal_unit_count++;
141 }
142 LOG(INFO) << "Processed " << temporal_unit_count << " Temporal Unit OBUs";
143
144 // Move the processed data to the output.
145 ia_sequence_header = obu_processor->ia_sequence_header_;
146 codec_config_obus.swap(obu_processor->codec_config_obus_);
147 audio_elements.swap(obu_processor->audio_elements_);
148 mix_presentations.swap(obu_processor->mix_presentations_);
149 return absl::OkStatus();
150 }
151
AddLpcmCodecConfig(DecodedUleb128 codec_config_id,uint32_t num_samples_per_frame,uint8_t sample_size,uint32_t sample_rate,absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus)152 void AddLpcmCodecConfig(
153 DecodedUleb128 codec_config_id, uint32_t num_samples_per_frame,
154 uint8_t sample_size, uint32_t sample_rate,
155 absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus) {
156 // Initialize the Codec Config OBU.
157 ASSERT_EQ(codec_config_obus.find(codec_config_id), codec_config_obus.end());
158
159 CodecConfigObu obu(
160 ObuHeader(), codec_config_id,
161 {.codec_id = CodecConfig::kCodecIdLpcm,
162 .num_samples_per_frame = num_samples_per_frame,
163 .decoder_config = LpcmDecoderConfig{
164 .sample_format_flags_bitmask_ = LpcmDecoderConfig::kLpcmLittleEndian,
165 .sample_size_ = sample_size,
166 .sample_rate_ = sample_rate}});
167 EXPECT_THAT(obu.Initialize(kOverrideAudioRollDistance), IsOk());
168 codec_config_obus.emplace(codec_config_id, std::move(obu));
169 }
170
AddLpcmCodecConfigWithIdAndSampleRate(uint32_t codec_config_id,uint32_t sample_rate,absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus)171 void AddLpcmCodecConfigWithIdAndSampleRate(
172 uint32_t codec_config_id, uint32_t sample_rate,
173 absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus) {
174 // Many tests either don't care about the details. Or assumed these "default"
175 // values.
176 constexpr uint32_t kNumSamplesPerFrame = 8;
177 constexpr uint8_t kSampleSize = 16;
178 return AddLpcmCodecConfig(codec_config_id, kNumSamplesPerFrame, kSampleSize,
179 sample_rate, codec_config_obus);
180 }
181
AddOpusCodecConfigWithId(uint32_t codec_config_id,absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus)182 void AddOpusCodecConfigWithId(
183 uint32_t codec_config_id,
184 absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus) {
185 // Initialize the Codec Config OBU.
186 ASSERT_EQ(codec_config_obus.find(codec_config_id), codec_config_obus.end());
187
188 CodecConfigObu obu(
189 ObuHeader(), codec_config_id,
190 {.codec_id = CodecConfig::kCodecIdOpus,
191 .num_samples_per_frame = 8,
192 .decoder_config = OpusDecoderConfig{
193 .version_ = 1, .pre_skip_ = 312, .input_sample_rate_ = 0}});
194 ASSERT_THAT(obu.Initialize(kOverrideAudioRollDistance), IsOk());
195 codec_config_obus.emplace(codec_config_id, std::move(obu));
196 }
197
AddFlacCodecConfigWithId(uint32_t codec_config_id,absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus)198 void AddFlacCodecConfigWithId(
199 uint32_t codec_config_id,
200 absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus) {
201 // Initialize the Codec Config OBU.
202 ASSERT_EQ(codec_config_obus.find(codec_config_id), codec_config_obus.end());
203
204 CodecConfigObu obu(
205 ObuHeader(), codec_config_id,
206 {.codec_id = CodecConfig::kCodecIdFlac,
207 .num_samples_per_frame = 16,
208 .decoder_config = FlacDecoderConfig(
209 {{{.header = {.last_metadata_block_flag = true,
210 .block_type = FlacMetaBlockHeader::kFlacStreamInfo,
211 .metadata_data_block_length = 34},
212 .payload =
213 FlacMetaBlockStreamInfo{.minimum_block_size = 16,
214 .maximum_block_size = 16,
215 .sample_rate = 48000,
216 .bits_per_sample = 15,
217 .total_samples_in_stream = 0}}}})});
218 ASSERT_THAT(obu.Initialize(kOverrideAudioRollDistance), IsOk());
219 codec_config_obus.emplace(codec_config_id, std::move(obu));
220 }
221
AddAacCodecConfigWithId(uint32_t codec_config_id,absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus)222 void AddAacCodecConfigWithId(
223 uint32_t codec_config_id,
224 absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus) {
225 // Initialize the Codec Config OBU.
226 ASSERT_EQ(codec_config_obus.find(codec_config_id), codec_config_obus.end());
227
228 CodecConfigObu obu(ObuHeader(), codec_config_id,
229 {.codec_id = CodecConfig::kCodecIdAacLc,
230 .num_samples_per_frame = 1024,
231 .decoder_config = AacDecoderConfig{}});
232 ASSERT_THAT(obu.Initialize(kOverrideAudioRollDistance), IsOk());
233 codec_config_obus.emplace(codec_config_id, std::move(obu));
234 }
235
AddAmbisonicsMonoAudioElementWithSubstreamIds(DecodedUleb128 audio_element_id,uint32_t codec_config_id,absl::Span<const DecodedUleb128> substream_ids,const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements)236 void AddAmbisonicsMonoAudioElementWithSubstreamIds(
237 DecodedUleb128 audio_element_id, uint32_t codec_config_id,
238 absl::Span<const DecodedUleb128> substream_ids,
239 const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
240 absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& audio_elements) {
241 // Check the `codec_config_id` is known and this is a new
242 // `audio_element_id`.
243 auto codec_config_iter = codec_config_obus.find(codec_config_id);
244 ASSERT_NE(codec_config_iter, codec_config_obus.end());
245 ASSERT_EQ(audio_elements.find(audio_element_id), audio_elements.end());
246
247 // Initialize the Audio Element OBU without any parameters.
248 AudioElementObu obu = AudioElementObu(
249 ObuHeader(), audio_element_id, AudioElementObu::kAudioElementSceneBased,
250 0, codec_config_id);
251 obu.InitializeParams(0);
252 obu.InitializeAudioSubstreams(substream_ids.size());
253 obu.audio_substream_ids_.assign(substream_ids.begin(), substream_ids.end());
254
255 // Initialize to n-th order ambisonics. Choose the lowest order that can fit
256 // all `substream_ids`. This may result in mixed-order ambisonics.
257 uint8_t next_valid_output_channel_count;
258 ASSERT_THAT(AmbisonicsConfig::GetNextValidOutputChannelCount(
259 substream_ids.size(), next_valid_output_channel_count),
260 IsOk());
261 EXPECT_THAT(obu.InitializeAmbisonicsMono(next_valid_output_channel_count,
262 substream_ids.size()),
263 IsOk());
264
265 auto& channel_mapping =
266 std::get<AmbisonicsMonoConfig>(
267 std::get<AmbisonicsConfig>(obu.config_).ambisonics_config)
268 .channel_mapping;
269 // Map the first n channels from [0, n] in input order. Leave the rest of
270 // the channels as unmapped.
271 std::fill(channel_mapping.begin(), channel_mapping.end(),
272 AmbisonicsMonoConfig::kInactiveAmbisonicsChannelNumber);
273 std::iota(channel_mapping.begin(),
274 channel_mapping.begin() + substream_ids.size(), 0);
275
276 AudioElementWithData audio_element = {
277 .obu = std::move(obu), .codec_config = &codec_config_iter->second};
278 ASSERT_THAT(ObuWithDataGenerator::FinalizeAmbisonicsConfig(
279 audio_element.obu, audio_element.substream_id_to_labels),
280 IsOk());
281
282 audio_elements.emplace(audio_element_id, std::move(audio_element));
283 }
284
285 // Adds a scalable Audio Element OBU based on the input arguments.
AddScalableAudioElementWithSubstreamIds(IamfInputLayout input_layout,DecodedUleb128 audio_element_id,uint32_t codec_config_id,absl::Span<const DecodedUleb128> substream_ids,const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements)286 void AddScalableAudioElementWithSubstreamIds(
287 IamfInputLayout input_layout, DecodedUleb128 audio_element_id,
288 uint32_t codec_config_id, absl::Span<const DecodedUleb128> substream_ids,
289 const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
290 absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& audio_elements) {
291 google::protobuf::RepeatedPtrField<
292 iamf_tools_cli_proto::AudioElementObuMetadata>
293 audio_element_metadatas;
294 AudioElementMetadataBuilder builder;
295
296 auto& new_audio_element_metadata = *audio_element_metadatas.Add();
297 ASSERT_THAT(builder.PopulateAudioElementMetadata(
298 audio_element_id, codec_config_id, input_layout,
299 new_audio_element_metadata),
300 IsOk());
301 // Check that this is a scalable Audio Element, and override the substream
302 // IDs.
303 ASSERT_TRUE(new_audio_element_metadata.has_scalable_channel_layout_config());
304 ASSERT_EQ(new_audio_element_metadata.num_substreams(), substream_ids.size());
305 for (int i = 0; i < substream_ids.size(); ++i) {
306 new_audio_element_metadata.mutable_audio_substream_ids()->Set(
307 i, substream_ids[i]);
308 }
309
310 // Generate the Audio Element OBU.
311 AudioElementGenerator generator(audio_element_metadatas);
312 ASSERT_THAT(generator.Generate(codec_config_obus, audio_elements), IsOk());
313 }
314
AddMixPresentationObuWithAudioElementIds(DecodedUleb128 mix_presentation_id,const std::vector<DecodedUleb128> & audio_element_ids,DecodedUleb128 common_parameter_id,DecodedUleb128 common_parameter_rate,std::list<MixPresentationObu> & mix_presentations)315 void AddMixPresentationObuWithAudioElementIds(
316 DecodedUleb128 mix_presentation_id,
317 const std::vector<DecodedUleb128>& audio_element_ids,
318 DecodedUleb128 common_parameter_id, DecodedUleb128 common_parameter_rate,
319 std::list<MixPresentationObu>& mix_presentations) {
320 // Configure one of the simplest mix presentation. Mix presentations REQUIRE
321 // at least one sub-mix and a stereo layout.
322 AddMixPresentationObuWithConfigurableLayouts(
323 mix_presentation_id, audio_element_ids, common_parameter_id,
324 common_parameter_rate,
325 {LoudspeakersSsConventionLayout::kSoundSystemA_0_2_0}, mix_presentations);
326 }
327
AddMixPresentationObuWithConfigurableLayouts(DecodedUleb128 mix_presentation_id,const std::vector<DecodedUleb128> & audio_element_ids,DecodedUleb128 common_parameter_id,DecodedUleb128 common_parameter_rate,const std::vector<LoudspeakersSsConventionLayout::SoundSystem> & sound_system_layouts,std::list<MixPresentationObu> & mix_presentations)328 void AddMixPresentationObuWithConfigurableLayouts(
329 DecodedUleb128 mix_presentation_id,
330 const std::vector<DecodedUleb128>& audio_element_ids,
331 DecodedUleb128 common_parameter_id, DecodedUleb128 common_parameter_rate,
332 const std::vector<LoudspeakersSsConventionLayout::SoundSystem>&
333 sound_system_layouts,
334 std::list<MixPresentationObu>& mix_presentations) {
335 MixGainParamDefinition common_mix_gain_param_definition;
336 common_mix_gain_param_definition.parameter_id_ = common_parameter_id;
337 common_mix_gain_param_definition.parameter_rate_ = common_parameter_rate;
338 common_mix_gain_param_definition.param_definition_mode_ = true;
339 common_mix_gain_param_definition.default_mix_gain_ = 0;
340 std::vector<MixPresentationLayout> layouts;
341 for (const auto& sound_system : sound_system_layouts) {
342 layouts.push_back(
343 {.loudness_layout = {.layout_type =
344 Layout::kLayoutTypeLoudspeakersSsConvention,
345 .specific_layout =
346 LoudspeakersSsConventionLayout{
347 .sound_system = sound_system,
348 .reserved = 0}},
349 .loudness = {
350 .info_type = 0, .integrated_loudness = 0, .digital_peak = 0}});
351 }
352
353 std::vector<MixPresentationSubMix> sub_mixes = {
354 {.output_mix_gain = common_mix_gain_param_definition,
355 .layouts = layouts}};
356 for (const auto& audio_element_id : audio_element_ids) {
357 sub_mixes[0].audio_elements.push_back({
358 .audio_element_id = audio_element_id,
359 .localized_element_annotations = {},
360 .rendering_config =
361 {.headphones_rendering_mode =
362 RenderingConfig::kHeadphonesRenderingModeStereo,
363 .reserved = 0,
364 .rendering_config_extension_size = 0,
365 .rendering_config_extension_bytes = {}},
366 .element_mix_gain = common_mix_gain_param_definition,
367 });
368 }
369
370 mix_presentations.push_back(
371 MixPresentationObu(ObuHeader(), mix_presentation_id,
372 /*count_label=*/0, {}, {}, sub_mixes));
373 }
374
AddParamDefinitionWithMode0AndOneSubblock(DecodedUleb128 parameter_id,DecodedUleb128 parameter_rate,DecodedUleb128 duration,absl::flat_hash_map<DecodedUleb128,MixGainParamDefinition> & param_definitions)375 void AddParamDefinitionWithMode0AndOneSubblock(
376 DecodedUleb128 parameter_id, DecodedUleb128 parameter_rate,
377 DecodedUleb128 duration,
378 absl::flat_hash_map<DecodedUleb128, MixGainParamDefinition>&
379 param_definitions) {
380 MixGainParamDefinition param_definition;
381 SetParamDefinitionCommonFields(parameter_id, parameter_rate, duration,
382 param_definition);
383 param_definitions.emplace(parameter_id, param_definition);
384 }
385
AddDemixingParamDefinition(DecodedUleb128 parameter_id,DecodedUleb128 parameter_rate,DecodedUleb128 duration,AudioElementObu & audio_element_obu)386 void AddDemixingParamDefinition(DecodedUleb128 parameter_id,
387 DecodedUleb128 parameter_rate,
388 DecodedUleb128 duration,
389 AudioElementObu& audio_element_obu) {
390 DemixingParamDefinition param_definition;
391
392 // Specific fields of demixing param definitions.
393 param_definition.default_demixing_info_parameter_data_.dmixp_mode =
394 DemixingInfoParameterData::kDMixPMode1;
395 param_definition.default_demixing_info_parameter_data_.reserved = 0;
396 param_definition.default_demixing_info_parameter_data_.default_w = 10;
397 param_definition.default_demixing_info_parameter_data_
398 .reserved_for_future_use = 0;
399
400 AddParamDefinition(parameter_id, parameter_rate, duration, audio_element_obu,
401 param_definition);
402 }
403
AddReconGainParamDefinition(DecodedUleb128 parameter_id,DecodedUleb128 parameter_rate,DecodedUleb128 duration,AudioElementObu & audio_element_obu)404 void AddReconGainParamDefinition(DecodedUleb128 parameter_id,
405 DecodedUleb128 parameter_rate,
406 DecodedUleb128 duration,
407 AudioElementObu& audio_element_obu) {
408 ReconGainParamDefinition param_definition(
409 audio_element_obu.GetAudioElementId());
410
411 AddParamDefinition(parameter_id, parameter_rate, duration, audio_element_obu,
412 param_definition);
413 }
414
CreateWavReaderExpectOk(const std::string & filename,int num_samples_per_frame)415 WavReader CreateWavReaderExpectOk(const std::string& filename,
416 int num_samples_per_frame) {
417 auto wav_reader = WavReader::CreateFromFile(filename, num_samples_per_frame);
418 EXPECT_THAT(wav_reader, IsOk());
419 return std::move(*wav_reader);
420 }
421
RenderAndFlushExpectOk(const LabeledFrame & labeled_frame,AudioElementRendererBase * renderer,std::vector<InternalSampleType> & output_samples)422 void RenderAndFlushExpectOk(const LabeledFrame& labeled_frame,
423 AudioElementRendererBase* renderer,
424 std::vector<InternalSampleType>& output_samples) {
425 ASSERT_NE(renderer, nullptr);
426 EXPECT_THAT(renderer->RenderLabeledFrame(labeled_frame), IsOk());
427 EXPECT_THAT(renderer->Finalize(), IsOk());
428 EXPECT_TRUE(renderer->IsFinalized());
429 EXPECT_THAT(renderer->Flush(output_samples), IsOk());
430 }
431
GetAndCleanupOutputFileName(absl::string_view suffix)432 std::string GetAndCleanupOutputFileName(absl::string_view suffix) {
433 const testing::TestInfo* const test_info =
434 testing::UnitTest::GetInstance()->current_test_info();
435 std::string filename = absl::StrCat(test_info->name(), "-",
436 test_info->test_suite_name(), suffix);
437
438 // It is possible that the test suite name contains the '/' character.
439 // Replace it with '-' to form a legal file name.
440 absl::StrReplaceAll({{"/", "-"}}, &filename);
441 const std::filesystem::path test_specific_filename =
442 std::filesystem::path(::testing::TempDir()) / filename;
443
444 std::filesystem::remove(test_specific_filename);
445 return test_specific_filename.string();
446 }
447
GetAndCreateOutputDirectory(absl::string_view suffix)448 std::string GetAndCreateOutputDirectory(absl::string_view suffix) {
449 const std::string output_directory = GetAndCleanupOutputFileName(suffix);
450 std::error_code error_code;
451 EXPECT_TRUE(
452 std::filesystem::create_directories(output_directory, error_code));
453 return output_directory;
454 }
455
SerializeObusExpectOk(const std::list<const ObuBase * > & obus,const LebGenerator & leb_generator)456 std::vector<uint8_t> SerializeObusExpectOk(
457 const std::list<const ObuBase*>& obus, const LebGenerator& leb_generator) {
458 using ::absl_testing::IsOk;
459 WriteBitBuffer serialized_obus(0, leb_generator);
460 for (const auto* obu : obus) {
461 EXPECT_NE(obu, nullptr);
462 EXPECT_THAT(obu->ValidateAndWriteObu(serialized_obus), IsOk());
463 }
464
465 return serialized_obus.bit_buffer();
466 }
467
ParseUserMetadataAssertSuccess(const std::string & textproto_filename,iamf_tools_cli_proto::UserMetadata & user_metadata)468 void ParseUserMetadataAssertSuccess(
469 const std::string& textproto_filename,
470 iamf_tools_cli_proto::UserMetadata& user_metadata) {
471 ASSERT_TRUE(std::filesystem::exists(textproto_filename));
472 std::ifstream user_metadata_file(textproto_filename, std::ios::in);
473 google::protobuf::io::IstreamInputStream input_stream(&user_metadata_file);
474 ASSERT_TRUE(
475 google::protobuf::TextFormat::Parse(&input_stream, &user_metadata));
476 }
477
GetLogSpectralDistance(const absl::Span<const InternalSampleType> & first_log_spectrum,const absl::Span<const InternalSampleType> & second_log_spectrum)478 double GetLogSpectralDistance(
479 const absl::Span<const InternalSampleType>& first_log_spectrum,
480 const absl::Span<const InternalSampleType>& second_log_spectrum) {
481 const int num_samples = first_log_spectrum.size();
482 if (num_samples != second_log_spectrum.size()) {
483 LOG(ERROR) << "Spectrum sizes are not equal.";
484 return false;
485 }
486 double log_spectral_distance = 0.0;
487 for (int i = 0; i < num_samples; ++i) {
488 log_spectral_distance += (first_log_spectrum[i] - second_log_spectrum[i]) *
489 (first_log_spectrum[i] - second_log_spectrum[i]);
490 }
491 return (10 * std::sqrt(log_spectral_distance / num_samples));
492 }
493
GetDecodeSpecifications(const iamf_tools_cli_proto::UserMetadata & user_metadata)494 std::vector<DecodeSpecification> GetDecodeSpecifications(
495 const iamf_tools_cli_proto::UserMetadata& user_metadata) {
496 std::vector<DecodeSpecification> decode_specifications;
497 for (const auto& mix_presentation :
498 user_metadata.mix_presentation_metadata()) {
499 for (int i = 0; i < mix_presentation.sub_mixes_size(); ++i) {
500 for (int j = 0; j < mix_presentation.sub_mixes(i).layouts_size(); ++j) {
501 DecodeSpecification decode_specification;
502 decode_specification.mix_presentation_id =
503 mix_presentation.mix_presentation_id();
504 decode_specification.sub_mix_index = i;
505 if (mix_presentation.sub_mixes(i)
506 .layouts(j)
507 .loudness_layout()
508 .has_ss_layout()) {
509 auto sound_system_status = MixPresentationGenerator::CopySoundSystem(
510 mix_presentation.sub_mixes(i)
511 .layouts(j)
512 .loudness_layout()
513 .ss_layout()
514 .sound_system(),
515 decode_specification.sound_system);
516 if (!sound_system_status.ok()) {
517 LOG(ERROR) << "Failed to copy sound system: "
518 << sound_system_status;
519 continue;
520 }
521 }
522 decode_specification.layout_index = j;
523 decode_specifications.push_back(decode_specification);
524 }
525 }
526 }
527 return decode_specifications;
528 }
529
Int32ToInternalSampleType(absl::Span<const int32_t> samples)530 std::vector<InternalSampleType> Int32ToInternalSampleType(
531 absl::Span<const int32_t> samples) {
532 std::vector<InternalSampleType> result(samples.size());
533 Int32ToInternalSampleType(samples, absl::MakeSpan(result));
534 return result;
535 }
536
GenerateSineWav(uint64_t start_tick,uint32_t num_samples,uint32_t sample_rate_hz,double frequency_hz,double amplitude)537 std::vector<InternalSampleType> GenerateSineWav(uint64_t start_tick,
538 uint32_t num_samples,
539 uint32_t sample_rate_hz,
540 double frequency_hz,
541 double amplitude) {
542 std::vector<InternalSampleType> samples(num_samples, 0.0);
543 constexpr double kPi = std::numbers::pi_v<InternalSampleType>;
544 const double time_base = 1.0 / sample_rate_hz;
545
546 for (int frame_tick = 0; frame_tick < num_samples; ++frame_tick) {
547 const double t = start_tick + frame_tick;
548 samples[frame_tick] =
549 amplitude * sin(2.0 * kPi * frequency_hz * t * time_base);
550 }
551 return samples;
552 }
553
AccumulateZeroCrossings(absl::Span<const std::vector<int32_t>> samples,std::vector<ZeroCrossingState> & zero_crossing_states,std::vector<int> & zero_crossing_counts)554 void AccumulateZeroCrossings(
555 absl::Span<const std::vector<int32_t>> samples,
556 std::vector<ZeroCrossingState>& zero_crossing_states,
557 std::vector<int>& zero_crossing_counts) {
558 using enum ZeroCrossingState;
559 const auto num_channels = samples.empty() ? 0 : samples[0].size();
560 // Seed the data structures, or check they contain the right number of
561 // channels.
562 if (zero_crossing_counts.empty()) {
563 zero_crossing_counts.resize(num_channels, 0);
564 } else {
565 ASSERT_EQ(num_channels, zero_crossing_counts.size());
566 }
567 if (zero_crossing_states.empty()) {
568 zero_crossing_states.resize(num_channels, ZeroCrossingState::kUnknown);
569 } else {
570 ASSERT_EQ(num_channels, zero_crossing_states.size());
571 }
572
573 // Zero crossing threshold determined empirically for -18 dB sine waves to
574 // skip encoding artifacts (e.g. a small ringing artifact < -40 dB after
575 // the sine wave stopped.) Note that -18 dB would correspond to dividing
576 // by 8, while dividing by 100 is -40 dB.
577 constexpr int32_t kThreshold = std::numeric_limits<int32_t>::max() / 100;
578 for (const auto& tick : samples) {
579 ASSERT_EQ(tick.size(), num_channels);
580 for (int i = 0; i < num_channels; ++i) {
581 ZeroCrossingState next_state = (tick[i] > kThreshold) ? kPositive
582 : (tick[i] < -kThreshold) ? kNegative
583 : kUnknown;
584 if (next_state == kUnknown) {
585 // Don't do anything if it's not clearly positive or negative.
586 continue;
587 } else if (zero_crossing_states[i] != next_state) {
588 // If we clearly flipped states, count it as a zero crossing.
589 zero_crossing_counts[i]++;
590 zero_crossing_states[i] = next_state;
591 }
592 }
593 }
594 }
595
ReadFileToBytes(const std::filesystem::path & file_path,std::vector<uint8_t> & buffer)596 absl::Status ReadFileToBytes(const std::filesystem::path& file_path,
597 std::vector<uint8_t>& buffer) {
598 if (!std::filesystem::exists(file_path)) {
599 return absl::NotFoundError("File not found.");
600 }
601 std::ifstream ifs(file_path, std::ios::binary | std::ios::in);
602
603 // Increase the size of the buffer. Write to the original end (before
604 // resizing).
605 const auto file_size = std::filesystem::file_size(file_path);
606 const auto original_buffer_size = buffer.size();
607 buffer.resize(original_buffer_size + file_size);
608 ifs.read(reinterpret_cast<char*>(buffer.data() + original_buffer_size),
609 file_size);
610 return absl::OkStatus();
611 }
612
PushFrameDerived(absl::Span<const std::vector<int32_t>> time_channel_samples)613 absl::Status EverySecondTickResampler::PushFrameDerived(
614 absl::Span<const std::vector<int32_t>> time_channel_samples) {
615 EXPECT_EQ(num_valid_ticks_, 0); // `SampleProcessorBase` should ensure this.
616 for (size_t i = 0; i < time_channel_samples.size(); ++i) {
617 if (i % 2 == 1) {
618 output_time_channel_samples_[num_valid_ticks_] = time_channel_samples[i];
619 ++num_valid_ticks_;
620 }
621 }
622 return absl::OkStatus();
623 }
624
FlushDerived()625 absl::Status EverySecondTickResampler::FlushDerived() {
626 EXPECT_EQ(num_valid_ticks_, 0); // `SampleProcessorBase` should ensure this.
627 return absl::OkStatus();
628 }
629
PushFrameDerived(absl::Span<const std::vector<int32_t>> time_channel_samples)630 absl::Status OneFrameDelayer::PushFrameDerived(
631 absl::Span<const std::vector<int32_t>> time_channel_samples) {
632 // Swap the delayed samples with the output samples from the base class.
633 std::swap(delayed_samples_, output_time_channel_samples_);
634 std::swap(num_delayed_ticks_, num_valid_ticks_);
635
636 // The fact that the input size is less than the output size should have
637 // already been validated in `SampleProcessorBase`, but for safety we can
638 // check it here.
639 EXPECT_LE(time_channel_samples.size(), delayed_samples_.size());
640 // Cache the new samples to delay.
641 std::copy(time_channel_samples.begin(), time_channel_samples.end(),
642 delayed_samples_.begin());
643 num_delayed_ticks_ = time_channel_samples.size();
644
645 return absl::OkStatus();
646 }
647
FlushDerived()648 absl::Status OneFrameDelayer::FlushDerived() {
649 // Pushing in an empty frame will cause the delayed frame to be available.
650 return PushFrameDerived({});
651 }
652
653 } // namespace iamf_tools
654