1 #include "iamf/cli/iamf_encoder.h"
2
3 #include <cstddef>
4 #include <cstdint>
5 #include <filesystem>
6 #include <list>
7 #include <memory>
8 #include <optional>
9 #include <string>
10 #include <utility>
11 #include <vector>
12
13 #include "absl/container/flat_hash_map.h"
14 #include "absl/log/log.h"
15 #include "absl/status/status_matchers.h"
16 #include "absl/strings/string_view.h"
17 #include "gmock/gmock.h"
18 #include "gtest/gtest.h"
19 #include "iamf/cli/audio_element_with_data.h"
20 #include "iamf/cli/audio_frame_with_data.h"
21 #include "iamf/cli/channel_label.h"
22 #include "iamf/cli/demixing_module.h"
23 #include "iamf/cli/iamf_components.h"
24 #include "iamf/cli/iamf_encoder.h"
25 #include "iamf/cli/loudness_calculator_factory_base.h"
26 #include "iamf/cli/parameter_block_with_data.h"
27 #include "iamf/cli/proto/arbitrary_obu.pb.h"
28 #include "iamf/cli/proto/audio_element.pb.h"
29 #include "iamf/cli/proto/codec_config.pb.h"
30 #include "iamf/cli/proto/ia_sequence_header.pb.h"
31 #include "iamf/cli/proto/mix_presentation.pb.h"
32 #include "iamf/cli/proto/test_vector_metadata.pb.h"
33 #include "iamf/cli/proto/user_metadata.pb.h"
34 #include "iamf/cli/renderer_factory.h"
35 #include "iamf/cli/rendering_mix_presentation_finalizer.h"
36 #include "iamf/cli/tests/cli_test_utils.h"
37 #include "iamf/cli/user_metadata_builder/audio_element_metadata_builder.h"
38 #include "iamf/cli/user_metadata_builder/iamf_input_layout.h"
39 #include "iamf/cli/wav_writer.h"
40 #include "iamf/obu/arbitrary_obu.h"
41 #include "iamf/obu/codec_config.h"
42 #include "iamf/obu/ia_sequence_header.h"
43 #include "iamf/obu/mix_presentation.h"
44 #include "iamf/obu/types.h"
45 #include "src/google/protobuf/text_format.h"
46
47 namespace iamf_tools {
48 namespace {
49
50 using ::absl_testing::IsOk;
51 using ::absl_testing::IsOkAndHolds;
52 using ::iamf_tools_cli_proto::UserMetadata;
53 using ::testing::_;
54 using ::testing::Contains;
55 using ::testing::IsEmpty;
56 using ::testing::Not;
57 using ::testing::Return;
58
59 constexpr DecodedUleb128 kCodecConfigId = 200;
60 constexpr DecodedUleb128 kAudioElementId = 300;
61 constexpr uint32_t kNumSamplesPerFrame = 8;
62 constexpr int kExpectedPcmBitDepth = 16;
63
64 const auto kOmitOutputWavFiles =
65 RenderingMixPresentationFinalizer::ProduceNoSampleProcessors;
66
AddIaSequenceHeader(UserMetadata & user_metadata)67 void AddIaSequenceHeader(UserMetadata& user_metadata) {
68 ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(
69 R"pb(
70 primary_profile: PROFILE_VERSION_SIMPLE
71 additional_profile: PROFILE_VERSION_BASE
72 )pb",
73 user_metadata.add_ia_sequence_header_metadata()));
74 }
75
AddCodecConfig(UserMetadata & user_metadata)76 void AddCodecConfig(UserMetadata& user_metadata) {
77 auto* new_codec_config = user_metadata.add_codec_config_metadata();
78 ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(
79 R"pb(
80 codec_config_id: 200
81 codec_config {
82 codec_id: CODEC_ID_LPCM
83 num_samples_per_frame: 8
84 audio_roll_distance: 0
85 decoder_config_lpcm {
86 sample_format_flags: LPCM_LITTLE_ENDIAN
87 sample_rate: 48000
88 }
89 }
90 )pb",
91 new_codec_config));
92 new_codec_config->mutable_codec_config()
93 ->mutable_decoder_config_lpcm()
94 ->set_sample_size(kExpectedPcmBitDepth);
95 }
96
AddAudioElement(UserMetadata & user_metadata)97 void AddAudioElement(UserMetadata& user_metadata) {
98 AudioElementMetadataBuilder builder;
99 ASSERT_THAT(builder.PopulateAudioElementMetadata(
100 kAudioElementId, kCodecConfigId, IamfInputLayout::kStereo,
101 *user_metadata.add_audio_element_metadata()),
102 IsOk());
103 }
104
AddMixPresentation(UserMetadata & user_metadata)105 void AddMixPresentation(UserMetadata& user_metadata) {
106 ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(
107 R"pb(
108 mix_presentation_id: 42
109 count_label: 0
110 sub_mixes {
111 audio_elements {
112 audio_element_id: 300
113 rendering_config {
114 headphones_rendering_mode: HEADPHONES_RENDERING_MODE_STEREO
115 }
116 element_mix_gain {
117 param_definition {
118 parameter_id: 100
119 parameter_rate: 16000
120 param_definition_mode: 1
121 reserved: 0
122 }
123 default_mix_gain: 0
124 }
125 }
126 output_mix_gain {
127 param_definition {
128 parameter_id: 100
129 parameter_rate: 16000
130 param_definition_mode: 1
131 reserved: 0
132 }
133 default_mix_gain: 0
134 }
135 layouts {
136 loudness_layout {
137 layout_type: LAYOUT_TYPE_LOUDSPEAKERS_SS_CONVENTION
138 ss_layout { sound_system: SOUND_SYSTEM_A_0_2_0 reserved: 0 }
139 }
140 loudness {
141 info_type_bit_masks: []
142 integrated_loudness: 0
143 digital_peak: 0
144 }
145 }
146 }
147 )pb",
148 user_metadata.add_mix_presentation_metadata()));
149 }
150
AddArbitraryObu(UserMetadata & user_metadata)151 void AddArbitraryObu(UserMetadata& user_metadata) {
152 ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(
153 R"pb(
154 insertion_hook: INSERTION_HOOK_AFTER_AUDIO_ELEMENTS
155 obu_type: OBU_IA_RESERVED_26
156 payload: "Imaginary descriptor OBU between the audio element and mix presentation."
157 )pb",
158 user_metadata.add_arbitrary_obu_metadata()));
159 }
160
AddAudioFrame(UserMetadata & user_metadata)161 void AddAudioFrame(UserMetadata& user_metadata) {
162 ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(
163 R"pb(
164 samples_to_trim_at_end: 0
165 samples_to_trim_at_start: 0
166 audio_element_id: 300
167 channel_ids: [ 0, 1 ]
168 channel_labels: [ "L2", "R2" ]
169 )pb",
170 user_metadata.add_audio_frame_metadata()));
171 }
172
AddParameterBlockAtTimestamp(InternalTimestamp start_timestamp,UserMetadata & user_metadata)173 void AddParameterBlockAtTimestamp(InternalTimestamp start_timestamp,
174 UserMetadata& user_metadata) {
175 auto* metadata = user_metadata.add_parameter_block_metadata();
176 ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(
177 R"pb(
178 parameter_id: 100
179 duration: 8
180 num_subblocks: 1
181 constant_subblock_duration: 8
182 subblocks:
183 [ {
184 mix_gain_parameter_data {
185 animation_type: ANIMATE_STEP
186 param_data { step { start_point_value: 0 } }
187 }
188 }]
189 )pb",
190 metadata));
191
192 // Overwrite `start_timestamp`.
193 metadata->set_start_timestamp(start_timestamp);
194 }
195
GetFirstSubmixFirstLayoutExpectedPath(absl::string_view output_directory)196 std::string GetFirstSubmixFirstLayoutExpectedPath(
197 absl::string_view output_directory) {
198 return (std::filesystem::path(output_directory) /
199 std::filesystem::path("first_file.wav"))
200 .string();
201 }
202
GetWavWriterFactoryThatProducesFirstSubMixFirstLayout(absl::string_view output_directory)203 auto GetWavWriterFactoryThatProducesFirstSubMixFirstLayout(
204 absl::string_view output_directory) {
205 const std::string output_wav_path =
206 GetFirstSubmixFirstLayoutExpectedPath(output_directory);
207 return [output_wav_path](
208 DecodedUleb128 mix_presentation_id, int sub_mix_index,
209 int layout_index, const Layout&, int num_channels, int sample_rate,
210 int bit_depth,
211 size_t num_samples_per_frame) -> std::unique_ptr<WavWriter> {
212 if (sub_mix_index != 0 || layout_index != 0) {
213 return nullptr;
214 }
215
216 return WavWriter::Create(output_wav_path, num_channels, sample_rate,
217 bit_depth, num_samples_per_frame);
218 };
219 }
220
221 class IamfEncoderTest : public ::testing::Test {
222 protected:
SetupDescriptorObus()223 void SetupDescriptorObus() {
224 AddIaSequenceHeader(user_metadata_);
225 AddCodecConfig(user_metadata_);
226 AddAudioElement(user_metadata_);
227 AddMixPresentation(user_metadata_);
228 }
229
CreateExpectOk()230 IamfEncoder CreateExpectOk() {
231 auto iamf_encoder = IamfEncoder::Create(
232 user_metadata_, renderer_factory_.get(),
233 loudness_calculator_factory_.get(), sample_processor_factory_,
234 ia_sequence_header_obu_, codec_config_obus_, audio_elements_,
235 mix_presentation_obus_, arbitrary_obus_);
236 EXPECT_THAT(iamf_encoder, IsOk());
237 return std::move(*iamf_encoder);
238 }
239
240 UserMetadata user_metadata_;
241 std::optional<IASequenceHeaderObu> ia_sequence_header_obu_;
242 absl::flat_hash_map<uint32_t, CodecConfigObu> codec_config_obus_;
243 absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements_;
244 std::list<MixPresentationObu> mix_presentation_obus_;
245 std::list<ArbitraryObu> arbitrary_obus_;
246 // Default some dependencies to be based on the real `IamfComponents`
247 // implementations. And generally disable wav writing since it is not needed
248 // for most tests.
249 std::unique_ptr<RendererFactoryBase> renderer_factory_ =
250 CreateRendererFactory();
251 std::unique_ptr<LoudnessCalculatorFactoryBase> loudness_calculator_factory_ =
252 CreateLoudnessCalculatorFactory();
253 RenderingMixPresentationFinalizer::SampleProcessorFactory
254 sample_processor_factory_ = kOmitOutputWavFiles;
255 };
256
TEST_F(IamfEncoderTest,CreateFailsOnEmptyUserMetadata)257 TEST_F(IamfEncoderTest, CreateFailsOnEmptyUserMetadata) {
258 user_metadata_.Clear();
259
260 EXPECT_FALSE(IamfEncoder::Create(user_metadata_, renderer_factory_.get(),
261 loudness_calculator_factory_.get(),
262 sample_processor_factory_,
263 ia_sequence_header_obu_, codec_config_obus_,
264 audio_elements_, mix_presentation_obus_,
265 arbitrary_obus_)
266 .ok());
267 }
268
TEST_F(IamfEncoderTest,CreateGeneratesDescriptorObus)269 TEST_F(IamfEncoderTest, CreateGeneratesDescriptorObus) {
270 SetupDescriptorObus();
271 auto iamf_encoder = CreateExpectOk();
272
273 EXPECT_TRUE(ia_sequence_header_obu_.has_value());
274 EXPECT_EQ(codec_config_obus_.size(), 1);
275 EXPECT_EQ(audio_elements_.size(), 1);
276 EXPECT_EQ(mix_presentation_obus_.size(), 1);
277 EXPECT_TRUE(arbitrary_obus_.empty());
278 }
279
TEST_F(IamfEncoderTest,CreateGeneratesArbitraryObus)280 TEST_F(IamfEncoderTest, CreateGeneratesArbitraryObus) {
281 SetupDescriptorObus();
282 AddArbitraryObu(user_metadata_);
283
284 auto iamf_encoder = CreateExpectOk();
285
286 EXPECT_EQ(arbitrary_obus_.size(), 1);
287 }
288
TEST_F(IamfEncoderTest,BuildInformationTagIsPresentByDefault)289 TEST_F(IamfEncoderTest, BuildInformationTagIsPresentByDefault) {
290 SetupDescriptorObus();
291
292 auto iamf_encoder = CreateExpectOk();
293 ASSERT_FALSE(mix_presentation_obus_.empty());
294
295 // We don't care which slot the build information tag is in. But we want it to
296 // be present by default, to help with debugging.
297 const auto& first_obu_tags =
298 mix_presentation_obus_.front().mix_presentation_tags_;
299 ASSERT_TRUE(first_obu_tags.has_value());
300 EXPECT_THAT(first_obu_tags->tags, Contains(TagMatchesBuildInformation()));
301 }
302
TEST_F(IamfEncoderTest,GenerateDataObusTwoIterationsSucceeds)303 TEST_F(IamfEncoderTest, GenerateDataObusTwoIterationsSucceeds) {
304 SetupDescriptorObus();
305 AddAudioFrame(user_metadata_);
306 AddParameterBlockAtTimestamp(0, user_metadata_);
307 AddParameterBlockAtTimestamp(8, user_metadata_);
308 auto iamf_encoder = CreateExpectOk();
309
310 // Temporary variables for one iteration.
311 const std::vector<InternalSampleType> zero_samples(kNumSamplesPerFrame, 0.0);
312 std::list<AudioFrameWithData> temp_audio_frames;
313 std::list<ParameterBlockWithData> temp_parameter_blocks;
314 IdLabeledFrameMap id_to_labeled_frame;
315 int iteration = 0;
316 while (iamf_encoder.GeneratingDataObus()) {
317 iamf_encoder.BeginTemporalUnit();
318 iamf_encoder.AddSamples(kAudioElementId, ChannelLabel::kL2, zero_samples);
319 iamf_encoder.AddSamples(kAudioElementId, ChannelLabel::kR2, zero_samples);
320
321 // Signal stopping adding samples at the second iteration.
322 if (iteration == 1) {
323 iamf_encoder.FinalizeAddSamples();
324 }
325
326 EXPECT_THAT(iamf_encoder.AddParameterBlockMetadata(
327 user_metadata_.parameter_block_metadata(iteration)),
328 IsOk());
329
330 // Output.
331 EXPECT_THAT(iamf_encoder.OutputTemporalUnit(temp_audio_frames,
332 temp_parameter_blocks),
333 IsOk());
334 EXPECT_EQ(temp_audio_frames.size(), 1);
335 EXPECT_EQ(temp_parameter_blocks.size(), 1);
336 EXPECT_EQ(temp_audio_frames.front().start_timestamp,
337 iteration * kNumSamplesPerFrame);
338
339 iteration++;
340 }
341
342 EXPECT_EQ(iteration, 2);
343 }
344
TEST_F(IamfEncoderTest,SafeToUseAfterMove)345 TEST_F(IamfEncoderTest, SafeToUseAfterMove) {
346 SetupDescriptorObus();
347 AddAudioFrame(user_metadata_);
348 AddParameterBlockAtTimestamp(0, user_metadata_);
349 AddParameterBlockAtTimestamp(8, user_metadata_);
350 auto iamf_encoder_to_move_from = CreateExpectOk();
351
352 // Move the encoder, and use it.
353 IamfEncoder iamf_encoder = std::move(iamf_encoder_to_move_from);
354
355 // Use many parts of the API, to make sure the move did not break anything.
356 EXPECT_TRUE(iamf_encoder.GeneratingDataObus());
357 iamf_encoder.BeginTemporalUnit();
358 const std::vector<InternalSampleType> kZeroSamples(kNumSamplesPerFrame, 0.0);
359 iamf_encoder.AddSamples(kAudioElementId, ChannelLabel::kL2, kZeroSamples);
360 iamf_encoder.AddSamples(kAudioElementId, ChannelLabel::kR2, kZeroSamples);
361 EXPECT_THAT(iamf_encoder.AddParameterBlockMetadata(
362 user_metadata_.parameter_block_metadata(0)),
363 IsOk());
364 iamf_encoder.FinalizeAddSamples();
365 std::list<AudioFrameWithData> temp_audio_frames;
366 std::list<ParameterBlockWithData> temp_parameter_blocks;
367 IdLabeledFrameMap id_to_labeled_frame;
368 EXPECT_THAT(
369 iamf_encoder.OutputTemporalUnit(temp_audio_frames, temp_parameter_blocks),
370 IsOk());
371 EXPECT_EQ(temp_audio_frames.size(), 1);
372 EXPECT_EQ(temp_parameter_blocks.size(), 1);
373 }
374
TEST_F(IamfEncoderTest,FinalizeMixPresentationObusSucceeds)375 TEST_F(IamfEncoderTest, FinalizeMixPresentationObusSucceeds) {
376 SetupDescriptorObus();
377 auto iamf_encoder = CreateExpectOk();
378
379 iamf_encoder.FinalizeAddSamples();
380
381 EXPECT_THAT(iamf_encoder.GetFinalizedMixPresentationObus(), IsOk());
382 }
383
TEST_F(IamfEncoderTest,CallingFinalizeMixPresentationObusTwiceFails)384 TEST_F(IamfEncoderTest, CallingFinalizeMixPresentationObusTwiceFails) {
385 SetupDescriptorObus();
386 auto iamf_encoder = CreateExpectOk();
387 iamf_encoder.FinalizeAddSamples();
388
389 // The first call is OK.
390 EXPECT_THAT(iamf_encoder.GetFinalizedMixPresentationObus(), IsOk());
391
392 EXPECT_FALSE(iamf_encoder.GetFinalizedMixPresentationObus().ok());
393 }
394
TEST_F(IamfEncoderTest,FinalizeMixPresentationObusDefaultsToPreservingUserLoudness)395 TEST_F(IamfEncoderTest,
396 FinalizeMixPresentationObusDefaultsToPreservingUserLoudness) {
397 SetupDescriptorObus();
398 // Configuring the encoder with null factories is permitted, which disables
399 // rendering and loudness measurements.
400 renderer_factory_ = nullptr;
401 loudness_calculator_factory_ = nullptr;
402 auto iamf_encoder = CreateExpectOk();
403 const auto original_loudness = mix_presentation_obus_.front()
404 .sub_mixes_.front()
405 .layouts.front()
406 .loudness;
407 iamf_encoder.FinalizeAddSamples();
408
409 const auto finalized_mix_presentation_obus =
410 iamf_encoder.GetFinalizedMixPresentationObus();
411 ASSERT_THAT(finalized_mix_presentation_obus, IsOk());
412
413 EXPECT_EQ(finalized_mix_presentation_obus->front()
414 .sub_mixes_.front()
415 .layouts.front()
416 .loudness,
417 original_loudness);
418 }
419
TEST_F(IamfEncoderTest,FinalizeMixPresentationObusFailsBeforeGeneratingDataObusIsFinished)420 TEST_F(IamfEncoderTest,
421 FinalizeMixPresentationObusFailsBeforeGeneratingDataObusIsFinished) {
422 SetupDescriptorObus();
423 AddAudioFrame(user_metadata_);
424 auto iamf_encoder = CreateExpectOk();
425
426 // The encoder is still generating data OBUs, so it's not possible to know the
427 // final loudness values.
428 ASSERT_TRUE(iamf_encoder.GeneratingDataObus());
429
430 EXPECT_FALSE(iamf_encoder.GetFinalizedMixPresentationObus().ok());
431 }
432
TEST_F(IamfEncoderTest,FinalizeMixPresentationObuFillsInLoudness)433 TEST_F(IamfEncoderTest, FinalizeMixPresentationObuFillsInLoudness) {
434 SetupDescriptorObus();
435 // Loudness measurement is done only when the signal can be rendered, and
436 // based on the resultant loudness calculators.
437 renderer_factory_ = std::make_unique<RendererFactory>();
438 auto mock_loudness_calculator_factory =
439 std::make_unique<MockLoudnessCalculatorFactory>();
440 auto mock_loudness_calculator = std::make_unique<MockLoudnessCalculator>();
441 const LoudnessInfo kArbitraryLoudnessInfo = {
442 .info_type = LoudnessInfo::kTruePeak,
443 .integrated_loudness = 123,
444 .digital_peak = 456,
445 .true_peak = 789,
446 };
447 ON_CALL(*mock_loudness_calculator, QueryLoudness())
448 .WillByDefault(Return(kArbitraryLoudnessInfo));
449 EXPECT_CALL(*mock_loudness_calculator_factory,
450 CreateLoudnessCalculator(_, _, _, _))
451 .WillOnce(Return(std::move(mock_loudness_calculator)));
452 loudness_calculator_factory_ = std::move(mock_loudness_calculator_factory);
453 auto iamf_encoder = CreateExpectOk();
454 iamf_encoder.FinalizeAddSamples();
455
456 const auto finalized_mix_presentation_obus =
457 iamf_encoder.GetFinalizedMixPresentationObus();
458 ASSERT_THAT(finalized_mix_presentation_obus, IsOkAndHolds(Not(IsEmpty())));
459
460 EXPECT_EQ(finalized_mix_presentation_obus->front()
461 .sub_mixes_.front()
462 .layouts.front()
463 .loudness,
464 kArbitraryLoudnessInfo);
465 };
466
TEST_F(IamfEncoderTest,OutputSampleProcessorFactoryIgnoresBitDepthOverride)467 TEST_F(IamfEncoderTest, OutputSampleProcessorFactoryIgnoresBitDepthOverride) {
468 // The override bit-depth should be used at the `SampleProcessorFactory`
469 // level.
470 SetupDescriptorObus();
471 constexpr uint32_t kExpectedSampleProcessorFactoryCalledBitDepth =
472 kExpectedPcmBitDepth;
473 constexpr uint32_t kIgnoredBitDepthOverride = 255;
474 user_metadata_.mutable_test_vector_metadata()
475 ->set_output_wav_file_bit_depth_override(kIgnoredBitDepthOverride);
476 // Wav file writing is done only when the signal can be rendered, based on the
477 // resultant wav writers.
478 renderer_factory_ = std::make_unique<RendererFactory>();
479 MockSampleProcessorFactory mock_sample_processor_factory;
480 EXPECT_CALL(
481 mock_sample_processor_factory,
482 Call(_, _, _, _, _, _, kExpectedSampleProcessorFactoryCalledBitDepth, _));
483 sample_processor_factory_ = mock_sample_processor_factory.AsStdFunction();
484
485 CreateExpectOk();
486 };
487
488 // TODO(b/349321277): Add more tests.
489
490 } // namespace
491 } // namespace iamf_tools
492