• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "iamf/cli/iamf_encoder.h"
2 
3 #include <cstddef>
4 #include <cstdint>
5 #include <filesystem>
6 #include <list>
7 #include <memory>
8 #include <optional>
9 #include <string>
10 #include <utility>
11 #include <vector>
12 
13 #include "absl/container/flat_hash_map.h"
14 #include "absl/log/log.h"
15 #include "absl/status/status_matchers.h"
16 #include "absl/strings/string_view.h"
17 #include "gmock/gmock.h"
18 #include "gtest/gtest.h"
19 #include "iamf/cli/audio_element_with_data.h"
20 #include "iamf/cli/audio_frame_with_data.h"
21 #include "iamf/cli/channel_label.h"
22 #include "iamf/cli/demixing_module.h"
23 #include "iamf/cli/iamf_components.h"
24 #include "iamf/cli/iamf_encoder.h"
25 #include "iamf/cli/loudness_calculator_factory_base.h"
26 #include "iamf/cli/parameter_block_with_data.h"
27 #include "iamf/cli/proto/arbitrary_obu.pb.h"
28 #include "iamf/cli/proto/audio_element.pb.h"
29 #include "iamf/cli/proto/codec_config.pb.h"
30 #include "iamf/cli/proto/ia_sequence_header.pb.h"
31 #include "iamf/cli/proto/mix_presentation.pb.h"
32 #include "iamf/cli/proto/test_vector_metadata.pb.h"
33 #include "iamf/cli/proto/user_metadata.pb.h"
34 #include "iamf/cli/renderer_factory.h"
35 #include "iamf/cli/rendering_mix_presentation_finalizer.h"
36 #include "iamf/cli/tests/cli_test_utils.h"
37 #include "iamf/cli/user_metadata_builder/audio_element_metadata_builder.h"
38 #include "iamf/cli/user_metadata_builder/iamf_input_layout.h"
39 #include "iamf/cli/wav_writer.h"
40 #include "iamf/obu/arbitrary_obu.h"
41 #include "iamf/obu/codec_config.h"
42 #include "iamf/obu/ia_sequence_header.h"
43 #include "iamf/obu/mix_presentation.h"
44 #include "iamf/obu/types.h"
45 #include "src/google/protobuf/text_format.h"
46 
47 namespace iamf_tools {
48 namespace {
49 
50 using ::absl_testing::IsOk;
51 using ::absl_testing::IsOkAndHolds;
52 using ::iamf_tools_cli_proto::UserMetadata;
53 using ::testing::_;
54 using ::testing::Contains;
55 using ::testing::IsEmpty;
56 using ::testing::Not;
57 using ::testing::Return;
58 
59 constexpr DecodedUleb128 kCodecConfigId = 200;
60 constexpr DecodedUleb128 kAudioElementId = 300;
61 constexpr uint32_t kNumSamplesPerFrame = 8;
62 constexpr int kExpectedPcmBitDepth = 16;
63 
64 const auto kOmitOutputWavFiles =
65     RenderingMixPresentationFinalizer::ProduceNoSampleProcessors;
66 
AddIaSequenceHeader(UserMetadata & user_metadata)67 void AddIaSequenceHeader(UserMetadata& user_metadata) {
68   ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(
69       R"pb(
70         primary_profile: PROFILE_VERSION_SIMPLE
71         additional_profile: PROFILE_VERSION_BASE
72       )pb",
73       user_metadata.add_ia_sequence_header_metadata()));
74 }
75 
AddCodecConfig(UserMetadata & user_metadata)76 void AddCodecConfig(UserMetadata& user_metadata) {
77   auto* new_codec_config = user_metadata.add_codec_config_metadata();
78   ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(
79       R"pb(
80         codec_config_id: 200
81         codec_config {
82           codec_id: CODEC_ID_LPCM
83           num_samples_per_frame: 8
84           audio_roll_distance: 0
85           decoder_config_lpcm {
86             sample_format_flags: LPCM_LITTLE_ENDIAN
87             sample_rate: 48000
88           }
89         }
90       )pb",
91       new_codec_config));
92   new_codec_config->mutable_codec_config()
93       ->mutable_decoder_config_lpcm()
94       ->set_sample_size(kExpectedPcmBitDepth);
95 }
96 
AddAudioElement(UserMetadata & user_metadata)97 void AddAudioElement(UserMetadata& user_metadata) {
98   AudioElementMetadataBuilder builder;
99   ASSERT_THAT(builder.PopulateAudioElementMetadata(
100                   kAudioElementId, kCodecConfigId, IamfInputLayout::kStereo,
101                   *user_metadata.add_audio_element_metadata()),
102               IsOk());
103 }
104 
AddMixPresentation(UserMetadata & user_metadata)105 void AddMixPresentation(UserMetadata& user_metadata) {
106   ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(
107       R"pb(
108         mix_presentation_id: 42
109         count_label: 0
110         sub_mixes {
111           audio_elements {
112             audio_element_id: 300
113             rendering_config {
114               headphones_rendering_mode: HEADPHONES_RENDERING_MODE_STEREO
115             }
116             element_mix_gain {
117               param_definition {
118                 parameter_id: 100
119                 parameter_rate: 16000
120                 param_definition_mode: 1
121                 reserved: 0
122               }
123               default_mix_gain: 0
124             }
125           }
126           output_mix_gain {
127             param_definition {
128               parameter_id: 100
129               parameter_rate: 16000
130               param_definition_mode: 1
131               reserved: 0
132             }
133             default_mix_gain: 0
134           }
135           layouts {
136             loudness_layout {
137               layout_type: LAYOUT_TYPE_LOUDSPEAKERS_SS_CONVENTION
138               ss_layout { sound_system: SOUND_SYSTEM_A_0_2_0 reserved: 0 }
139             }
140             loudness {
141               info_type_bit_masks: []
142               integrated_loudness: 0
143               digital_peak: 0
144             }
145           }
146         }
147       )pb",
148       user_metadata.add_mix_presentation_metadata()));
149 }
150 
AddArbitraryObu(UserMetadata & user_metadata)151 void AddArbitraryObu(UserMetadata& user_metadata) {
152   ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(
153       R"pb(
154         insertion_hook: INSERTION_HOOK_AFTER_AUDIO_ELEMENTS
155         obu_type: OBU_IA_RESERVED_26
156         payload: "Imaginary descriptor OBU between the audio element and mix presentation."
157       )pb",
158       user_metadata.add_arbitrary_obu_metadata()));
159 }
160 
AddAudioFrame(UserMetadata & user_metadata)161 void AddAudioFrame(UserMetadata& user_metadata) {
162   ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(
163       R"pb(
164         samples_to_trim_at_end: 0
165         samples_to_trim_at_start: 0
166         audio_element_id: 300
167         channel_ids: [ 0, 1 ]
168         channel_labels: [ "L2", "R2" ]
169       )pb",
170       user_metadata.add_audio_frame_metadata()));
171 }
172 
AddParameterBlockAtTimestamp(InternalTimestamp start_timestamp,UserMetadata & user_metadata)173 void AddParameterBlockAtTimestamp(InternalTimestamp start_timestamp,
174                                   UserMetadata& user_metadata) {
175   auto* metadata = user_metadata.add_parameter_block_metadata();
176   ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(
177       R"pb(
178         parameter_id: 100
179         duration: 8
180         num_subblocks: 1
181         constant_subblock_duration: 8
182         subblocks:
183         [ {
184           mix_gain_parameter_data {
185             animation_type: ANIMATE_STEP
186             param_data { step { start_point_value: 0 } }
187           }
188         }]
189       )pb",
190       metadata));
191 
192   // Overwrite `start_timestamp`.
193   metadata->set_start_timestamp(start_timestamp);
194 }
195 
GetFirstSubmixFirstLayoutExpectedPath(absl::string_view output_directory)196 std::string GetFirstSubmixFirstLayoutExpectedPath(
197     absl::string_view output_directory) {
198   return (std::filesystem::path(output_directory) /
199           std::filesystem::path("first_file.wav"))
200       .string();
201 }
202 
GetWavWriterFactoryThatProducesFirstSubMixFirstLayout(absl::string_view output_directory)203 auto GetWavWriterFactoryThatProducesFirstSubMixFirstLayout(
204     absl::string_view output_directory) {
205   const std::string output_wav_path =
206       GetFirstSubmixFirstLayoutExpectedPath(output_directory);
207   return [output_wav_path](
208              DecodedUleb128 mix_presentation_id, int sub_mix_index,
209              int layout_index, const Layout&, int num_channels, int sample_rate,
210              int bit_depth,
211              size_t num_samples_per_frame) -> std::unique_ptr<WavWriter> {
212     if (sub_mix_index != 0 || layout_index != 0) {
213       return nullptr;
214     }
215 
216     return WavWriter::Create(output_wav_path, num_channels, sample_rate,
217                              bit_depth, num_samples_per_frame);
218   };
219 }
220 
221 class IamfEncoderTest : public ::testing::Test {
222  protected:
SetupDescriptorObus()223   void SetupDescriptorObus() {
224     AddIaSequenceHeader(user_metadata_);
225     AddCodecConfig(user_metadata_);
226     AddAudioElement(user_metadata_);
227     AddMixPresentation(user_metadata_);
228   }
229 
CreateExpectOk()230   IamfEncoder CreateExpectOk() {
231     auto iamf_encoder = IamfEncoder::Create(
232         user_metadata_, renderer_factory_.get(),
233         loudness_calculator_factory_.get(), sample_processor_factory_,
234         ia_sequence_header_obu_, codec_config_obus_, audio_elements_,
235         mix_presentation_obus_, arbitrary_obus_);
236     EXPECT_THAT(iamf_encoder, IsOk());
237     return std::move(*iamf_encoder);
238   }
239 
240   UserMetadata user_metadata_;
241   std::optional<IASequenceHeaderObu> ia_sequence_header_obu_;
242   absl::flat_hash_map<uint32_t, CodecConfigObu> codec_config_obus_;
243   absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements_;
244   std::list<MixPresentationObu> mix_presentation_obus_;
245   std::list<ArbitraryObu> arbitrary_obus_;
246   // Default some dependencies to be based on the real `IamfComponents`
247   // implementations. And generally disable wav writing since it is not needed
248   // for most tests.
249   std::unique_ptr<RendererFactoryBase> renderer_factory_ =
250       CreateRendererFactory();
251   std::unique_ptr<LoudnessCalculatorFactoryBase> loudness_calculator_factory_ =
252       CreateLoudnessCalculatorFactory();
253   RenderingMixPresentationFinalizer::SampleProcessorFactory
254       sample_processor_factory_ = kOmitOutputWavFiles;
255 };
256 
TEST_F(IamfEncoderTest,CreateFailsOnEmptyUserMetadata)257 TEST_F(IamfEncoderTest, CreateFailsOnEmptyUserMetadata) {
258   user_metadata_.Clear();
259 
260   EXPECT_FALSE(IamfEncoder::Create(user_metadata_, renderer_factory_.get(),
261                                    loudness_calculator_factory_.get(),
262                                    sample_processor_factory_,
263                                    ia_sequence_header_obu_, codec_config_obus_,
264                                    audio_elements_, mix_presentation_obus_,
265                                    arbitrary_obus_)
266                    .ok());
267 }
268 
TEST_F(IamfEncoderTest,CreateGeneratesDescriptorObus)269 TEST_F(IamfEncoderTest, CreateGeneratesDescriptorObus) {
270   SetupDescriptorObus();
271   auto iamf_encoder = CreateExpectOk();
272 
273   EXPECT_TRUE(ia_sequence_header_obu_.has_value());
274   EXPECT_EQ(codec_config_obus_.size(), 1);
275   EXPECT_EQ(audio_elements_.size(), 1);
276   EXPECT_EQ(mix_presentation_obus_.size(), 1);
277   EXPECT_TRUE(arbitrary_obus_.empty());
278 }
279 
TEST_F(IamfEncoderTest,CreateGeneratesArbitraryObus)280 TEST_F(IamfEncoderTest, CreateGeneratesArbitraryObus) {
281   SetupDescriptorObus();
282   AddArbitraryObu(user_metadata_);
283 
284   auto iamf_encoder = CreateExpectOk();
285 
286   EXPECT_EQ(arbitrary_obus_.size(), 1);
287 }
288 
TEST_F(IamfEncoderTest,BuildInformationTagIsPresentByDefault)289 TEST_F(IamfEncoderTest, BuildInformationTagIsPresentByDefault) {
290   SetupDescriptorObus();
291 
292   auto iamf_encoder = CreateExpectOk();
293   ASSERT_FALSE(mix_presentation_obus_.empty());
294 
295   // We don't care which slot the build information tag is in. But we want it to
296   // be present by default, to help with debugging.
297   const auto& first_obu_tags =
298       mix_presentation_obus_.front().mix_presentation_tags_;
299   ASSERT_TRUE(first_obu_tags.has_value());
300   EXPECT_THAT(first_obu_tags->tags, Contains(TagMatchesBuildInformation()));
301 }
302 
TEST_F(IamfEncoderTest,GenerateDataObusTwoIterationsSucceeds)303 TEST_F(IamfEncoderTest, GenerateDataObusTwoIterationsSucceeds) {
304   SetupDescriptorObus();
305   AddAudioFrame(user_metadata_);
306   AddParameterBlockAtTimestamp(0, user_metadata_);
307   AddParameterBlockAtTimestamp(8, user_metadata_);
308   auto iamf_encoder = CreateExpectOk();
309 
310   // Temporary variables for one iteration.
311   const std::vector<InternalSampleType> zero_samples(kNumSamplesPerFrame, 0.0);
312   std::list<AudioFrameWithData> temp_audio_frames;
313   std::list<ParameterBlockWithData> temp_parameter_blocks;
314   IdLabeledFrameMap id_to_labeled_frame;
315   int iteration = 0;
316   while (iamf_encoder.GeneratingDataObus()) {
317     iamf_encoder.BeginTemporalUnit();
318     iamf_encoder.AddSamples(kAudioElementId, ChannelLabel::kL2, zero_samples);
319     iamf_encoder.AddSamples(kAudioElementId, ChannelLabel::kR2, zero_samples);
320 
321     // Signal stopping adding samples at the second iteration.
322     if (iteration == 1) {
323       iamf_encoder.FinalizeAddSamples();
324     }
325 
326     EXPECT_THAT(iamf_encoder.AddParameterBlockMetadata(
327                     user_metadata_.parameter_block_metadata(iteration)),
328                 IsOk());
329 
330     // Output.
331     EXPECT_THAT(iamf_encoder.OutputTemporalUnit(temp_audio_frames,
332                                                 temp_parameter_blocks),
333                 IsOk());
334     EXPECT_EQ(temp_audio_frames.size(), 1);
335     EXPECT_EQ(temp_parameter_blocks.size(), 1);
336     EXPECT_EQ(temp_audio_frames.front().start_timestamp,
337               iteration * kNumSamplesPerFrame);
338 
339     iteration++;
340   }
341 
342   EXPECT_EQ(iteration, 2);
343 }
344 
TEST_F(IamfEncoderTest,SafeToUseAfterMove)345 TEST_F(IamfEncoderTest, SafeToUseAfterMove) {
346   SetupDescriptorObus();
347   AddAudioFrame(user_metadata_);
348   AddParameterBlockAtTimestamp(0, user_metadata_);
349   AddParameterBlockAtTimestamp(8, user_metadata_);
350   auto iamf_encoder_to_move_from = CreateExpectOk();
351 
352   // Move the encoder, and use it.
353   IamfEncoder iamf_encoder = std::move(iamf_encoder_to_move_from);
354 
355   // Use many parts of the API, to make sure the move did not break anything.
356   EXPECT_TRUE(iamf_encoder.GeneratingDataObus());
357   iamf_encoder.BeginTemporalUnit();
358   const std::vector<InternalSampleType> kZeroSamples(kNumSamplesPerFrame, 0.0);
359   iamf_encoder.AddSamples(kAudioElementId, ChannelLabel::kL2, kZeroSamples);
360   iamf_encoder.AddSamples(kAudioElementId, ChannelLabel::kR2, kZeroSamples);
361   EXPECT_THAT(iamf_encoder.AddParameterBlockMetadata(
362                   user_metadata_.parameter_block_metadata(0)),
363               IsOk());
364   iamf_encoder.FinalizeAddSamples();
365   std::list<AudioFrameWithData> temp_audio_frames;
366   std::list<ParameterBlockWithData> temp_parameter_blocks;
367   IdLabeledFrameMap id_to_labeled_frame;
368   EXPECT_THAT(
369       iamf_encoder.OutputTemporalUnit(temp_audio_frames, temp_parameter_blocks),
370       IsOk());
371   EXPECT_EQ(temp_audio_frames.size(), 1);
372   EXPECT_EQ(temp_parameter_blocks.size(), 1);
373 }
374 
TEST_F(IamfEncoderTest,FinalizeMixPresentationObusSucceeds)375 TEST_F(IamfEncoderTest, FinalizeMixPresentationObusSucceeds) {
376   SetupDescriptorObus();
377   auto iamf_encoder = CreateExpectOk();
378 
379   iamf_encoder.FinalizeAddSamples();
380 
381   EXPECT_THAT(iamf_encoder.GetFinalizedMixPresentationObus(), IsOk());
382 }
383 
TEST_F(IamfEncoderTest,CallingFinalizeMixPresentationObusTwiceFails)384 TEST_F(IamfEncoderTest, CallingFinalizeMixPresentationObusTwiceFails) {
385   SetupDescriptorObus();
386   auto iamf_encoder = CreateExpectOk();
387   iamf_encoder.FinalizeAddSamples();
388 
389   // The first call is OK.
390   EXPECT_THAT(iamf_encoder.GetFinalizedMixPresentationObus(), IsOk());
391 
392   EXPECT_FALSE(iamf_encoder.GetFinalizedMixPresentationObus().ok());
393 }
394 
TEST_F(IamfEncoderTest,FinalizeMixPresentationObusDefaultsToPreservingUserLoudness)395 TEST_F(IamfEncoderTest,
396        FinalizeMixPresentationObusDefaultsToPreservingUserLoudness) {
397   SetupDescriptorObus();
398   // Configuring the encoder with null factories is permitted, which disables
399   // rendering and loudness measurements.
400   renderer_factory_ = nullptr;
401   loudness_calculator_factory_ = nullptr;
402   auto iamf_encoder = CreateExpectOk();
403   const auto original_loudness = mix_presentation_obus_.front()
404                                      .sub_mixes_.front()
405                                      .layouts.front()
406                                      .loudness;
407   iamf_encoder.FinalizeAddSamples();
408 
409   const auto finalized_mix_presentation_obus =
410       iamf_encoder.GetFinalizedMixPresentationObus();
411   ASSERT_THAT(finalized_mix_presentation_obus, IsOk());
412 
413   EXPECT_EQ(finalized_mix_presentation_obus->front()
414                 .sub_mixes_.front()
415                 .layouts.front()
416                 .loudness,
417             original_loudness);
418 }
419 
TEST_F(IamfEncoderTest,FinalizeMixPresentationObusFailsBeforeGeneratingDataObusIsFinished)420 TEST_F(IamfEncoderTest,
421        FinalizeMixPresentationObusFailsBeforeGeneratingDataObusIsFinished) {
422   SetupDescriptorObus();
423   AddAudioFrame(user_metadata_);
424   auto iamf_encoder = CreateExpectOk();
425 
426   // The encoder is still generating data OBUs, so it's not possible to know the
427   // final loudness values.
428   ASSERT_TRUE(iamf_encoder.GeneratingDataObus());
429 
430   EXPECT_FALSE(iamf_encoder.GetFinalizedMixPresentationObus().ok());
431 }
432 
TEST_F(IamfEncoderTest,FinalizeMixPresentationObuFillsInLoudness)433 TEST_F(IamfEncoderTest, FinalizeMixPresentationObuFillsInLoudness) {
434   SetupDescriptorObus();
435   // Loudness measurement is done only when the signal can be rendered, and
436   // based on the resultant loudness calculators.
437   renderer_factory_ = std::make_unique<RendererFactory>();
438   auto mock_loudness_calculator_factory =
439       std::make_unique<MockLoudnessCalculatorFactory>();
440   auto mock_loudness_calculator = std::make_unique<MockLoudnessCalculator>();
441   const LoudnessInfo kArbitraryLoudnessInfo = {
442       .info_type = LoudnessInfo::kTruePeak,
443       .integrated_loudness = 123,
444       .digital_peak = 456,
445       .true_peak = 789,
446   };
447   ON_CALL(*mock_loudness_calculator, QueryLoudness())
448       .WillByDefault(Return(kArbitraryLoudnessInfo));
449   EXPECT_CALL(*mock_loudness_calculator_factory,
450               CreateLoudnessCalculator(_, _, _, _))
451       .WillOnce(Return(std::move(mock_loudness_calculator)));
452   loudness_calculator_factory_ = std::move(mock_loudness_calculator_factory);
453   auto iamf_encoder = CreateExpectOk();
454   iamf_encoder.FinalizeAddSamples();
455 
456   const auto finalized_mix_presentation_obus =
457       iamf_encoder.GetFinalizedMixPresentationObus();
458   ASSERT_THAT(finalized_mix_presentation_obus, IsOkAndHolds(Not(IsEmpty())));
459 
460   EXPECT_EQ(finalized_mix_presentation_obus->front()
461                 .sub_mixes_.front()
462                 .layouts.front()
463                 .loudness,
464             kArbitraryLoudnessInfo);
465 };
466 
TEST_F(IamfEncoderTest,OutputSampleProcessorFactoryIgnoresBitDepthOverride)467 TEST_F(IamfEncoderTest, OutputSampleProcessorFactoryIgnoresBitDepthOverride) {
468   // The override bit-depth should be used at the `SampleProcessorFactory`
469   // level.
470   SetupDescriptorObus();
471   constexpr uint32_t kExpectedSampleProcessorFactoryCalledBitDepth =
472       kExpectedPcmBitDepth;
473   constexpr uint32_t kIgnoredBitDepthOverride = 255;
474   user_metadata_.mutable_test_vector_metadata()
475       ->set_output_wav_file_bit_depth_override(kIgnoredBitDepthOverride);
476   // Wav file writing is done only when the signal can be rendered, based on the
477   // resultant wav writers.
478   renderer_factory_ = std::make_unique<RendererFactory>();
479   MockSampleProcessorFactory mock_sample_processor_factory;
480   EXPECT_CALL(
481       mock_sample_processor_factory,
482       Call(_, _, _, _, _, _, kExpectedSampleProcessorFactoryCalledBitDepth, _));
483   sample_processor_factory_ = mock_sample_processor_factory.AsStdFunction();
484 
485   CreateExpectOk();
486 };
487 
488 // TODO(b/349321277): Add more tests.
489 
490 }  // namespace
491 }  // namespace iamf_tools
492