/* * Copyright (c) 2023, Alliance for Open Media. All rights reserved * * This source code is subject to the terms of the BSD 3-Clause Clear * License and the Alliance for Open Media Patent License 1.0. If the BSD * 3-Clause Clear License was not distributed with this source code in the * LICENSE file, you can obtain it at * www.aomedia.org/license/software-license/bsd-3-c-c. If the Alliance for * Open Media Patent License 1.0 was not distributed with this source code * in the PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ #include "iamf/cli/demixing_module.h" #include #include #include #include #include #include #include #include #include "absl/container/flat_hash_map.h" #include "absl/status/status.h" #include "absl/status/status_matchers.h" #include "absl/status/statusor.h" #include "absl/types/span.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include "iamf/cli/audio_element_with_data.h" #include "iamf/cli/audio_frame_decoder.h" #include "iamf/cli/audio_frame_with_data.h" #include "iamf/cli/channel_label.h" #include "iamf/cli/proto/user_metadata.pb.h" #include "iamf/cli/proto_conversion/channel_label_utils.h" #include "iamf/cli/proto_conversion/downmixing_reconstruction_util.h" #include "iamf/cli/tests/cli_test_utils.h" #include "iamf/common/utils/numeric_utils.h" #include "iamf/obu/audio_element.h" #include "iamf/obu/audio_frame.h" #include "iamf/obu/codec_config.h" #include "iamf/obu/demixing_info_parameter_data.h" #include "iamf/obu/obu_header.h" #include "iamf/obu/recon_gain_info_parameter_data.h" #include "iamf/obu/types.h" namespace iamf_tools { namespace { using ::absl_testing::IsOk; using ::absl_testing::IsOkAndHolds; using enum ChannelLabel::Label; using ::testing::DoubleEq; using ::testing::DoubleNear; using ::testing::IsEmpty; using ::testing::Not; using ::testing::Pointwise; constexpr DecodedUleb128 kAudioElementId = 137; constexpr std::array kReconGainValues = { 255, 0, 125, 200, 150, 255, 255, 255, 255, 255, 255, 255}; const uint32_t kZeroSamplesToTrimAtEnd = 0; const uint32_t kZeroSamplesToTrimAtStart = 0; const int kStartTimestamp = 0; const int kEndTimestamp = 4; const DecodedUleb128 kMonoSubstreamId = 0; const DecodedUleb128 kL2SubstreamId = 1; // TODO(b/305927287): Test computation of linear output gains. Test some cases // of erroneous input. TEST(FindSamplesOrDemixedSamples, FindsMatchingSamples) { const std::vector kSamplesToFind = {1, 2, 3}; const LabelSamplesMap kLabelToSamples = {{kL2, kSamplesToFind}}; const std::vector* found_samples; EXPECT_THAT(DemixingModule::FindSamplesOrDemixedSamples(kL2, kLabelToSamples, &found_samples), IsOk()); EXPECT_THAT(*found_samples, Pointwise(DoubleEq(), kSamplesToFind)); } TEST(FindSamplesOrDemixedSamples, FindsMatchingDemixedSamples) { const std::vector kSamplesToFind = {1, 2, 3}; const LabelSamplesMap kLabelToSamples = {{kDemixedR2, kSamplesToFind}}; const std::vector* found_samples; EXPECT_THAT(DemixingModule::FindSamplesOrDemixedSamples(kR2, kLabelToSamples, &found_samples), IsOk()); EXPECT_THAT(*found_samples, Pointwise(DoubleEq(), kSamplesToFind)); } TEST(FindSamplesOrDemixedSamples, InvalidWhenThereIsNoDemixingLabel) { const std::vector kSamplesToFind = {1, 2, 3}; const LabelSamplesMap kLabelToSamples = {{kDemixedR2, kSamplesToFind}}; const std::vector* found_samples; EXPECT_FALSE(DemixingModule::FindSamplesOrDemixedSamples(kL2, kLabelToSamples, &found_samples) .ok()); } TEST(FindSamplesOrDemixedSamples, RegularSamplesTakePrecedence) { const std::vector kSamplesToFind = {1, 2, 3}; const std::vector kDemixedSamplesToIgnore = {4, 5, 6}; const LabelSamplesMap kLabelToSamples = { {kR2, kSamplesToFind}, {kDemixedR2, kDemixedSamplesToIgnore}}; const std::vector* found_samples; EXPECT_THAT(DemixingModule::FindSamplesOrDemixedSamples(kR2, kLabelToSamples, &found_samples), IsOk()); EXPECT_THAT(*found_samples, Pointwise(DoubleEq(), kSamplesToFind)); } TEST(FindSamplesOrDemixedSamples, ErrorNoMatchingSamples) { const std::vector kSamplesToFind = {1, 2, 3}; const LabelSamplesMap kLabelToSamples = {{kL2, kSamplesToFind}}; const std::vector* found_samples; EXPECT_FALSE(DemixingModule::FindSamplesOrDemixedSamples(kL3, kLabelToSamples, &found_samples) .ok()); } void InitAudioElementWithLabelsAndLayers( const SubstreamIdLabelsMap& substream_id_to_labels, const std::vector& loudspeaker_layouts, absl::flat_hash_map& audio_elements) { auto [iter, unused_inserted] = audio_elements.emplace( kAudioElementId, AudioElementWithData{ .obu = AudioElementObu(ObuHeader(), kAudioElementId, AudioElementObu::kAudioElementChannelBased, /*reserved=*/0, /*codec_config_id=*/0), .substream_id_to_labels = substream_id_to_labels, }); auto& obu = iter->second.obu; ASSERT_THAT( obu.InitializeScalableChannelLayout(loudspeaker_layouts.size(), 0), IsOk()); auto& config = std::get(obu.config_); for (int i = 0; i < loudspeaker_layouts.size(); ++i) { config.channel_audio_layer_configs[i].loudspeaker_layout = loudspeaker_layouts[i]; } } TEST(CreateForDownMixingAndReconstruction, EmptyConfigMapIsOk) { absl::flat_hash_map id_to_config_map; const auto demixing_module = DemixingModule::CreateForDownMixingAndReconstruction( std::move(id_to_config_map)); EXPECT_THAT(demixing_module, IsOk()); } TEST(CreateForDownMixingAndReconstruction, ValidWithTwoLayerStereo) { DecodedUleb128 id = 137; DemixingModule::DownmixingAndReconstructionConfig config = { .user_labels = {kL2, kR2}, .substream_id_to_labels = {{0, {kMono}}, {1, {kL2}}}, .label_to_output_gain = {}}; absl::flat_hash_map id_to_config_map = {{id, config}}; const auto demixing_module = DemixingModule::CreateForDownMixingAndReconstruction( std::move(id_to_config_map)); EXPECT_THAT(demixing_module, IsOk()); } TEST(InitializeForReconstruction, NeverCreatesDownMixers) { absl::flat_hash_map audio_elements; InitAudioElementWithLabelsAndLayers({{0, {kMono}}, {1, {kL2}}}, {ChannelAudioLayerConfig::kLayoutMono, ChannelAudioLayerConfig::kLayoutStereo}, audio_elements); const auto demixing_module = DemixingModule::CreateForReconstruction(audio_elements); ASSERT_THAT(demixing_module, IsOk()); const std::list* down_mixers = nullptr; EXPECT_THAT(demixing_module->GetDownMixers(kAudioElementId, down_mixers), IsOk()); EXPECT_TRUE(down_mixers->empty()); } TEST(CreateForReconstruction, CreatesOneDemixerForTwoLayerStereo) { absl::flat_hash_map audio_elements; InitAudioElementWithLabelsAndLayers({{0, {kMono}}, {1, {kL2}}}, {ChannelAudioLayerConfig::kLayoutMono, ChannelAudioLayerConfig::kLayoutStereo}, audio_elements); const auto demixing_module = DemixingModule::CreateForReconstruction(audio_elements); ASSERT_THAT(demixing_module, IsOk()); const std::list* demixer = nullptr; EXPECT_THAT(demixing_module->GetDemixers(kAudioElementId, demixer), IsOk()); EXPECT_EQ(demixer->size(), 1); } TEST(CreateForReconstruction, FailsForReservedLayout14) { absl::flat_hash_map audio_elements; InitAudioElementWithLabelsAndLayers( {{0, {kOmitted}}}, {ChannelAudioLayerConfig::kLayoutReserved14}, audio_elements); const auto demixing_module = DemixingModule::CreateForReconstruction(audio_elements); EXPECT_FALSE(demixing_module.ok()); } TEST(CreateForReconstruction, ValidForExpandedLayoutLFE) { absl::flat_hash_map audio_elements; InitAudioElementWithLabelsAndLayers( {{0, {kLFE}}}, {ChannelAudioLayerConfig::kLayoutExpanded}, audio_elements); std::get( audio_elements.at(kAudioElementId).obu.config_) .channel_audio_layer_configs[0] .expanded_loudspeaker_layout = ChannelAudioLayerConfig::kExpandedLayoutLFE; const auto demixing_module = DemixingModule::CreateForReconstruction(audio_elements); EXPECT_THAT(demixing_module, IsOk()); } TEST(CreateForReconstruction, CreatesNoDemixersForSingleLayerChannelBased) { absl::flat_hash_map audio_elements; InitAudioElementWithLabelsAndLayers({{0, {kL2, kR2}}}, {ChannelAudioLayerConfig::kLayoutStereo}, audio_elements); const auto demixing_module = DemixingModule::CreateForReconstruction(audio_elements); ASSERT_THAT(demixing_module, IsOk()); const std::list* demixer = nullptr; EXPECT_THAT(demixing_module->GetDemixers(kAudioElementId, demixer), IsOk()); EXPECT_TRUE(demixer->empty()); } TEST(CreateForReconstruction, CreatesNoDemixersForAmbisonics) { const DecodedUleb128 kCodecConfigId = 0; constexpr std::array kAmbisonicsSubstreamIds{0, 1, 2, 3}; absl::flat_hash_map codec_configs; AddLpcmCodecConfigWithIdAndSampleRate(kCodecConfigId, 48000, codec_configs); absl::flat_hash_map audio_elements; AddAmbisonicsMonoAudioElementWithSubstreamIds(kAudioElementId, kCodecConfigId, kAmbisonicsSubstreamIds, codec_configs, audio_elements); const auto demixing_module = DemixingModule::CreateForReconstruction(audio_elements); ASSERT_THAT(demixing_module, IsOk()); const std::list* demixer = nullptr; EXPECT_THAT(demixing_module->GetDemixers(kAudioElementId, demixer), IsOk()); EXPECT_TRUE(demixer->empty()); } TEST(DemixOriginalAudioSamples, ReturnsErrorAfterCreateForReconstruction) { absl::flat_hash_map audio_elements; InitAudioElementWithLabelsAndLayers( {{kMonoSubstreamId, {kMono}}, {kL2SubstreamId, {kL2}}}, {ChannelAudioLayerConfig::kLayoutMono, ChannelAudioLayerConfig::kLayoutStereo}, audio_elements); auto demixing_module = DemixingModule::CreateForReconstruction(audio_elements); ASSERT_THAT(demixing_module, IsOk()); EXPECT_THAT(demixing_module->DemixOriginalAudioSamples({}), Not(IsOk())); } TEST(DemixDecodedAudioSamples, OutputContainsOriginalAndDemixedSamples) { absl::flat_hash_map audio_elements; InitAudioElementWithLabelsAndLayers( {{kMonoSubstreamId, {kMono}}, {kL2SubstreamId, {kL2}}}, {ChannelAudioLayerConfig::kLayoutMono, ChannelAudioLayerConfig::kLayoutStereo}, audio_elements); std::list decoded_audio_frames; decoded_audio_frames.push_back( DecodedAudioFrame{.substream_id = kMonoSubstreamId, .start_timestamp = kStartTimestamp, .end_timestamp = kEndTimestamp, .samples_to_trim_at_end = kZeroSamplesToTrimAtEnd, .samples_to_trim_at_start = kZeroSamplesToTrimAtStart, .decoded_samples = {{0}}, .down_mixing_params = DownMixingParams()}); decoded_audio_frames.push_back( DecodedAudioFrame{.substream_id = kL2SubstreamId, .start_timestamp = kStartTimestamp, .end_timestamp = kEndTimestamp, .samples_to_trim_at_end = kZeroSamplesToTrimAtEnd, .samples_to_trim_at_start = kZeroSamplesToTrimAtStart, .decoded_samples = {{0}}, .down_mixing_params = DownMixingParams()}); auto demixing_module = DemixingModule::CreateForReconstruction(audio_elements); ASSERT_THAT(demixing_module, IsOk()); const auto id_to_labeled_decoded_frame = demixing_module->DemixDecodedAudioSamples(decoded_audio_frames); ASSERT_THAT(id_to_labeled_decoded_frame, IsOk()); ASSERT_TRUE(id_to_labeled_decoded_frame->contains(kAudioElementId)); const auto& labeled_frame = id_to_labeled_decoded_frame->at(kAudioElementId); EXPECT_TRUE(labeled_frame.label_to_samples.contains(kL2)); EXPECT_TRUE(labeled_frame.label_to_samples.contains(kMono)); EXPECT_TRUE(labeled_frame.label_to_samples.contains(kDemixedR2)); } TEST(DemixDecodedAudioSamples, OutputEchoesTimingInformation) { // These values are not very sensible, but as long as they are consistent // between related frames it is OK. const DecodedUleb128 kExpectedStartTimestamp = 99; const DecodedUleb128 kExpectedEndTimestamp = 123; const DecodedUleb128 kExpectedNumSamplesToTrimAtEnd = 999; const DecodedUleb128 kExpectedNumSamplesToTrimAtStart = 9999; const DecodedUleb128 kL2SubstreamId = 1; absl::flat_hash_map audio_elements; InitAudioElementWithLabelsAndLayers( {{kMonoSubstreamId, {kMono}}, {kL2SubstreamId, {kL2}}}, {ChannelAudioLayerConfig::kLayoutMono, ChannelAudioLayerConfig::kLayoutStereo}, audio_elements); std::list decoded_audio_frames; decoded_audio_frames.push_back(DecodedAudioFrame{ .substream_id = kMonoSubstreamId, .start_timestamp = kExpectedStartTimestamp, .end_timestamp = kExpectedEndTimestamp, .samples_to_trim_at_end = kExpectedNumSamplesToTrimAtEnd, .samples_to_trim_at_start = kExpectedNumSamplesToTrimAtStart, .decoded_samples = {{0}}, .down_mixing_params = DownMixingParams()}); decoded_audio_frames.push_back(DecodedAudioFrame{ .substream_id = kL2SubstreamId, .start_timestamp = kExpectedStartTimestamp, .end_timestamp = kExpectedEndTimestamp, .samples_to_trim_at_end = kExpectedNumSamplesToTrimAtEnd, .samples_to_trim_at_start = kExpectedNumSamplesToTrimAtStart, .decoded_samples = {{0}}, .down_mixing_params = DownMixingParams()}); const auto demixing_module = DemixingModule::CreateForReconstruction(audio_elements); ASSERT_THAT(demixing_module, IsOk()); const auto id_to_labeled_decoded_frame = demixing_module->DemixDecodedAudioSamples(decoded_audio_frames); ASSERT_THAT(id_to_labeled_decoded_frame, IsOk()); ASSERT_TRUE(id_to_labeled_decoded_frame->contains(kAudioElementId)); const auto& labeled_frame = id_to_labeled_decoded_frame->at(kAudioElementId); EXPECT_EQ(labeled_frame.end_timestamp, kExpectedEndTimestamp); EXPECT_EQ(labeled_frame.samples_to_trim_at_end, kExpectedNumSamplesToTrimAtEnd); EXPECT_EQ(labeled_frame.samples_to_trim_at_start, kExpectedNumSamplesToTrimAtStart); } TEST(DemixDecodedAudioSamples, OutputEchoesOriginalLabels) { absl::flat_hash_map audio_elements; InitAudioElementWithLabelsAndLayers( {{kMonoSubstreamId, {kMono}}, {kL2SubstreamId, {kL2}}}, {ChannelAudioLayerConfig::kLayoutMono, ChannelAudioLayerConfig::kLayoutStereo}, audio_elements); std::list decoded_audio_frames; decoded_audio_frames.push_back( DecodedAudioFrame{.substream_id = kMonoSubstreamId, .start_timestamp = kStartTimestamp, .end_timestamp = kEndTimestamp, .samples_to_trim_at_end = kZeroSamplesToTrimAtEnd, .samples_to_trim_at_start = kZeroSamplesToTrimAtStart, .decoded_samples = {{1}, {2}, {3}}, .down_mixing_params = DownMixingParams()}); decoded_audio_frames.push_back( DecodedAudioFrame{.substream_id = kL2SubstreamId, .start_timestamp = kStartTimestamp, .end_timestamp = kEndTimestamp, .samples_to_trim_at_end = kZeroSamplesToTrimAtEnd, .samples_to_trim_at_start = kZeroSamplesToTrimAtStart, .decoded_samples = {{9}, {10}, {11}}, .down_mixing_params = DownMixingParams()}); const auto demixing_module = DemixingModule::CreateForReconstruction(audio_elements); ASSERT_THAT(demixing_module, IsOk()); IdLabeledFrameMap unused_id_labeled_frame; const auto id_to_labeled_decoded_frame = demixing_module->DemixDecodedAudioSamples(decoded_audio_frames); ASSERT_THAT(id_to_labeled_decoded_frame, IsOk()); ASSERT_TRUE(id_to_labeled_decoded_frame->contains(kAudioElementId)); // Examine the demixed frame. const auto& labeled_frame = id_to_labeled_decoded_frame->at(kAudioElementId); constexpr std::array kExpectedMonoSamples = {1, 2, 3}; constexpr std::array kExpectedL2Samples = {9, 10, 11}; EXPECT_THAT( labeled_frame.label_to_samples.at(kMono), Pointwise(InternalSampleMatchesIntegralSample(), kExpectedMonoSamples)); EXPECT_THAT( labeled_frame.label_to_samples.at(kL2), Pointwise(InternalSampleMatchesIntegralSample(), kExpectedL2Samples)); } TEST(DemixDecodedAudioSamples, OutputHasReconstructedLayers) { absl::flat_hash_map audio_elements; InitAudioElementWithLabelsAndLayers( {{kMonoSubstreamId, {kMono}}, {kL2SubstreamId, {kL2}}}, {ChannelAudioLayerConfig::kLayoutMono, ChannelAudioLayerConfig::kLayoutStereo}, audio_elements); std::list decoded_audio_frames; decoded_audio_frames.push_back( DecodedAudioFrame{.substream_id = kMonoSubstreamId, .start_timestamp = kStartTimestamp, .end_timestamp = kEndTimestamp, .samples_to_trim_at_end = kZeroSamplesToTrimAtEnd, .samples_to_trim_at_start = kZeroSamplesToTrimAtStart, .decoded_samples = {{750}}, .down_mixing_params = DownMixingParams()}); decoded_audio_frames.push_back( DecodedAudioFrame{.substream_id = kL2SubstreamId, .start_timestamp = kStartTimestamp, .end_timestamp = kEndTimestamp, .samples_to_trim_at_end = kZeroSamplesToTrimAtEnd, .samples_to_trim_at_start = kZeroSamplesToTrimAtStart, .decoded_samples = {{1000}}, .down_mixing_params = DownMixingParams()}); const auto demixing_module = DemixingModule::CreateForReconstruction(audio_elements); ASSERT_THAT(demixing_module, IsOk()); const auto id_to_labeled_decoded_frame = demixing_module->DemixDecodedAudioSamples(decoded_audio_frames); ASSERT_THAT(id_to_labeled_decoded_frame, IsOk()); ASSERT_TRUE(id_to_labeled_decoded_frame->contains(kAudioElementId)); // Examine the demixed frame. const auto& labeled_frame = id_to_labeled_decoded_frame->at(kAudioElementId); // D_R2 = M - (L2 - 6 dB) + 6 dB. EXPECT_THAT(labeled_frame.label_to_samples.at(kDemixedR2), Pointwise(InternalSampleMatchesIntegralSample(), {500})); } TEST(DemixDecodedAudioSamples, OutputContainsReconGainAndLayerInfo) { absl::flat_hash_map audio_elements; InitAudioElementWithLabelsAndLayers( {{kMonoSubstreamId, {kMono}}, {kL2SubstreamId, {kL2}}}, {ChannelAudioLayerConfig::kLayoutMono, ChannelAudioLayerConfig::kLayoutStereo}, audio_elements); std::list decoded_audio_frames; ReconGainInfoParameterData recon_gain_info_parameter_data; recon_gain_info_parameter_data.recon_gain_elements.push_back(ReconGainElement{ .recon_gain_flag = DecodedUleb128(1), .recon_gain = kReconGainValues}); decoded_audio_frames.push_back(DecodedAudioFrame{ .substream_id = kMonoSubstreamId, .start_timestamp = kStartTimestamp, .end_timestamp = kEndTimestamp, .samples_to_trim_at_end = kZeroSamplesToTrimAtEnd, .samples_to_trim_at_start = kZeroSamplesToTrimAtStart, .decoded_samples = {{0}}, .down_mixing_params = DownMixingParams(), .recon_gain_info_parameter_data = recon_gain_info_parameter_data, .audio_element_with_data = &audio_elements.at(kAudioElementId)}); decoded_audio_frames.push_back(DecodedAudioFrame{ .substream_id = kL2SubstreamId, .start_timestamp = kStartTimestamp, .end_timestamp = kEndTimestamp, .samples_to_trim_at_end = kZeroSamplesToTrimAtEnd, .samples_to_trim_at_start = kZeroSamplesToTrimAtStart, .decoded_samples = {{0}}, .down_mixing_params = DownMixingParams(), .recon_gain_info_parameter_data = recon_gain_info_parameter_data, .audio_element_with_data = &audio_elements.at(kAudioElementId)}); const auto demixing_module = DemixingModule::CreateForReconstruction(audio_elements); ASSERT_THAT(demixing_module, IsOk()); const auto id_to_labeled_decoded_frame = demixing_module->DemixDecodedAudioSamples(decoded_audio_frames); ASSERT_THAT(id_to_labeled_decoded_frame, IsOk()); ASSERT_TRUE(id_to_labeled_decoded_frame->contains(kAudioElementId)); const auto& labeled_frame = id_to_labeled_decoded_frame->at(kAudioElementId); EXPECT_TRUE(labeled_frame.label_to_samples.contains(kL2)); EXPECT_TRUE(labeled_frame.label_to_samples.contains(kMono)); EXPECT_TRUE(labeled_frame.label_to_samples.contains(kDemixedR2)); EXPECT_EQ( labeled_frame.recon_gain_info_parameter_data.recon_gain_elements.size(), 1); const auto& recon_gain_element = labeled_frame.recon_gain_info_parameter_data.recon_gain_elements.at(0); ASSERT_TRUE(recon_gain_element.has_value()); EXPECT_EQ(recon_gain_element->recon_gain_flag, DecodedUleb128(1)); EXPECT_THAT(recon_gain_element->recon_gain, testing::ElementsAreArray(kReconGainValues)); EXPECT_EQ(labeled_frame.loudspeaker_layout_per_layer.size(), 2); EXPECT_THAT(labeled_frame.loudspeaker_layout_per_layer, testing::ElementsAre(ChannelAudioLayerConfig::kLayoutMono, ChannelAudioLayerConfig::kLayoutStereo)); } class DemixingModuleTestBase { public: DemixingModuleTestBase() { audio_frame_metadata_.set_audio_element_id(kAudioElementId); } void CreateDemixingModuleExpectOk() { iamf_tools_cli_proto::UserMetadata user_metadata; *user_metadata.add_audio_frame_metadata() = audio_frame_metadata_; audio_elements_.emplace( kAudioElementId, AudioElementWithData{ .obu = AudioElementObu(ObuHeader(), kAudioElementId, AudioElementObu::kAudioElementChannelBased, /*reserved=*/0, /*codec_config_id=*/0), .substream_id_to_labels = substream_id_to_labels_, }); const absl::StatusOr> audio_element_id_to_demixing_metadata = CreateAudioElementIdToDemixingMetadata(user_metadata, audio_elements_); ASSERT_THAT(audio_element_id_to_demixing_metadata.status(), IsOk()); auto demixing_module = DemixingModule::CreateForDownMixingAndReconstruction( std::move(audio_element_id_to_demixing_metadata.value())); ASSERT_THAT(demixing_module, IsOk()); demixing_module_.emplace(*std::move(demixing_module)); } void TestCreateDemixingModule(int expected_number_of_down_mixers) { CreateDemixingModuleExpectOk(); const std::list* down_mixers = nullptr; const std::list* demixers = nullptr; ASSERT_THAT(demixing_module_->GetDownMixers(kAudioElementId, down_mixers), IsOk()); ASSERT_THAT(demixing_module_->GetDemixers(kAudioElementId, demixers), IsOk()); EXPECT_EQ(down_mixers->size(), expected_number_of_down_mixers); EXPECT_EQ(demixers->size(), expected_number_of_down_mixers); } protected: void ConfigureAudioFrameMetadata( absl::Span labels) { for (const auto& label : labels) { auto proto_label = ChannelLabelUtils::LabelToProto(label); ASSERT_TRUE(proto_label.ok()); audio_frame_metadata_.add_channel_metadatas()->set_channel_label( *proto_label); } } iamf_tools_cli_proto::AudioFrameObuMetadata audio_frame_metadata_; absl::flat_hash_map audio_elements_; SubstreamIdLabelsMap substream_id_to_labels_; // Held in `std::optional` for delayed construction. std::optional demixing_module_; }; class DownMixingModuleTest : public DemixingModuleTestBase, public ::testing::Test { protected: void TestDownMixing(const DownMixingParams& down_mixing_params, int expected_number_of_down_mixers) { TestCreateDemixingModule(expected_number_of_down_mixers); EXPECT_THAT(demixing_module_->DownMixSamplesToSubstreams( kAudioElementId, down_mixing_params, input_label_to_samples_, substream_id_to_substream_data_), IsOk()); for (const auto& [substream_id, substream_data] : substream_id_to_substream_data_) { // Copy the output queue to a vector for comparison. std::vector> output_samples; std::copy(substream_data.samples_obu.begin(), substream_data.samples_obu.end(), std::back_inserter(output_samples)); EXPECT_EQ(output_samples, substream_id_to_expected_samples_[substream_id]); } } void ConfigureInputChannel(ChannelLabel::Label label, absl::Span input_samples) { ConfigureAudioFrameMetadata({label}); auto [iter, inserted] = input_label_to_samples_.emplace( label, std::vector(input_samples.size(), 0)); Int32ToInternalSampleType(input_samples, absl::MakeSpan(iter->second)); // This function should not be called with the same label twice. ASSERT_TRUE(inserted); } void ConfigureOutputChannel( const std::list& requested_output_labels, const std::vector>& expected_output_smples) { // The substream ID itself does not matter. Generate a unique one. const uint32_t substream_id = substream_id_to_labels_.size(); substream_id_to_labels_[substream_id] = requested_output_labels; substream_id_to_substream_data_[substream_id] = {.substream_id = substream_id}; substream_id_to_expected_samples_[substream_id] = expected_output_smples; } LabelSamplesMap input_label_to_samples_; absl::flat_hash_map substream_id_to_substream_data_; absl::flat_hash_map>> substream_id_to_expected_samples_; }; TEST_F(DownMixingModuleTest, OneLayerStereoHasNoDownMixers) { ConfigureInputChannel(kL2, {}); ConfigureInputChannel(kR2, {}); ConfigureOutputChannel({kL2, kR2}, {{}}); TestCreateDemixingModule(0); } TEST_F(DownMixingModuleTest, OneLayer7_1_4HasNoDownMixers) { // Initialize arguments for single layer 7.1.4. ConfigureInputChannel(kL7, {}); ConfigureInputChannel(kR7, {}); ConfigureInputChannel(kCentre, {}); ConfigureInputChannel(kLFE, {}); ConfigureInputChannel(kLss7, {}); ConfigureInputChannel(kRss7, {}); ConfigureInputChannel(kLrs7, {}); ConfigureInputChannel(kRrs7, {}); ConfigureInputChannel(kLtf4, {}); ConfigureInputChannel(kRtf4, {}); ConfigureInputChannel(kLtb4, {}); ConfigureInputChannel(kRtb4, {}); ConfigureOutputChannel({kCentre}, {{}}); ConfigureOutputChannel({kL7, kR7}, {}); ConfigureOutputChannel({kLss7, kRss7}, {}); ConfigureOutputChannel({kLrs7, kRrs7}, {}); ConfigureOutputChannel({kLtf4, kRtf4}, {}); ConfigureOutputChannel({kLtb4, kRtb4}, {}); ConfigureOutputChannel({kLFE}, {}); TestCreateDemixingModule(0); } TEST_F(DownMixingModuleTest, AmbisonicsHasNoDownMixers) { ConfigureInputChannel(kA0, {}); ConfigureInputChannel(kA1, {}); ConfigureInputChannel(kA2, {}); ConfigureInputChannel(kA3, {}); ConfigureOutputChannel({kA0}, {{}}); ConfigureOutputChannel({kA1}, {{}}); ConfigureOutputChannel({kA2}, {{}}); ConfigureOutputChannel({kA3}, {{}}); TestCreateDemixingModule(0); } TEST_F(DownMixingModuleTest, OneLayerStereo) { ConfigureInputChannel(kL2, {0, 1, 2, 3}); ConfigureInputChannel(kR2, {100, 101, 102, 103}); // Down-mix to stereo as the highest layer. The highest layer always matches // the original input. ConfigureOutputChannel({kL2, kR2}, {{0, 100}, {1, 101}, {2, 102}, {3, 103}}); TestDownMixing({}, 0); } TEST_F(DownMixingModuleTest, S2ToS1DownMixer) { ConfigureInputChannel(kL2, {0, 100, 500, 1000}); ConfigureInputChannel(kR2, {100, 0, 500, 500}); // Down-mix to stereo as the highest layer. The highest layer always matches // the original input. ConfigureOutputChannel({kL2}, {{0}, {100}, {500}, {1000}}); // Down-mix to mono as the lowest layer. // M = (L2 - 6 dB) + (R2 - 6 dB). ConfigureOutputChannel({kMono}, {{50}, {50}, {500}, {750}}); TestDownMixing({}, 1); } TEST_F(DownMixingModuleTest, S3ToS2DownMixer) { ConfigureInputChannel(kL3, {0, 100}); ConfigureInputChannel(kR3, {0, 100}); ConfigureInputChannel(kCentre, {100, 100}); ConfigureInputChannel(kLtf3, {99999, 99999}); ConfigureInputChannel(kRtf3, {99998, 99998}); // Down-mix to 3.1.2 as the highest layer. The highest layer always matches // the original input. ConfigureOutputChannel({kCentre}, {{100}, {100}}); ConfigureOutputChannel({kLtf3, kRtf3}, {{99999, 99998}, {99999, 99998}}); // Down-mix to stereo as the lowest layer. // L2 = L3 + (C - 3 dB). // R2 = R3 + (C - 3 dB). ConfigureOutputChannel({kL2, kR2}, {{70, 70}, {170, 170}}); TestDownMixing({}, 1); } TEST_F(DownMixingModuleTest, S5ToS3ToS2DownMixer) { ConfigureInputChannel(kL5, {100}); ConfigureInputChannel(kR5, {200}); ConfigureInputChannel(kCentre, {1000}); ConfigureInputChannel(kLs5, {2000}); ConfigureInputChannel(kRs5, {3000}); ConfigureInputChannel(kLFE, {6}); // Down-mix to 5.1 as the highest layer. The highest layer always matches the // original input. ConfigureOutputChannel({kCentre}, {{1000}}); ConfigureOutputChannel({kLs5, kRs5}, {{2000, 3000}}); ConfigureOutputChannel({kLFE}, {{6}}); // Down-mix to stereo as the lowest layer. // L3 = L5 + Ls5 * delta. // L2 = L3 + (C - 3 dB). ConfigureOutputChannel({kL2, kR2}, {{2221, 3028}}); // Internally there is a down-mixer to L3/R3 then another for L2/R2. TestDownMixing({.delta = .707}, 2); } TEST_F(DownMixingModuleTest, S5ToS3ToDownMixer) { ConfigureInputChannel(kL5, {1000}); ConfigureInputChannel(kR5, {2000}); ConfigureInputChannel(kCentre, {3}); ConfigureInputChannel(kLs5, {4000}); ConfigureInputChannel(kRs5, {8000}); ConfigureInputChannel(kLtf2, {1000}); ConfigureInputChannel(kRtf2, {2000}); ConfigureInputChannel(kLFE, {8}); // Down-mix to 5.1.2 as the highest layer. The highest layer always matches // the original input. ConfigureOutputChannel({kLs5, kRs5}, {{4000, 8000}}); // Down-mix to 3.1.2 as the lowest layer. // L3 = L5 + Ls5 * delta. ConfigureOutputChannel({kL3, kR3}, {{3828, 7656}}); ConfigureOutputChannel({kCentre}, {{3}}); // Ltf3 = Ltf2 + Ls5 * w * delta. ConfigureOutputChannel({kLtf3, kRtf3}, {{1707, 3414}}); ConfigureOutputChannel({kLFE}, {{8}}); // Internally there is a down-mixer for the height and another for the // surround. TestDownMixing({.delta = .707, .w = 0.25}, 2); } TEST_F(DownMixingModuleTest, T4ToT2DownMixer) { ConfigureInputChannel(kL5, {1}); ConfigureInputChannel(kR5, {2}); ConfigureInputChannel(kCentre, {3}); ConfigureInputChannel(kLs5, {4}); ConfigureInputChannel(kRs5, {5}); ConfigureInputChannel(kLtf4, {1000}); ConfigureInputChannel(kRtf4, {2000}); ConfigureInputChannel(kLtb4, {1000}); ConfigureInputChannel(kRtb4, {2000}); ConfigureInputChannel(kLFE, {10}); // Down-mix to 5.1.4 as the highest layer. The highest layer always matches // the original input. ConfigureOutputChannel({kLtb4, kRtb4}, {{1000, 2000}}); // Down-mix to 5.1.2 as the lowest layer. ConfigureOutputChannel({kL5, kR5}, {{1, 2}}); ConfigureOutputChannel({kCentre}, {{3}}); ConfigureOutputChannel({kLs5, kRs5}, {{4, 5}}); // Ltf2 = Ltf4 + Ltb4 * gamma. ConfigureOutputChannel({kLtf2, kRtf2}, {{1707, 3414}}); ConfigureOutputChannel({kLFE}, {{10}}); TestDownMixing({.gamma = .707}, 1); } TEST_F(DownMixingModuleTest, S7ToS5DownMixerWithoutT0) { ConfigureInputChannel(kL7, {1}); ConfigureInputChannel(kR7, {2}); ConfigureInputChannel(kCentre, {3}); ConfigureInputChannel(kLss7, {1000}); ConfigureInputChannel(kRss7, {2000}); ConfigureInputChannel(kLrs7, {3000}); ConfigureInputChannel(kRrs7, {4000}); ConfigureInputChannel(kLFE, {8}); // Down-mix to 7.1.0 as the highest layer. The highest layer always matches // the original input. ConfigureOutputChannel({kLrs7, kRrs7}, {{3000, 4000}}); // Down-mix to 5.1.0 as the lowest layer. ConfigureOutputChannel({kL5, kR5}, {{1, 2}}); ConfigureOutputChannel({kCentre}, {{3}}); // Ls5 = Lss7 * alpha + Lrs7 * beta. ConfigureOutputChannel({kLs5, kRs5}, {{3598, 5464}}); ConfigureOutputChannel({kLFE}, {{8}}); TestDownMixing({.alpha = 1, .beta = .866}, 1); } TEST_F(DownMixingModuleTest, S7ToS5DownMixerWithT2) { ConfigureInputChannel(kL7, {1}); ConfigureInputChannel(kR7, {2}); ConfigureInputChannel(kCentre, {3}); ConfigureInputChannel(kLss7, {1000}); ConfigureInputChannel(kRss7, {2000}); ConfigureInputChannel(kLrs7, {3000}); ConfigureInputChannel(kRrs7, {4000}); ConfigureInputChannel(kLtf2, {8}); ConfigureInputChannel(kRtf2, {9}); ConfigureInputChannel(kLFE, {10}); // Down-mix to 7.1.2 as the highest layer. The highest layer always matches // the original input. ConfigureOutputChannel({kLrs7, kRrs7}, {{3000, 4000}}); // Down-mix to 5.1.2 as the lowest layer. ConfigureOutputChannel({kL5, kR5}, {{1, 2}}); ConfigureOutputChannel({kCentre}, {{3}}); // Ls5 = Lss7 * alpha + Lrs7 * beta. ConfigureOutputChannel({kLs5, kRs5}, {{3598, 5464}}); ConfigureOutputChannel({kLtf2, kRtf2}, {{8, 9}}); ConfigureOutputChannel({kLFE}, {{10}}); TestDownMixing({.alpha = 1, .beta = .866}, 1); } TEST_F(DownMixingModuleTest, S7ToS5DownMixerWithT4) { ConfigureInputChannel(kL7, {1}); ConfigureInputChannel(kR7, {2}); ConfigureInputChannel(kCentre, {3}); ConfigureInputChannel(kLss7, {1000}); ConfigureInputChannel(kRss7, {2000}); ConfigureInputChannel(kLrs7, {3000}); ConfigureInputChannel(kRrs7, {4000}); ConfigureInputChannel(kLtf4, {8}); ConfigureInputChannel(kRtf4, {9}); ConfigureInputChannel(kLtb4, {10}); ConfigureInputChannel(kRtb4, {11}); ConfigureInputChannel(kLFE, {12}); // Down-mix to 7.1.4 as the highest layer. The highest layer always matches // the original input. ConfigureOutputChannel({kLrs7, kRrs7}, {{3000, 4000}}); // Down-mix to 5.1.4 as the lowest layer. ConfigureOutputChannel({kL5, kR5}, {{1, 2}}); ConfigureOutputChannel({kCentre}, {{3}}); // Ls5 = Lss7 * alpha + Lrs7 * beta. ConfigureOutputChannel({kLs5, kRs5}, {{3598, 5464}}); ConfigureOutputChannel({kLtf4, kRtf4}, {{8, 9}}); ConfigureOutputChannel({kLtb4, kRtb4}, {{10, 11}}); ConfigureOutputChannel({kLFE}, {{12}}); TestDownMixing({.alpha = 1, .beta = .866}, 1); } TEST_F(DownMixingModuleTest, SixLayer7_1_4) { ConfigureInputChannel(kL7, {1000}); ConfigureInputChannel(kR7, {2000}); ConfigureInputChannel(kCentre, {1000}); ConfigureInputChannel(kLss7, {1000}); ConfigureInputChannel(kRss7, {2000}); ConfigureInputChannel(kLrs7, {3000}); ConfigureInputChannel(kRrs7, {4000}); ConfigureInputChannel(kLtf4, {1000}); ConfigureInputChannel(kRtf4, {2000}); ConfigureInputChannel(kLtb4, {1000}); ConfigureInputChannel(kRtb4, {2000}); ConfigureInputChannel(kLFE, {12}); // There are different paths to have six-layers, choose 7.1.2, 5.1.2, 3.1.2, // stereo, mono to avoid dropping the height channels for as many steps as // possible. // Down-mix to 7.1.4 as the sixth layer. ConfigureOutputChannel({kLtb4, kRtb4}, {{1000, 2000}}); // Down-mix to 7.1.2 as the fifth layer. ConfigureOutputChannel({kLrs7, kRrs7}, {{3000, 4000}}); // Down-mix to 5.1.2 as the fourth layer. // Ls5 = Lss7 * alpha + Lrs7 * beta. ConfigureOutputChannel({kLs5, kRs5}, {{3598, 5464}}); // Down-mix to 3.1.2 as the third layer. ConfigureOutputChannel({kCentre}, {{1000}}); // Ltf2 = Ltf4 + Ltb4 * gamma. // Ltf3 = Ltf2 + Ls5 * w * delta. ConfigureOutputChannel({kLtf3, kRtf3}, {{2644, 4914}}); ConfigureOutputChannel({kLFE}, {{12}}); // Down-mix to stereo as the second layer. // L5 = L7. // L3 = L5 + Ls5 * delta. // L2 = L3 + (C - 3 dB). ConfigureOutputChannel({kL2}, {{4822}}); // Down=mix to mono as the first layer. // R5 = R7. // R3 = R5 + Rs5 * delta. // R2 = R3 + (C - 3 dB). // M = (L2 - 6 dB) + (R2 - 6 dB). ConfigureOutputChannel({kMono}, {{6130}}); TestDownMixing( {.alpha = 1, .beta = .866, .gamma = .866, .delta = .866, .w = 0.25}, 6); } class DemixingModuleTest : public DemixingModuleTestBase, public ::testing::Test { public: void ConfigureLosslessAudioFrameAndDecodedAudioFrame( const std::list& labels, const std::vector>& pcm_samples, DownMixingParams down_mixing_params = { .alpha = 1, .beta = .866, .gamma = .866, .delta = .866, .w = 0.25}) { // The substream ID itself does not matter. Generate a unique one. const DecodedUleb128 substream_id = substream_id_to_labels_.size(); substream_id_to_labels_[substream_id] = labels; // Configure a pair of audio frames and decoded audio frames. They share a // lot of the same information for a lossless codec. audio_frames_.push_back(AudioFrameWithData{ .obu = AudioFrameObu(ObuHeader(), substream_id, {}), .start_timestamp = kStartTimestamp, .end_timestamp = kEndTimestamp, .pcm_samples = pcm_samples, .down_mixing_params = down_mixing_params, }); decoded_audio_frames_.push_back( DecodedAudioFrame{.substream_id = substream_id, .start_timestamp = kStartTimestamp, .end_timestamp = kEndTimestamp, .samples_to_trim_at_end = kZeroSamplesToTrimAtEnd, .samples_to_trim_at_start = kZeroSamplesToTrimAtStart, .decoded_samples = pcm_samples, .down_mixing_params = down_mixing_params}); auto& expected_label_to_samples = expected_id_to_labeled_decoded_frame_[kAudioElementId].label_to_samples; // `raw_samples` is arranged in (time, channel axes). Arrange the samples // associated with each channel by time. The demixing process never changes // data for the input labels. auto labels_iter = labels.begin(); for (int channel = 0; channel < labels.size(); ++channel) { auto& samples_for_channel = expected_label_to_samples[*labels_iter]; samples_for_channel.reserve(pcm_samples.size()); for (auto tick : pcm_samples) { samples_for_channel.push_back( Int32ToNormalizedFloatingPoint(tick[channel])); } labels_iter++; } } void ConfiguredExpectedDemixingChannelFrame( ChannelLabel::Label label, const std::vector& expected_demixed_samples) { std::vector expected_demixed_samples_as_internal_type; expected_demixed_samples_as_internal_type.reserve( expected_demixed_samples.size()); for (int32_t sample : expected_demixed_samples) { expected_demixed_samples_as_internal_type.push_back( Int32ToNormalizedFloatingPoint(sample)); } // Configure the expected demixed channels. Typically the input `label` // should have a "D_" prefix. expected_id_to_labeled_decoded_frame_[kAudioElementId] .label_to_samples[label] = expected_demixed_samples_as_internal_type; } void TestLosslessDemixing(int expected_number_of_down_mixers) { TestCreateDemixingModule(expected_number_of_down_mixers); const auto id_to_labeled_decoded_frame = demixing_module_->DemixDecodedAudioSamples(decoded_audio_frames_); ASSERT_THAT(id_to_labeled_decoded_frame, IsOk()); ASSERT_TRUE(id_to_labeled_decoded_frame->contains(kAudioElementId)); // Check that the demixed samples have the correct values. const auto& actual_label_to_samples = id_to_labeled_decoded_frame->at(kAudioElementId).label_to_samples; const auto& expected_label_to_samples = expected_id_to_labeled_decoded_frame_[kAudioElementId].label_to_samples; EXPECT_EQ(actual_label_to_samples.size(), expected_label_to_samples.size()); for (const auto& [label, samples] : actual_label_to_samples) { // Use `DoubleNear` with a tolerance because floating-point arithmetic // introduces errors larger than allowed by `DoubleEq`. constexpr double kErrorTolerance = 1e-14; EXPECT_THAT(samples, Pointwise(DoubleNear(kErrorTolerance), expected_label_to_samples.at(label))); } // Also, since this is lossless, we expect demixing the original samples // should give the same result. const auto id_to_labeled_frame = demixing_module_->DemixOriginalAudioSamples(audio_frames_); ASSERT_THAT(id_to_labeled_frame, IsOk()); ASSERT_TRUE(id_to_labeled_frame->contains(kAudioElementId)); EXPECT_EQ(id_to_labeled_frame->at(kAudioElementId).label_to_samples, actual_label_to_samples); } protected: std::list audio_frames_; std::list decoded_audio_frames_; IdLabeledFrameMap expected_id_to_labeled_decoded_frame_; }; // namespace TEST(DemixingModule, DemixingOriginalAudioSamplesSucceedsWithEmptyInputs) { const auto demixing_module = DemixingModule::CreateForDownMixingAndReconstruction({}); ASSERT_THAT(demixing_module, IsOk()); EXPECT_THAT(demixing_module->DemixOriginalAudioSamples({}), IsOkAndHolds(IsEmpty())); } TEST(DemixingModule, DemixingDecodedAudioSamplesSucceedsWithEmptyInputs) { const auto demixing_module = DemixingModule::CreateForDownMixingAndReconstruction({}); ASSERT_THAT(demixing_module, IsOk()); EXPECT_THAT(demixing_module->DemixDecodedAudioSamples({}), IsOkAndHolds(IsEmpty())); } TEST_F(DemixingModuleTest, AmbisonicsHasNoDemixers) { ConfigureAudioFrameMetadata({kA0, kA1, kA2, kA3}); ConfigureLosslessAudioFrameAndDecodedAudioFrame({kA0}, {{1}}); ConfigureLosslessAudioFrameAndDecodedAudioFrame({kA1}, {{1}}); ConfigureLosslessAudioFrameAndDecodedAudioFrame({kA2}, {{1}}); ConfigureLosslessAudioFrameAndDecodedAudioFrame({kA3}, {{1}}); TestLosslessDemixing(0); } TEST_F(DemixingModuleTest, S1ToS2Demixer) { // The highest layer is stereo. ConfigureAudioFrameMetadata({kL2, kR2}); // Mono is the lowest layer. ConfigureLosslessAudioFrameAndDecodedAudioFrame({kMono}, {{750}, {1500}}); // Stereo is the next layer. ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL2}, {{1000}, {2000}}); // Demixing recovers kDemixedR2 // D_R2 = M - (L2 - 6 dB) + 6 dB. ConfiguredExpectedDemixingChannelFrame(kDemixedR2, {500, 1000}); TestLosslessDemixing(1); } TEST_F(DemixingModuleTest, DemixOriginalAudioSamplesReturnsErrorIfAudioFrameIsMissingPcmSamples) { ConfigureAudioFrameMetadata({kL2, kR2}); ConfigureLosslessAudioFrameAndDecodedAudioFrame({kMono}, {{750}, {1500}}); ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL2}, {{1000}, {2000}}); IdLabeledFrameMap unused_id_to_labeled_frame, id_to_labeled_decoded_frame; TestCreateDemixingModule(1); // Destroy the raw samples. audio_frames_.back().pcm_samples = std::nullopt; EXPECT_THAT(demixing_module_->DemixOriginalAudioSamples(audio_frames_), Not(IsOk())); } TEST_F(DemixingModuleTest, S2ToS3Demixer) { // The highest layer is 3.1.2. ConfigureAudioFrameMetadata({kL3, kR3, kCentre, kLtf3, kRtf3}); // Stereo is the lowest layer. ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL2, kR2}, {{70, 70}, {1700, 1700}}); // 3.1.2 as the next layer. ConfigureLosslessAudioFrameAndDecodedAudioFrame({kCentre}, {{2000}, {1000}}); ConfigureLosslessAudioFrameAndDecodedAudioFrame( {kLtf3, kRtf3}, {{99999, 99998}, {99999, 99998}}); // L3/R3 get demixed from the lower layers. // L3 = L2 - (C - 3 dB). // R3 = R2 - (C - 3 dB). ConfiguredExpectedDemixingChannelFrame(kDemixedL3, {-1344, 993}); ConfiguredExpectedDemixingChannelFrame(kDemixedR3, {-1344, 993}); TestLosslessDemixing(1); } TEST_F(DemixingModuleTest, S3ToS5AndTf2ToT2Demixers) { // Adding a (valid) layer on top of 3.1.2 will always result in both S3ToS5 // and Tf2ToT2 demixers. // The highest layer is 5.1.2. ConfigureAudioFrameMetadata({kL5, kR5, kCentre, kLtf2, kRtf2}); const DownMixingParams kDownMixingParams = {.delta = .866, .w = 0.25}; // 3.1.2 is the lowest layer. ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL3, kR3}, {{18660, 28660}}, kDownMixingParams); ConfigureLosslessAudioFrameAndDecodedAudioFrame({kCentre}, {{100}}, kDownMixingParams); ConfigureLosslessAudioFrameAndDecodedAudioFrame( {kLtf3, kRtf3}, {{1000, 2000}}, kDownMixingParams); // 5.1.2 as the next layer. ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL5, kR5}, {{10000, 20000}}, kDownMixingParams); // S3ToS5: Ls5/Rs5 get demixed from the lower layers. // Ls5 = (1 / delta) * (L3 - L5). // Rs5 = (1 / delta) * (R3 - R5). ConfiguredExpectedDemixingChannelFrame(kDemixedLs5, {10000}); ConfiguredExpectedDemixingChannelFrame(kDemixedRs5, {10000}); // Tf2ToT2: Ltf2/Rtf2 get demixed from the lower layers. // Ltf2 = Ltf3 - w * (L3 - L5). // Rtf2 = Rtf3 - w * (R3 - R5). ConfiguredExpectedDemixingChannelFrame(kDemixedLtf2, {-1165}); ConfiguredExpectedDemixingChannelFrame(kDemixedRtf2, {-165}); TestLosslessDemixing(2); } TEST_F(DemixingModuleTest, S5ToS7Demixer) { // The highest layer is 7.1.0. ConfigureAudioFrameMetadata({kL7, kR7, kCentre, kLss7, kRss7, kLrs7, kRrs7}); const DownMixingParams kDownMixingParams = {.alpha = 0.866, .beta = .866}; // 5.1.0 is the lowest layer. ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL5, kR5}, {{100, 100}}, kDownMixingParams); ConfigureLosslessAudioFrameAndDecodedAudioFrame({kLs5, kRs5}, {{7794, 7794}}, kDownMixingParams); ConfigureLosslessAudioFrameAndDecodedAudioFrame({kCentre}, {{100}}, kDownMixingParams); // 7.1.0 as the next layer. ConfigureLosslessAudioFrameAndDecodedAudioFrame( {kLss7, kRss7}, {{1000, 2000}}, kDownMixingParams); // L7/R7 get demixed from the lower layers. // L7 = R5. // R7 = R5. ConfiguredExpectedDemixingChannelFrame(kDemixedL7, {100}); ConfiguredExpectedDemixingChannelFrame(kDemixedR7, {100}); // Lrs7/Rrs7 get demixed from the lower layers. // Lrs7 = (1 / beta) * (Ls5 - alpha * Lss7). // Rrs7 = (1 / beta) * (Rs5 - alpha * Rss7). ConfiguredExpectedDemixingChannelFrame(kDemixedLrs7, {8000}); ConfiguredExpectedDemixingChannelFrame(kDemixedRrs7, {7000}); TestLosslessDemixing(1); } TEST_F(DemixingModuleTest, T2ToT4Demixer) { // The highest layer is 5.1.4. ConfigureAudioFrameMetadata({kL5, kR5, kCentre, kLtf4, kRtf4}); const DownMixingParams kDownMixingParams = {.gamma = .866}; // 5.1.2 is the lowest layer. ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL5, kR5}, {{100, 100}}, kDownMixingParams); ConfigureLosslessAudioFrameAndDecodedAudioFrame({kLs5, kRs5}, {{100, 100}}, kDownMixingParams); ConfigureLosslessAudioFrameAndDecodedAudioFrame({kCentre}, {{100}}, kDownMixingParams); ConfigureLosslessAudioFrameAndDecodedAudioFrame( {kLtf2, kRtf2}, {{8660, 17320}}, kDownMixingParams); // 5.1.4 as the next layer. ConfigureLosslessAudioFrameAndDecodedAudioFrame({kLtf4, kRtf4}, {{866, 1732}}, kDownMixingParams); // Ltb4/Rtb4 get demixed from the lower layers. // Ltb4 = (1 / gamma) * (Ltf2 - Ltf4). // Ttb4 = (1 / gamma) * (Ttf2 - Rtf4). ConfiguredExpectedDemixingChannelFrame(kDemixedLtb4, {9000}); ConfiguredExpectedDemixingChannelFrame(kDemixedRtb4, {18000}); TestLosslessDemixing(1); } } // namespace } // namespace iamf_tools