/* * Copyright (c) 2023, Alliance for Open Media. All rights reserved * * This source code is subject to the terms of the BSD 3-Clause Clear License * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear * License was not distributed with this source code in the LICENSE file, you * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the * Alliance for Open Media Patent License 1.0 was not distributed with this * source code in the PATENTS file, you can obtain it at * www.aomedia.org/license/patent. */ #include "iamf/cli/audio_frame_decoder.h" #include #include #include #include "absl/container/flat_hash_map.h" #include "absl/status/status_matchers.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include "iamf/cli/audio_element_with_data.h" #include "iamf/cli/audio_frame_with_data.h" #include "iamf/cli/channel_label.h" #include "iamf/cli/tests/cli_test_utils.h" #include "iamf/obu/audio_frame.h" #include "iamf/obu/codec_config.h" #include "iamf/obu/demixing_info_parameter_data.h" #include "iamf/obu/obu_header.h" #include "iamf/obu/types.h" namespace iamf_tools { namespace { using ::absl_testing::IsOk; constexpr DecodedUleb128 kCodecConfigId = 44; constexpr uint32_t kSampleRate = 16000; constexpr DecodedUleb128 kAudioElementId = 13; constexpr DecodedUleb128 kSubstreamId = 0; constexpr DownMixingParams kDownMixingParams = {.alpha = 0.5, .beta = 0.5}; const int kNumChannels = 1; const int kNumSamplesPerFrame = 8; const int kBytesPerSample = 2; constexpr std::array kFlacEncodedFrame = { 0xff, 0xf8, 0x6a, 0xa8, 0x00, 0x0f, 0x42, 0x00, 0x00, 0x00, 0x13, 0x80, 0x00, 0x80, 0x04, 0x92, 0x49, 0x00, 0x01, 0xfe, 0x81, 0xee}; AudioFrameWithData PrepareEncodedAudioFrame( absl::flat_hash_map& codec_config_obus, absl::flat_hash_map& audio_elements, CodecConfig::CodecId codec_id_type = CodecConfig::kCodecIdLpcm, std::vector encoded_audio_frame_payload = {}) { if (codec_id_type == CodecConfig::kCodecIdLpcm) { AddLpcmCodecConfigWithIdAndSampleRate(kCodecConfigId, kSampleRate, codec_config_obus); } else if (codec_id_type == CodecConfig::kCodecIdFlac) { AddFlacCodecConfigWithId(kCodecConfigId, codec_config_obus); } AddAmbisonicsMonoAudioElementWithSubstreamIds( kAudioElementId, kCodecConfigId, {kSubstreamId}, codec_config_obus, audio_elements); if (encoded_audio_frame_payload.empty()) { encoded_audio_frame_payload = std::vector(kNumSamplesPerFrame * kBytesPerSample, 0); } return AudioFrameWithData({ .obu = AudioFrameObu(ObuHeader(), kSubstreamId, /*audio_frame=*/ encoded_audio_frame_payload), .start_timestamp = 0, .end_timestamp = kNumSamplesPerFrame, .down_mixing_params = kDownMixingParams, .audio_element_with_data = &audio_elements.at(kAudioElementId), }); } TEST(Decode, RequiresSubstreamsAreInitialized) { AudioFrameDecoder decoder; // Encoded frames. absl::flat_hash_map codec_config_obus; absl::flat_hash_map audio_elements; const AudioFrameWithData encoded_audio_frame = PrepareEncodedAudioFrame(codec_config_obus, audio_elements); // Decoding fails before substreams are initialized. EXPECT_FALSE(decoder.Decode(encoded_audio_frame).ok()); const auto& audio_element = audio_elements.at(kAudioElementId); // Decoding succeeds after substreams are initialized. EXPECT_THAT( decoder.InitDecodersForSubstreams(audio_element.substream_id_to_labels, *audio_element.codec_config), IsOk()); EXPECT_THAT(decoder.Decode(encoded_audio_frame), IsOk()); } TEST(InitDecodersForSubstreams, ShouldNotBeCalledTwiceWithTheSameSubstreamIdForStatefulEncoders) { absl::flat_hash_map codec_config_obus; AddOpusCodecConfigWithId(kCodecConfigId, codec_config_obus); const auto& codec_config = codec_config_obus.at(kCodecConfigId); AudioFrameDecoder decoder; const SubstreamIdLabelsMap kLabelsForSubstreamZero = { {kSubstreamId, {ChannelLabel::kMono}}}; EXPECT_THAT( decoder.InitDecodersForSubstreams(kLabelsForSubstreamZero, codec_config), IsOk()); EXPECT_FALSE( decoder.InitDecodersForSubstreams(kLabelsForSubstreamZero, codec_config) .ok()); const SubstreamIdLabelsMap kLabelsForSubstreamOne = { {kSubstreamId + 1, {ChannelLabel::kMono}}}; EXPECT_THAT( decoder.InitDecodersForSubstreams(kLabelsForSubstreamOne, codec_config), IsOk()); } void InitAllAudioElements( const absl::flat_hash_map& audio_elements, AudioFrameDecoder& decoder) { for (const auto& [audio_element_id, audio_element_with_data] : audio_elements) { EXPECT_THAT(decoder.InitDecodersForSubstreams( audio_element_with_data.substream_id_to_labels, *audio_element_with_data.codec_config), IsOk()); } } TEST(Decode, DecodesLpcmFrame) { AudioFrameDecoder decoder; // Encoded frames. absl::flat_hash_map codec_config_obus; absl::flat_hash_map audio_elements; const AudioFrameWithData encoded_audio_frame = PrepareEncodedAudioFrame(codec_config_obus, audio_elements); InitAllAudioElements(audio_elements, decoder); // Decode. const auto decoded_audio_frame = decoder.Decode(encoded_audio_frame); // Validate. ASSERT_THAT(decoded_audio_frame, IsOk()); EXPECT_EQ(decoded_audio_frame->substream_id, kSubstreamId); EXPECT_EQ(decoded_audio_frame->start_timestamp, 0); EXPECT_EQ(decoded_audio_frame->end_timestamp, kNumSamplesPerFrame); EXPECT_EQ(decoded_audio_frame->down_mixing_params, kDownMixingParams); EXPECT_EQ(decoded_audio_frame->audio_element_with_data, &audio_elements.at(kAudioElementId)); // For LPCM, the input bytes are all zeros, but we expect the decoder to // combine kBytesPerSample bytes each into one int32_t sample. // There are kNumSamplesPerFrame samples in the frame. EXPECT_EQ(decoded_audio_frame->decoded_samples.size(), kNumSamplesPerFrame); for (const std::vector& samples_for_one_time_tick : decoded_audio_frame->decoded_samples) { EXPECT_EQ(samples_for_one_time_tick.size(), kNumChannels); for (int32_t sample : samples_for_one_time_tick) { EXPECT_EQ(sample, 0); } } } TEST(Decode, DecodesFlacFrame) { AudioFrameDecoder decoder; // Encoded frames. absl::flat_hash_map codec_config_obus; absl::flat_hash_map audio_elements; std::vector encoded_audio_frame_payload = {kFlacEncodedFrame.begin(), kFlacEncodedFrame.end()}; const AudioFrameWithData encoded_audio_frame = PrepareEncodedAudioFrame( codec_config_obus, audio_elements, CodecConfig::kCodecIdFlac, encoded_audio_frame_payload); InitAllAudioElements(audio_elements, decoder); // Decode. const auto decoded_audio_frame = decoder.Decode(encoded_audio_frame); // Validate. ASSERT_THAT(decoded_audio_frame, IsOk()); EXPECT_EQ(decoded_audio_frame->substream_id, kSubstreamId); EXPECT_EQ(decoded_audio_frame->start_timestamp, 0); EXPECT_EQ(decoded_audio_frame->end_timestamp, kNumSamplesPerFrame); EXPECT_EQ(decoded_audio_frame->down_mixing_params, kDownMixingParams); EXPECT_EQ(decoded_audio_frame->audio_element_with_data, &audio_elements.at(kAudioElementId)); const std::vector> kExpectedDecodedSamples = { {0x00010000, static_cast(0xffff0000)}, {0x00020000, static_cast(0xfffe0000)}, {0x00030000, static_cast(0xfffd0000)}, {0x00040000, static_cast(0xfffc0000)}, {0x00050000, static_cast(0xfffb0000)}, {0x00060000, static_cast(0xfffa0000)}, {0x00070000, static_cast(0xfff90000)}, {0x00080000, static_cast(0xfff80000)}, {0x00000000, 0x00000000}, {0x00000000, 0x00000000}, {0x00000000, 0x00000000}, {0x00000000, 0x00000000}, {0x00000000, 0x00000000}, {0x00000000, 0x00000000}, {0x00000000, 0x00000000}, {0x00000000, 0x00000000}}; EXPECT_EQ(decoded_audio_frame->decoded_samples, kExpectedDecodedSamples); } TEST(Decode, DecodesMultipleFlacFrames) { AudioFrameDecoder decoder; // Encoded frames. absl::flat_hash_map codec_config_obus; absl::flat_hash_map audio_elements; std::vector encoded_audio_frame_payload = {kFlacEncodedFrame.begin(), kFlacEncodedFrame.end()}; const AudioFrameWithData encoded_audio_frame = PrepareEncodedAudioFrame( codec_config_obus, audio_elements, CodecConfig::kCodecIdFlac, encoded_audio_frame_payload); InitAllAudioElements(audio_elements, decoder); // Decode the same frame twice. EXPECT_THAT(decoder.Decode(encoded_audio_frame), IsOk()); EXPECT_THAT(decoder.Decode(encoded_audio_frame), IsOk()); } // TODO(b/308073716): Add tests for more kinds of decoders. } // namespace } // namespace iamf_tools