1 /*
2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * www.aomedia.org/license/patent.
11 */
12 #include "iamf/obu/decoder_config/aac_decoder_config.h"
13
14 #include <cstdint>
15 #include <vector>
16
17 #include "absl/base/no_destructor.h"
18 #include "absl/container/flat_hash_map.h"
19 #include "absl/log/check.h"
20 #include "absl/log/log.h"
21 #include "absl/status/status.h"
22 #include "absl/strings/str_cat.h"
23 #include "absl/strings/string_view.h"
24 #include "absl/types/span.h"
25 #include "iamf/common/read_bit_buffer.h"
26 #include "iamf/common/utils/macros.h"
27 #include "iamf/common/utils/map_utils.h"
28 #include "iamf/common/utils/validation_utils.h"
29 #include "iamf/common/write_bit_buffer.h"
30
31 // These defines are not part of an official API and are likely to change or be
32 // removed. Please do not depend on them.
33 // TODO(b/401063570): Remove these statements when no longer disabling FLAC/AAC.
34 #if !defined(IAMF_TOOLS_DISABLE_AAC_DECODER)
35 #include "libSYS/include/machine_type.h"
36 #else
37 #define INT_PCM 16
38 #endif
39
40 namespace iamf_tools {
41
42 namespace {
43
44 using SampleFrequencyIndex = AudioSpecificConfig::SampleFrequencyIndex;
45
46 // ISO 14496:1 limits the max size of `DecoderConfigDescriptor` and
47 // `DecoderSpecificInfo` to 2^28 - 1 bits.
48 constexpr int32_t kMaxClassSize = (1 << 28) - 1;
49
50 // We typically expect the classes in this file to be very small (except when
51 // extensions are present).
52 constexpr int kInternalBufferSize = 32;
53
ValidateAudioRollDistance(int16_t audio_roll_distance)54 absl::Status ValidateAudioRollDistance(int16_t audio_roll_distance) {
55 return ValidateEqual(audio_roll_distance,
56 AacDecoderConfig::GetRequiredAudioRollDistance(),
57 "audio_roll_distance");
58 }
59
60 // Copies all data from `original_wb` to `output_wb` with the corresponding ISO
61 // 14496-1:2010 expandable size field prepended.
PrependWithIso14496_1Expanded(const WriteBitBuffer & original_wb,WriteBitBuffer & output_wb)62 absl::Status PrependWithIso14496_1Expanded(const WriteBitBuffer& original_wb,
63 WriteBitBuffer& output_wb) {
64 CHECK(original_wb.IsByteAligned());
65 if (original_wb.bit_buffer().size() > kMaxClassSize) {
66 return absl::ResourceExhaustedError(
67 absl::StrCat("Buffer size ", original_wb.bit_buffer().size(),
68 " exceeds the maximum expected size."));
69 }
70 RETURN_IF_NOT_OK(
71 output_wb.WriteIso14496_1Expanded(original_wb.bit_buffer().size()));
72 RETURN_IF_NOT_OK(
73 output_wb.WriteUint8Span(absl::MakeConstSpan(original_wb.bit_buffer())));
74 return absl::OkStatus();
75 }
76
WriteDecoderSpecificInfo(const AacDecoderConfig::DecoderSpecificInfo & decoder_specific_info,WriteBitBuffer & wb)77 absl::Status WriteDecoderSpecificInfo(
78 const AacDecoderConfig::DecoderSpecificInfo& decoder_specific_info,
79 WriteBitBuffer& wb) {
80 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(
81 decoder_specific_info.decoder_specific_info_tag, 8));
82 // Determine the size by writing the remaining `DecoderSpecificInfo`, then
83 // prepend the size and write it to the output buffer.
84 {
85 WriteBitBuffer wb_internal(kInternalBufferSize);
86 // Write nested `audio_specific_config`.
87 RETURN_IF_NOT_OK(
88 decoder_specific_info.audio_specific_config.ValidateAndWrite(
89 wb_internal));
90 // Write the `DecoderSpecificInfo` extension.
91 RETURN_IF_NOT_OK(wb_internal.WriteUint8Span(absl::MakeConstSpan(
92 decoder_specific_info.decoder_specific_info_extension)));
93 RETURN_IF_NOT_OK(PrependWithIso14496_1Expanded(wb_internal, wb));
94 }
95 return absl::OkStatus();
96 }
97
GetExpectedPositionFromIso14496_1Expanded(ReadBitBuffer & rb,int64_t & expected_position)98 absl::Status GetExpectedPositionFromIso14496_1Expanded(
99 ReadBitBuffer& rb, int64_t& expected_position) {
100 uint32_t size;
101 RETURN_IF_NOT_OK(rb.ReadIso14496_1Expanded(kMaxClassSize, size));
102 expected_position = rb.Tell() + (static_cast<int64_t>(size) * 8);
103 return absl::OkStatus();
104 }
105
106 // Advances the buffer to the position. Dumps all skipped bytes to `extension`.
107 // OK if the buffer is already at the position. Fails if the buffer would need
108 // to go backwards.
AdvanceBufferToPosition(absl::string_view debugging_context,ReadBitBuffer & rb,const int64_t expected_position,std::vector<uint8_t> & extension)109 absl::Status AdvanceBufferToPosition(absl::string_view debugging_context,
110 ReadBitBuffer& rb,
111 const int64_t expected_position,
112 std::vector<uint8_t>& extension) {
113 const int64_t actual_position = rb.Tell();
114 if (actual_position == expected_position) {
115 // Ok no extension is present.
116 return absl::OkStatus();
117 } else if (actual_position < expected_position) {
118 // Advance and consume the extension.
119 extension.resize((expected_position - actual_position) / 8);
120 return rb.ReadUint8Span(absl::MakeSpan(extension));
121 } else {
122 // The buffer is already past the position.
123 return absl::OutOfRangeError(
124 absl::StrCat("Not enough bytes to parse ", debugging_context, "."));
125 }
126 }
127
128 } // namespace
129
Validate() const130 absl::Status AacDecoderConfig::Validate() const {
131 RETURN_IF_NOT_OK(ValidateEqual(decoder_config_descriptor_tag_,
132 AacDecoderConfig::kDecoderConfigDescriptorTag,
133 "decoder_config_descriptor_tag"));
134 // IAMF restricts several fields.
135 RETURN_IF_NOT_OK(ValidateEqual(object_type_indication_,
136 AacDecoderConfig::kObjectTypeIndication,
137 "object_type_indication"));
138 RETURN_IF_NOT_OK(ValidateEqual(stream_type_, AacDecoderConfig::kStreamType,
139 "stream_type"));
140 RETURN_IF_NOT_OK(
141 ValidateEqual(upstream_, AacDecoderConfig::kUpstream, "upstream"));
142 RETURN_IF_NOT_OK(
143 ValidateEqual(reserved_, AacDecoderConfig::kReserved, "reserved"));
144 RETURN_IF_NOT_OK(ValidateEqual(
145 decoder_specific_info_.decoder_specific_info_tag,
146 AacDecoderConfig::DecoderSpecificInfo::kDecoderSpecificInfoTag,
147 "decoder_specific_info_tag"));
148
149 const AudioSpecificConfig& audio_specific_config =
150 decoder_specific_info_.audio_specific_config;
151
152 RETURN_IF_NOT_OK(ValidateEqual(audio_specific_config.audio_object_type_,
153 AudioSpecificConfig::kAudioObjectType,
154 "audio_object_type"));
155 RETURN_IF_NOT_OK(ValidateEqual(audio_specific_config.channel_configuration_,
156 AudioSpecificConfig::kChannelConfiguration,
157 "channel_configuration"));
158 RETURN_IF_NOT_OK(
159 ValidateEqual(audio_specific_config.ga_specific_config_.frame_length_flag,
160 AudioSpecificConfig::GaSpecificConfig::kFrameLengthFlag,
161 "frame_length_flag"));
162 RETURN_IF_NOT_OK(ValidateEqual(
163 audio_specific_config.ga_specific_config_.depends_on_core_coder,
164 AudioSpecificConfig::GaSpecificConfig::kDependsOnCoreCoder,
165 "depends_on_core_coder"));
166 RETURN_IF_NOT_OK(ValidateEqual(
167 audio_specific_config.ga_specific_config_.extension_flag,
168 AudioSpecificConfig::GaSpecificConfig::kExtensionFlag, "extension_flag"));
169 return absl::OkStatus();
170 }
171
ValidateAndWrite(WriteBitBuffer & wb) const172 absl::Status AudioSpecificConfig::ValidateAndWrite(WriteBitBuffer& wb) const {
173 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(audio_object_type_, 5));
174 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(
175 static_cast<uint32_t>(sample_frequency_index_), 4));
176 if (sample_frequency_index_ == SampleFrequencyIndex::kEscapeValue) {
177 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(sampling_frequency_, 24));
178 }
179 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(channel_configuration_, 4));
180
181 // Write nested `ga_specific_config`.
182 RETURN_IF_NOT_OK(
183 wb.WriteUnsignedLiteral(ga_specific_config_.frame_length_flag, 1));
184 RETURN_IF_NOT_OK(
185 wb.WriteUnsignedLiteral(ga_specific_config_.depends_on_core_coder, 1));
186 RETURN_IF_NOT_OK(
187 wb.WriteUnsignedLiteral(ga_specific_config_.extension_flag, 1));
188
189 return absl::OkStatus();
190 }
191
Read(ReadBitBuffer & rb)192 absl::Status AudioSpecificConfig::Read(ReadBitBuffer& rb) {
193 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(5, audio_object_type_));
194 uint8_t sample_frequency_index_uint8;
195 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(4, sample_frequency_index_uint8));
196 sample_frequency_index_ =
197 static_cast<SampleFrequencyIndex>(sample_frequency_index_uint8);
198 if (sample_frequency_index_ == SampleFrequencyIndex::kEscapeValue) {
199 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(24, sampling_frequency_));
200 }
201 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(4, channel_configuration_));
202
203 // Write nested `ga_specific_config`.
204 RETURN_IF_NOT_OK(rb.ReadBoolean(ga_specific_config_.frame_length_flag));
205 RETURN_IF_NOT_OK(rb.ReadBoolean(ga_specific_config_.depends_on_core_coder));
206 RETURN_IF_NOT_OK(rb.ReadBoolean(ga_specific_config_.extension_flag));
207
208 return absl::OkStatus();
209 }
210
ValidateAndWrite(int16_t audio_roll_distance,WriteBitBuffer & wb) const211 absl::Status AacDecoderConfig::ValidateAndWrite(int16_t audio_roll_distance,
212 WriteBitBuffer& wb) const {
213 MAYBE_RETURN_IF_NOT_OK(ValidateAudioRollDistance(audio_roll_distance));
214 RETURN_IF_NOT_OK(Validate());
215
216 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(decoder_config_descriptor_tag_, 8));
217 // Write the remaining `DecoderConfigDescriptor`, then once we know the size,
218 // prepend it with the expandable size field.
219 {
220 WriteBitBuffer wb_internal(kInternalBufferSize);
221 RETURN_IF_NOT_OK(
222 wb_internal.WriteUnsignedLiteral(object_type_indication_, 8));
223 RETURN_IF_NOT_OK(wb_internal.WriteUnsignedLiteral(stream_type_, 6));
224 RETURN_IF_NOT_OK(wb_internal.WriteUnsignedLiteral(upstream_, 1));
225 RETURN_IF_NOT_OK(wb_internal.WriteUnsignedLiteral(reserved_, 1));
226 RETURN_IF_NOT_OK(wb_internal.WriteUnsignedLiteral(buffer_size_db_, 24));
227 RETURN_IF_NOT_OK(wb_internal.WriteUnsignedLiteral(max_bitrate_, 32));
228 RETURN_IF_NOT_OK(wb_internal.WriteUnsignedLiteral(average_bit_rate_, 32));
229
230 // Write nested `decoder_specific_info`.
231 RETURN_IF_NOT_OK(
232 WriteDecoderSpecificInfo(decoder_specific_info_, wb_internal));
233
234 RETURN_IF_NOT_OK(wb_internal.WriteUint8Span(
235 absl::MakeConstSpan(decoder_config_extension_)));
236
237 RETURN_IF_NOT_OK(PrependWithIso14496_1Expanded(wb_internal, wb));
238 }
239
240 return absl::OkStatus();
241 }
242
ReadAndValidate(int16_t audio_roll_distance,ReadBitBuffer & rb)243 absl::Status AacDecoderConfig::ReadAndValidate(int16_t audio_roll_distance,
244 ReadBitBuffer& rb) {
245 // Read top-level fields.
246 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(8, decoder_config_descriptor_tag_));
247 int64_t end_of_decoder_config_position;
248 RETURN_IF_NOT_OK(GetExpectedPositionFromIso14496_1Expanded(
249 rb, end_of_decoder_config_position));
250
251 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(8, object_type_indication_));
252 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(6, stream_type_));
253 RETURN_IF_NOT_OK(rb.ReadBoolean(upstream_));
254 RETURN_IF_NOT_OK(rb.ReadBoolean(reserved_));
255 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(24, buffer_size_db_));
256 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(32, max_bitrate_));
257 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(32, average_bit_rate_));
258
259 // Read nested `decoder_specific_info` the advance past its nested extension.
260 {
261 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(
262 8, decoder_specific_info_.decoder_specific_info_tag));
263 int64_t end_of_decoder_specific_info_position;
264 RETURN_IF_NOT_OK(GetExpectedPositionFromIso14496_1Expanded(
265 rb, end_of_decoder_specific_info_position));
266 // Read nested `audio_specific_config`.
267 RETURN_IF_NOT_OK(decoder_specific_info_.audio_specific_config.Read(rb));
268 RETURN_IF_NOT_OK(AdvanceBufferToPosition(
269 "decoder_specific_info", rb, end_of_decoder_specific_info_position,
270 decoder_specific_info_.decoder_specific_info_extension));
271 }
272 // Advance past the top-level extension.
273 RETURN_IF_NOT_OK(AdvanceBufferToPosition("decoder_config_descriptor", rb,
274 end_of_decoder_config_position,
275 decoder_config_extension_));
276
277 RETURN_IF_NOT_OK(ValidateAudioRollDistance(audio_roll_distance));
278 RETURN_IF_NOT_OK(Validate());
279 return absl::OkStatus();
280 }
281
GetOutputSampleRate(uint32_t & output_sample_rate) const282 absl::Status AacDecoderConfig::GetOutputSampleRate(
283 uint32_t& output_sample_rate) const {
284 using enum AudioSpecificConfig::SampleFrequencyIndex;
285 static const absl::NoDestructor<
286 absl::flat_hash_map<AudioSpecificConfig::SampleFrequencyIndex, uint32_t>>
287 kSampleFrequencyIndexToSampleFrequency({{k96000, 96000},
288 {k88200, 88200},
289 {k64000, 64000},
290 {k48000, 48000},
291 {k44100, 44100},
292 {k32000, 32000},
293 {k24000, 24000},
294 {k22050, 22050},
295 {k16000, 16000},
296 {k12000, 12000},
297 {k11025, 11025},
298 {k8000, 8000},
299 {k7350, 7350}});
300
301 const auto sample_frequency_index =
302 decoder_specific_info_.audio_specific_config.sample_frequency_index_;
303
304 if (sample_frequency_index == SampleFrequencyIndex::kEscapeValue) {
305 // Accept the value directly from the bitstream.
306 output_sample_rate =
307 decoder_specific_info_.audio_specific_config.sampling_frequency_;
308 return absl::OkStatus();
309 }
310
311 if (sample_frequency_index == SampleFrequencyIndex::kReservedA ||
312 sample_frequency_index == SampleFrequencyIndex::kReservedB) {
313 // Reject values reserved by the AAC spec.
314 return absl::UnimplementedError(absl::StrCat(
315 "Reserved sample_frequency_index= ", sample_frequency_index));
316 }
317
318 return CopyFromMap(
319 *kSampleFrequencyIndexToSampleFrequency, sample_frequency_index,
320 "Sample rate for AAC Sampling Frequency Index", output_sample_rate);
321 }
322
GetBitDepthToMeasureLoudness()323 uint8_t AacDecoderConfig::GetBitDepthToMeasureLoudness() {
324 // The input/output bit-depth depends on how `fdk_aac` was compiled. Measure
325 // loudness based on that.
326 return sizeof(INT_PCM) * 8;
327 }
328
Print() const329 void AudioSpecificConfig::Print() const {
330 LOG(INFO) << " audio_object_type= "
331 << absl::StrCat(audio_object_type_);
332 LOG(INFO) << " sample_frequency_index= "
333 << absl::StrCat(sample_frequency_index_);
334 if (sample_frequency_index_ == SampleFrequencyIndex::kEscapeValue) {
335 LOG(INFO) << " sampling_frequency= " << sampling_frequency_;
336 }
337 LOG(INFO) << " channel_configuration= "
338 << absl::StrCat(channel_configuration_);
339 LOG(INFO) << " ga_specific_info(aac):";
340 LOG(INFO) << " frame_length_flag= "
341 << ga_specific_config_.frame_length_flag;
342 LOG(INFO) << " depends_on_core_coder= "
343 << ga_specific_config_.depends_on_core_coder;
344 LOG(INFO) << " extension_flag= " << ga_specific_config_.extension_flag;
345 }
346
Print() const347 void AacDecoderConfig::Print() const {
348 LOG(INFO) << " decoder_config(aac):";
349 LOG(INFO) << " object_type_indication= "
350 << absl::StrCat(object_type_indication_);
351 LOG(INFO) << " stream_type= " << absl::StrCat(stream_type_);
352 LOG(INFO) << " upstream= " << upstream_;
353 LOG(INFO) << " reserved= " << reserved_;
354 LOG(INFO) << " buffer_size_db= " << buffer_size_db_;
355 LOG(INFO) << " max_bitrate= " << max_bitrate_;
356 LOG(INFO) << " average_bit_rate= " << average_bit_rate_;
357 LOG(INFO) << " decoder_specific_info(aac):";
358
359 decoder_specific_info_.audio_specific_config.Print();
360 LOG(INFO) << " // decoder_specific_info_extension omitted.";
361 LOG(INFO) << " // decoder_config_extension omitted.";
362 }
363
364 } // namespace iamf_tools
365