1 /* 2 * Copyright 2020 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.google.android.exoplayer2.audio; 17 18 import androidx.annotation.IntDef; 19 import com.google.android.exoplayer2.C; 20 import com.google.android.exoplayer2.ParserException; 21 import com.google.android.exoplayer2.util.Assertions; 22 import com.google.android.exoplayer2.util.Log; 23 import com.google.android.exoplayer2.util.ParsableBitArray; 24 import java.lang.annotation.Documented; 25 import java.lang.annotation.Retention; 26 import java.lang.annotation.RetentionPolicy; 27 28 /** Utility methods for handling AAC audio streams. */ 29 public final class AacUtil { 30 31 private static final String TAG = "AacUtil"; 32 33 /** Holds sample format information for AAC audio. */ 34 public static final class Config { 35 36 /** The sample rate in Hertz. */ 37 public final int sampleRateHz; 38 /** The number of channels. */ 39 public final int channelCount; 40 /** The RFC 6381 codecs string. */ 41 public final String codecs; 42 Config(int sampleRateHz, int channelCount, String codecs)43 private Config(int sampleRateHz, int channelCount, String codecs) { 44 this.sampleRateHz = sampleRateHz; 45 this.channelCount = channelCount; 46 this.codecs = codecs; 47 } 48 } 49 50 // Audio sample count constants assume the frameLengthFlag in the access unit is 0. 51 /** 52 * Number of raw audio samples that are produced per channel when decoding an AAC LC access unit. 53 */ 54 public static final int AAC_LC_AUDIO_SAMPLE_COUNT = 1024; 55 /** 56 * Number of raw audio samples that are produced per channel when decoding an AAC XHE access unit. 57 */ 58 public static final int AAC_XHE_AUDIO_SAMPLE_COUNT = AAC_LC_AUDIO_SAMPLE_COUNT; 59 /** 60 * Number of raw audio samples that are produced per channel when decoding an AAC HE access unit. 61 */ 62 public static final int AAC_HE_AUDIO_SAMPLE_COUNT = 2048; 63 /** 64 * Number of raw audio samples that are produced per channel when decoding an AAC LD access unit. 65 */ 66 public static final int AAC_LD_AUDIO_SAMPLE_COUNT = 512; 67 68 // Maximum bitrates for AAC profiles from the Fraunhofer FDK AAC encoder documentation: 69 // https://cs.android.com/android/platform/superproject/+/android-9.0.0_r8:external/aac/libAACenc/include/aacenc_lib.h;l=718 70 /** Maximum rate for an AAC LC audio stream, in bytes per second. */ 71 public static final int AAC_LC_MAX_RATE_BYTES_PER_SECOND = 800 * 1000 / 8; 72 /** Maximum rate for an AAC HE V1 audio stream, in bytes per second. */ 73 public static final int AAC_HE_V1_MAX_RATE_BYTES_PER_SECOND = 128 * 1000 / 8; 74 /** Maximum rate for an AAC HE V2 audio stream, in bytes per second. */ 75 public static final int AAC_HE_V2_MAX_RATE_BYTES_PER_SECOND = 56 * 1000 / 8; 76 /** 77 * Maximum rate for an AAC XHE audio stream, in bytes per second. 78 * 79 * <p>Fraunhofer documentation says "500 kbit/s and above" for stereo, so we use a rate generously 80 * above the 500 kbit/s level. 81 */ 82 public static final int AAC_XHE_MAX_RATE_BYTES_PER_SECOND = 2048 * 1000 / 8; 83 /** 84 * Maximum rate for an AAC ELD audio stream, in bytes per second. 85 * 86 * <p>Fraunhofer documentation shows AAC-ELD as useful for up to ~ 64 kbit/s so we use this value. 87 */ 88 public static final int AAC_ELD_MAX_RATE_BYTES_PER_SECOND = 64 * 1000 / 8; 89 90 private static final int AUDIO_SPECIFIC_CONFIG_FREQUENCY_INDEX_ARBITRARY = 0xF; 91 private static final int[] AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE = 92 new int[] { 93 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350 94 }; 95 private static final int AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID = -1; 96 /** 97 * In the channel configurations below, <A> indicates a single channel element; (A, B) 98 * indicates a channel pair element; and [A] indicates a low-frequency effects element. The 99 * speaker mapping short forms used are: 100 * 101 * <ul> 102 * <li>FC: front center 103 * <li>BC: back center 104 * <li>FL/FR: front left/right 105 * <li>FCL/FCR: front center left/right 106 * <li>FTL/FTR: front top left/right 107 * <li>SL/SR: back surround left/right 108 * <li>BL/BR: back left/right 109 * <li>LFE: low frequency effects 110 * </ul> 111 */ 112 private static final int[] AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE = 113 new int[] { 114 0, 115 1, /* mono: <FC> */ 116 2, /* stereo: (FL, FR) */ 117 3, /* 3.0: <FC>, (FL, FR) */ 118 4, /* 4.0: <FC>, (FL, FR), <BC> */ 119 5, /* 5.0 back: <FC>, (FL, FR), (SL, SR) */ 120 6, /* 5.1 back: <FC>, (FL, FR), (SL, SR), <BC>, [LFE] */ 121 8, /* 7.1 wide back: <FC>, (FCL, FCR), (FL, FR), (SL, SR), [LFE] */ 122 AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID, 123 AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID, 124 AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID, 125 7, /* 6.1: <FC>, (FL, FR), (SL, SR), <RC>, [LFE] */ 126 8, /* 7.1: <FC>, (FL, FR), (SL, SR), (BL, BR), [LFE] */ 127 AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID, 128 8, /* 7.1 top: <FC>, (FL, FR), (SL, SR), [LFE], (FTL, FTR) */ 129 AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID 130 }; 131 132 /** 133 * Prefix for the RFC 6381 codecs string for AAC formats. To form a full codecs string, suffix the 134 * decimal AudioObjectType. 135 */ 136 private static final String CODECS_STRING_PREFIX = "mp4a.40."; 137 138 // Advanced Audio Coding Low-Complexity profile. 139 public static final int AUDIO_OBJECT_TYPE_AAC_LC = 2; 140 // Spectral Band Replication. 141 public static final int AUDIO_OBJECT_TYPE_AAC_SBR = 5; 142 // Error Resilient Bit-Sliced Arithmetic Coding. 143 public static final int AUDIO_OBJECT_TYPE_AAC_ER_BSAC = 22; 144 // Enhanced low delay. 145 public static final int AUDIO_OBJECT_TYPE_AAC_ELD = 23; 146 // Parametric Stereo. 147 public static final int AUDIO_OBJECT_TYPE_AAC_PS = 29; 148 // Escape code for extended audio object types. 149 private static final int AUDIO_OBJECT_TYPE_ESCAPE = 31; 150 // Extended high efficiency. 151 public static final int AUDIO_OBJECT_TYPE_AAC_XHE = 42; 152 153 /** 154 * Valid AAC Audio object types. One of {@link #AUDIO_OBJECT_TYPE_AAC_LC}, {@link 155 * #AUDIO_OBJECT_TYPE_AAC_SBR}, {@link #AUDIO_OBJECT_TYPE_AAC_ER_BSAC}, {@link 156 * #AUDIO_OBJECT_TYPE_AAC_ELD}, {@link #AUDIO_OBJECT_TYPE_AAC_PS} or {@link 157 * #AUDIO_OBJECT_TYPE_AAC_XHE}. 158 */ 159 @Documented 160 @Retention(RetentionPolicy.SOURCE) 161 @IntDef({ 162 AUDIO_OBJECT_TYPE_AAC_LC, 163 AUDIO_OBJECT_TYPE_AAC_SBR, 164 AUDIO_OBJECT_TYPE_AAC_ER_BSAC, 165 AUDIO_OBJECT_TYPE_AAC_ELD, 166 AUDIO_OBJECT_TYPE_AAC_PS, 167 AUDIO_OBJECT_TYPE_AAC_XHE 168 }) 169 public @interface AacAudioObjectType {} 170 171 /** 172 * Parses an AAC AudioSpecificConfig, as defined in ISO 14496-3 1.6.2.1 173 * 174 * @param audioSpecificConfig A byte array containing the AudioSpecificConfig to parse. 175 * @return The parsed configuration. 176 * @throws ParserException If the AudioSpecificConfig cannot be parsed as it's not supported. 177 */ parseAudioSpecificConfig(byte[] audioSpecificConfig)178 public static Config parseAudioSpecificConfig(byte[] audioSpecificConfig) throws ParserException { 179 return parseAudioSpecificConfig( 180 new ParsableBitArray(audioSpecificConfig), /* forceReadToEnd= */ false); 181 } 182 183 /** 184 * Parses an AAC AudioSpecificConfig, as defined in ISO 14496-3 1.6.2.1 185 * 186 * @param bitArray A {@link ParsableBitArray} containing the AudioSpecificConfig to parse. The 187 * position is advanced to the end of the AudioSpecificConfig. 188 * @param forceReadToEnd Whether the entire AudioSpecificConfig should be read. Required for 189 * knowing the length of the configuration payload. 190 * @return The parsed configuration. 191 * @throws ParserException If the AudioSpecificConfig cannot be parsed as it's not supported. 192 */ parseAudioSpecificConfig(ParsableBitArray bitArray, boolean forceReadToEnd)193 public static Config parseAudioSpecificConfig(ParsableBitArray bitArray, boolean forceReadToEnd) 194 throws ParserException { 195 int audioObjectType = getAudioObjectType(bitArray); 196 int sampleRateHz = getSamplingFrequency(bitArray); 197 int channelConfiguration = bitArray.readBits(4); 198 String codecs = CODECS_STRING_PREFIX + audioObjectType; 199 if (audioObjectType == AUDIO_OBJECT_TYPE_AAC_SBR 200 || audioObjectType == AUDIO_OBJECT_TYPE_AAC_PS) { 201 // For an AAC bitstream using spectral band replication (SBR) or parametric stereo (PS) with 202 // explicit signaling, we return the extension sampling frequency as the sample rate of the 203 // content; this is identical to the sample rate of the decoded output but may differ from 204 // the sample rate set above. 205 // Use the extensionSamplingFrequencyIndex. 206 sampleRateHz = getSamplingFrequency(bitArray); 207 audioObjectType = getAudioObjectType(bitArray); 208 if (audioObjectType == AUDIO_OBJECT_TYPE_AAC_ER_BSAC) { 209 // Use the extensionChannelConfiguration. 210 channelConfiguration = bitArray.readBits(4); 211 } 212 } 213 214 if (forceReadToEnd) { 215 switch (audioObjectType) { 216 case 1: 217 case 2: 218 case 3: 219 case 4: 220 case 6: 221 case 7: 222 case 17: 223 case 19: 224 case 20: 225 case 21: 226 case 22: 227 case 23: 228 parseGaSpecificConfig(bitArray, audioObjectType, channelConfiguration); 229 break; 230 default: 231 throw new ParserException("Unsupported audio object type: " + audioObjectType); 232 } 233 switch (audioObjectType) { 234 case 17: 235 case 19: 236 case 20: 237 case 21: 238 case 22: 239 case 23: 240 int epConfig = bitArray.readBits(2); 241 if (epConfig == 2 || epConfig == 3) { 242 throw new ParserException("Unsupported epConfig: " + epConfig); 243 } 244 break; 245 default: 246 break; 247 } 248 } 249 // For supported containers, bits_to_decode() is always 0. 250 int channelCount = AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE[channelConfiguration]; 251 Assertions.checkArgument(channelCount != AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID); 252 return new Config(sampleRateHz, channelCount, codecs); 253 } 254 255 /** 256 * Builds a simple AAC LC AudioSpecificConfig, as defined in ISO 14496-3 1.6.2.1 257 * 258 * @param sampleRate The sample rate in Hz. 259 * @param channelCount The channel count. 260 * @return The AudioSpecificConfig. 261 */ buildAacLcAudioSpecificConfig(int sampleRate, int channelCount)262 public static byte[] buildAacLcAudioSpecificConfig(int sampleRate, int channelCount) { 263 int sampleRateIndex = C.INDEX_UNSET; 264 for (int i = 0; i < AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE.length; ++i) { 265 if (sampleRate == AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE[i]) { 266 sampleRateIndex = i; 267 } 268 } 269 int channelConfig = C.INDEX_UNSET; 270 for (int i = 0; i < AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE.length; ++i) { 271 if (channelCount == AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE[i]) { 272 channelConfig = i; 273 } 274 } 275 if (sampleRate == C.INDEX_UNSET || channelConfig == C.INDEX_UNSET) { 276 throw new IllegalArgumentException( 277 "Invalid sample rate or number of channels: " + sampleRate + ", " + channelCount); 278 } 279 return buildAudioSpecificConfig(AUDIO_OBJECT_TYPE_AAC_LC, sampleRateIndex, channelConfig); 280 } 281 282 /** 283 * Builds a simple AudioSpecificConfig, as defined in ISO 14496-3 1.6.2.1 284 * 285 * @param audioObjectType The audio object type. 286 * @param sampleRateIndex The sample rate index. 287 * @param channelConfig The channel configuration. 288 * @return The AudioSpecificConfig. 289 */ buildAudioSpecificConfig( int audioObjectType, int sampleRateIndex, int channelConfig)290 public static byte[] buildAudioSpecificConfig( 291 int audioObjectType, int sampleRateIndex, int channelConfig) { 292 byte[] specificConfig = new byte[2]; 293 specificConfig[0] = (byte) (((audioObjectType << 3) & 0xF8) | ((sampleRateIndex >> 1) & 0x07)); 294 specificConfig[1] = (byte) (((sampleRateIndex << 7) & 0x80) | ((channelConfig << 3) & 0x78)); 295 return specificConfig; 296 } 297 298 /** Returns the encoding for a given AAC audio object type. */ 299 @C.Encoding getEncodingForAudioObjectType(@acAudioObjectType int audioObjectType)300 public static int getEncodingForAudioObjectType(@AacAudioObjectType int audioObjectType) { 301 switch (audioObjectType) { 302 case AUDIO_OBJECT_TYPE_AAC_LC: 303 return C.ENCODING_AAC_LC; 304 case AUDIO_OBJECT_TYPE_AAC_SBR: 305 return C.ENCODING_AAC_HE_V1; 306 case AUDIO_OBJECT_TYPE_AAC_PS: 307 return C.ENCODING_AAC_HE_V2; 308 case AUDIO_OBJECT_TYPE_AAC_XHE: 309 return C.ENCODING_AAC_XHE; 310 case AUDIO_OBJECT_TYPE_AAC_ELD: 311 return C.ENCODING_AAC_ELD; 312 default: 313 return C.ENCODING_INVALID; 314 } 315 } 316 317 /** 318 * Returns the AAC audio object type as specified in 14496-3 (2005) Table 1.14. 319 * 320 * @param bitArray The bit array containing the audio specific configuration. 321 * @return The audio object type. 322 */ getAudioObjectType(ParsableBitArray bitArray)323 private static int getAudioObjectType(ParsableBitArray bitArray) { 324 int audioObjectType = bitArray.readBits(5); 325 if (audioObjectType == AUDIO_OBJECT_TYPE_ESCAPE) { 326 audioObjectType = 32 + bitArray.readBits(6); 327 } 328 return audioObjectType; 329 } 330 331 /** 332 * Returns the AAC sampling frequency (or extension sampling frequency) as specified in 14496-3 333 * (2005) Table 1.13. 334 * 335 * @param bitArray The bit array containing the audio specific configuration. 336 * @return The sampling frequency. 337 */ getSamplingFrequency(ParsableBitArray bitArray)338 private static int getSamplingFrequency(ParsableBitArray bitArray) { 339 int samplingFrequency; 340 int frequencyIndex = bitArray.readBits(4); 341 if (frequencyIndex == AUDIO_SPECIFIC_CONFIG_FREQUENCY_INDEX_ARBITRARY) { 342 samplingFrequency = bitArray.readBits(24); 343 } else { 344 Assertions.checkArgument(frequencyIndex < 13); 345 samplingFrequency = AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE[frequencyIndex]; 346 } 347 return samplingFrequency; 348 } 349 350 private static void parseGaSpecificConfig( 351 ParsableBitArray bitArray, int audioObjectType, int channelConfiguration) { 352 boolean frameLengthFlag = bitArray.readBit(); 353 if (frameLengthFlag) { 354 Log.w(TAG, "Unexpected frameLengthFlag = 1"); 355 } 356 boolean dependsOnCoreDecoder = bitArray.readBit(); 357 if (dependsOnCoreDecoder) { 358 bitArray.skipBits(14); // coreCoderDelay. 359 } 360 boolean extensionFlag = bitArray.readBit(); 361 if (channelConfiguration == 0) { 362 throw new UnsupportedOperationException(); // TODO: Implement programConfigElement(); 363 } 364 if (audioObjectType == 6 || audioObjectType == 20) { 365 bitArray.skipBits(3); // layerNr. 366 } 367 if (extensionFlag) { 368 if (audioObjectType == 22) { 369 bitArray.skipBits(16); // numOfSubFrame (5), layer_length(11). 370 } 371 if (audioObjectType == 17 372 || audioObjectType == 19 373 || audioObjectType == 20 374 || audioObjectType == 23) { 375 // aacSectionDataResilienceFlag, aacScalefactorDataResilienceFlag, 376 // aacSpectralDataResilienceFlag. 377 bitArray.skipBits(3); 378 } 379 bitArray.skipBits(1); // extensionFlag3. 380 } 381 } 382 383 private AacUtil() {} 384 } 385