• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.google.android.exoplayer2.audio;
17 
18 import androidx.annotation.IntDef;
19 import com.google.android.exoplayer2.C;
20 import com.google.android.exoplayer2.ParserException;
21 import com.google.android.exoplayer2.util.Assertions;
22 import com.google.android.exoplayer2.util.Log;
23 import com.google.android.exoplayer2.util.ParsableBitArray;
24 import java.lang.annotation.Documented;
25 import java.lang.annotation.Retention;
26 import java.lang.annotation.RetentionPolicy;
27 
28 /** Utility methods for handling AAC audio streams. */
29 public final class AacUtil {
30 
31   private static final String TAG = "AacUtil";
32 
33   /** Holds sample format information for AAC audio. */
34   public static final class Config {
35 
36     /** The sample rate in Hertz. */
37     public final int sampleRateHz;
38     /** The number of channels. */
39     public final int channelCount;
40     /** The RFC 6381 codecs string. */
41     public final String codecs;
42 
Config(int sampleRateHz, int channelCount, String codecs)43     private Config(int sampleRateHz, int channelCount, String codecs) {
44       this.sampleRateHz = sampleRateHz;
45       this.channelCount = channelCount;
46       this.codecs = codecs;
47     }
48   }
49 
50   // Audio sample count constants assume the frameLengthFlag in the access unit is 0.
51   /**
52    * Number of raw audio samples that are produced per channel when decoding an AAC LC access unit.
53    */
54   public static final int AAC_LC_AUDIO_SAMPLE_COUNT = 1024;
55   /**
56    * Number of raw audio samples that are produced per channel when decoding an AAC XHE access unit.
57    */
58   public static final int AAC_XHE_AUDIO_SAMPLE_COUNT = AAC_LC_AUDIO_SAMPLE_COUNT;
59   /**
60    * Number of raw audio samples that are produced per channel when decoding an AAC HE access unit.
61    */
62   public static final int AAC_HE_AUDIO_SAMPLE_COUNT = 2048;
63   /**
64    * Number of raw audio samples that are produced per channel when decoding an AAC LD access unit.
65    */
66   public static final int AAC_LD_AUDIO_SAMPLE_COUNT = 512;
67 
68   // Maximum bitrates for AAC profiles from the Fraunhofer FDK AAC encoder documentation:
69   // https://cs.android.com/android/platform/superproject/+/android-9.0.0_r8:external/aac/libAACenc/include/aacenc_lib.h;l=718
70   /** Maximum rate for an AAC LC audio stream, in bytes per second. */
71   public static final int AAC_LC_MAX_RATE_BYTES_PER_SECOND = 800 * 1000 / 8;
72   /** Maximum rate for an AAC HE V1 audio stream, in bytes per second. */
73   public static final int AAC_HE_V1_MAX_RATE_BYTES_PER_SECOND = 128 * 1000 / 8;
74   /** Maximum rate for an AAC HE V2 audio stream, in bytes per second. */
75   public static final int AAC_HE_V2_MAX_RATE_BYTES_PER_SECOND = 56 * 1000 / 8;
76   /**
77    * Maximum rate for an AAC XHE audio stream, in bytes per second.
78    *
79    * <p>Fraunhofer documentation says "500 kbit/s and above" for stereo, so we use a rate generously
80    * above the 500 kbit/s level.
81    */
82   public static final int AAC_XHE_MAX_RATE_BYTES_PER_SECOND = 2048 * 1000 / 8;
83   /**
84    * Maximum rate for an AAC ELD audio stream, in bytes per second.
85    *
86    * <p>Fraunhofer documentation shows AAC-ELD as useful for up to ~ 64 kbit/s so we use this value.
87    */
88   public static final int AAC_ELD_MAX_RATE_BYTES_PER_SECOND = 64 * 1000 / 8;
89 
90   private static final int AUDIO_SPECIFIC_CONFIG_FREQUENCY_INDEX_ARBITRARY = 0xF;
91   private static final int[] AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE =
92       new int[] {
93         96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350
94       };
95   private static final int AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID = -1;
96   /**
97    * In the channel configurations below, &lt;A&gt; indicates a single channel element; (A, B)
98    * indicates a channel pair element; and [A] indicates a low-frequency effects element. The
99    * speaker mapping short forms used are:
100    *
101    * <ul>
102    *   <li>FC: front center
103    *   <li>BC: back center
104    *   <li>FL/FR: front left/right
105    *   <li>FCL/FCR: front center left/right
106    *   <li>FTL/FTR: front top left/right
107    *   <li>SL/SR: back surround left/right
108    *   <li>BL/BR: back left/right
109    *   <li>LFE: low frequency effects
110    * </ul>
111    */
112   private static final int[] AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE =
113       new int[] {
114         0,
115         1, /* mono: <FC> */
116         2, /* stereo: (FL, FR) */
117         3, /* 3.0: <FC>, (FL, FR) */
118         4, /* 4.0: <FC>, (FL, FR), <BC> */
119         5, /* 5.0 back: <FC>, (FL, FR), (SL, SR) */
120         6, /* 5.1 back: <FC>, (FL, FR), (SL, SR), <BC>, [LFE] */
121         8, /* 7.1 wide back: <FC>, (FCL, FCR), (FL, FR), (SL, SR), [LFE] */
122         AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID,
123         AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID,
124         AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID,
125         7, /* 6.1: <FC>, (FL, FR), (SL, SR), <RC>, [LFE] */
126         8, /* 7.1: <FC>, (FL, FR), (SL, SR), (BL, BR), [LFE] */
127         AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID,
128         8, /* 7.1 top: <FC>, (FL, FR), (SL, SR), [LFE], (FTL, FTR) */
129         AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID
130       };
131 
132   /**
133    * Prefix for the RFC 6381 codecs string for AAC formats. To form a full codecs string, suffix the
134    * decimal AudioObjectType.
135    */
136   private static final String CODECS_STRING_PREFIX = "mp4a.40.";
137 
138   // Advanced Audio Coding Low-Complexity profile.
139   public static final int AUDIO_OBJECT_TYPE_AAC_LC = 2;
140   // Spectral Band Replication.
141   public static final int AUDIO_OBJECT_TYPE_AAC_SBR = 5;
142   // Error Resilient Bit-Sliced Arithmetic Coding.
143   public static final int AUDIO_OBJECT_TYPE_AAC_ER_BSAC = 22;
144   // Enhanced low delay.
145   public static final int AUDIO_OBJECT_TYPE_AAC_ELD = 23;
146   // Parametric Stereo.
147   public static final int AUDIO_OBJECT_TYPE_AAC_PS = 29;
148   // Escape code for extended audio object types.
149   private static final int AUDIO_OBJECT_TYPE_ESCAPE = 31;
150   // Extended high efficiency.
151   public static final int AUDIO_OBJECT_TYPE_AAC_XHE = 42;
152 
153   /**
154    * Valid AAC Audio object types. One of {@link #AUDIO_OBJECT_TYPE_AAC_LC}, {@link
155    * #AUDIO_OBJECT_TYPE_AAC_SBR}, {@link #AUDIO_OBJECT_TYPE_AAC_ER_BSAC}, {@link
156    * #AUDIO_OBJECT_TYPE_AAC_ELD}, {@link #AUDIO_OBJECT_TYPE_AAC_PS} or {@link
157    * #AUDIO_OBJECT_TYPE_AAC_XHE}.
158    */
159   @Documented
160   @Retention(RetentionPolicy.SOURCE)
161   @IntDef({
162     AUDIO_OBJECT_TYPE_AAC_LC,
163     AUDIO_OBJECT_TYPE_AAC_SBR,
164     AUDIO_OBJECT_TYPE_AAC_ER_BSAC,
165     AUDIO_OBJECT_TYPE_AAC_ELD,
166     AUDIO_OBJECT_TYPE_AAC_PS,
167     AUDIO_OBJECT_TYPE_AAC_XHE
168   })
169   public @interface AacAudioObjectType {}
170 
171   /**
172    * Parses an AAC AudioSpecificConfig, as defined in ISO 14496-3 1.6.2.1
173    *
174    * @param audioSpecificConfig A byte array containing the AudioSpecificConfig to parse.
175    * @return The parsed configuration.
176    * @throws ParserException If the AudioSpecificConfig cannot be parsed as it's not supported.
177    */
parseAudioSpecificConfig(byte[] audioSpecificConfig)178   public static Config parseAudioSpecificConfig(byte[] audioSpecificConfig) throws ParserException {
179     return parseAudioSpecificConfig(
180         new ParsableBitArray(audioSpecificConfig), /* forceReadToEnd= */ false);
181   }
182 
183   /**
184    * Parses an AAC AudioSpecificConfig, as defined in ISO 14496-3 1.6.2.1
185    *
186    * @param bitArray A {@link ParsableBitArray} containing the AudioSpecificConfig to parse. The
187    *     position is advanced to the end of the AudioSpecificConfig.
188    * @param forceReadToEnd Whether the entire AudioSpecificConfig should be read. Required for
189    *     knowing the length of the configuration payload.
190    * @return The parsed configuration.
191    * @throws ParserException If the AudioSpecificConfig cannot be parsed as it's not supported.
192    */
parseAudioSpecificConfig(ParsableBitArray bitArray, boolean forceReadToEnd)193   public static Config parseAudioSpecificConfig(ParsableBitArray bitArray, boolean forceReadToEnd)
194       throws ParserException {
195     int audioObjectType = getAudioObjectType(bitArray);
196     int sampleRateHz = getSamplingFrequency(bitArray);
197     int channelConfiguration = bitArray.readBits(4);
198     String codecs = CODECS_STRING_PREFIX + audioObjectType;
199     if (audioObjectType == AUDIO_OBJECT_TYPE_AAC_SBR
200         || audioObjectType == AUDIO_OBJECT_TYPE_AAC_PS) {
201       // For an AAC bitstream using spectral band replication (SBR) or parametric stereo (PS) with
202       // explicit signaling, we return the extension sampling frequency as the sample rate of the
203       // content; this is identical to the sample rate of the decoded output but may differ from
204       // the sample rate set above.
205       // Use the extensionSamplingFrequencyIndex.
206       sampleRateHz = getSamplingFrequency(bitArray);
207       audioObjectType = getAudioObjectType(bitArray);
208       if (audioObjectType == AUDIO_OBJECT_TYPE_AAC_ER_BSAC) {
209         // Use the extensionChannelConfiguration.
210         channelConfiguration = bitArray.readBits(4);
211       }
212     }
213 
214     if (forceReadToEnd) {
215       switch (audioObjectType) {
216         case 1:
217         case 2:
218         case 3:
219         case 4:
220         case 6:
221         case 7:
222         case 17:
223         case 19:
224         case 20:
225         case 21:
226         case 22:
227         case 23:
228           parseGaSpecificConfig(bitArray, audioObjectType, channelConfiguration);
229           break;
230         default:
231           throw new ParserException("Unsupported audio object type: " + audioObjectType);
232       }
233       switch (audioObjectType) {
234         case 17:
235         case 19:
236         case 20:
237         case 21:
238         case 22:
239         case 23:
240           int epConfig = bitArray.readBits(2);
241           if (epConfig == 2 || epConfig == 3) {
242             throw new ParserException("Unsupported epConfig: " + epConfig);
243           }
244           break;
245         default:
246           break;
247       }
248     }
249     // For supported containers, bits_to_decode() is always 0.
250     int channelCount = AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE[channelConfiguration];
251     Assertions.checkArgument(channelCount != AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID);
252     return new Config(sampleRateHz, channelCount, codecs);
253   }
254 
255   /**
256    * Builds a simple AAC LC AudioSpecificConfig, as defined in ISO 14496-3 1.6.2.1
257    *
258    * @param sampleRate The sample rate in Hz.
259    * @param channelCount The channel count.
260    * @return The AudioSpecificConfig.
261    */
buildAacLcAudioSpecificConfig(int sampleRate, int channelCount)262   public static byte[] buildAacLcAudioSpecificConfig(int sampleRate, int channelCount) {
263     int sampleRateIndex = C.INDEX_UNSET;
264     for (int i = 0; i < AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE.length; ++i) {
265       if (sampleRate == AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE[i]) {
266         sampleRateIndex = i;
267       }
268     }
269     int channelConfig = C.INDEX_UNSET;
270     for (int i = 0; i < AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE.length; ++i) {
271       if (channelCount == AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE[i]) {
272         channelConfig = i;
273       }
274     }
275     if (sampleRate == C.INDEX_UNSET || channelConfig == C.INDEX_UNSET) {
276       throw new IllegalArgumentException(
277           "Invalid sample rate or number of channels: " + sampleRate + ", " + channelCount);
278     }
279     return buildAudioSpecificConfig(AUDIO_OBJECT_TYPE_AAC_LC, sampleRateIndex, channelConfig);
280   }
281 
282   /**
283    * Builds a simple AudioSpecificConfig, as defined in ISO 14496-3 1.6.2.1
284    *
285    * @param audioObjectType The audio object type.
286    * @param sampleRateIndex The sample rate index.
287    * @param channelConfig The channel configuration.
288    * @return The AudioSpecificConfig.
289    */
buildAudioSpecificConfig( int audioObjectType, int sampleRateIndex, int channelConfig)290   public static byte[] buildAudioSpecificConfig(
291       int audioObjectType, int sampleRateIndex, int channelConfig) {
292     byte[] specificConfig = new byte[2];
293     specificConfig[0] = (byte) (((audioObjectType << 3) & 0xF8) | ((sampleRateIndex >> 1) & 0x07));
294     specificConfig[1] = (byte) (((sampleRateIndex << 7) & 0x80) | ((channelConfig << 3) & 0x78));
295     return specificConfig;
296   }
297 
298   /** Returns the encoding for a given AAC audio object type. */
299   @C.Encoding
getEncodingForAudioObjectType(@acAudioObjectType int audioObjectType)300   public static int getEncodingForAudioObjectType(@AacAudioObjectType int audioObjectType) {
301     switch (audioObjectType) {
302       case AUDIO_OBJECT_TYPE_AAC_LC:
303         return C.ENCODING_AAC_LC;
304       case AUDIO_OBJECT_TYPE_AAC_SBR:
305         return C.ENCODING_AAC_HE_V1;
306       case AUDIO_OBJECT_TYPE_AAC_PS:
307         return C.ENCODING_AAC_HE_V2;
308       case AUDIO_OBJECT_TYPE_AAC_XHE:
309         return C.ENCODING_AAC_XHE;
310       case AUDIO_OBJECT_TYPE_AAC_ELD:
311         return C.ENCODING_AAC_ELD;
312       default:
313         return C.ENCODING_INVALID;
314     }
315   }
316 
317   /**
318    * Returns the AAC audio object type as specified in 14496-3 (2005) Table 1.14.
319    *
320    * @param bitArray The bit array containing the audio specific configuration.
321    * @return The audio object type.
322    */
getAudioObjectType(ParsableBitArray bitArray)323   private static int getAudioObjectType(ParsableBitArray bitArray) {
324     int audioObjectType = bitArray.readBits(5);
325     if (audioObjectType == AUDIO_OBJECT_TYPE_ESCAPE) {
326       audioObjectType = 32 + bitArray.readBits(6);
327     }
328     return audioObjectType;
329   }
330 
331   /**
332    * Returns the AAC sampling frequency (or extension sampling frequency) as specified in 14496-3
333    * (2005) Table 1.13.
334    *
335    * @param bitArray The bit array containing the audio specific configuration.
336    * @return The sampling frequency.
337    */
getSamplingFrequency(ParsableBitArray bitArray)338   private static int getSamplingFrequency(ParsableBitArray bitArray) {
339     int samplingFrequency;
340     int frequencyIndex = bitArray.readBits(4);
341     if (frequencyIndex == AUDIO_SPECIFIC_CONFIG_FREQUENCY_INDEX_ARBITRARY) {
342       samplingFrequency = bitArray.readBits(24);
343     } else {
344       Assertions.checkArgument(frequencyIndex < 13);
345       samplingFrequency = AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE[frequencyIndex];
346     }
347     return samplingFrequency;
348   }
349 
350   private static void parseGaSpecificConfig(
351       ParsableBitArray bitArray, int audioObjectType, int channelConfiguration) {
352     boolean frameLengthFlag = bitArray.readBit();
353     if (frameLengthFlag) {
354       Log.w(TAG, "Unexpected frameLengthFlag = 1");
355     }
356     boolean dependsOnCoreDecoder = bitArray.readBit();
357     if (dependsOnCoreDecoder) {
358       bitArray.skipBits(14); // coreCoderDelay.
359     }
360     boolean extensionFlag = bitArray.readBit();
361     if (channelConfiguration == 0) {
362       throw new UnsupportedOperationException(); // TODO: Implement programConfigElement();
363     }
364     if (audioObjectType == 6 || audioObjectType == 20) {
365       bitArray.skipBits(3); // layerNr.
366     }
367     if (extensionFlag) {
368       if (audioObjectType == 22) {
369         bitArray.skipBits(16); // numOfSubFrame (5), layer_length(11).
370       }
371       if (audioObjectType == 17
372           || audioObjectType == 19
373           || audioObjectType == 20
374           || audioObjectType == 23) {
375         // aacSectionDataResilienceFlag, aacScalefactorDataResilienceFlag,
376         // aacSpectralDataResilienceFlag.
377         bitArray.skipBits(3);
378       }
379       bitArray.skipBits(1); // extensionFlag3.
380     }
381   }
382 
383   private AacUtil() {}
384 }
385