• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 3-Clause Clear License
5  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6  * License was not distributed with this source code in the LICENSE file, you
7  * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8  * Alliance for Open Media Patent License 1.0 was not distributed with this
9  * source code in the PATENTS file, you can obtain it at
10  * www.aomedia.org/license/patent.
11  */
12 
13 #ifndef API_DECODER_IAMF_DECODER_H_
14 #define API_DECODER_IAMF_DECODER_H_
15 
16 #include <cstddef>
17 #include <cstdint>
18 #include <memory>
19 #include <vector>
20 
21 #include "absl/status/status.h"
22 #include "absl/status/statusor.h"
23 #include "absl/types/span.h"
24 #include "iamf/api/types.h"
25 
26 namespace iamf_tools {
27 namespace api {
28 
29 /*!brief The class and entrypoint for decoding IAMF bitstreams. */
30 class IamfDecoder {
31  public:
32   /* WARNING: API is currently in flux and will change.
33    *
34    * The functions below constitute our IAMF Iterative Decoder API. Below is a
35    * sample usage of the API.
36    *
37    * Reconfigurable Standalone IAMF Usage
38    * IamfDecoder streaming_decoder = IamfDecoder::Create();
39    * for chunk of data in iamf stream:
40    *    Decode()
41    *    if (IsDescriptorProcessingComplete()) {
42    *      GetMixPresentations(output_mix_presentation_ids)
43    *      ConfigureMixPresentationId(mix_presentation_id)
44    *      ConfigureOutputLayout(output_layout)
45    *      ConfigureBitDepth(bit_depth)
46    *    }
47    * for chunk of data in iamf stream:
48    *    Decode()
49    *    while (IsTemporalUnitAvailable()) {
50    *      GetOutputTemporalUnit(output_temporal_unit)
51    *      Playback(output_temporal_unit)
52    *    }
53    * while (IsTemporalUnitAvailable()) {
54    *      Flush(output_temporal_unit)
55    *      Playback(output_temporal_unit)
56    *  }
57    * Close();
58    */
59 
60   // Dtor cannot be inline (so it must be declared and defined in the source
61   // file) because this class holds a (unique) pointer to the partial class,
62   // DecoderState.  Moves must be declared and defined because dtor is defined.
63   ~IamfDecoder();
64   IamfDecoder(IamfDecoder&&);
65   IamfDecoder& operator=(IamfDecoder&&);
66 
67   /*!\brief Creates an IamfDecoder.
68    *
69    * This function should be used for pure streaming applications in which the
70    * descriptor OBUs are not known in advance.
71    *
72    * \param requested_layout Specifies the desired output layout. This layout
73    *        will be used so long as it is present in the Descriptor OBUs that
74    *        are later provided to Decode(). If not, a default layout will be
75    *        selected.
76    *
77    * \return IamfDecoder upon success. Other specific statuses on
78    *         failure.
79    */
80   static absl::StatusOr<IamfDecoder> Create(
81       const OutputLayout& requested_layout);
82 
83   /*!\brief Creates an IamfDecoder from a known set of descriptor OBUs.
84    *
85    * This function should be used for applications in which the descriptor OBUs
86    * are known in advance.
87    *
88    * \param requested_layout Specifies the desired output layout. This layout
89    *        will be used so long as it is present in the Descriptor OBUs that
90    *        are provided. If not, a default layout will be selected.
91    * \param descriptor_obus Bitstream containing all the descriptor OBUs and
92    *        only descriptor OBUs.
93    * \return IamfDecoder upon success. Other specific statuses on
94    *         failure.
95    */
96   static absl::StatusOr<IamfDecoder> CreateFromDescriptors(
97       const OutputLayout& requested_layout,
98       absl::Span<const uint8_t> descriptor_obus);
99 
100   /*!\brief Configures the decoder with the desired mix presentation.
101    *
102    * \param mix_presentation_id Specifies the desired mix presentation.
103    * \return `absl::OkStatus()` upon success. Other specific statuses on
104    *         failure.
105    */
106   absl::Status ConfigureMixPresentationId(
107       MixPresentationId mix_presentation_id);
108 
109   /*!\brief Configures the decoder with the desired bit depth.
110    *
111    * Call this method to specify a specific output sample type.  If it is not
112    * called, the output samples will be a default value, retrievable by
113    * `GetOutputSampleType`.
114    */
115   void ConfigureOutputSampleType(OutputSampleType output_sample_type);
116 
117   /*!\brief Decodes the bitstream provided.
118    *
119    * Supports both descriptor OBUs, temporal units, and partial versions of
120    * both. User can provide as much data as they would like. To receive decoded
121    * temporal units, GetOutputTemporalUnit() should be called. If
122    * GetOutputTemporalUnit() has not been called, this function guarantees that
123    * any temporal units received thus far have not been lost. If descriptors are
124    * processed for the first time, function will exit before processing any
125    * temporal units. This provides the user a chance to configure the decoder as
126    * they see fit. See sample usages for more details.
127    *
128    * \param bitstream Bitstream to decode.
129    * \return `absl::OkStatus()` upon success. Other specific statuses on
130    *         failure.
131    */
132   absl::Status Decode(absl::Span<const uint8_t> bitstream);
133 
134   /*!\brief Outputs the next temporal unit of decoded audio.
135    *
136    * If no decoded data is available, output_decoded_temporal_unit will be
137    * empty. The user can continue calling until the output is empty, as there
138    * may be more than one temporal unit available. When this returns empty, the
139    * user should call Decode() again with more data.
140    *
141    * \param output_bytes Output buffer to receive bytes.  Must be large enough
142    *        to receive bytes.  Maximum necessary size can be determined by
143    *        GetFrameSize and GetOutputSampleType.
144    * \param bytes_written Number of bytes written to the output_bytes.
145    * \return `absl::OkStatus()` upon success. Other specific statuses on
146    *         failure.
147    */
148   absl::Status GetOutputTemporalUnit(absl::Span<uint8_t> output_bytes,
149                                      size_t& bytes_written);
150 
151   /*!\brief Returns true iff a decoded temporal unit is available.
152    *
153    * This function can be used to determine when the user should call
154    * GetOutputTemporalUnit().
155    *
156    * \return true iff a decoded temporal unit is available.
157    */
158   bool IsTemporalUnitAvailable() const;
159 
160   /*!\brief Returns true iff the descriptor OBUs have been parsed.
161    *
162    * This function can be used for determining when configuration setters that
163    * rely on Descriptor OBU parsing can be called.
164    *
165    * \return true iff the Descriptor OBUs have been parsed.
166    */
167   bool IsDescriptorProcessingComplete() const;
168 
169   /*!\brief Gets the layout that will be used to render the audio.
170    *
171    * The actual Layout used for rendering may not the same as requested when
172    * creating the IamfDecoder, if the requested Layout could not be used.
173    * This function allows verifying the actual Layout used after Descriptor OBU
174    * parsing is complete.
175    *
176    * This function can only be used after all Descriptor OBUs have been parsed,
177    * i.e. IsDescriptorProcessingComplete() returns true.
178    *
179    * \return OutputLayout or error statuses on failure.
180    */
181   absl::StatusOr<OutputLayout> GetOutputLayout() const;
182 
183   /*!\brief Gets the number of output channels.
184    *
185    * This function can only be used after all Descriptor OBUs have been parsed,
186    * i.e. IsDescriptorProcessingComplete() returns true.
187    *
188    * \return
189    */
190   absl::StatusOr<int> GetNumberOfOutputChannels() const;
191 
192   /*!\brief Provides mix presentation information from the descriptor OBUs.
193    *
194    * This function can be used to determine which mix presentation the user
195    * would like to configure the decoder with.
196    *
197    * This function can only be used after all Descriptor OBUs have been parsed,
198    * i.e. IsDescriptorProcessingComplete() returns true.
199    *
200    * \param output_mix_presentation_metadatas Output parameter for the mix
201    *        presentation metadata.
202    * \return `absl::OkStatus()` upon success. Other specific statuses on
203    *         failure.
204    */
205   absl::Status GetMixPresentations(std::vector<MixPresentationMetadata>&
206                                        output_mix_presentation_metadatas) const;
207 
208   /*!\brief Returns the current OutputSampleType.
209    *
210    * The value is either the value set by ConfigureOutputSampleType or a default
211    * which may vary based on content.
212    *
213    * This function can only be used after all Descriptor OBUs have been parsed,
214    * i.e. IsDescriptorProcessingComplete() returns true.
215    */
216   OutputSampleType GetOutputSampleType() const;
217 
218   /*!\brief Gets the sample rate.
219    *
220    * This function can only be used after all Descriptor OBUs have been parsed,
221    * i.e. IsDescriptorProcessingComplete() returns true.
222    *
223    * \return `absl::OkStatus()` upon success. Other specific statuses on
224    *         failure.
225    */
226   absl::StatusOr<uint32_t> GetSampleRate() const;
227 
228   /*!\brief Gets the number of samples per frame.
229    *
230    * This function can only be used after all Descriptor OBUs have been parsed,
231    * i.e. IsDescriptorProcessingComplete() returns true.
232    *
233    * Returns the number of samples per frame of the output audio. The total
234    * number of samples in a time tick is the number of channels times the number
235    * of samples per frame.
236    *
237    * \return Number of samples per frame upon success. Other specific statuses
238    *         on failure.
239    */
240   absl::StatusOr<uint32_t> GetFrameSize() const;
241 
242   /*!\brief Outputs the last temporal unit(s) of decoded audio.
243    *
244    * Signals to the decoder that no more data will be provided; therefore it
245    * should only be called once the user has finished providing data to
246    * Decode(). Temporal units are output one at a time, so this function should
247    * be called until output_is_done is true.
248    *
249    * \param output_decoded_temporal_unit Output parameter for the next temporal
250    *        unit of decoded audio.
251    * \param output_is_done Output parameter for whether there are more temporal
252    *        units to be output.
253    * \return `absl::OkStatus()` upon success. Other specific statuses on
254    *         failure.
255    */
256   absl::Status Flush(absl::Span<uint8_t> output_bytes, size_t& bytes_written,
257                      bool& output_is_done);
258 
259   /*!\brief Closes the decoder.
260    *
261    * This should be called once the user has finished providing data into
262    * Decode() and has called Flush() until output_is_done is true. Will close
263    * all underlying decoders.
264    *
265    * \return `absl::OkStatus()` upon success. Other specific statuses on
266    *         failure.
267    */
268   absl::Status Close();
269 
270  private:
271   // Forward declaration of the internal state of the decoder.
272   struct DecoderState;
273 
274   // Private constructor only used by Create functions.
275   IamfDecoder(std::unique_ptr<DecoderState> state);
276 
277   // Internal state of the decoder.
278   std::unique_ptr<DecoderState> state_;
279 };
280 }  // namespace api
281 }  // namespace iamf_tools
282 
283 #endif  // API_DECODER_IAMF_DECODER_H_
284