• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 3-Clause Clear License
5  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6  * License was not distributed with this source code in the LICENSE file, you
7  * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8  * Alliance for Open Media Patent License 1.0 was not distributed with this
9  * source code in the PATENTS file, you can obtain it at
10  * www.aomedia.org/license/patent.
11  */
12 
13 #ifndef CLI_OBU_PROCESSOR_H_
14 #define CLI_OBU_PROCESSOR_H_
15 
16 #include <cstdint>
17 #include <list>
18 #include <memory>
19 #include <optional>
20 #include <vector>
21 
22 #include "absl/container/flat_hash_map.h"
23 #include "absl/status/status.h"
24 #include "absl/status/statusor.h"
25 #include "absl/types/span.h"
26 #include "iamf/cli/audio_element_with_data.h"
27 #include "iamf/cli/audio_frame_decoder.h"
28 #include "iamf/cli/audio_frame_with_data.h"
29 #include "iamf/cli/demixing_module.h"
30 #include "iamf/cli/global_timing_module.h"
31 #include "iamf/cli/parameter_block_with_data.h"
32 #include "iamf/cli/parameters_manager.h"
33 #include "iamf/cli/rendering_mix_presentation_finalizer.h"
34 #include "iamf/common/read_bit_buffer.h"
35 #include "iamf/obu/codec_config.h"
36 #include "iamf/obu/ia_sequence_header.h"
37 #include "iamf/obu/mix_presentation.h"
38 #include "iamf/obu/param_definition_variant.h"
39 #include "iamf/obu/temporal_delimiter.h"
40 #include "iamf/obu/types.h"
41 
42 namespace iamf_tools {
43 
44 class ObuProcessor {
45  public:
46   /*!\brief Processes the Descriptor OBUs of an IA Sequence.
47    *
48    * If insufficient data to process all descriptor OBUs is provided, a failing
49    * status will be returned. `insufficient_data` will be set to true, the
50    * read_bit_buffer will not be consumed, and the output parameters will not be
51    * populated. A user should call this function again after providing more
52    * data within the read_bit_buffer.
53    *
54    * \param is_exhaustive_and_exact Whether the bitstream provided is meant to
55    *        include all descriptor OBUs and no other data. This should only be
56    *        set to true if the user knows the exact boundaries of their set of
57    *        descriptor OBUs.
58    * \param read_bit_buffer Buffer containing a portion of an iamf bitstream
59    *        containing a sequence of OBUs. The buffer will be consumed up to the
60    *        end of the descriptor OBUs if processing is successful.
61    * \param output_sequence_header IA sequence header processed from the
62    *        bitstream.
63    * \param output_codec_config_obus Map of Codec Config OBUs processed from the
64    *        bitstream.
65    * \param output_audio_elements_with_data Map of Audio Elements and metadata
66    *        processed from the bitstream.
67    * \param output_mix_presentation_obus List of Mix Presentation OBUs processed
68    *        from the bitstream.
69    * \param insufficient_data Whether the bitstream provided is insufficient to
70    *        process all descriptor OBUs.
71    * \return `absl::OkStatus()` if the process is successful. A specific status
72    *         on failure.
73    */
74   [[deprecated(
75       "Remove when all tests are ported. Use the non-static version instead.")]]
76   static absl::Status ProcessDescriptorObus(
77       bool is_exhaustive_and_exact, ReadBitBuffer& read_bit_buffer,
78       IASequenceHeaderObu& output_sequence_header,
79       absl::flat_hash_map<DecodedUleb128, CodecConfigObu>&
80           output_codec_config_obus,
81       absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
82           output_audio_elements_with_data,
83       std::list<MixPresentationObu>& output_mix_presentation_obus,
84       bool& insufficient_data);
85 
86   // TODO(b/330732117): Remove this function and use the non-static version.
87   /*!\brief Processes one Temporal Unit OBU of an IA Sequence.
88    *
89    * This function should only be called after successfully calling
90    * ProcessDescriptorObus. Output audio frames and parameter blocks are
91    * ordered by timestamps first and then by IDs.
92    *
93    * \param audio_elements_with_data Map containing the audio elements that
94    *        were present in the descriptor OBUs, keyed by audio element ID.
95    * \param codec_config_obus Map containing the codec configs that were
96    *        present in the descriptor OBUs, keyed by codec config ID.
97    * \param substream_id_to_audio_element Mapping from substream IDs to the
98    *        audio elements that they belong to.
99    * \param param_definition_variants Map containing the param definitions that
100    *        were present in the descriptor OBUs, keyed by parameter ID.
101    * \param parameters_manager Manager of parameters.
102    * \param read_bit_buffer Buffer reader that reads the IAMF bitstream.
103    * \param global_timing_module Module to keep track of the timing of audio
104    *        frames and parameters.
105    * \param output_audio_frame_with_data Output Audio Frame with the requisite
106    *        data.
107    * \param output_parameter_block_with_data Output parameter Block with the
108    *        requisite data.
109    * \param output_temporal_delimiter Output temporal deilimiter OBU.
110    * \param continue_processing Whether the processing should be continued.
111    * \return `absl::OkStatus()` if the process is successful. A specific status
112    *         on failure.
113    */
114   static absl::Status ProcessTemporalUnitObu(
115       const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
116           audio_elements_with_data,
117       const absl::flat_hash_map<DecodedUleb128, CodecConfigObu>&
118           codec_config_obus,
119       const absl::flat_hash_map<DecodedUleb128, const AudioElementWithData*>&
120           substream_id_to_audio_element,
121       const absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>&
122           param_definition_variants,
123       ParametersManager& parameters_manager, ReadBitBuffer& read_bit_buffer,
124       GlobalTimingModule& global_timing_module,
125       std::optional<AudioFrameWithData>& output_audio_frame_with_data,
126       std::optional<ParameterBlockWithData>& output_parameter_block_with_data,
127       std::optional<TemporalDelimiterObu>& output_temporal_delimiter,
128       bool& continue_processing);
129 
130   /*!\brief Creates the OBU processor.
131    *
132    * Creation succeeds only if the descriptor OBUs are successfully processed.
133    *
134    * \param is_exhaustive_and_exact Whether the bitstream provided is meant to
135    *        include all descriptor OBUs and no other data. This should only be
136    *        set to true if the user knows the exact boundaries of their set of
137    *        descriptor OBUs.
138    * \param read_bit_buffer Pointer to the read bit buffer that reads the IAMF
139    *        bitstream.
140    * \param output_insufficient_data True iff the bitstream provided is
141    *        insufficient to process all descriptor OBUs and there is no other
142    *        error.
143    * \return std::unique_ptr<ObuProcessor> on success. `nullptr` on failure.
144    */
145   static std::unique_ptr<ObuProcessor> Create(bool is_exhaustive_and_exact,
146                                               ReadBitBuffer* read_bit_buffer,
147                                               bool& output_insufficient_data);
148 
149   /*!\brief Move constructor. */
150   ObuProcessor(ObuProcessor&& obu_processor) = delete;
151 
152   /*!\brief Creates the OBU processor for rendering.
153    *
154    * Creation succeeds only if the descriptor OBUs are successfully processed
155    * and all rendering modules are successfully initialized.
156    *
157    * \param desired_layout Specifies the desired layout that will be used to
158    *        render the audio, if available in the mix presentations. If not
159    *        available, the first layout in the first mix presentation will be
160    *        used.
161    * \param sample_processor_factory Factory to create post processors.
162    * \param is_exhaustive_and_exact Whether the bitstream provided is meant to
163    *        include all descriptor OBUs and no other data. This should only be
164    *        set to true if the user knows the exact boundaries of their set of
165    *        descriptor OBUs.
166    * \param read_bit_buffer Pointer to the read bit buffer that reads the IAMF
167    *        bitstream.
168    * \param output_layout The layout that will be used to render the audio. This
169    *        is the same as `desired_layout` if it is available in the mix
170    *        presentations, otherwise a default layout is used.
171    * \param output_insufficient_data True iff the bitstream provided is
172    *        insufficient to process all descriptor OBUs and there is no other
173    *        error.
174    * \return Pointer to an ObuProcessor on success. `nullptr` on failure.
175    */
176   static std::unique_ptr<ObuProcessor> CreateForRendering(
177       const Layout& desired_layout,
178       const RenderingMixPresentationFinalizer::SampleProcessorFactory&
179           sample_processor_factory,
180       bool is_exhaustive_and_exact, ReadBitBuffer* read_bit_buffer,
181       Layout& output_layout, bool& output_insufficient_data);
182 
183   /*!\brief Gets the sample rate of the output audio.
184    *
185    * \return Sample rate of the output audio, or a specific error code on
186    *         failure.
187    */
188   absl::StatusOr<uint32_t> GetOutputSampleRate() const;
189 
190   /*!\brief Gets the frame size of the output audio.
191    *
192    * Useful to determine the maximum number of samples per
193    * `RenderTemporalUnitAndMeasureLoudness` call.
194    *
195    * \return Number of samples in per frame of the output audio, or a specific
196    *         specific error code on failure.
197    */
198   absl::StatusOr<uint32_t> GetOutputFrameSize() const;
199 
200   // TODO(b/381072155): Consider removing this one in favor of
201   //                    `ProcessTemporalUnit()`, which outputs all OBUs
202   //                    belonging the whole temporal unit.
203   /*!\brief Processes one Temporal Unit OBU from the stored IA Sequence.
204    *
205    * `Initialize()` must be called first to ready the input bitstream.
206    *
207    * \param output_audio_frame_with_data Output Audio Frame with the requisite
208    *        data.
209    * \param output_parameter_block_with_data Output Parameter Block with the
210    *        requisite data.
211    * \param output_temporal_delimiter Output temporal deilimiter OBU.
212    * \param continue_processing Whether the processing should be continued.
213    * \return `absl::OkStatus()` if the process is successful. A specific status
214    *         on failure.
215    */
216   absl::Status ProcessTemporalUnitObu(
217       std::optional<AudioFrameWithData>& output_audio_frame_with_data,
218       std::optional<ParameterBlockWithData>& output_parameter_block_with_data,
219       std::optional<TemporalDelimiterObu>& output_temporal_delimiter,
220       bool& continue_processing);
221 
222   struct OutputTemporalUnit {
223     std::list<AudioFrameWithData> output_audio_frames;
224     std::list<ParameterBlockWithData> output_parameter_blocks;
225     InternalTimestamp output_timestamp;
226   };
227 
228   // TODO(b/379819959): Also handle Temporal Delimiter OBUs.
229   /*!\brief Processes all OBUs from a Temporal Unit from the stored IA Sequence.
230    *
231    * \param eos_is_end_of_sequence Whether reaching the end of the stream
232    *        should be considered as the end of the sequence, and therefore the
233    *        end of the temporal unit.
234    * \param output_temporal_unit Contains the data from the temporal unit that
235    *        is processed.
236    * \param continue_processing Whether the processing should be continued.
237    * \return `absl::OkStatus()` if the process is successful. A specific status
238    *         on failure.
239    */
240   absl::Status ProcessTemporalUnit(
241       bool eos_is_end_of_sequence,
242       std::optional<OutputTemporalUnit>& output_temporal_unit,
243       bool& continue_processing);
244 
245   /*!\brief Renders a temporal unit and measures loudness.
246    *
247    * `InitializeForRendering()` must be called before calling this.
248    *
249    * \param timestamp Timestamp of this temporal unit. Used to verify that
250    *        the input OBUs actually belong to the same temporal unit.
251    * \param audio_frames_with_data Audio Frames with the requisite data.
252    * \param parameter_blocks_with_data Parameter Blocks with the requisite data.
253    * \param output_rendered_pcm_samples Output rendered PCM samples. These
254    *        should be used immediately after this function is called; they will
255    *        be invalidated after the next call to
256    *        `RenderTemporalUnitAndMeasureLoudness()`, as well as after the
257    *        `ObuProcessor` is destroyed.
258    * \return `absl::OkStatus()` if the process is successful. A specific status
259    *         on failure.
260    */
261   absl::Status RenderTemporalUnitAndMeasureLoudness(
262       InternalTimestamp timestamp,
263       const std::list<AudioFrameWithData>& audio_frames,
264       const std::list<ParameterBlockWithData>& parameter_blocks,
265       absl::Span<const std::vector<int32_t>>& output_rendered_pcm_samples);
266 
267   IASequenceHeaderObu ia_sequence_header_;
268   absl::flat_hash_map<DecodedUleb128, CodecConfigObu> codec_config_obus_ = {};
269   absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements_ =
270       {};
271   std::list<MixPresentationObu> mix_presentations_ = {};
272 
273  private:
274   /*!\brief Private constructor used only by Create() and CreateForRendering().
275    *
276    * \param read_bit_buffer Pointer to the read bit buffer that reads the IAMF
277    *        bitstream.
278    * \return ObuProcessor instance.
279    */
ObuProcessor(ReadBitBuffer * buffer)280   explicit ObuProcessor(ReadBitBuffer* buffer) : read_bit_buffer_(buffer) {}
281 
282   /*!\brief Performs internal initialization of the OBU processor.
283    *
284    * Only used by Create() and CreateForRendering().
285    *
286    * \param is_exhaustive_and_exact Whether the bitstream provided is meant to
287    *        include all descriptor OBUs and no other data. This should only be
288    *        set to true if the user knows the exact boundaries of their set of
289    *        descriptor OBUs.
290     \param output_insufficient_data True iff the bitstream provided is
291    *        insufficient to process all descriptor OBUs and there is no other
292    *        error.
293    * \return `absl::OkStatus()` if initialization is successful. A specific
294    *        status on failure.
295    */
296   absl::Status InitializeInternal(bool is_exhaustive_and_exact,
297                                   bool& output_insufficient_data);
298 
299   /*!\brief Initializes the OBU processor for rendering.
300    *
301    * Must be called after `Initialize()` is called.
302    *
303    * \param desired_layout Specifies the layout that will be used to render the
304    *        audio, if available.
305    * \param sample_processor_factory Factory to create post processors.
306    * \param output_layout The layout that will be used to render the audio. This
307    *        is the same as `desired_layout` if it is available, otherwise a
308    *        default layout is used.
309    * \return `absl::OkStatus()` if the process is successful. A specific status
310    *         on failure.
311    */
312   absl::Status InitializeForRendering(
313       const Layout& desired_layout,
314       const RenderingMixPresentationFinalizer::SampleProcessorFactory&
315           sample_processor_factory,
316       Layout& output_layout);
317 
318   struct DecodingLayoutInfo {
319     DecodedUleb128 mix_presentation_id;
320     int sub_mix_index;
321     int layout_index;
322   };
323 
324   struct TemporalUnitData {
325     std::list<ParameterBlockWithData> parameter_blocks;
326     std::list<AudioFrameWithData> audio_frames;
327 
328     std::optional<TemporalDelimiterObu> temporal_delimiter;
329     std::optional<int32_t> timestamp;
330 
EmptyTemporalUnitData331     bool Empty() const {
332       return parameter_blocks.empty() && audio_frames.empty();
333     }
334 
ClearTemporalUnitData335     void Clear() {
336       audio_frames.clear();
337       parameter_blocks.clear();
338       temporal_delimiter.reset();
339       timestamp.reset();
340     }
341 
342     template <class T>
AddDataToCorrectTemporalUnitTemporalUnitData343     static void AddDataToCorrectTemporalUnit(
344         TemporalUnitData& current_temporal_unit,
345         TemporalUnitData& next_temporal_unit, T&& obu_with_data) {
346       const auto new_timestamp = obu_with_data.start_timestamp;
347       if (!current_temporal_unit.timestamp.has_value()) {
348         current_temporal_unit.timestamp = new_timestamp;
349       }
350       if (*current_temporal_unit.timestamp == new_timestamp) {
351         current_temporal_unit.GetList<T>().push_back(
352             std::forward<T>(obu_with_data));
353       } else {
354         next_temporal_unit.GetList<T>().push_back(
355             std::forward<T>(obu_with_data));
356         next_temporal_unit.timestamp = new_timestamp;
357       }
358     }
359 
360    private:
361     template <class T>
GetListTemporalUnitData362     std::list<T>& GetList() {
363       if constexpr (std::is_same_v<T, ParameterBlockWithData>) {
364         return parameter_blocks;
365       } else if constexpr (std::is_same_v<T, AudioFrameWithData>) {
366         return audio_frames;
367       }
368     };
369   };
370 
371   std::optional<uint32_t> output_sample_rate_;
372   std::optional<uint32_t> output_frame_size_;
373 
374   absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>
375       param_definition_variants_;
376   absl::flat_hash_map<DecodedUleb128, const AudioElementWithData*>
377       substream_id_to_audio_element_;
378   std::unique_ptr<GlobalTimingModule> global_timing_module_;
379   std::optional<ParametersManager> parameters_manager_;
380   ReadBitBuffer* read_bit_buffer_;
381 
382   // Contains target layout information for rendering.
383   DecodingLayoutInfo decoding_layout_info_;
384 
385   // Cached data when processing temporal units.
386   TemporalUnitData current_temporal_unit_;
387   TemporalUnitData next_temporal_unit_;
388   std::list<DecodedAudioFrame> decoded_frames_for_temporal_unit_;
389 
390   // Modules used for rendering.
391   std::optional<AudioFrameDecoder> audio_frame_decoder_;
392   std::optional<DemixingModule> demixing_module_;
393   std::optional<RenderingMixPresentationFinalizer> mix_presentation_finalizer_;
394 };
395 }  // namespace iamf_tools
396 #endif  // CLI_OBU_PROCESSOR_H_
397