• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 3-Clause Clear License
5  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6  * License was not distributed with this source code in the LICENSE file, you
7  * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8  * Alliance for Open Media Patent License 1.0 was not distributed with this
9  * source code in the PATENTS file, you can obtain it at
10  * www.aomedia.org/license/patent.
11  */
12 
13 #ifndef CLI_RENDERING_MIX_PRESENTATION_FINALIZER_H_
14 #define CLI_RENDERING_MIX_PRESENTATION_FINALIZER_H_
15 
16 #include <cstddef>
17 #include <cstdint>
18 #include <list>
19 #include <memory>
20 #include <utility>
21 #include <vector>
22 
23 #include "absl/base/nullability.h"
24 #include "absl/container/flat_hash_map.h"
25 #include "absl/functional/any_invocable.h"
26 #include "absl/status/status.h"
27 #include "absl/status/statusor.h"
28 #include "absl/types/span.h"
29 #include "iamf/cli/audio_element_with_data.h"
30 #include "iamf/cli/demixing_module.h"
31 #include "iamf/cli/loudness_calculator_base.h"
32 #include "iamf/cli/loudness_calculator_factory_base.h"
33 #include "iamf/cli/parameter_block_with_data.h"
34 #include "iamf/cli/renderer/audio_element_renderer_base.h"
35 #include "iamf/cli/renderer_factory.h"
36 #include "iamf/cli/sample_processor_base.h"
37 #include "iamf/obu/audio_element.h"
38 #include "iamf/obu/codec_config.h"
39 #include "iamf/obu/mix_presentation.h"
40 #include "iamf/obu/param_definitions.h"
41 #include "iamf/obu/types.h"
42 
43 namespace iamf_tools {
44 
45 /*!\brief A class that renders and finalizes IAMF mixes.
46  *
47  * The use pattern of this class is:
48  *   // Call the factory function and handle any errors.
49  *   auto finalizer = RenderingMixPresentationFinalizer::Create(...);
50  *   if(!finalizer.ok()) {
51  *     // Handle error.
52  *   }
53  *
54  *   while (source has temporal units) {
55  *     // Push the next temporal unit.
56  *     RETURN_IF_NOT_OK(finalizer->PushTemporalUnit(...));
57  *     // Get the post-processed samples for each relevant layout. Relevant
58  *     // layouts depend on use-case.
59  *     RETURN_IF_NOT_OK(finalizer->GetPostProcessedSamplesAsSpan(...));
60  *   }
61  *   RETURN_IF_NOT_OK(finalizer->FinalizePushingTemporalUnits());
62  *   // Get the post-processed samples for each relevant layout. Relevant
63  *   // layouts depend on use-case.
64  *   RETURN_IF_NOT_OK(finalizer->GetPostProcessedSamplesAsSpan(...));
65  *   // Get the final OBUs, with measured loudness information.
66  *   absl::StatusOr<...> mix_presentation_obus =
67  *     finalizer->GetFinalizedMixPresentationOBUs();
68  *   // Handle any errors, or use the output mix presentation OBUs.
69  */
70 class RenderingMixPresentationFinalizer {
71  public:
72   // -- Rendering Metadata struct definitions --
73 
74   // Common metadata for rendering an audio element and independent of
75   // each frame.
76   struct AudioElementRenderingMetadata {
77     std::unique_ptr<AudioElementRendererBase> renderer;
78 
79     // Pointers to the audio element and the associated codec config. They
80     // contain useful information for rendering.
81     const AudioElementObu* audio_element;
82     const CodecConfigObu* codec_config;
83   };
84 
85   // Contains rendering metadata for all audio elements in a given layout.
86   struct LayoutRenderingMetadata {
87     bool can_render;
88     // Controlled by the `SampleProcessorFactory`; may be `nullptr` if the user
89     // does not want post-processing this layout.
90     std::unique_ptr<SampleProcessorBase> sample_processor;
91     // Controlled by the `LoudnessCalculatorFactory`; may be `nullptr` if the
92     // user does not want loudness calculated for this layout.
93     std::unique_ptr<LoudnessCalculatorBase> loudness_calculator;
94     std::vector<AudioElementRenderingMetadata> audio_element_rendering_metadata;
95     // The number of channels in this layout.
96     int32_t num_channels;
97     // The start time stamp of the current frames to be rendered within this
98     // layout.
99     InternalTimestamp start_timestamp;
100 
101     // Reusable buffer for storing rendered samples.
102     std::vector<std::vector<int32_t>> rendered_samples;
103     // A view into the valid portion of `rendered_samples`.
104     absl::Span<const std::vector<int32_t>> valid_rendered_samples;
105   };
106 
107   // We need to store rendering metadata for each submix, layout, and audio
108   // element. This metadata will then be used to render the audio frames at each
109   // timestamp. Some metadata is common to all audio elements and all layouts
110   // within a submix. We also want to optionally support writing to a wav file
111   // and/or calculating loudness based on the rendered output.
112   struct SubmixRenderingMetadata {
113     uint32_t common_sample_rate;
114     std::vector<SubMixAudioElement> audio_elements_in_sub_mix;
115     // Mix gain applied to the entire submix.
116     std::unique_ptr<MixGainParamDefinition> mix_gain;
117     // This vector will contain one LayoutRenderingMetadata per layout in the
118     // submix.
119     std::vector<LayoutRenderingMetadata> layout_rendering_metadata;
120   };
121 
122   /*!\brief Factory for a sample processor.
123    *
124    * Used to create a sample processor for use in post-processing the rendering.
125    *
126    * For example, if the user only wants a particular layout (e.g. stereo), or a
127    * particular mix presentation to be saved to a wav file, then a factory could
128    * select relevant layouts and mix presentations to create a `WavWriter` for.
129    *
130    * \param mix_presentation_id Mix presentation ID.
131    * \param sub_mix_index Index of the sub mix within the mix presentation.
132    * \param layout_index Index of the layout within the sub mix.
133    * \param layout Associated layout.
134    * \param prefix Prefix for the output file.
135    * \param num_channels Number of channels.
136    * \param sample_rate Sample rate of the input audio.
137    * \param bit_depth Bit depth of the input audio.
138    * \param num_samples_per_frame Number of samples per frame.
139    * \return Unique pointer to a sample processor or `nullptr` if none is
140    *         desired.
141    */
142   typedef absl::AnyInvocable<std::unique_ptr<SampleProcessorBase>(
143       DecodedUleb128 mix_presentation_id, int sub_mix_index, int layout_index,
144       const Layout& layout, int num_channels, int sample_rate, int bit_depth,
145       size_t num_samples_per_frame) const>
146       SampleProcessorFactory;
147 
148   /*!\brief Factory that never returns a sample processor.
149    *
150    * For convenience to use with `Create`.
151    */
ProduceNoSampleProcessors(DecodedUleb128,int,int,const Layout &,int,int,int,size_t)152   static std::unique_ptr<SampleProcessorBase> ProduceNoSampleProcessors(
153       DecodedUleb128 /*mix_presentation_id*/, int /*sub_mix_index*/,
154       int /*layout_index*/, const Layout& /*layout*/, int /*num_channels*/,
155       int /*sample_rate*/, int /*bit_depth*/,
156       size_t /*num_samples_per_frame*/) {
157     return nullptr;
158   }
159 
160   /*!\brief Creates a rendering mix presentation finalizer.
161    *
162    * Rendering metadata is extracted from the mix presentation OBUs, which will
163    * be used to render the mix presentations in PushTemporalUnit.
164    *
165    * \param renderer_factory Factory to create renderers, or `nullptr` to
166    *        disable rendering.
167    * \param loudness_calculator_factory Factory to create loudness calculators
168    *        or `nullptr` to disable loudness calculation.
169    * \param audio_elements Audio elements with data.
170    * \param sample_processor_factory Factory to create sample processors for use
171    *        after rendering.
172    * \param mix_presentation_obus OBUs to render and measure the loudness of.
173    * \return `absl::OkStatus()` on success. A specific status on failure.
174    */
175   static absl::StatusOr<RenderingMixPresentationFinalizer> Create(
176       absl::Nullable<const RendererFactoryBase*> renderer_factory,
177       absl::Nullable<const LoudnessCalculatorFactoryBase*>
178           loudness_calculator_factory,
179       const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
180       const SampleProcessorFactory& sample_processor_factory,
181       const std::list<MixPresentationObu>& mix_presentation_obus);
182 
183   /*!\brief Move constructor. */
184   RenderingMixPresentationFinalizer(RenderingMixPresentationFinalizer&&) =
185       default;
186   /*!\brief Destructor. */
187   ~RenderingMixPresentationFinalizer() = default;
188 
189   /*!\brief Renders and writes a single temporal unit.
190    *
191    * Renders a single temporal unit for all mix presentations. It also
192    * accumulates the loudness of the rendered samples which will be finalized
193    * once FinalizePushingTemporalUnits() is called. This function must not be
194    * called after FinalizePushingTemporalUnits() has been called.
195    *
196    * \param id_to_labeled_frame Data structure of samples for a given timestamp,
197    *        keyed by audio element ID and channel label.
198    * \param start_timestamp Start timestamp of this temporal unit.
199    * \param end_timestamp End timestamp of this temporal unit.
200    * \param parameter_blocks Parameter Block OBUs associated with this temporal
201    *        unit.
202    * \param mix_presentation_obus Output list of OBUs to finalize with initial
203    *        user-provided loudness information.
204    * \return `absl::OkStatus()` on success. A specific status on failure.
205    */
206   absl::Status PushTemporalUnit(
207       const IdLabeledFrameMap& id_to_labeled_frame,
208       InternalTimestamp start_timestamp, InternalTimestamp end_timestamp,
209       const std::list<ParameterBlockWithData>& parameter_blocks);
210 
211   /*!\brief Retrieves cached post-processed samples.
212    *
213    * Retrieves the post-processed samples for a given mix presentation, submix,
214    * and layout. Or the rendered samples if no post-processor is available. New
215    * data is available after each call to `PushTemporalUnit` or
216    * `FinalizePushingTemporalUnits`. The output span is invalidated by any
217    * further calls to `PushTemporalUnit` or `FinalizePushingTemporalUnits` and
218    * typically should be consumed or copied immediately.
219    *
220    * Simple use pattern:
221    *   - Call based on the same layout each time. E.g. to always render the
222    *     same stereo layout.
223    *
224    * More complex use pattern:
225    *   - Call multiple times based on a small set of layouts. (E.g. to back a
226    *     buffer to support seamless transitions when a GUI element is clicked to
227    *     toggle between mixes, language, or loudnspeaker layout).
228    *   - Call for each layout, to cache and save all possible rendered layouts
229    *     to a file.
230    *
231    * \param mix_presentation_id Mix presentation ID
232    * \param submix_index Index of the sub mix to retrieve.
233    * \param layout_index Index of the layout to retrieve.
234    * \param Post-processed samples, or rendered samples if no post-processor is
235    *        available. A specific status on failure.
236    */
237   absl::StatusOr<absl::Span<const std::vector<int32_t>>>
238   GetPostProcessedSamplesAsSpan(DecodedUleb128 mix_presentation_id,
239                                 size_t sub_mix_index,
240                                 size_t layout_index) const;
241 
242   /*!\brief Signals that `PushTemporalUnit` will no longer be called.
243    *
244    * \return `absl::OkStatus()` on success. `absl::FailedPreconditionError` if
245    *         this function has already been called.
246    */
247   absl::Status FinalizePushingTemporalUnits();
248 
249   /*!\brief Retrieves the finalized mix presentation OBUs.
250    *
251    * Will return mix presentation OBUs with updated loudness information. Should
252    * only be called after `FinalizePushingTemporalUnits` has been called.
253    *
254    * \param validate_loudness If true, validate the computed loudness matches
255    *        the original user-provided provided loudness.
256    * \return List of finalized OBUs with calculated loudness information. A
257    *         specific status on failure.
258    */
259   absl::StatusOr<std::list<MixPresentationObu>> GetFinalizedMixPresentationObus(
260       bool validate_loudness);
261 
262  private:
263   enum State {
264     kAcceptingTemporalUnits,
265     kFinalizePushTemporalUnitCalled,
266     kFlushedFinalizedMixPresentationObus
267   };
268 
269   /*!\brief Private constructor.
270    *
271    * Used only by the factory method.
272    *
273    * \param mix_presentation_id_to_sub_mix_rendering_metadata Mix presentation
274    *        ID to rendering metadata for each sub mix.
275    * \param mix_presentation_obus Mix presentation OBUs to render and measure
276    *        the loudness of.
277    */
RenderingMixPresentationFinalizer(absl::flat_hash_map<DecodedUleb128,std::vector<SubmixRenderingMetadata>> && mix_presentation_id_to_sub_mix_rendering_metadata,std::list<MixPresentationObu> && mix_presentation_obus)278   RenderingMixPresentationFinalizer(
279       absl::flat_hash_map<DecodedUleb128,
280                           std::vector<SubmixRenderingMetadata>>&&
281           mix_presentation_id_to_sub_mix_rendering_metadata,
282       std::list<MixPresentationObu>&& mix_presentation_obus)
283       : mix_presentation_id_to_sub_mix_rendering_metadata_(
284             std::move(mix_presentation_id_to_sub_mix_rendering_metadata)),
285         mix_presentation_obus_(std::move(mix_presentation_obus)) {}
286 
287   State state_ = kAcceptingTemporalUnits;
288 
289   // Mapping from Mix Presentation ID to rendering metadata. Slots are absent
290   // for Mix Presentations that have no layouts which can be rendered.
291   absl::flat_hash_map<DecodedUleb128, std::vector<SubmixRenderingMetadata>>
292       mix_presentation_id_to_sub_mix_rendering_metadata_;
293 
294   // Mix Presentation OBUs to render and measure the loudness of.
295   std::list<MixPresentationObu> mix_presentation_obus_;
296 };
297 
298 }  // namespace iamf_tools
299 
300 #endif  // CLI_RENDERING_MIX_PRESENTATION_FINALIZER_H_
301