1 /* 2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 3-Clause Clear License 5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear 6 * License was not distributed with this source code in the LICENSE file, you 7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the 8 * Alliance for Open Media Patent License 1.0 was not distributed with this 9 * source code in the PATENTS file, you can obtain it at 10 * www.aomedia.org/license/patent. 11 */ 12 13 #ifndef CLI_RENDERING_MIX_PRESENTATION_FINALIZER_H_ 14 #define CLI_RENDERING_MIX_PRESENTATION_FINALIZER_H_ 15 16 #include <cstddef> 17 #include <cstdint> 18 #include <list> 19 #include <memory> 20 #include <utility> 21 #include <vector> 22 23 #include "absl/base/nullability.h" 24 #include "absl/container/flat_hash_map.h" 25 #include "absl/functional/any_invocable.h" 26 #include "absl/status/status.h" 27 #include "absl/status/statusor.h" 28 #include "absl/types/span.h" 29 #include "iamf/cli/audio_element_with_data.h" 30 #include "iamf/cli/demixing_module.h" 31 #include "iamf/cli/loudness_calculator_base.h" 32 #include "iamf/cli/loudness_calculator_factory_base.h" 33 #include "iamf/cli/parameter_block_with_data.h" 34 #include "iamf/cli/renderer/audio_element_renderer_base.h" 35 #include "iamf/cli/renderer_factory.h" 36 #include "iamf/cli/sample_processor_base.h" 37 #include "iamf/obu/audio_element.h" 38 #include "iamf/obu/codec_config.h" 39 #include "iamf/obu/mix_presentation.h" 40 #include "iamf/obu/param_definitions.h" 41 #include "iamf/obu/types.h" 42 43 namespace iamf_tools { 44 45 /*!\brief A class that renders and finalizes IAMF mixes. 46 * 47 * The use pattern of this class is: 48 * // Call the factory function and handle any errors. 49 * auto finalizer = RenderingMixPresentationFinalizer::Create(...); 50 * if(!finalizer.ok()) { 51 * // Handle error. 52 * } 53 * 54 * while (source has temporal units) { 55 * // Push the next temporal unit. 56 * RETURN_IF_NOT_OK(finalizer->PushTemporalUnit(...)); 57 * // Get the post-processed samples for each relevant layout. Relevant 58 * // layouts depend on use-case. 59 * RETURN_IF_NOT_OK(finalizer->GetPostProcessedSamplesAsSpan(...)); 60 * } 61 * RETURN_IF_NOT_OK(finalizer->FinalizePushingTemporalUnits()); 62 * // Get the post-processed samples for each relevant layout. Relevant 63 * // layouts depend on use-case. 64 * RETURN_IF_NOT_OK(finalizer->GetPostProcessedSamplesAsSpan(...)); 65 * // Get the final OBUs, with measured loudness information. 66 * absl::StatusOr<...> mix_presentation_obus = 67 * finalizer->GetFinalizedMixPresentationOBUs(); 68 * // Handle any errors, or use the output mix presentation OBUs. 69 */ 70 class RenderingMixPresentationFinalizer { 71 public: 72 // -- Rendering Metadata struct definitions -- 73 74 // Common metadata for rendering an audio element and independent of 75 // each frame. 76 struct AudioElementRenderingMetadata { 77 std::unique_ptr<AudioElementRendererBase> renderer; 78 79 // Pointers to the audio element and the associated codec config. They 80 // contain useful information for rendering. 81 const AudioElementObu* audio_element; 82 const CodecConfigObu* codec_config; 83 }; 84 85 // Contains rendering metadata for all audio elements in a given layout. 86 struct LayoutRenderingMetadata { 87 bool can_render; 88 // Controlled by the `SampleProcessorFactory`; may be `nullptr` if the user 89 // does not want post-processing this layout. 90 std::unique_ptr<SampleProcessorBase> sample_processor; 91 // Controlled by the `LoudnessCalculatorFactory`; may be `nullptr` if the 92 // user does not want loudness calculated for this layout. 93 std::unique_ptr<LoudnessCalculatorBase> loudness_calculator; 94 std::vector<AudioElementRenderingMetadata> audio_element_rendering_metadata; 95 // The number of channels in this layout. 96 int32_t num_channels; 97 // The start time stamp of the current frames to be rendered within this 98 // layout. 99 InternalTimestamp start_timestamp; 100 101 // Reusable buffer for storing rendered samples. 102 std::vector<std::vector<int32_t>> rendered_samples; 103 // A view into the valid portion of `rendered_samples`. 104 absl::Span<const std::vector<int32_t>> valid_rendered_samples; 105 }; 106 107 // We need to store rendering metadata for each submix, layout, and audio 108 // element. This metadata will then be used to render the audio frames at each 109 // timestamp. Some metadata is common to all audio elements and all layouts 110 // within a submix. We also want to optionally support writing to a wav file 111 // and/or calculating loudness based on the rendered output. 112 struct SubmixRenderingMetadata { 113 uint32_t common_sample_rate; 114 std::vector<SubMixAudioElement> audio_elements_in_sub_mix; 115 // Mix gain applied to the entire submix. 116 std::unique_ptr<MixGainParamDefinition> mix_gain; 117 // This vector will contain one LayoutRenderingMetadata per layout in the 118 // submix. 119 std::vector<LayoutRenderingMetadata> layout_rendering_metadata; 120 }; 121 122 /*!\brief Factory for a sample processor. 123 * 124 * Used to create a sample processor for use in post-processing the rendering. 125 * 126 * For example, if the user only wants a particular layout (e.g. stereo), or a 127 * particular mix presentation to be saved to a wav file, then a factory could 128 * select relevant layouts and mix presentations to create a `WavWriter` for. 129 * 130 * \param mix_presentation_id Mix presentation ID. 131 * \param sub_mix_index Index of the sub mix within the mix presentation. 132 * \param layout_index Index of the layout within the sub mix. 133 * \param layout Associated layout. 134 * \param prefix Prefix for the output file. 135 * \param num_channels Number of channels. 136 * \param sample_rate Sample rate of the input audio. 137 * \param bit_depth Bit depth of the input audio. 138 * \param num_samples_per_frame Number of samples per frame. 139 * \return Unique pointer to a sample processor or `nullptr` if none is 140 * desired. 141 */ 142 typedef absl::AnyInvocable<std::unique_ptr<SampleProcessorBase>( 143 DecodedUleb128 mix_presentation_id, int sub_mix_index, int layout_index, 144 const Layout& layout, int num_channels, int sample_rate, int bit_depth, 145 size_t num_samples_per_frame) const> 146 SampleProcessorFactory; 147 148 /*!\brief Factory that never returns a sample processor. 149 * 150 * For convenience to use with `Create`. 151 */ ProduceNoSampleProcessors(DecodedUleb128,int,int,const Layout &,int,int,int,size_t)152 static std::unique_ptr<SampleProcessorBase> ProduceNoSampleProcessors( 153 DecodedUleb128 /*mix_presentation_id*/, int /*sub_mix_index*/, 154 int /*layout_index*/, const Layout& /*layout*/, int /*num_channels*/, 155 int /*sample_rate*/, int /*bit_depth*/, 156 size_t /*num_samples_per_frame*/) { 157 return nullptr; 158 } 159 160 /*!\brief Creates a rendering mix presentation finalizer. 161 * 162 * Rendering metadata is extracted from the mix presentation OBUs, which will 163 * be used to render the mix presentations in PushTemporalUnit. 164 * 165 * \param renderer_factory Factory to create renderers, or `nullptr` to 166 * disable rendering. 167 * \param loudness_calculator_factory Factory to create loudness calculators 168 * or `nullptr` to disable loudness calculation. 169 * \param audio_elements Audio elements with data. 170 * \param sample_processor_factory Factory to create sample processors for use 171 * after rendering. 172 * \param mix_presentation_obus OBUs to render and measure the loudness of. 173 * \return `absl::OkStatus()` on success. A specific status on failure. 174 */ 175 static absl::StatusOr<RenderingMixPresentationFinalizer> Create( 176 absl::Nullable<const RendererFactoryBase*> renderer_factory, 177 absl::Nullable<const LoudnessCalculatorFactoryBase*> 178 loudness_calculator_factory, 179 const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements, 180 const SampleProcessorFactory& sample_processor_factory, 181 const std::list<MixPresentationObu>& mix_presentation_obus); 182 183 /*!\brief Move constructor. */ 184 RenderingMixPresentationFinalizer(RenderingMixPresentationFinalizer&&) = 185 default; 186 /*!\brief Destructor. */ 187 ~RenderingMixPresentationFinalizer() = default; 188 189 /*!\brief Renders and writes a single temporal unit. 190 * 191 * Renders a single temporal unit for all mix presentations. It also 192 * accumulates the loudness of the rendered samples which will be finalized 193 * once FinalizePushingTemporalUnits() is called. This function must not be 194 * called after FinalizePushingTemporalUnits() has been called. 195 * 196 * \param id_to_labeled_frame Data structure of samples for a given timestamp, 197 * keyed by audio element ID and channel label. 198 * \param start_timestamp Start timestamp of this temporal unit. 199 * \param end_timestamp End timestamp of this temporal unit. 200 * \param parameter_blocks Parameter Block OBUs associated with this temporal 201 * unit. 202 * \param mix_presentation_obus Output list of OBUs to finalize with initial 203 * user-provided loudness information. 204 * \return `absl::OkStatus()` on success. A specific status on failure. 205 */ 206 absl::Status PushTemporalUnit( 207 const IdLabeledFrameMap& id_to_labeled_frame, 208 InternalTimestamp start_timestamp, InternalTimestamp end_timestamp, 209 const std::list<ParameterBlockWithData>& parameter_blocks); 210 211 /*!\brief Retrieves cached post-processed samples. 212 * 213 * Retrieves the post-processed samples for a given mix presentation, submix, 214 * and layout. Or the rendered samples if no post-processor is available. New 215 * data is available after each call to `PushTemporalUnit` or 216 * `FinalizePushingTemporalUnits`. The output span is invalidated by any 217 * further calls to `PushTemporalUnit` or `FinalizePushingTemporalUnits` and 218 * typically should be consumed or copied immediately. 219 * 220 * Simple use pattern: 221 * - Call based on the same layout each time. E.g. to always render the 222 * same stereo layout. 223 * 224 * More complex use pattern: 225 * - Call multiple times based on a small set of layouts. (E.g. to back a 226 * buffer to support seamless transitions when a GUI element is clicked to 227 * toggle between mixes, language, or loudnspeaker layout). 228 * - Call for each layout, to cache and save all possible rendered layouts 229 * to a file. 230 * 231 * \param mix_presentation_id Mix presentation ID 232 * \param submix_index Index of the sub mix to retrieve. 233 * \param layout_index Index of the layout to retrieve. 234 * \param Post-processed samples, or rendered samples if no post-processor is 235 * available. A specific status on failure. 236 */ 237 absl::StatusOr<absl::Span<const std::vector<int32_t>>> 238 GetPostProcessedSamplesAsSpan(DecodedUleb128 mix_presentation_id, 239 size_t sub_mix_index, 240 size_t layout_index) const; 241 242 /*!\brief Signals that `PushTemporalUnit` will no longer be called. 243 * 244 * \return `absl::OkStatus()` on success. `absl::FailedPreconditionError` if 245 * this function has already been called. 246 */ 247 absl::Status FinalizePushingTemporalUnits(); 248 249 /*!\brief Retrieves the finalized mix presentation OBUs. 250 * 251 * Will return mix presentation OBUs with updated loudness information. Should 252 * only be called after `FinalizePushingTemporalUnits` has been called. 253 * 254 * \param validate_loudness If true, validate the computed loudness matches 255 * the original user-provided provided loudness. 256 * \return List of finalized OBUs with calculated loudness information. A 257 * specific status on failure. 258 */ 259 absl::StatusOr<std::list<MixPresentationObu>> GetFinalizedMixPresentationObus( 260 bool validate_loudness); 261 262 private: 263 enum State { 264 kAcceptingTemporalUnits, 265 kFinalizePushTemporalUnitCalled, 266 kFlushedFinalizedMixPresentationObus 267 }; 268 269 /*!\brief Private constructor. 270 * 271 * Used only by the factory method. 272 * 273 * \param mix_presentation_id_to_sub_mix_rendering_metadata Mix presentation 274 * ID to rendering metadata for each sub mix. 275 * \param mix_presentation_obus Mix presentation OBUs to render and measure 276 * the loudness of. 277 */ RenderingMixPresentationFinalizer(absl::flat_hash_map<DecodedUleb128,std::vector<SubmixRenderingMetadata>> && mix_presentation_id_to_sub_mix_rendering_metadata,std::list<MixPresentationObu> && mix_presentation_obus)278 RenderingMixPresentationFinalizer( 279 absl::flat_hash_map<DecodedUleb128, 280 std::vector<SubmixRenderingMetadata>>&& 281 mix_presentation_id_to_sub_mix_rendering_metadata, 282 std::list<MixPresentationObu>&& mix_presentation_obus) 283 : mix_presentation_id_to_sub_mix_rendering_metadata_( 284 std::move(mix_presentation_id_to_sub_mix_rendering_metadata)), 285 mix_presentation_obus_(std::move(mix_presentation_obus)) {} 286 287 State state_ = kAcceptingTemporalUnits; 288 289 // Mapping from Mix Presentation ID to rendering metadata. Slots are absent 290 // for Mix Presentations that have no layouts which can be rendered. 291 absl::flat_hash_map<DecodedUleb128, std::vector<SubmixRenderingMetadata>> 292 mix_presentation_id_to_sub_mix_rendering_metadata_; 293 294 // Mix Presentation OBUs to render and measure the loudness of. 295 std::list<MixPresentationObu> mix_presentation_obus_; 296 }; 297 298 } // namespace iamf_tools 299 300 #endif // CLI_RENDERING_MIX_PRESENTATION_FINALIZER_H_ 301