• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 3-Clause Clear License
5  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6  * License was not distributed with this source code in the LICENSE file, you
7  * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8  * Alliance for Open Media Patent License 1.0 was not distributed with this
9  * source code in the PATENTS file, you can obtain it at
10  * www.aomedia.org/license/patent.
11  */
12 #ifndef CLI_IAMF_ENCODER_H_
13 #define CLI_IAMF_ENCODER_H_
14 
15 #include <cstdint>
16 #include <list>
17 #include <memory>
18 #include <optional>
19 #include <utility>
20 #include <vector>
21 
22 #include "absl/base/nullability.h"
23 #include "absl/container/flat_hash_map.h"
24 #include "absl/log/log.h"
25 #include "absl/status/status.h"
26 #include "iamf/cli/audio_element_with_data.h"
27 #include "iamf/cli/audio_frame_decoder.h"
28 #include "iamf/cli/audio_frame_with_data.h"
29 #include "iamf/cli/channel_label.h"
30 #include "iamf/cli/demixing_module.h"
31 #include "iamf/cli/global_timing_module.h"
32 #include "iamf/cli/loudness_calculator_factory_base.h"
33 #include "iamf/cli/parameter_block_with_data.h"
34 #include "iamf/cli/parameters_manager.h"
35 #include "iamf/cli/proto/test_vector_metadata.pb.h"
36 #include "iamf/cli/proto/user_metadata.pb.h"
37 #include "iamf/cli/proto_conversion/proto_to_obu/audio_frame_generator.h"
38 #include "iamf/cli/proto_conversion/proto_to_obu/parameter_block_generator.h"
39 #include "iamf/cli/renderer_factory.h"
40 #include "iamf/cli/rendering_mix_presentation_finalizer.h"
41 #include "iamf/obu/arbitrary_obu.h"
42 #include "iamf/obu/codec_config.h"
43 #include "iamf/obu/ia_sequence_header.h"
44 #include "iamf/obu/mix_presentation.h"
45 #include "iamf/obu/param_definition_variant.h"
46 #include "iamf/obu/types.h"
47 
48 namespace iamf_tools {
49 
50 /*!\brief A class that encodes an IA Sequence and generates OBUs.
51  *
52  * Descriptor OBUs are generated once at the beginning, and data OBUs are
53  * generated iteratively for each temporal unit (TU). The use pattern of this
54  * class is:
55  *   // Call factory function.
56  *   absl::StatusOr<IamfEncoder> encoder = IamfEncoder::Create(...);
57  *   if(!encoder.ok()) {
58  *     // Handle error.
59  *   }
60  *
61  *   while (encoder->GeneratingDataObus()) {
62  *     // Prepare for the next temporal unit; clear state of the previous TU.
63  *     encoder->BeginTemporalUnit();
64  *
65  *     // For all audio elements and labels corresponding to this temporal unit:
66  *     for each audio element: {
67  *       for each channel label from the current element {
68  *         encoder->AddSamples(audio_element_id, label, samples);
69  *       }
70  *     }
71  *
72  *     // When all samples (for all temporal units) are added:
73  *     if (done_receiving_all_audio) {
74  *       encoder->FinalizeAddSamples();
75  *     }
76  *
77  *     // For all parameter block metadata corresponding to this temporal unit:
78  *     encoder->AddParameterBlockMetadata(...);
79  *
80  *     // Get OBUs for next encoded temporal unit.
81  *     encoder->OutputTemporalUnit(...);
82  *   }
83  *   // Get the final mix presentation OBUs, with measured loudness information.
84  *   auto mix_presentation_obus = encoder->GetFinalizedMixPresentationObus();
85  *
86  * Note the timestamps corresponding to `AddSamples()` and
87  * `AddParameterBlockMetadata()` might be different from that of the output
88  * OBUs obtained in `OutputTemporalUnit()`, because some codecs introduce a
89  * frame of delay. We thus distinguish the concepts of input and output
90  * timestamps (`input_timestamp` and `output_timestamp`) in the code below.
91  */
92 class IamfEncoder {
93  public:
94   /*!\brief Factory function to create an `IamfEncoder`.
95    *
96    * \param user_metadata Input user metadata describing the IAMF stream.
97    * \param renderer_factory Factory to create renderers for use in measuring
98    *        the loudness.
99    * \param loudness_calculator_factory Factory to create loudness calculators
100    *        to measure the loudness of the output layouts.
101    * \param sample_processor_factory Factory to create processors for use after
102    *        rendering.
103    * \param ia_sequence_header_obu Generated IA Sequence Header OBU.
104    * \param codec_config_obus Map of Codec Config ID to generated Codec Config
105    *        OBUs.
106    * \param audio_elements Map of Audio Element IDs to generated OBUs with data.
107    * \param preliminary_mix_presentation_obus List of preliminary Mix
108    *        Presentation OBUs. Using these directly almost certainly results in
109    *        incorrect loudness metadata. It is best practice to replace these
110    *        with the result of `GetFinalizedMixPresentationObus()` after all
111    *        data OBUs are generated.
112    * \param arbitrary_obus List of generated Arbitrary OBUs.
113    * \return `absl::OkStatus()` if successful. A specific status on failure.
114    */
115   static absl::StatusOr<IamfEncoder> Create(
116       const iamf_tools_cli_proto::UserMetadata& user_metadata,
117       absl::Nullable<const RendererFactoryBase*> renderer_factory,
118       absl::Nullable<const LoudnessCalculatorFactoryBase*>
119           loudness_calculator_factory,
120       const RenderingMixPresentationFinalizer::SampleProcessorFactory&
121           sample_processor_factory,
122       std::optional<IASequenceHeaderObu>& ia_sequence_header_obu,
123       absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
124       absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& audio_elements,
125       std::list<MixPresentationObu>& preliminary_mix_presentation_obus,
126       std::list<ArbitraryObu>& arbitrary_obus);
127 
128   /*!\brief Returns whether this encoder is generating data OBUs.
129    *
130    * \return True if still generating data OBUs.
131    */
132   bool GeneratingDataObus() const;
133 
134   /*!\brief Clears the state, e.g. accumulated samples for next temporal unit.
135    */
136   void BeginTemporalUnit();
137 
138   /*!\brief Gets the input timestamp of the data OBU generation iteration.
139    *
140    * \param input_timestamp Result of input timestamp.
141    * \return `absl::OkStatus()` if successful. A specific status on failure.
142    */
143   absl::Status GetInputTimestamp(int32_t& input_timestamp);
144 
145   /*!\brief Adds audio samples belonging to the same temporal unit.
146    *
147    * The best practice is to not call this function after
148    * `FinalizeAddSamples()`. But it is OK if you do -- just that the added
149    * samples will be ignored and not encoded.
150    *
151    * \param audio_element_id ID of the audio element to add samples to.
152    * \param label Channel label to add samples to.
153    * \param samples Audio samples to add.
154    */
155   void AddSamples(DecodedUleb128 audio_element_id, ChannelLabel::Label label,
156                   const std::vector<InternalSampleType>& samples);
157 
158   /*!\brief Finalizes the process of adding samples.
159    *
160    * This will signal the underlying codecs to flush all remaining samples,
161    * as well as trim samples from the end.
162    */
163   void FinalizeAddSamples();
164 
165   /*!\brief Adds parameter block metadata belonging to the same temporal unit.
166    *
167    * \param parameter_block_metadata Parameter block metadata to add.
168    * \return `absl::OkStatus()` if successful. A specific status on failure.
169    */
170   absl::Status AddParameterBlockMetadata(
171       const iamf_tools_cli_proto::ParameterBlockObuMetadata&
172           parameter_block_metadata);
173 
174   /*!\brief Outputs data OBUs corresponding to one temporal unit.
175    *
176    * \param audio_frames List of generated audio frames corresponding to this
177    *        temporal unit.
178    * \param parameter_blocks List of generated parameter block corresponding
179    *        to this temporal unit.
180    * \return `absl::OkStatus()` if successful. A specific status on failure.
181    */
182   absl::Status OutputTemporalUnit(
183       std::list<AudioFrameWithData>& audio_frames,
184       std::list<ParameterBlockWithData>& parameter_blocks);
185 
186   /*!\brief Gets the finalized mix presentation OBUs.
187    *
188    * Mix Presentation OBUs contain loudness information, which is only possible
189    * to know after all data OBUs are generated.
190    *
191    * Must only be called only once and after all data OBUs are generated, i.e.
192    * after `GeneratingDataObus()` returns false.
193    *
194    * \return Finalized Mix Presentation OBUs. A specific status on failure.
195    */
196   absl::StatusOr<std::list<MixPresentationObu>>
197   GetFinalizedMixPresentationObus();
198 
199  private:
200   /*!\brief Private constructor.
201    *
202    * Moves from the input arguments Some arguments are wrapped in unique
203    * pointers to ensure pointer or reference stability after move.
204    *
205    * \param validate_user_loudness Whether to validate the user-provided
206    *        loudness.
207    * \param parameter_id_to_metadata Mapping from parameter IDs to per-ID
208    *        parameter metadata.
209    * \param param_definition_variants Parameter definitions for the IA Sequence.
210    * \param parameters_manager Manager to support internal querying
211    *        of parameters.
212    * \param demixing_module Module to demix audio elements.
213    * \param audio_frame_generator Audio frame generator.
214    * \param audio_frame_decoder Decodes the original audio frames, to facilitate
215    *        recon gain computation.
216    * \param global_timing_module Manages global timing information.
217    */
IamfEncoder(bool validate_user_loudness,std::unique_ptr<absl::flat_hash_map<DecodedUleb128,ParamDefinitionVariant>> param_definition_variants,ParameterBlockGenerator && parameter_block_generator,std::unique_ptr<ParametersManager> parameters_manager,const DemixingModule & demixing_module,std::unique_ptr<AudioFrameGenerator> audio_frame_generator,AudioFrameDecoder && audio_frame_decoder,std::unique_ptr<GlobalTimingModule> global_timing_module,RenderingMixPresentationFinalizer && mix_presentation_finalizer)218   IamfEncoder(bool validate_user_loudness,
219               std::unique_ptr<
220                   absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>>
221                   param_definition_variants,
222               ParameterBlockGenerator&& parameter_block_generator,
223               std::unique_ptr<ParametersManager> parameters_manager,
224               const DemixingModule& demixing_module,
225               std::unique_ptr<AudioFrameGenerator> audio_frame_generator,
226               AudioFrameDecoder&& audio_frame_decoder,
227               std::unique_ptr<GlobalTimingModule> global_timing_module,
228               RenderingMixPresentationFinalizer&& mix_presentation_finalizer)
229       : validate_user_loudness_(validate_user_loudness),
230         param_definition_variants_(std::move(param_definition_variants)),
231         parameter_block_generator_(std::move(parameter_block_generator)),
232         parameters_manager_(std::move(parameters_manager)),
233         demixing_module_(demixing_module),
234         audio_frame_generator_(std::move(audio_frame_generator)),
235         audio_frame_decoder_(std::move(audio_frame_decoder)),
236         global_timing_module_(std::move(global_timing_module)),
237         mix_presentation_finalizer_(std::move(mix_presentation_finalizer)) {}
238 
239   const bool validate_user_loudness_;
240 
241   // Mapping from parameter IDs to parameter definitions.
242   // Parameter block generator owns a reference to this map. Wrapped in
243   // `std::unique_ptr` for reference stability after move.
244   absl::Nonnull<std::unique_ptr<
245       const absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>>>
246       param_definition_variants_;
247 
248   // Saved parameter blocks generated in one iteration.
249   std::list<ParameterBlockWithData> temp_mix_gain_parameter_blocks_;
250   std::list<ParameterBlockWithData> temp_demixing_parameter_blocks_;
251   std::list<ParameterBlockWithData> temp_recon_gain_parameter_blocks_;
252 
253   // Cached mapping from Audio Element ID to labeled samples added in the same
254   // iteration.
255   absl::flat_hash_map<DecodedUleb128, LabelSamplesMap> id_to_labeled_samples_;
256 
257   // Whether the `FinalizeAddSamples()` has been called.
258   bool add_samples_finalized_ = false;
259 
260   // Various generators and modules used when generating data OBUs iteratively.
261   // Some are held in `unique_ptr` for reference stability after move.
262   ParameterBlockGenerator parameter_block_generator_;
263   absl::Nonnull<std::unique_ptr<ParametersManager>> parameters_manager_;
264   const DemixingModule demixing_module_;
265   absl::Nonnull<std::unique_ptr<AudioFrameGenerator>> audio_frame_generator_;
266   AudioFrameDecoder audio_frame_decoder_;
267   absl::Nonnull<std::unique_ptr<GlobalTimingModule>> global_timing_module_;
268 
269   // Modules to render the output layouts and measure their loudness.
270   RenderingMixPresentationFinalizer mix_presentation_finalizer_;
271 };
272 
273 }  // namespace iamf_tools
274 
275 #endif  // CLI_IAMF_ENCODER_H_
276