• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 3-Clause Clear License
5  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6  * License was not distributed with this source code in the LICENSE file, you
7  * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8  * Alliance for Open Media Patent License 1.0 was not distributed with this
9  * source code in the PATENTS file, you can obtain it at
10  * www.aomedia.org/license/patent.
11  */
12 
13 #ifndef CLI_DEMIXING_MODULE_H_
14 #define CLI_DEMIXING_MODULE_H_
15 
16 #include <cstdint>
17 #include <deque>
18 #include <list>
19 #include <utility>
20 #include <vector>
21 
22 #include "absl/container/flat_hash_map.h"
23 #include "absl/container/flat_hash_set.h"
24 #include "absl/container/node_hash_map.h"
25 #include "absl/status/status.h"
26 #include "absl/status/statusor.h"
27 #include "iamf/cli/audio_element_with_data.h"
28 #include "iamf/cli/audio_frame_decoder.h"
29 #include "iamf/cli/audio_frame_with_data.h"
30 #include "iamf/cli/channel_label.h"
31 #include "iamf/obu/audio_element.h"
32 #include "iamf/obu/demixing_info_parameter_data.h"
33 #include "iamf/obu/recon_gain_info_parameter_data.h"
34 #include "iamf/obu/types.h"
35 
36 namespace iamf_tools {
37 
38 struct SubstreamData {
39   uint32_t substream_id;
40 
41   // Samples arranged in a FIFO queue with a vector of channels. There can only
42   // be one or two channels. Includes "virtual" samples that are output from the
43   // encoder, but are not passed to the encoder.
44   std::deque<std::vector<int32_t>> samples_obu;
45   // Samples to pass to encoder.
46   std::deque<std::vector<int32_t>> samples_encode;
47   // One or two elements; corresponding to the output gain to be applied to
48   // each channel.
49   std::vector<double> output_gains_linear;
50   uint32_t num_samples_to_trim_at_end;
51   uint32_t num_samples_to_trim_at_start;
52 };
53 
54 // Mapping from channel label to a frame of samples.
55 typedef absl::node_hash_map<ChannelLabel::Label,
56                             std::vector<InternalSampleType>>
57     LabelSamplesMap;
58 
59 struct LabeledFrame {
60   int32_t end_timestamp;
61   uint32_t samples_to_trim_at_end;
62   uint32_t samples_to_trim_at_start;
63   LabelSamplesMap label_to_samples;
64   DownMixingParams demixing_params;
65   ReconGainInfoParameterData recon_gain_info_parameter_data;
66   // Vector of length `num_layers`. Only populated for scalable channel audio.
67   std::vector<ChannelAudioLayerConfig::LoudspeakerLayout>
68       loudspeaker_layout_per_layer;
69 };
70 
71 // Mapping from audio element ids to `LabeledFrame`s.
72 typedef absl::flat_hash_map<DecodedUleb128, LabeledFrame> IdLabeledFrameMap;
73 
74 typedef absl::Status (*Demixer)(const DownMixingParams&, LabelSamplesMap&);
75 
76 /*!\brief Manages data and processing to down-mix and demix audio elements.
77  *
78  * This class relates to the "Element Reconstructor" as used in the IAMF
79  * specifications. "An Element Reconstructor re-assembles the Audio Elements by
80  * combining the Channel Group(s) guided by Descriptors and Parameter
81  * Substream(s)." This class does not apply the reconstruction gain, so
82  * additional post processing is needed to finish audio element reconstruction.
83  *
84  * Down-mixers are used to down-mix the input channels to the substream
85  * channels. Typically there are down-mixers for scalable channel audio
86  * elements with more than one layer. Down-mixers are created according to
87  * https://aomediacodec.github.io/iamf/#iamfgeneration-scalablechannelaudio-downmixmechanism
88  *
89  * Demixers are used to recreate the original audio from the substreams.
90  * Demixers are created according to
91  * https://aomediacodec.github.io/iamf/#processing-scalablechannelaudio.
92  */
93 class DemixingModule {
94  public:
95   struct DemixingMetadataForAudioElementId {
96     std::list<Demixer> demixers;
97     std::list<Demixer> down_mixers;
98     SubstreamIdLabelsMap substream_id_to_labels;
99     LabelGainMap label_to_output_gain;
100   };
101 
102   struct DownmixingAndReconstructionConfig {
103     absl::flat_hash_set<ChannelLabel::Label> user_labels;
104     SubstreamIdLabelsMap substream_id_to_labels;
105     LabelGainMap label_to_output_gain;
106   };
107 
108   /*!\brief Creates a `DemixingModule` for down-mixing and reconstruction.
109    *
110    * This is most useful from the context of an encoder. For example, to encode
111    * a scalable channel audio element with two layers, the input channels are
112    * down-mixed according to various rules in the spec.
113    *
114    * Initializes metadata for each input audio element ID. The metadata includes
115    * information about the channels and the specific down-mixers and demixers
116    * needed for that audio element.
117    *
118    * \param id_to_config_map Map of Audio Element IDs to
119    *        `DownmixingAndReconstructionConfig`, which contains the
120    *        user-provided labels and the `substream_id_to_labels` and
121    *        `label_to_output_gain` from the corresponding
122    *        `AudioElementWithData`.
123    * \return `absl::OkStatus()` on success. A specific status on failure.
124    */
125   static absl::StatusOr<DemixingModule> CreateForDownMixingAndReconstruction(
126       const absl::flat_hash_map<DecodedUleb128,
127                                 DownmixingAndReconstructionConfig>&&
128           id_to_config_map);
129 
130   /*!\brief Initializes for reconstruction (demixing) the input audio elements.
131    *
132    * This is most useful from the context of a decoder. For example, to decode
133    * a scalable channel audio element with two layers, the substreams are
134    * demixed according to various rules in the spec.
135    *
136    * Initializes metadata for each input audio element ID. The metadata includes
137    * information about the channels and the specific down-mixers and demixers
138    * needed for that audio element.
139    *
140    * \param audio_elements Audio elements.
141    * \return `absl::OkStatus()` on success. A specific status on failure.
142    */
143   static absl::StatusOr<DemixingModule> CreateForReconstruction(
144       const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
145           audio_elements);
146 
147   /*!\brief Searches the input map for the target samples or demixed samples.
148    *
149    * \param label Label of the channel (or its demixed version) to search for.
150    * \param label_to_samples Map of label to samples to search.
151    * \param samples Output argument for the samples if found.
152    * \return `absl::OkStatus()` on success. `absl::UnknownError()` if the search
153    *         failed.
154    */
155   static absl::Status FindSamplesOrDemixedSamples(
156       ChannelLabel::Label label, const LabelSamplesMap& label_to_samples,
157       const std::vector<InternalSampleType>** samples);
158 
159   /*!\brief Down-mixes samples of input channels to substreams.
160    *
161    * \param audio_element_id Audio Element ID of these substreams.
162    * \param down_mixing_params Down mixing parameters to use. Ignored when
163    *        there is no associated down-mixer.
164    * \param input_label_to_samples Samples in input channels organized by the
165    *        channel labels.
166    * \param substream_id_to_substream_data Mapping from substream IDs to
167    *        substream data.
168    * \return `absl::OkStatus()` on success. A specific status on failure.
169    */
170   absl::Status DownMixSamplesToSubstreams(
171       DecodedUleb128 audio_element_id,
172       const DownMixingParams& down_mixing_params,
173       LabelSamplesMap& input_label_to_samples,
174       absl::flat_hash_map<uint32_t, SubstreamData>&
175           substream_id_to_substream_data) const;
176 
177   /*!\brief Demix original audio samples.
178    *
179    * This is most useful when the original (before lossy codec) samples are
180    * known, such as when encoding original audio.
181    *
182    * \param audio_frames Audio Frames.
183    * \return Output data structure for samples, or a specific status on failure.
184    */
185   absl::StatusOr<IdLabeledFrameMap> DemixOriginalAudioSamples(
186       const std::list<AudioFrameWithData>& audio_frames) const;
187 
188   /*!\brief Demix decoded audio samples.
189    *
190    * This is most useful when the decoded (after lossy codec) samples are
191    * known, such as when decoding an IA Sequence, or when analyzing the effect
192    * of a lossy codec to determine appropriate recon gain values.
193    *
194    * \param decoded_audio_frames Decoded Audio Frames.
195    * \return Output data structure for samples, or a specific status on failure.
196    */
197   absl::StatusOr<IdLabeledFrameMap> DemixDecodedAudioSamples(
198       const std::list<DecodedAudioFrame>& decoded_audio_frame) const;
199 
200   /*!\brief Gets the down-mixers associated with an Audio Element ID.
201    *
202    * \param audio_element_id Audio Element ID
203    * \param down_mixers Output pointer to the list of down-mixers.
204    * \return `absl::OkStatus()` on success. A specific status on failure.
205    */
206   absl::Status GetDownMixers(DecodedUleb128 audio_element_id,
207                              const std::list<Demixer>*& down_mixers) const;
208 
209   /*!\brief Gets the demixers associated with an Audio Element ID.
210    *
211    * \param audio_element_id Audio Element ID
212    * \param demixers Output pointer to the list of demixers.
213    * \return `absl::OkStatus()` on success. A specific status on failure.
214    */
215   absl::Status GetDemixers(DecodedUleb128 audio_element_id,
216                            const std::list<Demixer>*& demixers) const;
217 
218  private:
219   enum class DemixingMode { kDownMixingAndReconstruction, kReconstruction };
220 
221   /*!\brief Private constructor.
222    *
223    * For use with `CreateForDownMixingAndReconstruction` and
224    * `CreateForReconstruction`.
225    *
226    * \param demixing_mode Mode of the class.
227    * \param audio_element_id_to_demixing_metadata Mapping from audio element ID
228    *        to demixing metadata.
229    */
DemixingModule(DemixingMode demixing_mode,absl::flat_hash_map<DecodedUleb128,DemixingMetadataForAudioElementId> && audio_element_id_to_demixing_metadata)230   DemixingModule(
231       DemixingMode demixing_mode,
232       absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>&&
233           audio_element_id_to_demixing_metadata)
234       : demixing_mode_(demixing_mode),
235         audio_element_id_to_demixing_metadata_(
236             std::move(audio_element_id_to_demixing_metadata)) {}
237 
238   DemixingMode demixing_mode_;
239 
240   const absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>
241       audio_element_id_to_demixing_metadata_;
242 };
243 
244 }  // namespace iamf_tools
245 
246 #endif  // CLI_DEMIXING_MODULE_H_
247