1 /* 2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 3-Clause Clear License 5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear 6 * License was not distributed with this source code in the LICENSE file, you 7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the 8 * Alliance for Open Media Patent License 1.0 was not distributed with this 9 * source code in the PATENTS file, you can obtain it at 10 * www.aomedia.org/license/patent. 11 */ 12 13 #ifndef CLI_DEMIXING_MODULE_H_ 14 #define CLI_DEMIXING_MODULE_H_ 15 16 #include <cstdint> 17 #include <deque> 18 #include <list> 19 #include <utility> 20 #include <vector> 21 22 #include "absl/container/flat_hash_map.h" 23 #include "absl/container/flat_hash_set.h" 24 #include "absl/container/node_hash_map.h" 25 #include "absl/status/status.h" 26 #include "absl/status/statusor.h" 27 #include "iamf/cli/audio_element_with_data.h" 28 #include "iamf/cli/audio_frame_decoder.h" 29 #include "iamf/cli/audio_frame_with_data.h" 30 #include "iamf/cli/channel_label.h" 31 #include "iamf/obu/audio_element.h" 32 #include "iamf/obu/demixing_info_parameter_data.h" 33 #include "iamf/obu/recon_gain_info_parameter_data.h" 34 #include "iamf/obu/types.h" 35 36 namespace iamf_tools { 37 38 struct SubstreamData { 39 uint32_t substream_id; 40 41 // Samples arranged in a FIFO queue with a vector of channels. There can only 42 // be one or two channels. Includes "virtual" samples that are output from the 43 // encoder, but are not passed to the encoder. 44 std::deque<std::vector<int32_t>> samples_obu; 45 // Samples to pass to encoder. 46 std::deque<std::vector<int32_t>> samples_encode; 47 // One or two elements; corresponding to the output gain to be applied to 48 // each channel. 49 std::vector<double> output_gains_linear; 50 uint32_t num_samples_to_trim_at_end; 51 uint32_t num_samples_to_trim_at_start; 52 }; 53 54 // Mapping from channel label to a frame of samples. 55 typedef absl::node_hash_map<ChannelLabel::Label, 56 std::vector<InternalSampleType>> 57 LabelSamplesMap; 58 59 struct LabeledFrame { 60 int32_t end_timestamp; 61 uint32_t samples_to_trim_at_end; 62 uint32_t samples_to_trim_at_start; 63 LabelSamplesMap label_to_samples; 64 DownMixingParams demixing_params; 65 ReconGainInfoParameterData recon_gain_info_parameter_data; 66 // Vector of length `num_layers`. Only populated for scalable channel audio. 67 std::vector<ChannelAudioLayerConfig::LoudspeakerLayout> 68 loudspeaker_layout_per_layer; 69 }; 70 71 // Mapping from audio element ids to `LabeledFrame`s. 72 typedef absl::flat_hash_map<DecodedUleb128, LabeledFrame> IdLabeledFrameMap; 73 74 typedef absl::Status (*Demixer)(const DownMixingParams&, LabelSamplesMap&); 75 76 /*!\brief Manages data and processing to down-mix and demix audio elements. 77 * 78 * This class relates to the "Element Reconstructor" as used in the IAMF 79 * specifications. "An Element Reconstructor re-assembles the Audio Elements by 80 * combining the Channel Group(s) guided by Descriptors and Parameter 81 * Substream(s)." This class does not apply the reconstruction gain, so 82 * additional post processing is needed to finish audio element reconstruction. 83 * 84 * Down-mixers are used to down-mix the input channels to the substream 85 * channels. Typically there are down-mixers for scalable channel audio 86 * elements with more than one layer. Down-mixers are created according to 87 * https://aomediacodec.github.io/iamf/#iamfgeneration-scalablechannelaudio-downmixmechanism 88 * 89 * Demixers are used to recreate the original audio from the substreams. 90 * Demixers are created according to 91 * https://aomediacodec.github.io/iamf/#processing-scalablechannelaudio. 92 */ 93 class DemixingModule { 94 public: 95 struct DemixingMetadataForAudioElementId { 96 std::list<Demixer> demixers; 97 std::list<Demixer> down_mixers; 98 SubstreamIdLabelsMap substream_id_to_labels; 99 LabelGainMap label_to_output_gain; 100 }; 101 102 struct DownmixingAndReconstructionConfig { 103 absl::flat_hash_set<ChannelLabel::Label> user_labels; 104 SubstreamIdLabelsMap substream_id_to_labels; 105 LabelGainMap label_to_output_gain; 106 }; 107 108 /*!\brief Creates a `DemixingModule` for down-mixing and reconstruction. 109 * 110 * This is most useful from the context of an encoder. For example, to encode 111 * a scalable channel audio element with two layers, the input channels are 112 * down-mixed according to various rules in the spec. 113 * 114 * Initializes metadata for each input audio element ID. The metadata includes 115 * information about the channels and the specific down-mixers and demixers 116 * needed for that audio element. 117 * 118 * \param id_to_config_map Map of Audio Element IDs to 119 * `DownmixingAndReconstructionConfig`, which contains the 120 * user-provided labels and the `substream_id_to_labels` and 121 * `label_to_output_gain` from the corresponding 122 * `AudioElementWithData`. 123 * \return `absl::OkStatus()` on success. A specific status on failure. 124 */ 125 static absl::StatusOr<DemixingModule> CreateForDownMixingAndReconstruction( 126 const absl::flat_hash_map<DecodedUleb128, 127 DownmixingAndReconstructionConfig>&& 128 id_to_config_map); 129 130 /*!\brief Initializes for reconstruction (demixing) the input audio elements. 131 * 132 * This is most useful from the context of a decoder. For example, to decode 133 * a scalable channel audio element with two layers, the substreams are 134 * demixed according to various rules in the spec. 135 * 136 * Initializes metadata for each input audio element ID. The metadata includes 137 * information about the channels and the specific down-mixers and demixers 138 * needed for that audio element. 139 * 140 * \param audio_elements Audio elements. 141 * \return `absl::OkStatus()` on success. A specific status on failure. 142 */ 143 static absl::StatusOr<DemixingModule> CreateForReconstruction( 144 const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& 145 audio_elements); 146 147 /*!\brief Searches the input map for the target samples or demixed samples. 148 * 149 * \param label Label of the channel (or its demixed version) to search for. 150 * \param label_to_samples Map of label to samples to search. 151 * \param samples Output argument for the samples if found. 152 * \return `absl::OkStatus()` on success. `absl::UnknownError()` if the search 153 * failed. 154 */ 155 static absl::Status FindSamplesOrDemixedSamples( 156 ChannelLabel::Label label, const LabelSamplesMap& label_to_samples, 157 const std::vector<InternalSampleType>** samples); 158 159 /*!\brief Down-mixes samples of input channels to substreams. 160 * 161 * \param audio_element_id Audio Element ID of these substreams. 162 * \param down_mixing_params Down mixing parameters to use. Ignored when 163 * there is no associated down-mixer. 164 * \param input_label_to_samples Samples in input channels organized by the 165 * channel labels. 166 * \param substream_id_to_substream_data Mapping from substream IDs to 167 * substream data. 168 * \return `absl::OkStatus()` on success. A specific status on failure. 169 */ 170 absl::Status DownMixSamplesToSubstreams( 171 DecodedUleb128 audio_element_id, 172 const DownMixingParams& down_mixing_params, 173 LabelSamplesMap& input_label_to_samples, 174 absl::flat_hash_map<uint32_t, SubstreamData>& 175 substream_id_to_substream_data) const; 176 177 /*!\brief Demix original audio samples. 178 * 179 * This is most useful when the original (before lossy codec) samples are 180 * known, such as when encoding original audio. 181 * 182 * \param audio_frames Audio Frames. 183 * \return Output data structure for samples, or a specific status on failure. 184 */ 185 absl::StatusOr<IdLabeledFrameMap> DemixOriginalAudioSamples( 186 const std::list<AudioFrameWithData>& audio_frames) const; 187 188 /*!\brief Demix decoded audio samples. 189 * 190 * This is most useful when the decoded (after lossy codec) samples are 191 * known, such as when decoding an IA Sequence, or when analyzing the effect 192 * of a lossy codec to determine appropriate recon gain values. 193 * 194 * \param decoded_audio_frames Decoded Audio Frames. 195 * \return Output data structure for samples, or a specific status on failure. 196 */ 197 absl::StatusOr<IdLabeledFrameMap> DemixDecodedAudioSamples( 198 const std::list<DecodedAudioFrame>& decoded_audio_frame) const; 199 200 /*!\brief Gets the down-mixers associated with an Audio Element ID. 201 * 202 * \param audio_element_id Audio Element ID 203 * \param down_mixers Output pointer to the list of down-mixers. 204 * \return `absl::OkStatus()` on success. A specific status on failure. 205 */ 206 absl::Status GetDownMixers(DecodedUleb128 audio_element_id, 207 const std::list<Demixer>*& down_mixers) const; 208 209 /*!\brief Gets the demixers associated with an Audio Element ID. 210 * 211 * \param audio_element_id Audio Element ID 212 * \param demixers Output pointer to the list of demixers. 213 * \return `absl::OkStatus()` on success. A specific status on failure. 214 */ 215 absl::Status GetDemixers(DecodedUleb128 audio_element_id, 216 const std::list<Demixer>*& demixers) const; 217 218 private: 219 enum class DemixingMode { kDownMixingAndReconstruction, kReconstruction }; 220 221 /*!\brief Private constructor. 222 * 223 * For use with `CreateForDownMixingAndReconstruction` and 224 * `CreateForReconstruction`. 225 * 226 * \param demixing_mode Mode of the class. 227 * \param audio_element_id_to_demixing_metadata Mapping from audio element ID 228 * to demixing metadata. 229 */ DemixingModule(DemixingMode demixing_mode,absl::flat_hash_map<DecodedUleb128,DemixingMetadataForAudioElementId> && audio_element_id_to_demixing_metadata)230 DemixingModule( 231 DemixingMode demixing_mode, 232 absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>&& 233 audio_element_id_to_demixing_metadata) 234 : demixing_mode_(demixing_mode), 235 audio_element_id_to_demixing_metadata_( 236 std::move(audio_element_id_to_demixing_metadata)) {} 237 238 DemixingMode demixing_mode_; 239 240 const absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId> 241 audio_element_id_to_demixing_metadata_; 242 }; 243 244 } // namespace iamf_tools 245 246 #endif // CLI_DEMIXING_MODULE_H_ 247