1 /* 2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 3-Clause Clear License 5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear 6 * License was not distributed with this source code in the LICENSE file, you 7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the 8 * Alliance for Open Media Patent License 1.0 was not distributed with this 9 * source code in the PATENTS file, you can obtain it at 10 * www.aomedia.org/license/patent. 11 */ 12 13 #ifndef CLI_PROTO_CONVERSION_PROTO_TO_OBU_AUDIO_FRAME_GENERATOR_H_ 14 #define CLI_PROTO_CONVERSION_PROTO_TO_OBU_AUDIO_FRAME_GENERATOR_H_ 15 16 #include <cstdint> 17 #include <list> 18 #include <memory> 19 20 #include "absl/base/thread_annotations.h" 21 #include "absl/container/flat_hash_map.h" 22 #include "absl/container/flat_hash_set.h" 23 #include "absl/status/status.h" 24 #include "absl/status/statusor.h" 25 #include "absl/synchronization/mutex.h" 26 #include "absl/types/span.h" 27 #include "iamf/cli/audio_element_with_data.h" 28 #include "iamf/cli/audio_frame_with_data.h" 29 #include "iamf/cli/channel_label.h" 30 #include "iamf/cli/codec/encoder_base.h" 31 #include "iamf/cli/demixing_module.h" 32 #include "iamf/cli/global_timing_module.h" 33 #include "iamf/cli/parameters_manager.h" 34 #include "iamf/cli/proto/audio_frame.pb.h" 35 #include "iamf/cli/proto/codec_config.pb.h" 36 #include "iamf/obu/codec_config.h" 37 #include "iamf/obu/types.h" 38 #include "src/google/protobuf/repeated_ptr_field.h" 39 40 namespace iamf_tools { 41 42 /*!\brief Generator of audio frames. 43 * 44 * The generation of audio frames can be done asynchronously, where 45 * samples are added on one thread and completed frames are consumed on another. 46 * 47 * Under the hood, the generator can be in three states: 48 * 1. `kTakingSamples`: The generator is expecting audio substreams and taking 49 * samples. 50 * 2. `kFinalizeCalled`: `Finalize()` has been called; no more "real samples" 51 * are coming, and the generator will soon (starting in 52 * the next iteration) be flusing the remaining samples. 53 * 3. `kFlushingRemaining`: The generator is flushing the remaining samples 54 * that are still in the underlying encoders. 55 * 56 * The use pattern of this class is: 57 * 58 * - Initialize (`Initialize()`). 59 * - (This puts the generator in the `kTakingSamples` state.) 60 * 61 * Thread 1: 62 * - Repeat until no new sample to add (by checking `TakingSamples()`): 63 * - Add samples for each audio element (`AddSamples()`). 64 * - Finalize the sample-adding process (`Finalize()`). 65 * - (This puts the generator in the `kFinalizeCalled` state.) 66 * 67 * Thread 2: 68 * - Repeat until no frame to generate (by checking `GeneratingFrames()`): 69 * - Output generated frames (`OutputFrames()`). 70 * - If the generator is in the `kFlushingRemaining` state, the frames 71 * might come from remaining samples in the underlying encoders. 72 * - If the output is empty, wait. 73 * - Otherwise, add the output of this round to the final result. 74 * 75 */ 76 class AudioFrameGenerator { 77 public: 78 /*!\brief Data structure to track the user requested trimming. 79 */ 80 struct TrimmingState { 81 bool increment_samples_to_trim_at_end_by_padding; 82 83 int64_t user_samples_left_to_trim_at_end; 84 int64_t user_samples_left_to_trim_at_start; 85 }; 86 87 /*!\brief Constructor. 88 * 89 * \param audio_frame_metadata Input audio frame metadata. 90 * \param codec_config_metadata Input codec config metadata. 91 * \param audio_elements Input Audio Element OBUs with data. 92 * \param demixing_module Demixng module. 93 * \param parameters_manager Manager of parameters. 94 * \param global_timing_module Global Timing Module. 95 */ 96 AudioFrameGenerator( 97 const ::google::protobuf::RepeatedPtrField< 98 iamf_tools_cli_proto::AudioFrameObuMetadata>& audio_frame_metadata, 99 const ::google::protobuf::RepeatedPtrField< 100 iamf_tools_cli_proto::CodecConfigObuMetadata>& codec_config_metadata, 101 const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& 102 audio_elements, 103 const DemixingModule& demixing_module, 104 ParametersManager& parameters_manager, 105 GlobalTimingModule& global_timing_module); 106 107 /*!\brief Deleted move constructor. */ 108 AudioFrameGenerator(AudioFrameGenerator&&) = delete; 109 110 /*!\brief Returns the number of samples to delay based on the codec config. 111 * 112 * \param codec_config_metadata Codec config metadata. 113 * \param codec_config Codec config. 114 * \return Number of samples to delay at start on success. A specific status 115 * on failure. 116 */ 117 static absl::StatusOr<uint32_t> GetNumberOfSamplesToDelayAtStart( 118 const iamf_tools_cli_proto::CodecConfig& codec_config_metadata, 119 const CodecConfigObu& codec_config); 120 121 /*!\brief Initializes encoders and relevant data structures. 122 * 123 * \return `absl::OkStatus()` on success. A specific status on failure. 124 */ 125 absl::Status Initialize(); 126 127 /*!\brief Returns whether the generator is still taking audio samples. 128 * 129 * \return True if the generator is still taking audio samples. 130 */ 131 bool TakingSamples() const; 132 133 /*!\brief Adds samples for an Audio Element and a channel label. 134 * 135 * No effect if the generator is not in the `kTakingSamples` state. 136 * 137 * \param audio_element_id Audio Element ID that the added samples belong to. 138 * \param label Channel label of the added samples. 139 * \param samples Samples to add. Should not be of zero length before 140 * `Finalize()` is called. 141 * \return `absl::OkStatus()` on success. A specific status on failure. 142 */ 143 absl::Status AddSamples(DecodedUleb128 audio_element_id, 144 ChannelLabel::Label label, 145 absl::Span<const InternalSampleType> samples); 146 147 /*!\brief Finalizes the sample-adding process. 148 * 149 * This puts the generator in the `kFinalizedCalled` state if it is in the 150 * `kTakingSamples` state. No effect if the generator is in other states. 151 * 152 * \return `absl::OkStatus()` on success. A specific status on failure. 153 */ 154 absl::Status Finalize(); 155 156 /*!\brief Returns whether there still are audio frames being generated. 157 * 158 * \return True until all underlying encoders have finished encoding, and 159 * all audio frames have been generated. 160 */ 161 bool GeneratingFrames() const; 162 163 /*!\brief Outputs a list of generated Audio Frame OBUs (and associated data). 164 * 165 * The output frames all belong to the same temporal unit, sharing the same 166 * start and end timestamps. 167 * 168 * After `Finalize()` is called, all underlying encoders will be signalled 169 * to encode the remaining samples. Eventually when all substreams are 170 * are ended, encoders will be deleted and `GeneratingFrames()` will return 171 * false. 172 * 173 * \param audio_frames Output list of audio frames. 174 * \return `absl::OkStatus()` on success. A specific status on failure. 175 */ 176 absl::Status OutputFrames(std::list<AudioFrameWithData>& audio_frames); 177 178 private: 179 // State of an audio frame generator. 180 enum GeneratorState { 181 kTakingSamples, 182 kFinalizedCalled, 183 kFlushingRemaining, 184 }; 185 186 // Mapping from Audio Element ID to audio frame metadata. 187 absl::flat_hash_map<DecodedUleb128, 188 iamf_tools_cli_proto::AudioFrameObuMetadata> 189 audio_frame_metadata_; 190 191 // Mapping from Audio Element ID to labels. 192 absl::flat_hash_map<DecodedUleb128, absl::flat_hash_set<ChannelLabel::Label>> 193 audio_element_id_to_labels_; 194 195 // Mapping from Audio Element ID to audio element data. 196 const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& 197 audio_elements_; 198 199 // Mapping from Codec Config ID to additional codec config metadata used 200 // to configure encoders. 201 absl::flat_hash_map<DecodedUleb128, iamf_tools_cli_proto::CodecConfig> 202 codec_config_metadata_; 203 204 // Mapping from audio substream IDs to encoders. 205 absl::flat_hash_map<uint32_t, std::unique_ptr<EncoderBase>> 206 substream_id_to_encoder_ ABSL_GUARDED_BY(mutex_); 207 208 // Mapping from Audio Element ID to labeled samples. 209 absl::flat_hash_map<DecodedUleb128, LabelSamplesMap> id_to_labeled_samples_; 210 211 // Mapping from substream IDs to substream data. 212 absl::flat_hash_map<uint32_t, SubstreamData> substream_id_to_substream_data_; 213 214 // Mapping from substream IDs to trimming states. 215 absl::flat_hash_map<uint32_t, TrimmingState> substream_id_to_trimming_state_ 216 ABSL_GUARDED_BY(mutex_); 217 218 const DemixingModule demixing_module_; 219 // TODO(b/390150766): Be more careful about the lifetime of the 220 // `parameters_manager_` and `global_timing_module_`, as 221 // they are not owned by this class. 222 ParametersManager& parameters_manager_; 223 GlobalTimingModule& global_timing_module_; 224 GeneratorState state_ ABSL_GUARDED_BY(mutex_); 225 226 // Mutex to protect data accessed in different threads. 227 mutable absl::Mutex mutex_; 228 }; 229 230 } // namespace iamf_tools 231 232 #endif // CLI_PROTO_CONVERSION_PROTO_TO_OBU_AUDIO_FRAME_GENERATOR_H_ 233