• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 3-Clause Clear License
5  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6  * License was not distributed with this source code in the LICENSE file, you
7  * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8  * Alliance for Open Media Patent License 1.0 was not distributed with this
9  * source code in the PATENTS file, you can obtain it at
10  * www.aomedia.org/license/patent.
11  */
12 
13 #include "iamf/cli/obu_processor.h"
14 
15 #include <cstddef>
16 #include <cstdint>
17 #include <list>
18 #include <memory>
19 #include <optional>
20 #include <string>
21 #include <utility>
22 #include <vector>
23 
24 #include "absl/container/flat_hash_map.h"
25 #include "absl/container/flat_hash_set.h"
26 #include "absl/log/check.h"
27 #include "absl/log/log.h"
28 #include "absl/memory/memory.h"
29 #include "absl/status/status.h"
30 #include "absl/status/statusor.h"
31 #include "absl/strings/str_cat.h"
32 #include "absl/strings/string_view.h"
33 #include "absl/types/span.h"
34 #include "iamf/cli/audio_element_with_data.h"
35 #include "iamf/cli/audio_frame_with_data.h"
36 #include "iamf/cli/cli_util.h"
37 #include "iamf/cli/demixing_module.h"
38 #include "iamf/cli/global_timing_module.h"
39 #include "iamf/cli/obu_with_data_generator.h"
40 #include "iamf/cli/parameter_block_with_data.h"
41 #include "iamf/cli/parameters_manager.h"
42 #include "iamf/cli/profile_filter.h"
43 #include "iamf/cli/renderer_factory.h"
44 #include "iamf/cli/rendering_mix_presentation_finalizer.h"
45 #include "iamf/cli/sample_processor_base.h"
46 #include "iamf/common/read_bit_buffer.h"
47 #include "iamf/common/utils/macros.h"
48 #include "iamf/common/utils/validation_utils.h"
49 #include "iamf/obu/audio_element.h"
50 #include "iamf/obu/audio_frame.h"
51 #include "iamf/obu/codec_config.h"
52 #include "iamf/obu/ia_sequence_header.h"
53 #include "iamf/obu/mix_presentation.h"
54 #include "iamf/obu/obu_header.h"
55 #include "iamf/obu/param_definition_variant.h"
56 #include "iamf/obu/parameter_block.h"
57 #include "iamf/obu/temporal_delimiter.h"
58 #include "iamf/obu/types.h"
59 
60 namespace iamf_tools {
61 
62 namespace {
63 
64 // Gets a CodecConfigObu from `read_bit_buffer` and stores it into
65 // `codec_config_obu_map`, using the `codec_config_id` as the unique key.
GetAndStoreCodecConfigObu(const ObuHeader & header,int64_t payload_size,absl::flat_hash_map<DecodedUleb128,CodecConfigObu> & codec_config_obu_map,ReadBitBuffer & read_bit_buffer)66 absl::Status GetAndStoreCodecConfigObu(
67     const ObuHeader& header, int64_t payload_size,
68     absl::flat_hash_map<DecodedUleb128, CodecConfigObu>& codec_config_obu_map,
69     ReadBitBuffer& read_bit_buffer) {
70   absl::StatusOr<CodecConfigObu> codec_config_obu =
71       CodecConfigObu::CreateFromBuffer(header, payload_size, read_bit_buffer);
72   if (!codec_config_obu.ok()) {
73     return codec_config_obu.status();
74   }
75   codec_config_obu->PrintObu();
76   codec_config_obu_map.insert(
77       {codec_config_obu->GetCodecConfigId(), *std::move(codec_config_obu)});
78   return absl::OkStatus();
79 }
80 
GetAndStoreAudioElementObu(const ObuHeader & header,int64_t payload_size,absl::flat_hash_map<DecodedUleb128,AudioElementObu> & audio_element_obu_map,ReadBitBuffer & read_bit_buffer)81 absl::Status GetAndStoreAudioElementObu(
82     const ObuHeader& header, int64_t payload_size,
83     absl::flat_hash_map<DecodedUleb128, AudioElementObu>& audio_element_obu_map,
84     ReadBitBuffer& read_bit_buffer) {
85   absl::StatusOr<AudioElementObu> audio_element_obu =
86       AudioElementObu::CreateFromBuffer(header, payload_size, read_bit_buffer);
87   if (!audio_element_obu.ok()) {
88     return audio_element_obu.status();
89   }
90   audio_element_obu->PrintObu();
91   audio_element_obu_map.insert(
92       {audio_element_obu->GetAudioElementId(), *std::move(audio_element_obu)});
93   return absl::OkStatus();
94 }
95 
GetAndStoreMixPresentationObu(const ObuHeader & header,int64_t payload_size,std::list<MixPresentationObu> & mix_presentation_obus,ReadBitBuffer & read_bit_buffer)96 absl::Status GetAndStoreMixPresentationObu(
97     const ObuHeader& header, int64_t payload_size,
98     std::list<MixPresentationObu>& mix_presentation_obus,
99     ReadBitBuffer& read_bit_buffer) {
100   absl::StatusOr<MixPresentationObu> mix_presentation_obu =
101       MixPresentationObu::CreateFromBuffer(header, payload_size,
102                                            read_bit_buffer);
103   if (!mix_presentation_obu.ok()) {
104     return mix_presentation_obu.status();
105   }
106   LOG(INFO) << "Mix Presentation OBU successfully parsed.";
107   mix_presentation_obu->PrintObu();
108   mix_presentation_obus.push_back(*std::move(mix_presentation_obu));
109   return absl::OkStatus();
110 }
111 
UpdateParameterStatesIfNeeded(const absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements_with_data,const GlobalTimingModule & global_timing_module,ParametersManager & parameters_manager)112 absl::Status UpdateParameterStatesIfNeeded(
113     const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
114         audio_elements_with_data,
115     const GlobalTimingModule& global_timing_module,
116     ParametersManager& parameters_manager) {
117   std::optional<InternalTimestamp> global_timestamp;
118   RETURN_IF_NOT_OK(
119       global_timing_module.GetGlobalAudioFrameTimestamp(global_timestamp));
120   // Not ready to update the states yet.
121   if (!global_timestamp.has_value()) {
122     return absl::OkStatus();
123   }
124 
125   // The audio frames for all audio elements are finished; update the
126   // parameters manager.
127   for (const auto& [audio_element_id, unused_element] :
128        audio_elements_with_data) {
129     RETURN_IF_NOT_OK(parameters_manager.UpdateDemixingState(audio_element_id,
130                                                             *global_timestamp));
131     RETURN_IF_NOT_OK(parameters_manager.UpdateReconGainState(
132         audio_element_id, *global_timestamp));
133   }
134   return absl::OkStatus();
135 }
136 
GetAndStoreAudioFrameWithData(const ObuHeader & header,const int64_t payload_size,const absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements_with_data,const absl::flat_hash_map<DecodedUleb128,const AudioElementWithData * > & substream_id_to_audio_element,ReadBitBuffer & read_bit_buffer,GlobalTimingModule & global_timing_module,ParametersManager & parameters_manager,std::optional<AudioFrameWithData> & output_audio_frame_with_data)137 absl::Status GetAndStoreAudioFrameWithData(
138     const ObuHeader& header, const int64_t payload_size,
139     const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
140         audio_elements_with_data,
141     const absl::flat_hash_map<DecodedUleb128, const AudioElementWithData*>&
142         substream_id_to_audio_element,
143     ReadBitBuffer& read_bit_buffer, GlobalTimingModule& global_timing_module,
144     ParametersManager& parameters_manager,
145     std::optional<AudioFrameWithData>& output_audio_frame_with_data) {
146   output_audio_frame_with_data.reset();
147   auto audio_frame_obu =
148       AudioFrameObu::CreateFromBuffer(header, payload_size, read_bit_buffer);
149   if (!audio_frame_obu.ok()) {
150     return audio_frame_obu.status();
151   }
152   const auto substream_id = audio_frame_obu->GetSubstreamId();
153   const auto audio_element_iter =
154       substream_id_to_audio_element.find(substream_id);
155   if (audio_element_iter == substream_id_to_audio_element.end()) {
156     return absl::InvalidArgumentError(absl::StrCat(
157         "No audio element found having substream ID: ", substream_id));
158   }
159   const auto& audio_element_with_data = *audio_element_iter->second;
160   auto audio_frame_with_data = ObuWithDataGenerator::GenerateAudioFrameWithData(
161       audio_element_with_data, *audio_frame_obu, global_timing_module,
162       parameters_manager);
163   if (!audio_frame_with_data.ok()) {
164     return audio_frame_with_data.status();
165   }
166   output_audio_frame_with_data = *audio_frame_with_data;
167 
168   RETURN_IF_NOT_OK(UpdateParameterStatesIfNeeded(
169       audio_elements_with_data, global_timing_module, parameters_manager));
170 
171   return absl::OkStatus();
172 }
173 
GetAndStoreParameterBlockWithData(const ObuHeader & header,const int64_t payload_size,const absl::flat_hash_map<DecodedUleb128,ParamDefinitionVariant> & param_definition_variants,ReadBitBuffer & read_bit_buffer,GlobalTimingModule & global_timing_module,std::optional<ParameterBlockWithData> & output_parameter_block_with_data)174 absl::Status GetAndStoreParameterBlockWithData(
175     const ObuHeader& header, const int64_t payload_size,
176     const absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>&
177         param_definition_variants,
178     ReadBitBuffer& read_bit_buffer, GlobalTimingModule& global_timing_module,
179     std::optional<ParameterBlockWithData>& output_parameter_block_with_data) {
180   auto parameter_block_obu = ParameterBlockObu::CreateFromBuffer(
181       header, payload_size, param_definition_variants, read_bit_buffer);
182   if (!parameter_block_obu.ok()) {
183     return parameter_block_obu.status();
184   }
185 
186   std::optional<InternalTimestamp> global_timestamp;
187   RETURN_IF_NOT_OK(
188       global_timing_module.GetGlobalAudioFrameTimestamp(global_timestamp));
189   if (!global_timestamp.has_value()) {
190     return absl::InvalidArgumentError(
191         "Global timestamp has no value while generating a parameter "
192         "block");
193   }
194 
195   // Process the newly parsed parameter block OBU.
196   auto parameter_block_with_data =
197       ObuWithDataGenerator::GenerateParameterBlockWithData(
198           *global_timestamp, global_timing_module,
199           std::move(*parameter_block_obu));
200   if (!parameter_block_with_data.ok()) {
201     return parameter_block_with_data.status();
202   }
203   output_parameter_block_with_data = std::move(*parameter_block_with_data);
204 
205   return absl::OkStatus();
206 }
207 
208 // Returns a list of pointers to the supported mix presentations. Empty if none
209 // are supported.
GetSupportedMixPresentations(const absl::flat_hash_map<uint32_t,AudioElementWithData> & audio_elements,std::list<MixPresentationObu> & mix_presentation_obus)210 std::list<MixPresentationObu*> GetSupportedMixPresentations(
211     const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
212     std::list<MixPresentationObu>& mix_presentation_obus) {
213   // TODO(b/377554944): Support `ProfileVersion::kIamfBaseEnhancedProfile`.
214   // Only permit certain profiles to be used.
215   const absl::flat_hash_set<ProfileVersion> kSupportedProfiles = {
216       ProfileVersion::kIamfSimpleProfile, ProfileVersion::kIamfBaseProfile};
217 
218   std::list<MixPresentationObu*> supported_mix_presentations;
219   std::string cumulative_error_message;
220   for (auto iter = mix_presentation_obus.begin();
221        iter != mix_presentation_obus.end(); ++iter) {
222     auto profiles = kSupportedProfiles;
223     const auto status = ProfileFilter::FilterProfilesForMixPresentation(
224         audio_elements, *iter, profiles);
225     if (status.ok()) {
226       supported_mix_presentations.push_back(&*iter);
227     }
228     absl::StrAppend(&cumulative_error_message, status.message(), "\n");
229   }
230   LOG(INFO) << "Filtered mix presentations: " << cumulative_error_message;
231   return supported_mix_presentations;
232 }
233 
234 // Searches for the desired layout in the supported mix presentations. If found,
235 // the output_playback_layout is the same as the desired_layout. Otherwise, we
236 // default to the first layout in the first unsupported mix presentation.
GetPlaybackLayoutAndMixPresentation(const std::list<MixPresentationObu * > & supported_mix_presentations,const Layout & desired_layout,Layout & output_playback_layout)237 absl::StatusOr<MixPresentationObu*> GetPlaybackLayoutAndMixPresentation(
238     const std::list<MixPresentationObu*>& supported_mix_presentations,
239     const Layout& desired_layout, Layout& output_playback_layout) {
240   for (const auto& mix_presentation : supported_mix_presentations) {
241     for (const auto& sub_mix : mix_presentation->sub_mixes_) {
242       for (const auto& layout : sub_mix.layouts) {
243         if (layout.loudness_layout == desired_layout) {
244           output_playback_layout = layout.loudness_layout;
245           return mix_presentation;
246         }
247       }
248     }
249   }
250   // If we get here, we didn't find the desired layout in any of the supported
251   // mix presentations. We default to the first layout in the first mix
252   // presentation.
253   MixPresentationObu* output_mix_presentation =
254       supported_mix_presentations.front();
255   if (output_mix_presentation->sub_mixes_.empty()) {
256     return absl::InvalidArgumentError(
257         "No submixes found in the first mix presentation.");
258   }
259   if (output_mix_presentation->sub_mixes_.front().layouts.empty()) {
260     return absl::InvalidArgumentError(
261         "No layouts found in the first submix of the first mix presentation.");
262   }
263   output_playback_layout = output_mix_presentation->sub_mixes_.front()
264                                .layouts.front()
265                                .loudness_layout;
266   return output_mix_presentation;
267 }
268 
269 // Resets the buffer to `start_position` and sets the `insufficient_data`
270 // flag to `true`. Clears the output maps.
InsufficientDataReset(ReadBitBuffer & read_bit_buffer,const int64_t start_position,bool & insufficient_data,absl::flat_hash_map<DecodedUleb128,CodecConfigObu> & output_codec_config_obus,absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & output_audio_elements_with_data,std::list<MixPresentationObu> & output_mix_presentation_obus)271 absl::Status InsufficientDataReset(
272     ReadBitBuffer& read_bit_buffer, const int64_t start_position,
273     bool& insufficient_data,
274     absl::flat_hash_map<DecodedUleb128, CodecConfigObu>&
275         output_codec_config_obus,
276     absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
277         output_audio_elements_with_data,
278     std::list<MixPresentationObu>& output_mix_presentation_obus) {
279   LOG(INFO) << "Insufficient data to process all descriptor OBUs.";
280   insufficient_data = true;
281   output_codec_config_obus.clear();
282   output_audio_elements_with_data.clear();
283   output_mix_presentation_obus.clear();
284   RETURN_IF_NOT_OK(read_bit_buffer.Seek(start_position));
285   LOG(INFO) << "Reset the buffer to the beginning.";
286   return absl::ResourceExhaustedError(
287       "Insufficient data to process all descriptor OBUs. Please provide "
288       "more data and try again.");
289 }
290 
GetSampleRateAndFrameSize(const absl::flat_hash_map<DecodedUleb128,CodecConfigObu> & output_codec_config_obus,std::optional<uint32_t> & output_sample_rate,std::optional<uint32_t> & output_frame_size)291 void GetSampleRateAndFrameSize(
292     const absl::flat_hash_map<DecodedUleb128, CodecConfigObu>&
293         output_codec_config_obus,
294     std::optional<uint32_t>& output_sample_rate,
295     std::optional<uint32_t>& output_frame_size) {
296   if (output_codec_config_obus.size() != 1) {
297     LOG(WARNING) << "Expected exactly one codec config OBUs, but found "
298                  << output_codec_config_obus.size();
299     return;
300   }
301   const auto& first_codec_config_obu = output_codec_config_obus.begin()->second;
302   output_sample_rate = first_codec_config_obu.GetOutputSampleRate();
303   output_frame_size = first_codec_config_obu.GetNumSamplesPerFrame();
304 }
305 
306 }  // namespace
307 
InitializeInternal(bool is_exhaustive_and_exact,bool & output_insufficient_data)308 absl::Status ObuProcessor::InitializeInternal(bool is_exhaustive_and_exact,
309                                               bool& output_insufficient_data) {
310   // Process the descriptor OBUs.
311   LOG(INFO) << "Starting Descriptor OBU processing";
312   RETURN_IF_NOT_OK(ObuProcessor::ProcessDescriptorObus(
313       is_exhaustive_and_exact, *read_bit_buffer_, ia_sequence_header_,
314       codec_config_obus_, audio_elements_, mix_presentations_,
315       output_insufficient_data));
316   LOG(INFO) << "Processed Descriptor OBUs";
317   RETURN_IF_NOT_OK(CollectAndValidateParamDefinitions(
318       audio_elements_, mix_presentations_, param_definition_variants_));
319   GetSampleRateAndFrameSize(codec_config_obus_, output_sample_rate_,
320                             output_frame_size_);
321   // Mapping from substream IDs to pointers to audio element with data.
322   for (const auto& [audio_element_id, audio_element_with_data] :
323        audio_elements_) {
324     for (const auto& [substream_id, unused_labels] :
325          audio_element_with_data.substream_id_to_labels) {
326       auto [unused_iter, inserted] = substream_id_to_audio_element_.insert(
327           {substream_id, &audio_element_with_data});
328       if (!inserted) {
329         return absl::InvalidArgumentError(absl::StrCat(
330             "Duplicated substream ID: ", substream_id,
331             " associated with audio element ID: ", audio_element_id));
332       }
333     }
334   }
335   global_timing_module_ =
336       GlobalTimingModule::Create(audio_elements_, param_definition_variants_);
337   if (global_timing_module_ == nullptr) {
338     return absl::InvalidArgumentError(
339         "Failed to initialize the global timing module");
340   }
341   parameters_manager_.emplace(audio_elements_);
342   RETURN_IF_NOT_OK(parameters_manager_->Initialize());
343   return absl::OkStatus();
344 }
345 
ProcessDescriptorObus(bool is_exhaustive_and_exact,ReadBitBuffer & read_bit_buffer,IASequenceHeaderObu & output_sequence_header,absl::flat_hash_map<DecodedUleb128,CodecConfigObu> & output_codec_config_obus,absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & output_audio_elements_with_data,std::list<MixPresentationObu> & output_mix_presentation_obus,bool & output_insufficient_data)346 absl::Status ObuProcessor::ProcessDescriptorObus(
347     bool is_exhaustive_and_exact, ReadBitBuffer& read_bit_buffer,
348     IASequenceHeaderObu& output_sequence_header,
349     absl::flat_hash_map<DecodedUleb128, CodecConfigObu>&
350         output_codec_config_obus,
351     absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
352         output_audio_elements_with_data,
353     std::list<MixPresentationObu>& output_mix_presentation_obus,
354     bool& output_insufficient_data) {
355   // `output_insufficient_data` indicates a specific error condition and so is
356   // true iff we've received valid data but need more of it.
357   output_insufficient_data = false;
358   auto audio_element_obu_map =
359       absl::flat_hash_map<DecodedUleb128, AudioElementObu>();
360   const int64_t global_position_before_all_obus = read_bit_buffer.Tell();
361   bool processed_ia_header = false;
362   bool continue_processing = true;
363   while (continue_processing) {
364     auto header_metadata =
365         ObuHeader::PeekObuTypeAndTotalObuSize(read_bit_buffer);
366     if (!header_metadata.ok()) {
367       if (header_metadata.status().code() ==
368           absl::StatusCode::kResourceExhausted) {
369         // Can't read header because there is not enough data.
370         return InsufficientDataReset(
371             read_bit_buffer, global_position_before_all_obus,
372             output_insufficient_data, output_codec_config_obus,
373             output_audio_elements_with_data, output_mix_presentation_obus);
374       } else {
375         // Some other error occurred, propagate it.
376         return header_metadata.status();
377       }
378     }
379 
380     // Now, we know we were at least able to read obu_type and the total size of
381     // the obu.
382     if (ObuHeader::IsTemporalUnitObuType(header_metadata->obu_type)) {
383       if (is_exhaustive_and_exact) {
384         auto error_status = absl::InvalidArgumentError(
385             "Descriptor OBUs must not contain a temporal unit OBU when "
386             "is_exhaustive_and_exact is true.");
387         LOG(ERROR) << error_status;
388         RETURN_IF_NOT_OK(read_bit_buffer.Seek(global_position_before_all_obus));
389         return error_status;
390       }
391       // Since it's a temporal unit, we know we are done reading descriptor
392       // OBUs. Since we've only peeked on this iteration of the loop, no need to
393       // rewind the buffer.
394       // Check that we've processed an IA header to ensure it's a valid IA
395       // Sequence.
396       if (!processed_ia_header) {
397         return absl::InvalidArgumentError(
398             "An IA Sequence and/or descriptor OBUs must always start with an "
399             "IA Header.");
400       }
401       // Break out of the while loop since we've reached the end of the
402       // descriptor OBUs; should not seek back to the beginning of the buffer
403       // since this is a successful termination.
404       break;
405     }
406 
407     // Now, we know that this is not a temporal unit OBU.
408     if (!read_bit_buffer.CanReadBytes(header_metadata->total_obu_size)) {
409       // This is a descriptor OBU for which we don't have enough data.
410       return InsufficientDataReset(
411           read_bit_buffer, global_position_before_all_obus,
412           output_insufficient_data, output_codec_config_obus,
413           output_audio_elements_with_data, output_mix_presentation_obus);
414     }
415     // Now we know we can read the entire obu.
416     const int64_t position_before_header = read_bit_buffer.Tell();
417     ObuHeader header;
418     // Note that `payload_size` is different from the total obu size calculated
419     // by `PeekObuTypeAndTotalObuSize`.
420     int64_t payload_size;
421     RETURN_IF_NOT_OK(header.ReadAndValidate(read_bit_buffer, payload_size));
422     switch (header.obu_type) {
423       case kObuIaSequenceHeader: {
424         if (processed_ia_header && !header.obu_redundant_copy) {
425           LOG(WARNING) << "Detected an IA Sequence without temporal units.";
426           continue_processing = false;
427           break;
428         }
429         auto ia_sequence_header_obu = IASequenceHeaderObu::CreateFromBuffer(
430             header, payload_size, read_bit_buffer);
431         if (!ia_sequence_header_obu.ok()) {
432           return ia_sequence_header_obu.status();
433         }
434         output_sequence_header = *std::move(ia_sequence_header_obu);
435         output_sequence_header.PrintObu();
436         processed_ia_header = true;
437         break;
438       }
439       case kObuIaCodecConfig: {
440         RETURN_IF_NOT_OK(GetAndStoreCodecConfigObu(
441             header, payload_size, output_codec_config_obus, read_bit_buffer));
442         break;
443       }
444       case kObuIaAudioElement: {
445         RETURN_IF_NOT_OK(GetAndStoreAudioElementObu(
446             header, payload_size, audio_element_obu_map, read_bit_buffer));
447         break;
448       }
449       case kObuIaMixPresentation: {
450         RETURN_IF_NOT_OK(GetAndStoreMixPresentationObu(
451             header, payload_size, output_mix_presentation_obus,
452             read_bit_buffer));
453         break;
454       }
455       case kObuIaReserved24:
456       case kObuIaReserved25:
457       case kObuIaReserved26:
458       case kObuIaReserved27:
459       case kObuIaReserved28:
460       case kObuIaReserved29:
461       case kObuIaReserved30: {
462         // Reserved OBUs may occur in the sequence of Descriptor OBUs. For
463         // now, ignore any reserved OBUs by skipping over their bits in the
464         // buffer.
465         continue_processing = true;
466         LOG(INFO) << "Detected a reserved OBU while parsing Descriptor OBUs. "
467                   << "Safely ignoring it.";
468         std::vector<uint8_t> buffer_to_discard(payload_size);
469         RETURN_IF_NOT_OK(
470             read_bit_buffer.ReadUint8Span(absl::MakeSpan(buffer_to_discard)));
471         break;
472       }
473       default:
474         /// TODO(b/387550488): Handle reserved OBUs.
475         continue_processing = false;
476         break;
477     }
478     if (!continue_processing) {
479       // Rewind the position to before the last header was read.
480       LOG(INFO) << "position_before_header: " << position_before_header;
481       RETURN_IF_NOT_OK(read_bit_buffer.Seek(position_before_header));
482     }
483     if (!processed_ia_header) {
484       return absl::InvalidArgumentError(
485           "An IA Sequence and/or descriptor OBUs must always start with an IA "
486           "Header.");
487     }
488     if (is_exhaustive_and_exact && !read_bit_buffer.IsDataAvailable()) {
489       // We've reached the end of the bitstream and we've processed all
490       // descriptor OBUs.
491       break;
492     }
493   }
494   if (!audio_element_obu_map.empty()) {
495     auto audio_elements_with_data =
496         ObuWithDataGenerator::GenerateAudioElementsWithData(
497             output_codec_config_obus, audio_element_obu_map);
498     if (!audio_elements_with_data.ok()) {
499       return audio_elements_with_data.status();
500     }
501     output_audio_elements_with_data = std::move(*audio_elements_with_data);
502   }
503   return absl::OkStatus();
504 }
505 
ProcessTemporalUnitObu(const absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements_with_data,const absl::flat_hash_map<DecodedUleb128,CodecConfigObu> & codec_config_obus,const absl::flat_hash_map<DecodedUleb128,const AudioElementWithData * > & substream_id_to_audio_element,const absl::flat_hash_map<DecodedUleb128,ParamDefinitionVariant> & param_definition_variants,ParametersManager & parameters_manager,ReadBitBuffer & read_bit_buffer,GlobalTimingModule & global_timing_module,std::optional<AudioFrameWithData> & output_audio_frame_with_data,std::optional<ParameterBlockWithData> & output_parameter_block_with_data,std::optional<TemporalDelimiterObu> & output_temporal_delimiter,bool & continue_processing)506 absl::Status ObuProcessor::ProcessTemporalUnitObu(
507     const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
508         audio_elements_with_data,
509     const absl::flat_hash_map<DecodedUleb128, CodecConfigObu>&
510         codec_config_obus,
511     const absl::flat_hash_map<DecodedUleb128, const AudioElementWithData*>&
512         substream_id_to_audio_element,
513     const absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>&
514         param_definition_variants,
515     ParametersManager& parameters_manager, ReadBitBuffer& read_bit_buffer,
516     GlobalTimingModule& global_timing_module,
517     std::optional<AudioFrameWithData>& output_audio_frame_with_data,
518     std::optional<ParameterBlockWithData>& output_parameter_block_with_data,
519     std::optional<TemporalDelimiterObu>& output_temporal_delimiter,
520     bool& continue_processing) {
521   continue_processing = true;
522   output_audio_frame_with_data.reset();
523   output_parameter_block_with_data.reset();
524   output_temporal_delimiter.reset();
525 
526   auto header_metadata = ObuHeader::PeekObuTypeAndTotalObuSize(read_bit_buffer);
527   if (!header_metadata.ok()) {
528     if (header_metadata.status().code() ==
529         absl::StatusCode::kResourceExhausted) {
530       // Can't read header because there is not enough data. This is not an
531       // error, but we're done processing for now.
532       continue_processing = false;
533       return absl::OkStatus();
534     } else {
535       // Some other error occurred, propagate it.
536       return header_metadata.status();
537     }
538   }
539 
540   if (!read_bit_buffer.CanReadBytes(header_metadata->total_obu_size)) {
541     // This is a temporal unit OBU for which we don't have enough data. This is
542     // not an error, but we're done processing for now.
543     continue_processing = false;
544     return absl::OkStatus();
545   }
546 
547   const int64_t position_before_header = read_bit_buffer.Tell();
548 
549   // Read in the header and determines the size of the payload in bytes.
550   ObuHeader header;
551   int64_t payload_size;
552   RETURN_IF_NOT_OK(header.ReadAndValidate(read_bit_buffer, payload_size));
553 
554   // Typically we should expect {`kObuIaAudioFrameX`,`kObuIaParameterBlock`,
555   // `kObuIaTemporalDelimiter`}. We also want to detect an `kIaSequenceHeader`
556   // which would signal the start of a new IA Sequence, and to gracefully
557   // handle "reserved" OBUs.
558   switch (header.obu_type) {
559     case kObuIaAudioFrame:
560     case kObuIaAudioFrameId0:
561     case kObuIaAudioFrameId1:
562     case kObuIaAudioFrameId2:
563     case kObuIaAudioFrameId3:
564     case kObuIaAudioFrameId4:
565     case kObuIaAudioFrameId5:
566     case kObuIaAudioFrameId6:
567     case kObuIaAudioFrameId7:
568     case kObuIaAudioFrameId8:
569     case kObuIaAudioFrameId9:
570     case kObuIaAudioFrameId10:
571     case kObuIaAudioFrameId11:
572     case kObuIaAudioFrameId12:
573     case kObuIaAudioFrameId13:
574     case kObuIaAudioFrameId14:
575     case kObuIaAudioFrameId15:
576     case kObuIaAudioFrameId16:
577     case kObuIaAudioFrameId17: {
578       RETURN_IF_NOT_OK(GetAndStoreAudioFrameWithData(
579           header, payload_size, audio_elements_with_data,
580           substream_id_to_audio_element, read_bit_buffer, global_timing_module,
581           parameters_manager, output_audio_frame_with_data));
582       break;
583     }
584     case kObuIaParameterBlock: {
585       RETURN_IF_NOT_OK(GetAndStoreParameterBlockWithData(
586           header, payload_size, param_definition_variants, read_bit_buffer,
587           global_timing_module, output_parameter_block_with_data));
588       break;
589     }
590     case kObuIaTemporalDelimiter: {
591       // This implementation does not process by temporal unit. Safely ignore
592       // it.
593       const auto& temporal_delimiter = TemporalDelimiterObu::CreateFromBuffer(
594           header, payload_size, read_bit_buffer);
595       if (!temporal_delimiter.ok()) {
596         return temporal_delimiter.status();
597       }
598       output_temporal_delimiter = *temporal_delimiter;
599       break;
600     }
601     case kObuIaSequenceHeader:
602       if (!header.obu_redundant_copy) {
603         // OK. The user of this function will need to reconfigure its state to
604         // process the next IA sequence.
605         LOG(INFO) << "Detected the start of the next IA Sequence.";
606         continue_processing = false;
607         break;
608       }
609       // Ok for any IAMF v1.1.0 descriptor OBUs we can skip over redundant
610       // copies.
611       [[fallthrough]];
612     case kObuIaCodecConfig:
613     case kObuIaAudioElement:
614     case kObuIaMixPresentation:
615       if (!header.obu_redundant_copy) {
616         return absl::InvalidArgumentError(absl::StrCat(
617             "Unexpected non-reserved OBU obu_type= ", header.obu_type));
618       }
619       // Consume and discard the OBU. IAMF allows us to ignore it (even if the
620       // redundant flag is misleading).
621       [[fallthrough]];
622     default:
623       // TODO(b/329705373): Read in the data as an `ArbitraryOBU` and output
624       //                    it from this function.
625       LOG(INFO) << "Detected a reserved or redundant OBU. Safely ignoring it.";
626       std::vector<uint8_t> buffer_to_discard(payload_size);
627       RETURN_IF_NOT_OK(
628           read_bit_buffer.ReadUint8Span(absl::MakeSpan(buffer_to_discard)));
629       break;
630   }
631 
632   if (!continue_processing) {
633     // Rewind the position to before the last header was read.
634     LOG(INFO) << "position_before_header: " << position_before_header;
635     RETURN_IF_NOT_OK(read_bit_buffer.Seek(position_before_header));
636   }
637 
638   return absl::OkStatus();
639 }
640 
Create(bool is_exhaustive_and_exact,ReadBitBuffer * read_bit_buffer,bool & output_insufficient_data)641 std::unique_ptr<ObuProcessor> ObuProcessor::Create(
642     bool is_exhaustive_and_exact, ReadBitBuffer* read_bit_buffer,
643     bool& output_insufficient_data) {
644   // `output_insufficient_data` indicates a specific error condition and so is
645   // true iff we've received valid data but need more of it.
646   output_insufficient_data = false;
647   if (read_bit_buffer == nullptr) {
648     return nullptr;
649   }
650   std::unique_ptr<ObuProcessor> obu_processor =
651       absl::WrapUnique(new ObuProcessor(read_bit_buffer));
652   if (const auto status = obu_processor->InitializeInternal(
653           is_exhaustive_and_exact, output_insufficient_data);
654       !status.ok()) {
655     LOG(ERROR) << status;
656     return nullptr;
657   }
658   return obu_processor;
659 }
660 
CreateForRendering(const Layout & desired_layout,const RenderingMixPresentationFinalizer::SampleProcessorFactory & sample_processor_factory,bool is_exhaustive_and_exact,ReadBitBuffer * read_bit_buffer,Layout & output_layout,bool & output_insufficient_data)661 std::unique_ptr<ObuProcessor> ObuProcessor::CreateForRendering(
662     const Layout& desired_layout,
663     const RenderingMixPresentationFinalizer::SampleProcessorFactory&
664         sample_processor_factory,
665     bool is_exhaustive_and_exact, ReadBitBuffer* read_bit_buffer,
666     Layout& output_layout, bool& output_insufficient_data) {
667   // `output_insufficient_data` indicates a specific error condition and so is
668   // true iff we've received valid data but need more of it.
669   output_insufficient_data = false;
670   if (read_bit_buffer == nullptr) {
671     return nullptr;
672   }
673   std::unique_ptr<ObuProcessor> obu_processor =
674       absl::WrapUnique(new ObuProcessor(read_bit_buffer));
675   if (const auto status = obu_processor->InitializeInternal(
676           is_exhaustive_and_exact, output_insufficient_data);
677       !status.ok()) {
678     LOG(ERROR) << status;
679     return nullptr;
680   }
681 
682   if (const auto status = obu_processor->InitializeForRendering(
683           desired_layout, sample_processor_factory, output_layout);
684       !status.ok()) {
685     LOG(ERROR) << status;
686     return nullptr;
687   }
688   return obu_processor;
689 }
690 
GetOutputSampleRate() const691 absl::StatusOr<uint32_t> ObuProcessor::GetOutputSampleRate() const {
692   RETURN_IF_NOT_OK(
693       ValidateHasValue(output_sample_rate_,
694                        "Output sample rate, was this a trivial IA Sequence?"));
695   return *output_sample_rate_;
696 }
697 
GetOutputFrameSize() const698 absl::StatusOr<uint32_t> ObuProcessor::GetOutputFrameSize() const {
699   RETURN_IF_NOT_OK(
700       ValidateHasValue(output_frame_size_,
701                        "Output frame size, was this a trivial IA Sequence?"));
702   return *output_frame_size_;
703 }
704 
InitializeForRendering(const Layout & desired_layout,const RenderingMixPresentationFinalizer::SampleProcessorFactory & sample_processor_factory,Layout & output_layout)705 absl::Status ObuProcessor::InitializeForRendering(
706     const Layout& desired_layout,
707     const RenderingMixPresentationFinalizer::SampleProcessorFactory&
708         sample_processor_factory,
709     Layout& output_layout) {
710   if (mix_presentations_.empty()) {
711     return absl::InvalidArgumentError("No mix presentation OBUs found.");
712   }
713   if (audio_elements_.empty()) {
714     return absl::InvalidArgumentError("No audio element OBUs found.");
715   }
716 
717   // TODO(b/377747704): Decode only the frames selected for the playback
718   //                    layout.
719   audio_frame_decoder_.emplace();
720   for (const auto& [unused_id, audio_element_with_data] : audio_elements_) {
721     RETURN_IF_NOT_OK(audio_frame_decoder_->InitDecodersForSubstreams(
722         audio_element_with_data.substream_id_to_labels,
723         *audio_element_with_data.codec_config));
724   }
725   {
726     auto temp_demixing_module =
727         DemixingModule::CreateForReconstruction(audio_elements_);
728     if (!temp_demixing_module.ok()) {
729       return temp_demixing_module.status();
730     }
731     demixing_module_.emplace(*std::move(temp_demixing_module));
732   }
733 
734   // TODO(b/340289717): Add a way to select the mix presentation if multiple
735   //                    are supported.
736   const std::list<MixPresentationObu*> supported_mix_presentations =
737       GetSupportedMixPresentations(audio_elements_, mix_presentations_);
738   if (supported_mix_presentations.empty()) {
739     return absl::NotFoundError("No supported mix presentation OBUs found.");
740   }
741   Layout playback_layout;
742   auto mix_presentation_to_render = GetPlaybackLayoutAndMixPresentation(
743       supported_mix_presentations, desired_layout, output_layout);
744   if (!mix_presentation_to_render.ok()) {
745     return mix_presentation_to_render.status();
746   }
747   int playback_sub_mix_index;
748   int playback_layout_index;
749   RETURN_IF_NOT_OK(GetIndicesForLayout(
750       (*mix_presentation_to_render)->sub_mixes_, output_layout,
751       playback_sub_mix_index, playback_layout_index));
752   decoding_layout_info_ = {
753       .mix_presentation_id =
754           (*mix_presentation_to_render)->GetMixPresentationId(),
755       .sub_mix_index = playback_sub_mix_index,
756       .layout_index = playback_layout_index,
757   };
758   auto forward_on_desired_layout =
759       [&sample_processor_factory, mix_presentation_to_render,
760        playback_sub_mix_index, playback_layout_index](
761           DecodedUleb128 mix_presentation_id, int sub_mix_index,
762           int layout_index, const Layout& layout, int num_channels,
763           int sample_rate, int bit_depth, size_t max_input_samples_per_frame)
764       -> std::unique_ptr<SampleProcessorBase> {
765     if (mix_presentation_id ==
766             (*mix_presentation_to_render)->GetMixPresentationId() &&
767         playback_sub_mix_index == sub_mix_index &&
768         playback_layout_index == layout_index) {
769       return sample_processor_factory(
770           mix_presentation_id, sub_mix_index, layout_index, layout,
771           num_channels, sample_rate, bit_depth, max_input_samples_per_frame);
772     }
773     return nullptr;
774   };
775 
776   // Create the mix presentation finalizer which is used to render the output
777   // files. We neither trust the user-provided loudness, nor care about the
778   // calculated loudness.
779   const RendererFactory renderer_factory;
780   absl::StatusOr<RenderingMixPresentationFinalizer> mix_presentation_finalizer =
781       RenderingMixPresentationFinalizer::Create(
782           /*renderer_factory=*/&renderer_factory,
783           /*loudness_calculator_factory=*/nullptr, audio_elements_,
784           forward_on_desired_layout, mix_presentations_);
785   if (!mix_presentation_finalizer.ok()) {
786     return mix_presentation_finalizer.status();
787   }
788   mix_presentation_finalizer_.emplace(*std::move(mix_presentation_finalizer));
789 
790   return absl::OkStatus();
791 }
792 
ProcessTemporalUnitObu(std::optional<AudioFrameWithData> & output_audio_frame_with_data,std::optional<ParameterBlockWithData> & output_parameter_block_with_data,std::optional<TemporalDelimiterObu> & output_temporal_delimiter,bool & continue_processing)793 absl::Status ObuProcessor::ProcessTemporalUnitObu(
794     std::optional<AudioFrameWithData>& output_audio_frame_with_data,
795     std::optional<ParameterBlockWithData>& output_parameter_block_with_data,
796     std::optional<TemporalDelimiterObu>& output_temporal_delimiter,
797     bool& continue_processing) {
798   if (!parameters_manager_.has_value()) {
799     return absl::InvalidArgumentError(
800         "Parameters manager is not constructed; "
801         "remember to call `Initialize()` first.");
802   }
803   if (global_timing_module_ == nullptr) {
804     return absl::InvalidArgumentError(
805         "Global timing module is not constructed; "
806         "remember to call `Initialize()` first.");
807   }
808   if (read_bit_buffer_ == nullptr) {
809     return absl::InvalidArgumentError(
810         "Read bit buffer is not constructed; "
811         "remember to call `Initialize()` first.");
812   }
813 
814   return ObuProcessor::ProcessTemporalUnitObu(
815       audio_elements_, codec_config_obus_, substream_id_to_audio_element_,
816       param_definition_variants_, *parameters_manager_, *read_bit_buffer_,
817       *global_timing_module_, output_audio_frame_with_data,
818       output_parameter_block_with_data, output_temporal_delimiter,
819       continue_processing);
820 }
821 
ProcessTemporalUnit(bool eos_is_end_of_sequence,std::optional<OutputTemporalUnit> & output_temporal_unit,bool & continue_processing)822 absl::Status ObuProcessor::ProcessTemporalUnit(
823     bool eos_is_end_of_sequence,
824     std::optional<OutputTemporalUnit>& output_temporal_unit,
825     bool& continue_processing) {
826   continue_processing = true;
827   while (continue_processing) {
828     std::optional<AudioFrameWithData> audio_frame_with_data;
829     std::optional<ParameterBlockWithData> parameter_block_with_data;
830     std::optional<TemporalDelimiterObu> temporal_delimiter;
831     RETURN_IF_NOT_OK(
832         ProcessTemporalUnitObu(audio_frame_with_data, parameter_block_with_data,
833                                temporal_delimiter, continue_processing));
834 
835     // Collect OBUs into a temporal unit.
836     if (audio_frame_with_data.has_value()) {
837       TemporalUnitData::AddDataToCorrectTemporalUnit(
838           current_temporal_unit_, next_temporal_unit_,
839           *std::move(audio_frame_with_data));
840     } else if (parameter_block_with_data.has_value()) {
841       TemporalUnitData::AddDataToCorrectTemporalUnit(
842           current_temporal_unit_, next_temporal_unit_,
843           *std::move(parameter_block_with_data));
844     } else if (temporal_delimiter.has_value()) {
845       current_temporal_unit_.temporal_delimiter = *temporal_delimiter;
846     }
847 
848     // The current temporal unit is considered finished if any of the
849     // following conditions is met:
850     // - The end of sequence is reached.
851     // - The timestamp has advanced (i.e. when the next temporal unit gets its
852     //   timestamp).
853     // - A temporal delimiter is encountered.
854     if ((!continue_processing && eos_is_end_of_sequence) ||
855         next_temporal_unit_.timestamp.has_value() ||
856         current_temporal_unit_.temporal_delimiter.has_value()) {
857       output_temporal_unit = OutputTemporalUnit();
858       output_temporal_unit->output_audio_frames =
859           std::move(current_temporal_unit_.audio_frames);
860       output_temporal_unit->output_parameter_blocks =
861           std::move(current_temporal_unit_.parameter_blocks);
862       if (current_temporal_unit_.timestamp.has_value()) {
863         output_temporal_unit->output_timestamp =
864             current_temporal_unit_.timestamp.value();
865       }
866       current_temporal_unit_ = std::move(next_temporal_unit_);
867       next_temporal_unit_ = TemporalUnitData();
868       break;
869     }
870   }
871 
872   return absl::OkStatus();
873 }
874 
RenderTemporalUnitAndMeasureLoudness(InternalTimestamp start_timestamp,const std::list<AudioFrameWithData> & audio_frames,const std::list<ParameterBlockWithData> & parameter_blocks,absl::Span<const std::vector<int32_t>> & output_rendered_pcm_samples)875 absl::Status ObuProcessor::RenderTemporalUnitAndMeasureLoudness(
876     InternalTimestamp start_timestamp,
877     const std::list<AudioFrameWithData>& audio_frames,
878     const std::list<ParameterBlockWithData>& parameter_blocks,
879     absl::Span<const std::vector<int32_t>>& output_rendered_pcm_samples) {
880   if (audio_frames.empty()) {
881     // Nothing to decode, render, or measure loudness of.
882     return absl::OkStatus();
883   }
884 
885   if (!audio_frame_decoder_.has_value()) {
886     return absl::InvalidArgumentError(
887         "Audio frame decoder is not constructed; "
888         "remember to call `InitializeForRendering()` first.");
889   }
890   if (!demixing_module_.has_value()) {
891     return absl::InvalidArgumentError(
892         "Demxing module is not constructed; "
893         "remember to call `InitializeForRendering()` first.");
894   }
895   if (!mix_presentation_finalizer_.has_value()) {
896     return absl::InvalidArgumentError(
897         "Mix presentation finalizer is not constructed; "
898         "remember to call `InitializeForRendering()` first.");
899   }
900 
901   // Decode the temporal unit.
902   std::optional<InternalTimestamp> end_timestamp;
903 
904   // This resizing should happen only once per IA sequence, since all the
905   // temporal units should contain the same number of audio frames.
906   decoded_frames_for_temporal_unit_.resize(audio_frames.size());
907   auto decoded_frames_iter = decoded_frames_for_temporal_unit_.begin();
908   for (const auto& audio_frame : audio_frames) {
909     if (!end_timestamp.has_value()) {
910       end_timestamp = audio_frame.end_timestamp;
911     }
912     RETURN_IF_NOT_OK(
913         CompareTimestamps(start_timestamp, audio_frame.start_timestamp,
914                           "Audio frame has a different start timestamp than "
915                           "the temporal unit: "));
916     RETURN_IF_NOT_OK(CompareTimestamps(*end_timestamp,
917                                        audio_frame.end_timestamp,
918                                        "Audio frame has a different end "
919                                        "timestamp than the temporal unit: "));
920     auto decoded_frame = audio_frame_decoder_->Decode(audio_frame);
921     if (!decoded_frame.ok()) {
922       return decoded_frame.status();
923     }
924     *decoded_frames_iter = std::move(*decoded_frame);
925     decoded_frames_iter++;
926   }
927 
928   // Reconstruct the temporal unit and store the result in the output map.
929   const auto decoded_labeled_frames_for_temporal_unit =
930       demixing_module_->DemixDecodedAudioSamples(
931           decoded_frames_for_temporal_unit_);
932   if (!decoded_labeled_frames_for_temporal_unit.ok()) {
933     return decoded_labeled_frames_for_temporal_unit.status();
934   }
935 
936   RETURN_IF_NOT_OK(mix_presentation_finalizer_->PushTemporalUnit(
937       *decoded_labeled_frames_for_temporal_unit, start_timestamp,
938       *end_timestamp, parameter_blocks));
939 
940   auto rendered_samples =
941       mix_presentation_finalizer_->GetPostProcessedSamplesAsSpan(
942           decoding_layout_info_.mix_presentation_id,
943           decoding_layout_info_.sub_mix_index,
944           decoding_layout_info_.layout_index);
945   if (!rendered_samples.ok()) {
946     return rendered_samples.status();
947   }
948   output_rendered_pcm_samples = *rendered_samples;
949 
950   // TODO(b/379122580): Add a call to `FinalizePushingTemporalUnits`, then a
951   //                    final call to `GetPostProcessedSamplesAsSpan` when there
952   //                    are no more temporal units to push. Those calls may
953   //                    belong elsewhere in the class depending on the
954   //                    interface.
955 
956   return absl::OkStatus();
957 }
958 
959 }  // namespace iamf_tools
960