1 /*
2 * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * www.aomedia.org/license/patent.
11 */
12
13 #include "iamf/cli/obu_processor.h"
14
15 #include <cstddef>
16 #include <cstdint>
17 #include <list>
18 #include <memory>
19 #include <optional>
20 #include <string>
21 #include <utility>
22 #include <vector>
23
24 #include "absl/container/flat_hash_map.h"
25 #include "absl/container/flat_hash_set.h"
26 #include "absl/log/check.h"
27 #include "absl/log/log.h"
28 #include "absl/memory/memory.h"
29 #include "absl/status/status.h"
30 #include "absl/status/statusor.h"
31 #include "absl/strings/str_cat.h"
32 #include "absl/strings/string_view.h"
33 #include "absl/types/span.h"
34 #include "iamf/cli/audio_element_with_data.h"
35 #include "iamf/cli/audio_frame_with_data.h"
36 #include "iamf/cli/cli_util.h"
37 #include "iamf/cli/demixing_module.h"
38 #include "iamf/cli/global_timing_module.h"
39 #include "iamf/cli/obu_with_data_generator.h"
40 #include "iamf/cli/parameter_block_with_data.h"
41 #include "iamf/cli/parameters_manager.h"
42 #include "iamf/cli/profile_filter.h"
43 #include "iamf/cli/renderer_factory.h"
44 #include "iamf/cli/rendering_mix_presentation_finalizer.h"
45 #include "iamf/cli/sample_processor_base.h"
46 #include "iamf/common/read_bit_buffer.h"
47 #include "iamf/common/utils/macros.h"
48 #include "iamf/common/utils/validation_utils.h"
49 #include "iamf/obu/audio_element.h"
50 #include "iamf/obu/audio_frame.h"
51 #include "iamf/obu/codec_config.h"
52 #include "iamf/obu/ia_sequence_header.h"
53 #include "iamf/obu/mix_presentation.h"
54 #include "iamf/obu/obu_header.h"
55 #include "iamf/obu/param_definition_variant.h"
56 #include "iamf/obu/parameter_block.h"
57 #include "iamf/obu/temporal_delimiter.h"
58 #include "iamf/obu/types.h"
59
60 namespace iamf_tools {
61
62 namespace {
63
64 // Gets a CodecConfigObu from `read_bit_buffer` and stores it into
65 // `codec_config_obu_map`, using the `codec_config_id` as the unique key.
GetAndStoreCodecConfigObu(const ObuHeader & header,int64_t payload_size,absl::flat_hash_map<DecodedUleb128,CodecConfigObu> & codec_config_obu_map,ReadBitBuffer & read_bit_buffer)66 absl::Status GetAndStoreCodecConfigObu(
67 const ObuHeader& header, int64_t payload_size,
68 absl::flat_hash_map<DecodedUleb128, CodecConfigObu>& codec_config_obu_map,
69 ReadBitBuffer& read_bit_buffer) {
70 absl::StatusOr<CodecConfigObu> codec_config_obu =
71 CodecConfigObu::CreateFromBuffer(header, payload_size, read_bit_buffer);
72 if (!codec_config_obu.ok()) {
73 return codec_config_obu.status();
74 }
75 codec_config_obu->PrintObu();
76 codec_config_obu_map.insert(
77 {codec_config_obu->GetCodecConfigId(), *std::move(codec_config_obu)});
78 return absl::OkStatus();
79 }
80
GetAndStoreAudioElementObu(const ObuHeader & header,int64_t payload_size,absl::flat_hash_map<DecodedUleb128,AudioElementObu> & audio_element_obu_map,ReadBitBuffer & read_bit_buffer)81 absl::Status GetAndStoreAudioElementObu(
82 const ObuHeader& header, int64_t payload_size,
83 absl::flat_hash_map<DecodedUleb128, AudioElementObu>& audio_element_obu_map,
84 ReadBitBuffer& read_bit_buffer) {
85 absl::StatusOr<AudioElementObu> audio_element_obu =
86 AudioElementObu::CreateFromBuffer(header, payload_size, read_bit_buffer);
87 if (!audio_element_obu.ok()) {
88 return audio_element_obu.status();
89 }
90 audio_element_obu->PrintObu();
91 audio_element_obu_map.insert(
92 {audio_element_obu->GetAudioElementId(), *std::move(audio_element_obu)});
93 return absl::OkStatus();
94 }
95
GetAndStoreMixPresentationObu(const ObuHeader & header,int64_t payload_size,std::list<MixPresentationObu> & mix_presentation_obus,ReadBitBuffer & read_bit_buffer)96 absl::Status GetAndStoreMixPresentationObu(
97 const ObuHeader& header, int64_t payload_size,
98 std::list<MixPresentationObu>& mix_presentation_obus,
99 ReadBitBuffer& read_bit_buffer) {
100 absl::StatusOr<MixPresentationObu> mix_presentation_obu =
101 MixPresentationObu::CreateFromBuffer(header, payload_size,
102 read_bit_buffer);
103 if (!mix_presentation_obu.ok()) {
104 return mix_presentation_obu.status();
105 }
106 LOG(INFO) << "Mix Presentation OBU successfully parsed.";
107 mix_presentation_obu->PrintObu();
108 mix_presentation_obus.push_back(*std::move(mix_presentation_obu));
109 return absl::OkStatus();
110 }
111
UpdateParameterStatesIfNeeded(const absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements_with_data,const GlobalTimingModule & global_timing_module,ParametersManager & parameters_manager)112 absl::Status UpdateParameterStatesIfNeeded(
113 const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
114 audio_elements_with_data,
115 const GlobalTimingModule& global_timing_module,
116 ParametersManager& parameters_manager) {
117 std::optional<InternalTimestamp> global_timestamp;
118 RETURN_IF_NOT_OK(
119 global_timing_module.GetGlobalAudioFrameTimestamp(global_timestamp));
120 // Not ready to update the states yet.
121 if (!global_timestamp.has_value()) {
122 return absl::OkStatus();
123 }
124
125 // The audio frames for all audio elements are finished; update the
126 // parameters manager.
127 for (const auto& [audio_element_id, unused_element] :
128 audio_elements_with_data) {
129 RETURN_IF_NOT_OK(parameters_manager.UpdateDemixingState(audio_element_id,
130 *global_timestamp));
131 RETURN_IF_NOT_OK(parameters_manager.UpdateReconGainState(
132 audio_element_id, *global_timestamp));
133 }
134 return absl::OkStatus();
135 }
136
GetAndStoreAudioFrameWithData(const ObuHeader & header,const int64_t payload_size,const absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements_with_data,const absl::flat_hash_map<DecodedUleb128,const AudioElementWithData * > & substream_id_to_audio_element,ReadBitBuffer & read_bit_buffer,GlobalTimingModule & global_timing_module,ParametersManager & parameters_manager,std::optional<AudioFrameWithData> & output_audio_frame_with_data)137 absl::Status GetAndStoreAudioFrameWithData(
138 const ObuHeader& header, const int64_t payload_size,
139 const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
140 audio_elements_with_data,
141 const absl::flat_hash_map<DecodedUleb128, const AudioElementWithData*>&
142 substream_id_to_audio_element,
143 ReadBitBuffer& read_bit_buffer, GlobalTimingModule& global_timing_module,
144 ParametersManager& parameters_manager,
145 std::optional<AudioFrameWithData>& output_audio_frame_with_data) {
146 output_audio_frame_with_data.reset();
147 auto audio_frame_obu =
148 AudioFrameObu::CreateFromBuffer(header, payload_size, read_bit_buffer);
149 if (!audio_frame_obu.ok()) {
150 return audio_frame_obu.status();
151 }
152 const auto substream_id = audio_frame_obu->GetSubstreamId();
153 const auto audio_element_iter =
154 substream_id_to_audio_element.find(substream_id);
155 if (audio_element_iter == substream_id_to_audio_element.end()) {
156 return absl::InvalidArgumentError(absl::StrCat(
157 "No audio element found having substream ID: ", substream_id));
158 }
159 const auto& audio_element_with_data = *audio_element_iter->second;
160 auto audio_frame_with_data = ObuWithDataGenerator::GenerateAudioFrameWithData(
161 audio_element_with_data, *audio_frame_obu, global_timing_module,
162 parameters_manager);
163 if (!audio_frame_with_data.ok()) {
164 return audio_frame_with_data.status();
165 }
166 output_audio_frame_with_data = *audio_frame_with_data;
167
168 RETURN_IF_NOT_OK(UpdateParameterStatesIfNeeded(
169 audio_elements_with_data, global_timing_module, parameters_manager));
170
171 return absl::OkStatus();
172 }
173
GetAndStoreParameterBlockWithData(const ObuHeader & header,const int64_t payload_size,const absl::flat_hash_map<DecodedUleb128,ParamDefinitionVariant> & param_definition_variants,ReadBitBuffer & read_bit_buffer,GlobalTimingModule & global_timing_module,std::optional<ParameterBlockWithData> & output_parameter_block_with_data)174 absl::Status GetAndStoreParameterBlockWithData(
175 const ObuHeader& header, const int64_t payload_size,
176 const absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>&
177 param_definition_variants,
178 ReadBitBuffer& read_bit_buffer, GlobalTimingModule& global_timing_module,
179 std::optional<ParameterBlockWithData>& output_parameter_block_with_data) {
180 auto parameter_block_obu = ParameterBlockObu::CreateFromBuffer(
181 header, payload_size, param_definition_variants, read_bit_buffer);
182 if (!parameter_block_obu.ok()) {
183 return parameter_block_obu.status();
184 }
185
186 std::optional<InternalTimestamp> global_timestamp;
187 RETURN_IF_NOT_OK(
188 global_timing_module.GetGlobalAudioFrameTimestamp(global_timestamp));
189 if (!global_timestamp.has_value()) {
190 return absl::InvalidArgumentError(
191 "Global timestamp has no value while generating a parameter "
192 "block");
193 }
194
195 // Process the newly parsed parameter block OBU.
196 auto parameter_block_with_data =
197 ObuWithDataGenerator::GenerateParameterBlockWithData(
198 *global_timestamp, global_timing_module,
199 std::move(*parameter_block_obu));
200 if (!parameter_block_with_data.ok()) {
201 return parameter_block_with_data.status();
202 }
203 output_parameter_block_with_data = std::move(*parameter_block_with_data);
204
205 return absl::OkStatus();
206 }
207
208 // Returns a list of pointers to the supported mix presentations. Empty if none
209 // are supported.
GetSupportedMixPresentations(const absl::flat_hash_map<uint32_t,AudioElementWithData> & audio_elements,std::list<MixPresentationObu> & mix_presentation_obus)210 std::list<MixPresentationObu*> GetSupportedMixPresentations(
211 const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
212 std::list<MixPresentationObu>& mix_presentation_obus) {
213 // TODO(b/377554944): Support `ProfileVersion::kIamfBaseEnhancedProfile`.
214 // Only permit certain profiles to be used.
215 const absl::flat_hash_set<ProfileVersion> kSupportedProfiles = {
216 ProfileVersion::kIamfSimpleProfile, ProfileVersion::kIamfBaseProfile};
217
218 std::list<MixPresentationObu*> supported_mix_presentations;
219 std::string cumulative_error_message;
220 for (auto iter = mix_presentation_obus.begin();
221 iter != mix_presentation_obus.end(); ++iter) {
222 auto profiles = kSupportedProfiles;
223 const auto status = ProfileFilter::FilterProfilesForMixPresentation(
224 audio_elements, *iter, profiles);
225 if (status.ok()) {
226 supported_mix_presentations.push_back(&*iter);
227 }
228 absl::StrAppend(&cumulative_error_message, status.message(), "\n");
229 }
230 LOG(INFO) << "Filtered mix presentations: " << cumulative_error_message;
231 return supported_mix_presentations;
232 }
233
234 // Searches for the desired layout in the supported mix presentations. If found,
235 // the output_playback_layout is the same as the desired_layout. Otherwise, we
236 // default to the first layout in the first unsupported mix presentation.
GetPlaybackLayoutAndMixPresentation(const std::list<MixPresentationObu * > & supported_mix_presentations,const Layout & desired_layout,Layout & output_playback_layout)237 absl::StatusOr<MixPresentationObu*> GetPlaybackLayoutAndMixPresentation(
238 const std::list<MixPresentationObu*>& supported_mix_presentations,
239 const Layout& desired_layout, Layout& output_playback_layout) {
240 for (const auto& mix_presentation : supported_mix_presentations) {
241 for (const auto& sub_mix : mix_presentation->sub_mixes_) {
242 for (const auto& layout : sub_mix.layouts) {
243 if (layout.loudness_layout == desired_layout) {
244 output_playback_layout = layout.loudness_layout;
245 return mix_presentation;
246 }
247 }
248 }
249 }
250 // If we get here, we didn't find the desired layout in any of the supported
251 // mix presentations. We default to the first layout in the first mix
252 // presentation.
253 MixPresentationObu* output_mix_presentation =
254 supported_mix_presentations.front();
255 if (output_mix_presentation->sub_mixes_.empty()) {
256 return absl::InvalidArgumentError(
257 "No submixes found in the first mix presentation.");
258 }
259 if (output_mix_presentation->sub_mixes_.front().layouts.empty()) {
260 return absl::InvalidArgumentError(
261 "No layouts found in the first submix of the first mix presentation.");
262 }
263 output_playback_layout = output_mix_presentation->sub_mixes_.front()
264 .layouts.front()
265 .loudness_layout;
266 return output_mix_presentation;
267 }
268
269 // Resets the buffer to `start_position` and sets the `insufficient_data`
270 // flag to `true`. Clears the output maps.
InsufficientDataReset(ReadBitBuffer & read_bit_buffer,const int64_t start_position,bool & insufficient_data,absl::flat_hash_map<DecodedUleb128,CodecConfigObu> & output_codec_config_obus,absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & output_audio_elements_with_data,std::list<MixPresentationObu> & output_mix_presentation_obus)271 absl::Status InsufficientDataReset(
272 ReadBitBuffer& read_bit_buffer, const int64_t start_position,
273 bool& insufficient_data,
274 absl::flat_hash_map<DecodedUleb128, CodecConfigObu>&
275 output_codec_config_obus,
276 absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
277 output_audio_elements_with_data,
278 std::list<MixPresentationObu>& output_mix_presentation_obus) {
279 LOG(INFO) << "Insufficient data to process all descriptor OBUs.";
280 insufficient_data = true;
281 output_codec_config_obus.clear();
282 output_audio_elements_with_data.clear();
283 output_mix_presentation_obus.clear();
284 RETURN_IF_NOT_OK(read_bit_buffer.Seek(start_position));
285 LOG(INFO) << "Reset the buffer to the beginning.";
286 return absl::ResourceExhaustedError(
287 "Insufficient data to process all descriptor OBUs. Please provide "
288 "more data and try again.");
289 }
290
GetSampleRateAndFrameSize(const absl::flat_hash_map<DecodedUleb128,CodecConfigObu> & output_codec_config_obus,std::optional<uint32_t> & output_sample_rate,std::optional<uint32_t> & output_frame_size)291 void GetSampleRateAndFrameSize(
292 const absl::flat_hash_map<DecodedUleb128, CodecConfigObu>&
293 output_codec_config_obus,
294 std::optional<uint32_t>& output_sample_rate,
295 std::optional<uint32_t>& output_frame_size) {
296 if (output_codec_config_obus.size() != 1) {
297 LOG(WARNING) << "Expected exactly one codec config OBUs, but found "
298 << output_codec_config_obus.size();
299 return;
300 }
301 const auto& first_codec_config_obu = output_codec_config_obus.begin()->second;
302 output_sample_rate = first_codec_config_obu.GetOutputSampleRate();
303 output_frame_size = first_codec_config_obu.GetNumSamplesPerFrame();
304 }
305
306 } // namespace
307
InitializeInternal(bool is_exhaustive_and_exact,bool & output_insufficient_data)308 absl::Status ObuProcessor::InitializeInternal(bool is_exhaustive_and_exact,
309 bool& output_insufficient_data) {
310 // Process the descriptor OBUs.
311 LOG(INFO) << "Starting Descriptor OBU processing";
312 RETURN_IF_NOT_OK(ObuProcessor::ProcessDescriptorObus(
313 is_exhaustive_and_exact, *read_bit_buffer_, ia_sequence_header_,
314 codec_config_obus_, audio_elements_, mix_presentations_,
315 output_insufficient_data));
316 LOG(INFO) << "Processed Descriptor OBUs";
317 RETURN_IF_NOT_OK(CollectAndValidateParamDefinitions(
318 audio_elements_, mix_presentations_, param_definition_variants_));
319 GetSampleRateAndFrameSize(codec_config_obus_, output_sample_rate_,
320 output_frame_size_);
321 // Mapping from substream IDs to pointers to audio element with data.
322 for (const auto& [audio_element_id, audio_element_with_data] :
323 audio_elements_) {
324 for (const auto& [substream_id, unused_labels] :
325 audio_element_with_data.substream_id_to_labels) {
326 auto [unused_iter, inserted] = substream_id_to_audio_element_.insert(
327 {substream_id, &audio_element_with_data});
328 if (!inserted) {
329 return absl::InvalidArgumentError(absl::StrCat(
330 "Duplicated substream ID: ", substream_id,
331 " associated with audio element ID: ", audio_element_id));
332 }
333 }
334 }
335 global_timing_module_ =
336 GlobalTimingModule::Create(audio_elements_, param_definition_variants_);
337 if (global_timing_module_ == nullptr) {
338 return absl::InvalidArgumentError(
339 "Failed to initialize the global timing module");
340 }
341 parameters_manager_.emplace(audio_elements_);
342 RETURN_IF_NOT_OK(parameters_manager_->Initialize());
343 return absl::OkStatus();
344 }
345
ProcessDescriptorObus(bool is_exhaustive_and_exact,ReadBitBuffer & read_bit_buffer,IASequenceHeaderObu & output_sequence_header,absl::flat_hash_map<DecodedUleb128,CodecConfigObu> & output_codec_config_obus,absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & output_audio_elements_with_data,std::list<MixPresentationObu> & output_mix_presentation_obus,bool & output_insufficient_data)346 absl::Status ObuProcessor::ProcessDescriptorObus(
347 bool is_exhaustive_and_exact, ReadBitBuffer& read_bit_buffer,
348 IASequenceHeaderObu& output_sequence_header,
349 absl::flat_hash_map<DecodedUleb128, CodecConfigObu>&
350 output_codec_config_obus,
351 absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
352 output_audio_elements_with_data,
353 std::list<MixPresentationObu>& output_mix_presentation_obus,
354 bool& output_insufficient_data) {
355 // `output_insufficient_data` indicates a specific error condition and so is
356 // true iff we've received valid data but need more of it.
357 output_insufficient_data = false;
358 auto audio_element_obu_map =
359 absl::flat_hash_map<DecodedUleb128, AudioElementObu>();
360 const int64_t global_position_before_all_obus = read_bit_buffer.Tell();
361 bool processed_ia_header = false;
362 bool continue_processing = true;
363 while (continue_processing) {
364 auto header_metadata =
365 ObuHeader::PeekObuTypeAndTotalObuSize(read_bit_buffer);
366 if (!header_metadata.ok()) {
367 if (header_metadata.status().code() ==
368 absl::StatusCode::kResourceExhausted) {
369 // Can't read header because there is not enough data.
370 return InsufficientDataReset(
371 read_bit_buffer, global_position_before_all_obus,
372 output_insufficient_data, output_codec_config_obus,
373 output_audio_elements_with_data, output_mix_presentation_obus);
374 } else {
375 // Some other error occurred, propagate it.
376 return header_metadata.status();
377 }
378 }
379
380 // Now, we know we were at least able to read obu_type and the total size of
381 // the obu.
382 if (ObuHeader::IsTemporalUnitObuType(header_metadata->obu_type)) {
383 if (is_exhaustive_and_exact) {
384 auto error_status = absl::InvalidArgumentError(
385 "Descriptor OBUs must not contain a temporal unit OBU when "
386 "is_exhaustive_and_exact is true.");
387 LOG(ERROR) << error_status;
388 RETURN_IF_NOT_OK(read_bit_buffer.Seek(global_position_before_all_obus));
389 return error_status;
390 }
391 // Since it's a temporal unit, we know we are done reading descriptor
392 // OBUs. Since we've only peeked on this iteration of the loop, no need to
393 // rewind the buffer.
394 // Check that we've processed an IA header to ensure it's a valid IA
395 // Sequence.
396 if (!processed_ia_header) {
397 return absl::InvalidArgumentError(
398 "An IA Sequence and/or descriptor OBUs must always start with an "
399 "IA Header.");
400 }
401 // Break out of the while loop since we've reached the end of the
402 // descriptor OBUs; should not seek back to the beginning of the buffer
403 // since this is a successful termination.
404 break;
405 }
406
407 // Now, we know that this is not a temporal unit OBU.
408 if (!read_bit_buffer.CanReadBytes(header_metadata->total_obu_size)) {
409 // This is a descriptor OBU for which we don't have enough data.
410 return InsufficientDataReset(
411 read_bit_buffer, global_position_before_all_obus,
412 output_insufficient_data, output_codec_config_obus,
413 output_audio_elements_with_data, output_mix_presentation_obus);
414 }
415 // Now we know we can read the entire obu.
416 const int64_t position_before_header = read_bit_buffer.Tell();
417 ObuHeader header;
418 // Note that `payload_size` is different from the total obu size calculated
419 // by `PeekObuTypeAndTotalObuSize`.
420 int64_t payload_size;
421 RETURN_IF_NOT_OK(header.ReadAndValidate(read_bit_buffer, payload_size));
422 switch (header.obu_type) {
423 case kObuIaSequenceHeader: {
424 if (processed_ia_header && !header.obu_redundant_copy) {
425 LOG(WARNING) << "Detected an IA Sequence without temporal units.";
426 continue_processing = false;
427 break;
428 }
429 auto ia_sequence_header_obu = IASequenceHeaderObu::CreateFromBuffer(
430 header, payload_size, read_bit_buffer);
431 if (!ia_sequence_header_obu.ok()) {
432 return ia_sequence_header_obu.status();
433 }
434 output_sequence_header = *std::move(ia_sequence_header_obu);
435 output_sequence_header.PrintObu();
436 processed_ia_header = true;
437 break;
438 }
439 case kObuIaCodecConfig: {
440 RETURN_IF_NOT_OK(GetAndStoreCodecConfigObu(
441 header, payload_size, output_codec_config_obus, read_bit_buffer));
442 break;
443 }
444 case kObuIaAudioElement: {
445 RETURN_IF_NOT_OK(GetAndStoreAudioElementObu(
446 header, payload_size, audio_element_obu_map, read_bit_buffer));
447 break;
448 }
449 case kObuIaMixPresentation: {
450 RETURN_IF_NOT_OK(GetAndStoreMixPresentationObu(
451 header, payload_size, output_mix_presentation_obus,
452 read_bit_buffer));
453 break;
454 }
455 case kObuIaReserved24:
456 case kObuIaReserved25:
457 case kObuIaReserved26:
458 case kObuIaReserved27:
459 case kObuIaReserved28:
460 case kObuIaReserved29:
461 case kObuIaReserved30: {
462 // Reserved OBUs may occur in the sequence of Descriptor OBUs. For
463 // now, ignore any reserved OBUs by skipping over their bits in the
464 // buffer.
465 continue_processing = true;
466 LOG(INFO) << "Detected a reserved OBU while parsing Descriptor OBUs. "
467 << "Safely ignoring it.";
468 std::vector<uint8_t> buffer_to_discard(payload_size);
469 RETURN_IF_NOT_OK(
470 read_bit_buffer.ReadUint8Span(absl::MakeSpan(buffer_to_discard)));
471 break;
472 }
473 default:
474 /// TODO(b/387550488): Handle reserved OBUs.
475 continue_processing = false;
476 break;
477 }
478 if (!continue_processing) {
479 // Rewind the position to before the last header was read.
480 LOG(INFO) << "position_before_header: " << position_before_header;
481 RETURN_IF_NOT_OK(read_bit_buffer.Seek(position_before_header));
482 }
483 if (!processed_ia_header) {
484 return absl::InvalidArgumentError(
485 "An IA Sequence and/or descriptor OBUs must always start with an IA "
486 "Header.");
487 }
488 if (is_exhaustive_and_exact && !read_bit_buffer.IsDataAvailable()) {
489 // We've reached the end of the bitstream and we've processed all
490 // descriptor OBUs.
491 break;
492 }
493 }
494 if (!audio_element_obu_map.empty()) {
495 auto audio_elements_with_data =
496 ObuWithDataGenerator::GenerateAudioElementsWithData(
497 output_codec_config_obus, audio_element_obu_map);
498 if (!audio_elements_with_data.ok()) {
499 return audio_elements_with_data.status();
500 }
501 output_audio_elements_with_data = std::move(*audio_elements_with_data);
502 }
503 return absl::OkStatus();
504 }
505
ProcessTemporalUnitObu(const absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements_with_data,const absl::flat_hash_map<DecodedUleb128,CodecConfigObu> & codec_config_obus,const absl::flat_hash_map<DecodedUleb128,const AudioElementWithData * > & substream_id_to_audio_element,const absl::flat_hash_map<DecodedUleb128,ParamDefinitionVariant> & param_definition_variants,ParametersManager & parameters_manager,ReadBitBuffer & read_bit_buffer,GlobalTimingModule & global_timing_module,std::optional<AudioFrameWithData> & output_audio_frame_with_data,std::optional<ParameterBlockWithData> & output_parameter_block_with_data,std::optional<TemporalDelimiterObu> & output_temporal_delimiter,bool & continue_processing)506 absl::Status ObuProcessor::ProcessTemporalUnitObu(
507 const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
508 audio_elements_with_data,
509 const absl::flat_hash_map<DecodedUleb128, CodecConfigObu>&
510 codec_config_obus,
511 const absl::flat_hash_map<DecodedUleb128, const AudioElementWithData*>&
512 substream_id_to_audio_element,
513 const absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>&
514 param_definition_variants,
515 ParametersManager& parameters_manager, ReadBitBuffer& read_bit_buffer,
516 GlobalTimingModule& global_timing_module,
517 std::optional<AudioFrameWithData>& output_audio_frame_with_data,
518 std::optional<ParameterBlockWithData>& output_parameter_block_with_data,
519 std::optional<TemporalDelimiterObu>& output_temporal_delimiter,
520 bool& continue_processing) {
521 continue_processing = true;
522 output_audio_frame_with_data.reset();
523 output_parameter_block_with_data.reset();
524 output_temporal_delimiter.reset();
525
526 auto header_metadata = ObuHeader::PeekObuTypeAndTotalObuSize(read_bit_buffer);
527 if (!header_metadata.ok()) {
528 if (header_metadata.status().code() ==
529 absl::StatusCode::kResourceExhausted) {
530 // Can't read header because there is not enough data. This is not an
531 // error, but we're done processing for now.
532 continue_processing = false;
533 return absl::OkStatus();
534 } else {
535 // Some other error occurred, propagate it.
536 return header_metadata.status();
537 }
538 }
539
540 if (!read_bit_buffer.CanReadBytes(header_metadata->total_obu_size)) {
541 // This is a temporal unit OBU for which we don't have enough data. This is
542 // not an error, but we're done processing for now.
543 continue_processing = false;
544 return absl::OkStatus();
545 }
546
547 const int64_t position_before_header = read_bit_buffer.Tell();
548
549 // Read in the header and determines the size of the payload in bytes.
550 ObuHeader header;
551 int64_t payload_size;
552 RETURN_IF_NOT_OK(header.ReadAndValidate(read_bit_buffer, payload_size));
553
554 // Typically we should expect {`kObuIaAudioFrameX`,`kObuIaParameterBlock`,
555 // `kObuIaTemporalDelimiter`}. We also want to detect an `kIaSequenceHeader`
556 // which would signal the start of a new IA Sequence, and to gracefully
557 // handle "reserved" OBUs.
558 switch (header.obu_type) {
559 case kObuIaAudioFrame:
560 case kObuIaAudioFrameId0:
561 case kObuIaAudioFrameId1:
562 case kObuIaAudioFrameId2:
563 case kObuIaAudioFrameId3:
564 case kObuIaAudioFrameId4:
565 case kObuIaAudioFrameId5:
566 case kObuIaAudioFrameId6:
567 case kObuIaAudioFrameId7:
568 case kObuIaAudioFrameId8:
569 case kObuIaAudioFrameId9:
570 case kObuIaAudioFrameId10:
571 case kObuIaAudioFrameId11:
572 case kObuIaAudioFrameId12:
573 case kObuIaAudioFrameId13:
574 case kObuIaAudioFrameId14:
575 case kObuIaAudioFrameId15:
576 case kObuIaAudioFrameId16:
577 case kObuIaAudioFrameId17: {
578 RETURN_IF_NOT_OK(GetAndStoreAudioFrameWithData(
579 header, payload_size, audio_elements_with_data,
580 substream_id_to_audio_element, read_bit_buffer, global_timing_module,
581 parameters_manager, output_audio_frame_with_data));
582 break;
583 }
584 case kObuIaParameterBlock: {
585 RETURN_IF_NOT_OK(GetAndStoreParameterBlockWithData(
586 header, payload_size, param_definition_variants, read_bit_buffer,
587 global_timing_module, output_parameter_block_with_data));
588 break;
589 }
590 case kObuIaTemporalDelimiter: {
591 // This implementation does not process by temporal unit. Safely ignore
592 // it.
593 const auto& temporal_delimiter = TemporalDelimiterObu::CreateFromBuffer(
594 header, payload_size, read_bit_buffer);
595 if (!temporal_delimiter.ok()) {
596 return temporal_delimiter.status();
597 }
598 output_temporal_delimiter = *temporal_delimiter;
599 break;
600 }
601 case kObuIaSequenceHeader:
602 if (!header.obu_redundant_copy) {
603 // OK. The user of this function will need to reconfigure its state to
604 // process the next IA sequence.
605 LOG(INFO) << "Detected the start of the next IA Sequence.";
606 continue_processing = false;
607 break;
608 }
609 // Ok for any IAMF v1.1.0 descriptor OBUs we can skip over redundant
610 // copies.
611 [[fallthrough]];
612 case kObuIaCodecConfig:
613 case kObuIaAudioElement:
614 case kObuIaMixPresentation:
615 if (!header.obu_redundant_copy) {
616 return absl::InvalidArgumentError(absl::StrCat(
617 "Unexpected non-reserved OBU obu_type= ", header.obu_type));
618 }
619 // Consume and discard the OBU. IAMF allows us to ignore it (even if the
620 // redundant flag is misleading).
621 [[fallthrough]];
622 default:
623 // TODO(b/329705373): Read in the data as an `ArbitraryOBU` and output
624 // it from this function.
625 LOG(INFO) << "Detected a reserved or redundant OBU. Safely ignoring it.";
626 std::vector<uint8_t> buffer_to_discard(payload_size);
627 RETURN_IF_NOT_OK(
628 read_bit_buffer.ReadUint8Span(absl::MakeSpan(buffer_to_discard)));
629 break;
630 }
631
632 if (!continue_processing) {
633 // Rewind the position to before the last header was read.
634 LOG(INFO) << "position_before_header: " << position_before_header;
635 RETURN_IF_NOT_OK(read_bit_buffer.Seek(position_before_header));
636 }
637
638 return absl::OkStatus();
639 }
640
Create(bool is_exhaustive_and_exact,ReadBitBuffer * read_bit_buffer,bool & output_insufficient_data)641 std::unique_ptr<ObuProcessor> ObuProcessor::Create(
642 bool is_exhaustive_and_exact, ReadBitBuffer* read_bit_buffer,
643 bool& output_insufficient_data) {
644 // `output_insufficient_data` indicates a specific error condition and so is
645 // true iff we've received valid data but need more of it.
646 output_insufficient_data = false;
647 if (read_bit_buffer == nullptr) {
648 return nullptr;
649 }
650 std::unique_ptr<ObuProcessor> obu_processor =
651 absl::WrapUnique(new ObuProcessor(read_bit_buffer));
652 if (const auto status = obu_processor->InitializeInternal(
653 is_exhaustive_and_exact, output_insufficient_data);
654 !status.ok()) {
655 LOG(ERROR) << status;
656 return nullptr;
657 }
658 return obu_processor;
659 }
660
CreateForRendering(const Layout & desired_layout,const RenderingMixPresentationFinalizer::SampleProcessorFactory & sample_processor_factory,bool is_exhaustive_and_exact,ReadBitBuffer * read_bit_buffer,Layout & output_layout,bool & output_insufficient_data)661 std::unique_ptr<ObuProcessor> ObuProcessor::CreateForRendering(
662 const Layout& desired_layout,
663 const RenderingMixPresentationFinalizer::SampleProcessorFactory&
664 sample_processor_factory,
665 bool is_exhaustive_and_exact, ReadBitBuffer* read_bit_buffer,
666 Layout& output_layout, bool& output_insufficient_data) {
667 // `output_insufficient_data` indicates a specific error condition and so is
668 // true iff we've received valid data but need more of it.
669 output_insufficient_data = false;
670 if (read_bit_buffer == nullptr) {
671 return nullptr;
672 }
673 std::unique_ptr<ObuProcessor> obu_processor =
674 absl::WrapUnique(new ObuProcessor(read_bit_buffer));
675 if (const auto status = obu_processor->InitializeInternal(
676 is_exhaustive_and_exact, output_insufficient_data);
677 !status.ok()) {
678 LOG(ERROR) << status;
679 return nullptr;
680 }
681
682 if (const auto status = obu_processor->InitializeForRendering(
683 desired_layout, sample_processor_factory, output_layout);
684 !status.ok()) {
685 LOG(ERROR) << status;
686 return nullptr;
687 }
688 return obu_processor;
689 }
690
GetOutputSampleRate() const691 absl::StatusOr<uint32_t> ObuProcessor::GetOutputSampleRate() const {
692 RETURN_IF_NOT_OK(
693 ValidateHasValue(output_sample_rate_,
694 "Output sample rate, was this a trivial IA Sequence?"));
695 return *output_sample_rate_;
696 }
697
GetOutputFrameSize() const698 absl::StatusOr<uint32_t> ObuProcessor::GetOutputFrameSize() const {
699 RETURN_IF_NOT_OK(
700 ValidateHasValue(output_frame_size_,
701 "Output frame size, was this a trivial IA Sequence?"));
702 return *output_frame_size_;
703 }
704
InitializeForRendering(const Layout & desired_layout,const RenderingMixPresentationFinalizer::SampleProcessorFactory & sample_processor_factory,Layout & output_layout)705 absl::Status ObuProcessor::InitializeForRendering(
706 const Layout& desired_layout,
707 const RenderingMixPresentationFinalizer::SampleProcessorFactory&
708 sample_processor_factory,
709 Layout& output_layout) {
710 if (mix_presentations_.empty()) {
711 return absl::InvalidArgumentError("No mix presentation OBUs found.");
712 }
713 if (audio_elements_.empty()) {
714 return absl::InvalidArgumentError("No audio element OBUs found.");
715 }
716
717 // TODO(b/377747704): Decode only the frames selected for the playback
718 // layout.
719 audio_frame_decoder_.emplace();
720 for (const auto& [unused_id, audio_element_with_data] : audio_elements_) {
721 RETURN_IF_NOT_OK(audio_frame_decoder_->InitDecodersForSubstreams(
722 audio_element_with_data.substream_id_to_labels,
723 *audio_element_with_data.codec_config));
724 }
725 {
726 auto temp_demixing_module =
727 DemixingModule::CreateForReconstruction(audio_elements_);
728 if (!temp_demixing_module.ok()) {
729 return temp_demixing_module.status();
730 }
731 demixing_module_.emplace(*std::move(temp_demixing_module));
732 }
733
734 // TODO(b/340289717): Add a way to select the mix presentation if multiple
735 // are supported.
736 const std::list<MixPresentationObu*> supported_mix_presentations =
737 GetSupportedMixPresentations(audio_elements_, mix_presentations_);
738 if (supported_mix_presentations.empty()) {
739 return absl::NotFoundError("No supported mix presentation OBUs found.");
740 }
741 Layout playback_layout;
742 auto mix_presentation_to_render = GetPlaybackLayoutAndMixPresentation(
743 supported_mix_presentations, desired_layout, output_layout);
744 if (!mix_presentation_to_render.ok()) {
745 return mix_presentation_to_render.status();
746 }
747 int playback_sub_mix_index;
748 int playback_layout_index;
749 RETURN_IF_NOT_OK(GetIndicesForLayout(
750 (*mix_presentation_to_render)->sub_mixes_, output_layout,
751 playback_sub_mix_index, playback_layout_index));
752 decoding_layout_info_ = {
753 .mix_presentation_id =
754 (*mix_presentation_to_render)->GetMixPresentationId(),
755 .sub_mix_index = playback_sub_mix_index,
756 .layout_index = playback_layout_index,
757 };
758 auto forward_on_desired_layout =
759 [&sample_processor_factory, mix_presentation_to_render,
760 playback_sub_mix_index, playback_layout_index](
761 DecodedUleb128 mix_presentation_id, int sub_mix_index,
762 int layout_index, const Layout& layout, int num_channels,
763 int sample_rate, int bit_depth, size_t max_input_samples_per_frame)
764 -> std::unique_ptr<SampleProcessorBase> {
765 if (mix_presentation_id ==
766 (*mix_presentation_to_render)->GetMixPresentationId() &&
767 playback_sub_mix_index == sub_mix_index &&
768 playback_layout_index == layout_index) {
769 return sample_processor_factory(
770 mix_presentation_id, sub_mix_index, layout_index, layout,
771 num_channels, sample_rate, bit_depth, max_input_samples_per_frame);
772 }
773 return nullptr;
774 };
775
776 // Create the mix presentation finalizer which is used to render the output
777 // files. We neither trust the user-provided loudness, nor care about the
778 // calculated loudness.
779 const RendererFactory renderer_factory;
780 absl::StatusOr<RenderingMixPresentationFinalizer> mix_presentation_finalizer =
781 RenderingMixPresentationFinalizer::Create(
782 /*renderer_factory=*/&renderer_factory,
783 /*loudness_calculator_factory=*/nullptr, audio_elements_,
784 forward_on_desired_layout, mix_presentations_);
785 if (!mix_presentation_finalizer.ok()) {
786 return mix_presentation_finalizer.status();
787 }
788 mix_presentation_finalizer_.emplace(*std::move(mix_presentation_finalizer));
789
790 return absl::OkStatus();
791 }
792
ProcessTemporalUnitObu(std::optional<AudioFrameWithData> & output_audio_frame_with_data,std::optional<ParameterBlockWithData> & output_parameter_block_with_data,std::optional<TemporalDelimiterObu> & output_temporal_delimiter,bool & continue_processing)793 absl::Status ObuProcessor::ProcessTemporalUnitObu(
794 std::optional<AudioFrameWithData>& output_audio_frame_with_data,
795 std::optional<ParameterBlockWithData>& output_parameter_block_with_data,
796 std::optional<TemporalDelimiterObu>& output_temporal_delimiter,
797 bool& continue_processing) {
798 if (!parameters_manager_.has_value()) {
799 return absl::InvalidArgumentError(
800 "Parameters manager is not constructed; "
801 "remember to call `Initialize()` first.");
802 }
803 if (global_timing_module_ == nullptr) {
804 return absl::InvalidArgumentError(
805 "Global timing module is not constructed; "
806 "remember to call `Initialize()` first.");
807 }
808 if (read_bit_buffer_ == nullptr) {
809 return absl::InvalidArgumentError(
810 "Read bit buffer is not constructed; "
811 "remember to call `Initialize()` first.");
812 }
813
814 return ObuProcessor::ProcessTemporalUnitObu(
815 audio_elements_, codec_config_obus_, substream_id_to_audio_element_,
816 param_definition_variants_, *parameters_manager_, *read_bit_buffer_,
817 *global_timing_module_, output_audio_frame_with_data,
818 output_parameter_block_with_data, output_temporal_delimiter,
819 continue_processing);
820 }
821
ProcessTemporalUnit(bool eos_is_end_of_sequence,std::optional<OutputTemporalUnit> & output_temporal_unit,bool & continue_processing)822 absl::Status ObuProcessor::ProcessTemporalUnit(
823 bool eos_is_end_of_sequence,
824 std::optional<OutputTemporalUnit>& output_temporal_unit,
825 bool& continue_processing) {
826 continue_processing = true;
827 while (continue_processing) {
828 std::optional<AudioFrameWithData> audio_frame_with_data;
829 std::optional<ParameterBlockWithData> parameter_block_with_data;
830 std::optional<TemporalDelimiterObu> temporal_delimiter;
831 RETURN_IF_NOT_OK(
832 ProcessTemporalUnitObu(audio_frame_with_data, parameter_block_with_data,
833 temporal_delimiter, continue_processing));
834
835 // Collect OBUs into a temporal unit.
836 if (audio_frame_with_data.has_value()) {
837 TemporalUnitData::AddDataToCorrectTemporalUnit(
838 current_temporal_unit_, next_temporal_unit_,
839 *std::move(audio_frame_with_data));
840 } else if (parameter_block_with_data.has_value()) {
841 TemporalUnitData::AddDataToCorrectTemporalUnit(
842 current_temporal_unit_, next_temporal_unit_,
843 *std::move(parameter_block_with_data));
844 } else if (temporal_delimiter.has_value()) {
845 current_temporal_unit_.temporal_delimiter = *temporal_delimiter;
846 }
847
848 // The current temporal unit is considered finished if any of the
849 // following conditions is met:
850 // - The end of sequence is reached.
851 // - The timestamp has advanced (i.e. when the next temporal unit gets its
852 // timestamp).
853 // - A temporal delimiter is encountered.
854 if ((!continue_processing && eos_is_end_of_sequence) ||
855 next_temporal_unit_.timestamp.has_value() ||
856 current_temporal_unit_.temporal_delimiter.has_value()) {
857 output_temporal_unit = OutputTemporalUnit();
858 output_temporal_unit->output_audio_frames =
859 std::move(current_temporal_unit_.audio_frames);
860 output_temporal_unit->output_parameter_blocks =
861 std::move(current_temporal_unit_.parameter_blocks);
862 if (current_temporal_unit_.timestamp.has_value()) {
863 output_temporal_unit->output_timestamp =
864 current_temporal_unit_.timestamp.value();
865 }
866 current_temporal_unit_ = std::move(next_temporal_unit_);
867 next_temporal_unit_ = TemporalUnitData();
868 break;
869 }
870 }
871
872 return absl::OkStatus();
873 }
874
RenderTemporalUnitAndMeasureLoudness(InternalTimestamp start_timestamp,const std::list<AudioFrameWithData> & audio_frames,const std::list<ParameterBlockWithData> & parameter_blocks,absl::Span<const std::vector<int32_t>> & output_rendered_pcm_samples)875 absl::Status ObuProcessor::RenderTemporalUnitAndMeasureLoudness(
876 InternalTimestamp start_timestamp,
877 const std::list<AudioFrameWithData>& audio_frames,
878 const std::list<ParameterBlockWithData>& parameter_blocks,
879 absl::Span<const std::vector<int32_t>>& output_rendered_pcm_samples) {
880 if (audio_frames.empty()) {
881 // Nothing to decode, render, or measure loudness of.
882 return absl::OkStatus();
883 }
884
885 if (!audio_frame_decoder_.has_value()) {
886 return absl::InvalidArgumentError(
887 "Audio frame decoder is not constructed; "
888 "remember to call `InitializeForRendering()` first.");
889 }
890 if (!demixing_module_.has_value()) {
891 return absl::InvalidArgumentError(
892 "Demxing module is not constructed; "
893 "remember to call `InitializeForRendering()` first.");
894 }
895 if (!mix_presentation_finalizer_.has_value()) {
896 return absl::InvalidArgumentError(
897 "Mix presentation finalizer is not constructed; "
898 "remember to call `InitializeForRendering()` first.");
899 }
900
901 // Decode the temporal unit.
902 std::optional<InternalTimestamp> end_timestamp;
903
904 // This resizing should happen only once per IA sequence, since all the
905 // temporal units should contain the same number of audio frames.
906 decoded_frames_for_temporal_unit_.resize(audio_frames.size());
907 auto decoded_frames_iter = decoded_frames_for_temporal_unit_.begin();
908 for (const auto& audio_frame : audio_frames) {
909 if (!end_timestamp.has_value()) {
910 end_timestamp = audio_frame.end_timestamp;
911 }
912 RETURN_IF_NOT_OK(
913 CompareTimestamps(start_timestamp, audio_frame.start_timestamp,
914 "Audio frame has a different start timestamp than "
915 "the temporal unit: "));
916 RETURN_IF_NOT_OK(CompareTimestamps(*end_timestamp,
917 audio_frame.end_timestamp,
918 "Audio frame has a different end "
919 "timestamp than the temporal unit: "));
920 auto decoded_frame = audio_frame_decoder_->Decode(audio_frame);
921 if (!decoded_frame.ok()) {
922 return decoded_frame.status();
923 }
924 *decoded_frames_iter = std::move(*decoded_frame);
925 decoded_frames_iter++;
926 }
927
928 // Reconstruct the temporal unit and store the result in the output map.
929 const auto decoded_labeled_frames_for_temporal_unit =
930 demixing_module_->DemixDecodedAudioSamples(
931 decoded_frames_for_temporal_unit_);
932 if (!decoded_labeled_frames_for_temporal_unit.ok()) {
933 return decoded_labeled_frames_for_temporal_unit.status();
934 }
935
936 RETURN_IF_NOT_OK(mix_presentation_finalizer_->PushTemporalUnit(
937 *decoded_labeled_frames_for_temporal_unit, start_timestamp,
938 *end_timestamp, parameter_blocks));
939
940 auto rendered_samples =
941 mix_presentation_finalizer_->GetPostProcessedSamplesAsSpan(
942 decoding_layout_info_.mix_presentation_id,
943 decoding_layout_info_.sub_mix_index,
944 decoding_layout_info_.layout_index);
945 if (!rendered_samples.ok()) {
946 return rendered_samples.status();
947 }
948 output_rendered_pcm_samples = *rendered_samples;
949
950 // TODO(b/379122580): Add a call to `FinalizePushingTemporalUnits`, then a
951 // final call to `GetPostProcessedSamplesAsSpan` when there
952 // are no more temporal units to push. Those calls may
953 // belong elsewhere in the class depending on the
954 // interface.
955
956 return absl::OkStatus();
957 }
958
959 } // namespace iamf_tools
960