1 /*
2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * www.aomedia.org/license/patent.
11 */
12 #include "iamf/cli/obu_sequencer_base.h"
13
14 #include <algorithm>
15 #include <cstdint>
16 #include <functional>
17 #include <list>
18 #include <optional>
19 #include <utility>
20 #include <vector>
21
22 #include "absl/container/btree_map.h"
23 #include "absl/container/flat_hash_map.h"
24 #include "absl/container/flat_hash_set.h"
25 #include "absl/log/check.h"
26 #include "absl/log/log.h"
27 #include "absl/status/status.h"
28 #include "absl/types/span.h"
29 #include "iamf/cli/audio_element_with_data.h"
30 #include "iamf/cli/audio_frame_with_data.h"
31 #include "iamf/cli/cli_util.h"
32 #include "iamf/cli/parameter_block_with_data.h"
33 #include "iamf/cli/profile_filter.h"
34 #include "iamf/cli/temporal_unit_view.h"
35 #include "iamf/common/leb_generator.h"
36 #include "iamf/common/utils/macros.h"
37 #include "iamf/common/write_bit_buffer.h"
38 #include "iamf/obu/arbitrary_obu.h"
39 #include "iamf/obu/audio_element.h"
40 #include "iamf/obu/audio_frame.h"
41 #include "iamf/obu/codec_config.h"
42 #include "iamf/obu/ia_sequence_header.h"
43 #include "iamf/obu/mix_presentation.h"
44 #include "iamf/obu/obu_header.h"
45 #include "iamf/obu/parameter_block.h"
46 #include "iamf/obu/temporal_delimiter.h"
47 #include "iamf/obu/types.h"
48
49 namespace iamf_tools {
50
51 namespace {
52
53 // Write buffer. Let's start with 64 KB. The buffer will resize for larger
54 // OBUs if needed.
55 constexpr int64_t kBufferStartSize = 65536;
56
57 /*!\brief Map of start timestamp -> OBUs in that temporal unit.
58 *
59 * Map of temporal unit start time -> OBUs that overlap this temporal unit.
60 * Using absl::btree_map for convenience as this allows iterating by
61 * timestamp (which is the key).
62 */
63 typedef absl::btree_map<int32_t, TemporalUnitView> TemporalUnitMap;
64
65 /*!\brief Helper class to abort an `ObuSequencerBase` on destruction.
66 *
67 * This class calls on an `ObuSequencerBase::Abort` on destruction. Or does
68 * nothing if `CancelAbort` is called.
69 *
70 * Typically, this is useful to create an instance of this class on the stack,
71 * in the scope of a function which has many locations where it may return an
72 * un-recoverable error. When those exit points are reached, the sequencer will
73 * automatically be aborted.
74 *
75 * Before any successful exit point, `CancelAbort` should be called, which will
76 * prevent the sequencer from being aborting.
77 */
78 class AbortOnDestruct {
79 public:
80 /*!\brief Constructor
81 *
82 * \param obu_sequencer The `ObuSequencerBase` to abort on destruction.
83 */
AbortOnDestruct(ObuSequencerBase * obu_sequencer)84 explicit AbortOnDestruct(ObuSequencerBase* obu_sequencer)
85 : obu_sequencer(obu_sequencer) {}
86
87 /*!\brief Destructor */
~AbortOnDestruct()88 ~AbortOnDestruct() {
89 if (obu_sequencer != nullptr) {
90 obu_sequencer->Abort();
91 }
92 }
93
94 /*!\brief Cancels the abort on destruction. */
CancelAbort()95 void CancelAbort() { obu_sequencer = nullptr; }
96
97 private:
98 ObuSequencerBase* obu_sequencer;
99 };
100
101 template <typename KeyValueMap, typename KeyComparator>
SortedKeys(const KeyValueMap & map,const KeyComparator & comparator)102 std::vector<uint32_t> SortedKeys(const KeyValueMap& map,
103 const KeyComparator& comparator) {
104 std::vector<uint32_t> keys;
105 keys.reserve(map.size());
106 for (const auto& [key, value] : map) {
107 keys.push_back(key);
108 }
109 std::sort(keys.begin(), keys.end(), comparator);
110 return keys;
111 }
112 // Some IA Sequences can be "trivial" and missing descriptor OBUs or audio
113 // frames. These would decode to an empty stream. Fallback to some reasonable,
114 // but arbitrary default values, when the true value is undefined.
115
116 // Fallback number of samples per frame when there are no audio frames.
117 constexpr uint32_t kFallbackSamplesPerFrame = 1024;
118 // Fallback sample rate when there are no Codec Config OBUs.
119 constexpr uint32_t kFallbackSampleRate = 48000;
120 // Fallback bit-depth when there are no Codec Config OBUs.
121 constexpr uint8_t kFallbackBitDepth = 16;
122 // Fallback number of channels when there are no audio elements.
123 constexpr uint32_t kFallbackNumChannels = 2;
124 // Fallback first PTS when there are no audio frames.
125 constexpr int64_t kFallbackFirstPts = 0;
126
127 // Gets the sum of the number of channels for the given audio elements. Or falls
128 // back to a default value if there are no audio elements.
GetNumberOfChannels(const absl::flat_hash_map<uint32_t,AudioElementWithData> & audio_elements)129 int32_t GetNumberOfChannels(
130 const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements) {
131 if (audio_elements.empty()) {
132 // The muxer fails if we return the true value (0 channels).
133 return kFallbackNumChannels;
134 }
135
136 int32_t num_channels = 0;
137 for (const auto& [audio_element_id, audio_element] : audio_elements) {
138 // Add the number of channels for every substream in every audio element.
139 for (const auto& [substream_id, labels] :
140 audio_element.substream_id_to_labels) {
141 num_channels += static_cast<int32_t>(labels.size());
142 }
143 }
144 return num_channels;
145 }
146
147 // Gets the common sample rate and bit depth for the given codec config OBUs. Or
148 // falls back to default values if there are no codec configs.
GetCommonSampleRateAndBitDepth(const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,uint32_t & common_sample_rate,uint8_t & common_bit_depth,bool & requires_resampling)149 absl::Status GetCommonSampleRateAndBitDepth(
150 const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
151 uint32_t& common_sample_rate, uint8_t& common_bit_depth,
152 bool& requires_resampling) {
153 if (codec_config_obus.empty()) {
154 // The true value is undefined, but the muxer requires non-zero values.
155 common_sample_rate = kFallbackSampleRate;
156 common_bit_depth = kFallbackBitDepth;
157 requires_resampling = false;
158 return absl::OkStatus();
159 }
160
161 requires_resampling = false;
162 absl::flat_hash_set<uint32_t> sample_rates;
163 absl::flat_hash_set<uint8_t> bit_depths;
164 for (const auto& [unused_id, obu] : codec_config_obus) {
165 sample_rates.insert(obu.GetOutputSampleRate());
166 bit_depths.insert(obu.GetBitDepthToMeasureLoudness());
167 }
168
169 return ::iamf_tools::GetCommonSampleRateAndBitDepth(
170 sample_rates, bit_depths, common_sample_rate, common_bit_depth,
171 requires_resampling);
172 }
173
WriteObusWithHook(ArbitraryObu::InsertionHook insertion_hook,const std::vector<const ArbitraryObu * > & arbitrary_obus,WriteBitBuffer & wb)174 absl::Status WriteObusWithHook(
175 ArbitraryObu::InsertionHook insertion_hook,
176 const std::vector<const ArbitraryObu*>& arbitrary_obus,
177 WriteBitBuffer& wb) {
178 for (const auto& arbitrary_obu : arbitrary_obus) {
179 if (arbitrary_obu->insertion_hook_ == insertion_hook) {
180 RETURN_IF_NOT_OK(arbitrary_obu->ValidateAndWriteObu(wb));
181 }
182 }
183 return absl::OkStatus();
184 }
185
FillDescriptorStatistics(const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,auto & descriptor_statistics)186 absl::Status FillDescriptorStatistics(
187 const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
188 auto& descriptor_statistics) {
189 descriptor_statistics.common_samples_per_frame = kFallbackSamplesPerFrame;
190 descriptor_statistics.common_sample_rate = kFallbackSampleRate;
191 descriptor_statistics.common_bit_depth = kFallbackBitDepth;
192 descriptor_statistics.num_channels = kFallbackNumChannels;
193
194 bool requires_resampling = false;
195 RETURN_IF_NOT_OK(GetCommonSampleRateAndBitDepth(
196 codec_config_obus, descriptor_statistics.common_sample_rate,
197 descriptor_statistics.common_bit_depth, requires_resampling));
198 if (requires_resampling) {
199 return absl::UnimplementedError(
200 "Codec Config OBUs with different bit-depths and/or sample "
201 "rates are not in base-enhanced/base/simple profile; they are not "
202 "allowed in ISOBMFF.");
203 }
204
205 // This assumes all Codec Configs have the same sample rate and frame size.
206 // We may need to be more careful if IA Samples do not all (except the
207 // final) have the same duration in the future.
208 return GetCommonSamplesPerFrame(
209 codec_config_obus, descriptor_statistics.common_samples_per_frame);
210 }
211
212 [[deprecated("Remove this, and related types when `PickAndPlace` is removed.")]]
GenerateTemporalUnitMap(const std::list<AudioFrameWithData> & audio_frames,const std::list<ParameterBlockWithData> & parameter_blocks,const std::list<ArbitraryObu> & arbitrary_obus,TemporalUnitMap & temporal_unit_map)213 absl::Status GenerateTemporalUnitMap(
214 const std::list<AudioFrameWithData>& audio_frames,
215 const std::list<ParameterBlockWithData>& parameter_blocks,
216 const std::list<ArbitraryObu>& arbitrary_obus,
217 TemporalUnitMap& temporal_unit_map) {
218 // Initially, guess the temporal units by the start time. Deeper validation
219 // and sanitization occurs when creating the TemporalUnitView.
220 struct UnsanitizedTemporalUnit {
221 std::vector<const ParameterBlockWithData*> parameter_blocks;
222 std::vector<const AudioFrameWithData*> audio_frames;
223 std::vector<const ArbitraryObu*> arbitrary_obus;
224 };
225 typedef absl::flat_hash_map<InternalTimestamp, UnsanitizedTemporalUnit>
226 UnsanitizedTemporalUnitMap;
227 UnsanitizedTemporalUnitMap unsanitized_temporal_unit_map;
228
229 for (const auto& parameter_block : parameter_blocks) {
230 unsanitized_temporal_unit_map[parameter_block.start_timestamp]
231 .parameter_blocks.push_back(¶meter_block);
232 }
233 for (auto& audio_frame : audio_frames) {
234 unsanitized_temporal_unit_map[audio_frame.start_timestamp]
235 .audio_frames.push_back(&audio_frame);
236 }
237 for (const auto& arbitrary_obu : arbitrary_obus) {
238 if (arbitrary_obu.insertion_tick_ == std::nullopt) {
239 continue;
240 }
241 unsanitized_temporal_unit_map[*arbitrary_obu.insertion_tick_]
242 .arbitrary_obus.push_back(&arbitrary_obu);
243 }
244 // Sanitize and build a map on the sanitized temporal units.
245 for (const auto& [timestamp, unsanitized_temporal_unit] :
246 unsanitized_temporal_unit_map) {
247 auto temporal_unit_view = TemporalUnitView::CreateFromPointers(
248 unsanitized_temporal_unit.parameter_blocks,
249 unsanitized_temporal_unit.audio_frames,
250 unsanitized_temporal_unit.arbitrary_obus);
251 if (!temporal_unit_view.ok()) {
252 return temporal_unit_view.status();
253 }
254 temporal_unit_map.emplace(timestamp, *std::move(temporal_unit_view));
255 }
256
257 return absl::OkStatus();
258 }
259
260 } // namespace
261
WriteTemporalUnit(bool include_temporal_delimiters,const TemporalUnitView & temporal_unit,WriteBitBuffer & wb,int & num_samples)262 absl::Status ObuSequencerBase::WriteTemporalUnit(
263 bool include_temporal_delimiters, const TemporalUnitView& temporal_unit,
264 WriteBitBuffer& wb, int& num_samples) {
265 num_samples += temporal_unit.num_untrimmed_samples_;
266
267 if (include_temporal_delimiters) {
268 // Temporal delimiter has no payload.
269 const TemporalDelimiterObu obu((ObuHeader()));
270 RETURN_IF_NOT_OK(obu.ValidateAndWriteObu(wb));
271 }
272
273 RETURN_IF_NOT_OK(
274 WriteObusWithHook(ArbitraryObu::kInsertionHookBeforeParameterBlocksAtTick,
275 temporal_unit.arbitrary_obus_, wb));
276
277 // Write the Parameter Block OBUs.
278 for (const auto& parameter_blocks : temporal_unit.parameter_blocks_) {
279 const auto& parameter_block = parameter_blocks;
280 RETURN_IF_NOT_OK(parameter_block->obu->ValidateAndWriteObu(wb));
281 }
282
283 RETURN_IF_NOT_OK(
284 WriteObusWithHook(ArbitraryObu::kInsertionHookAfterParameterBlocksAtTick,
285 temporal_unit.arbitrary_obus_, wb));
286
287 // Write Audio Frame OBUs.
288 for (const auto& audio_frame : temporal_unit.audio_frames_) {
289 RETURN_IF_NOT_OK(audio_frame->obu.ValidateAndWriteObu(wb));
290 LOG_FIRST_N(INFO, 10) << "wb.bit_offset= " << wb.bit_offset()
291 << " after Audio Frame";
292 }
293
294 RETURN_IF_NOT_OK(
295 WriteObusWithHook(ArbitraryObu::kInsertionHookAfterAudioFramesAtTick,
296 temporal_unit.arbitrary_obus_, wb));
297
298 if (!wb.IsByteAligned()) {
299 return absl::InvalidArgumentError("Write buffer not byte-aligned");
300 }
301
302 return absl::OkStatus();
303 }
304
305 // Writes the descriptor OBUs. Section 5.1.1
306 // (https://aomediacodec.github.io/iamf/#standalone-descriptor-obus) orders the
307 // OBUs by type.
308 //
309 // For Codec Config OBUs and Audio Element OBUs, the order is arbitrary. For
310 // determinism this implementation orders them by ascending ID.
311 //
312 // For Mix Presentation OBUs, the order is the same as the original order.
313 // Because the original ordering may be used downstream when selecting the mix
314 // presentation
315 // (https://aomediacodec.github.io/iamf/#processing-mixpresentation-selection).
316 //
317 // For Arbitrary OBUs, they are inserted in an order implied by the insertion
318 // hook. Ties are broken by the original order, when multiple OBUs have the same
319 // hook.
WriteDescriptorObus(const IASequenceHeaderObu & ia_sequence_header_obu,const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,const absl::flat_hash_map<uint32_t,AudioElementWithData> & audio_elements,const std::list<MixPresentationObu> & mix_presentation_obus,const std::list<ArbitraryObu> & arbitrary_obus,WriteBitBuffer & wb)320 absl::Status ObuSequencerBase::WriteDescriptorObus(
321 const IASequenceHeaderObu& ia_sequence_header_obu,
322 const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
323 const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
324 const std::list<MixPresentationObu>& mix_presentation_obus,
325 const std::list<ArbitraryObu>& arbitrary_obus, WriteBitBuffer& wb) {
326 // Write IA Sequence Header OBU.
327 RETURN_IF_NOT_OK(ia_sequence_header_obu.ValidateAndWriteObu(wb));
328 LOG(INFO) << "wb.bit_offset= " << wb.bit_offset()
329 << " after IA Sequence Header";
330
331 RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
332 ArbitraryObu::kInsertionHookAfterIaSequenceHeader, arbitrary_obus, wb));
333
334 // Write Codec Config OBUs in ascending order of Codec Config IDs.
335 // TODO(b/332956880): Support customizing the ordering.
336 const std::vector<uint32_t> codec_config_ids =
337 SortedKeys(codec_config_obus, std::less<uint32_t>());
338 for (const auto id : codec_config_ids) {
339 RETURN_IF_NOT_OK(codec_config_obus.at(id).ValidateAndWriteObu(wb));
340 LOG(INFO) << "wb.bit_offset= " << wb.bit_offset() << " after Codec Config";
341 }
342
343 RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
344 ArbitraryObu::kInsertionHookAfterCodecConfigs, arbitrary_obus, wb));
345
346 // Write Audio Element OBUs in ascending order of Audio Element IDs.
347 // TODO(b/332956880): Support customizing the ordering.
348 const std::vector<uint32_t> audio_element_ids =
349 SortedKeys(audio_elements, std::less<uint32_t>());
350 for (const auto id : audio_element_ids) {
351 RETURN_IF_NOT_OK(audio_elements.at(id).obu.ValidateAndWriteObu(wb));
352 LOG(INFO) << "wb.bit_offset= " << wb.bit_offset() << " after Audio Element";
353 }
354
355 RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
356 ArbitraryObu::kInsertionHookAfterAudioElements, arbitrary_obus, wb));
357
358 // TODO(b/269708630): Ensure at least one the profiles in the IA Sequence
359 // Header supports all of the layers for scalable audio
360 // elements.
361 // Maintain the original order of Mix Presentation OBUs.
362 for (const auto& mix_presentation_obu : mix_presentation_obus) {
363 // Make sure the mix presentation is valid for at least one of the profiles
364 // in the sequence header before writing it.
365 absl::flat_hash_set<ProfileVersion> profile_version = {
366 ia_sequence_header_obu.GetPrimaryProfile(),
367 ia_sequence_header_obu.GetAdditionalProfile()};
368 RETURN_IF_NOT_OK(ProfileFilter::FilterProfilesForMixPresentation(
369 audio_elements, mix_presentation_obu, profile_version));
370
371 RETURN_IF_NOT_OK(mix_presentation_obu.ValidateAndWriteObu(wb));
372 LOG(INFO) << "wb.bit_offset= " << wb.bit_offset()
373 << " after Mix Presentation";
374 }
375 RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
376 ArbitraryObu::kInsertionHookAfterMixPresentations, arbitrary_obus, wb));
377
378 return absl::OkStatus();
379 }
380
ObuSequencerBase(const LebGenerator & leb_generator,bool include_temporal_delimiters,bool delay_descriptors_until_first_untrimmed_sample)381 ObuSequencerBase::ObuSequencerBase(
382 const LebGenerator& leb_generator, bool include_temporal_delimiters,
383 bool delay_descriptors_until_first_untrimmed_sample)
384 : leb_generator_(leb_generator),
385 delay_descriptors_until_first_untrimmed_sample_(
386 delay_descriptors_until_first_untrimmed_sample),
387 include_temporal_delimiters_(include_temporal_delimiters),
388 wb_(kBufferStartSize, leb_generator) {}
389
~ObuSequencerBase()390 ObuSequencerBase::~ObuSequencerBase() {
391 switch (state_) {
392 case kInitialized:
393 return;
394 case kPushDescriptorObusCalled:
395 case kPushSerializedDescriptorsCalled:
396 LOG(ERROR) << "OBUs have been pushed, but `ObuSequencerBase` is being "
397 "destroyed without calling `Close` or `Abort`.";
398 return;
399 case kClosed:
400 return;
401 }
402 // The above switch is exhaustive.
403 LOG(FATAL) << "Unexpected state: " << static_cast<int>(state_);
404 };
405
PushDescriptorObus(const IASequenceHeaderObu & ia_sequence_header_obu,const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,const absl::flat_hash_map<uint32_t,AudioElementWithData> & audio_elements,const std::list<MixPresentationObu> & mix_presentation_obus,const std::list<ArbitraryObu> & arbitrary_obus)406 absl::Status ObuSequencerBase::PushDescriptorObus(
407 const IASequenceHeaderObu& ia_sequence_header_obu,
408 const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
409 const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
410 const std::list<MixPresentationObu>& mix_presentation_obus,
411 const std::list<ArbitraryObu>& arbitrary_obus) {
412 // Many failure points should call `Abort`. We want to avoid leaving
413 // sequencers open if they may have invalid or corrupted IAMF data.
414 AbortOnDestruct abort_on_destruct(this);
415 switch (state_) {
416 case kInitialized:
417 break;
418 case kPushDescriptorObusCalled:
419 case kPushSerializedDescriptorsCalled:
420 return absl::FailedPreconditionError(
421 "`PushDescriptorObus` can only be called once.");
422 case kClosed:
423 return absl::FailedPreconditionError(
424 "`PushDescriptorObus` cannot be called after `Close` or `Abort`.");
425 }
426 state_ = kPushDescriptorObusCalled;
427 wb_.Reset();
428
429 // Serialize descriptor OBUS and adjacent arbitrary OBUs.
430 RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
431 ArbitraryObu::kInsertionHookBeforeDescriptors, arbitrary_obus, wb_));
432 // Write out the descriptor OBUs.
433 RETURN_IF_NOT_OK(ObuSequencerBase::WriteDescriptorObus(
434 ia_sequence_header_obu, codec_config_obus, audio_elements,
435 mix_presentation_obus, arbitrary_obus, wb_));
436 RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
437 ArbitraryObu::kInsertionHookAfterDescriptors, arbitrary_obus, wb_));
438 // Cache the descriptor OBUs, so we can validate "functional" equivalence if
439 // the user calls `UpdateDescriptorObusAndClose`.
440 DescriptorStatistics descriptor_statistics{.descriptor_obus =
441 wb_.bit_buffer()};
442 RETURN_IF_NOT_OK(
443 FillDescriptorStatistics(codec_config_obus, descriptor_statistics));
444 descriptor_statistics_.emplace(std::move(descriptor_statistics));
445
446 if (!delay_descriptors_until_first_untrimmed_sample_) {
447 // Avoid unnecessary delay, for concrete classes that don't need
448 // `first_pts`.
449 RETURN_IF_NOT_OK(PushSerializedDescriptorObus(
450 descriptor_statistics_->common_samples_per_frame,
451 descriptor_statistics_->common_sample_rate,
452 descriptor_statistics_->common_bit_depth,
453 descriptor_statistics_->first_untrimmed_timestamp,
454 descriptor_statistics_->num_channels,
455 descriptor_statistics_->descriptor_obus));
456
457 state_ = kPushSerializedDescriptorsCalled;
458 }
459
460 abort_on_destruct.CancelAbort();
461 return absl::OkStatus();
462 }
463
PushTemporalUnit(const TemporalUnitView & temporal_unit)464 absl::Status ObuSequencerBase::PushTemporalUnit(
465 const TemporalUnitView& temporal_unit) {
466 // Many failure points should call `Abort`. We want to avoid leaving
467 // sequencers open if they may have invalid or corrupted IAMF data.
468 AbortOnDestruct abort_on_destruct(this);
469 switch (state_) {
470 case kInitialized:
471 return absl::FailedPreconditionError(
472 "PushDescriptorObus must be called before PushTemporalUnit.");
473 break;
474 case kPushDescriptorObusCalled:
475 case kPushSerializedDescriptorsCalled:
476 break;
477 case kClosed:
478 return absl::FailedPreconditionError(
479 "PushTemporalUnit can only be called before `Close` or `Abort`.");
480 }
481 wb_.Reset();
482
483 // Cache the frame for later
484 const int64_t start_timestamp =
485 static_cast<int64_t>(temporal_unit.start_timestamp_);
486 int num_samples = 0;
487 RETURN_IF_NOT_OK(WriteTemporalUnit(include_temporal_delimiters_,
488 temporal_unit, wb_, num_samples));
489 cumulative_num_samples_for_logging_ += num_samples;
490 num_temporal_units_for_logging_++;
491
492 if (!descriptor_statistics_->first_untrimmed_timestamp.has_value()) {
493 // Treat the initial temporal units as a special case, this helps gather
494 // statistics about the first untrimmed sample.
495 RETURN_IF_NOT_OK(HandleInitialTemporalUnits(
496 temporal_unit, absl::MakeConstSpan(wb_.bit_buffer())));
497
498 } else if (temporal_unit.num_samples_to_trim_at_start_ > 0) {
499 return absl::InvalidArgumentError(
500 "A unit has samples to trim at start, but the first untrimmed sample "
501 "was already found.");
502 } else [[likely]] {
503 // This is by far the most common case, after we have seen the first real
504 // frame of audio, we can handle this simply.
505 RETURN_IF_NOT_OK(PushSerializedTemporalUnit(
506 start_timestamp, num_samples, absl::MakeConstSpan(wb_.bit_buffer())));
507 }
508
509 abort_on_destruct.CancelAbort();
510 return absl::OkStatus();
511 }
512
PickAndPlace(const IASequenceHeaderObu & ia_sequence_header_obu,const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,const absl::flat_hash_map<uint32_t,AudioElementWithData> & audio_elements,const std::list<MixPresentationObu> & mix_presentation_obus,const std::list<AudioFrameWithData> & audio_frames,const std::list<ParameterBlockWithData> & parameter_blocks,const std::list<ArbitraryObu> & arbitrary_obus)513 absl::Status ObuSequencerBase::PickAndPlace(
514 const IASequenceHeaderObu& ia_sequence_header_obu,
515 const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
516 const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
517 const std::list<MixPresentationObu>& mix_presentation_obus,
518 const std::list<AudioFrameWithData>& audio_frames,
519 const std::list<ParameterBlockWithData>& parameter_blocks,
520 const std::list<ArbitraryObu>& arbitrary_obus) {
521 RETURN_IF_NOT_OK(PushDescriptorObus(ia_sequence_header_obu, codec_config_obus,
522 audio_elements, mix_presentation_obus,
523 arbitrary_obus));
524
525 TemporalUnitMap temporal_unit_map;
526 RETURN_IF_NOT_OK(GenerateTemporalUnitMap(audio_frames, parameter_blocks,
527 arbitrary_obus, temporal_unit_map));
528 for (const auto& [timestamp, temporal_unit] : temporal_unit_map) {
529 RETURN_IF_NOT_OK(PushTemporalUnit(temporal_unit));
530 }
531 return Close();
532 }
533
UpdateDescriptorObusAndClose(const IASequenceHeaderObu & ia_sequence_header_obu,const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,const absl::flat_hash_map<uint32_t,AudioElementWithData> & audio_elements,const std::list<MixPresentationObu> & mix_presentation_obus,const std::list<ArbitraryObu> & arbitrary_obus)534 absl::Status ObuSequencerBase::UpdateDescriptorObusAndClose(
535 const IASequenceHeaderObu& ia_sequence_header_obu,
536 const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
537 const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
538 const std::list<MixPresentationObu>& mix_presentation_obus,
539 const std::list<ArbitraryObu>& arbitrary_obus) {
540 // Many failure points should call `Abort`. We want to avoid leaving
541 // sequencers open if they may have invalid or corrupted IAMF data.
542 AbortOnDestruct abort_on_destruct(this);
543 switch (state_) {
544 case kInitialized:
545 return absl::FailedPreconditionError(
546 "`UpdateDescriptorObusAndClose` must be called after "
547 "`PushDescriptorObus`.");
548 case kPushDescriptorObusCalled:
549 case kPushSerializedDescriptorsCalled:
550 break;
551 case kClosed:
552 return absl::FailedPreconditionError(
553 "`Abort` or `Close` previously called.");
554 }
555 wb_.Reset();
556
557 // Serialize descriptor OBUS and adjacent arbitrary OBUs.
558 RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
559 ArbitraryObu::kInsertionHookBeforeDescriptors, arbitrary_obus, wb_));
560 // Write out the descriptor OBUs.
561 RETURN_IF_NOT_OK(ObuSequencerBase::WriteDescriptorObus(
562 ia_sequence_header_obu, codec_config_obus, audio_elements,
563 mix_presentation_obus, arbitrary_obus, wb_));
564 RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
565 ArbitraryObu::kInsertionHookAfterDescriptors, arbitrary_obus, wb_));
566
567 // We're a bit loose with what types of metadata we allow to change. Check
568 // at least the "functional" statistics are equivalent.
569 DescriptorStatistics descriptor_statistics{.descriptor_obus =
570 wb_.bit_buffer()};
571 RETURN_IF_NOT_OK(
572 FillDescriptorStatistics(codec_config_obus, descriptor_statistics));
573 if (descriptor_statistics_->common_samples_per_frame !=
574 descriptor_statistics.common_samples_per_frame ||
575 descriptor_statistics_->common_sample_rate !=
576 descriptor_statistics.common_sample_rate ||
577 descriptor_statistics_->common_bit_depth !=
578 descriptor_statistics.common_bit_depth ||
579 descriptor_statistics_->num_channels !=
580 descriptor_statistics.num_channels) {
581 return absl::FailedPreconditionError(
582 "Descriptor OBUs have changed size between finalizing and "
583 "closing.");
584 }
585 if (descriptor_statistics_->descriptor_obus.size() !=
586 descriptor_statistics.descriptor_obus.size()) {
587 return absl::UnimplementedError(
588 "Descriptor OBUs have changed size between finalizing and closing.");
589 }
590
591 RETURN_IF_NOT_OK(
592 PushFinalizedDescriptorObus(absl::MakeConstSpan(wb_.bit_buffer())));
593 state_ = kPushSerializedDescriptorsCalled;
594 RETURN_IF_NOT_OK(Close());
595
596 abort_on_destruct.CancelAbort();
597 return absl::OkStatus();
598 }
599
Close()600 absl::Status ObuSequencerBase::Close() {
601 switch (state_) {
602 case kInitialized:
603 break;
604 case kPushDescriptorObusCalled: {
605 // Ok, trivial IA sequences don't have a first untrimmed timestamp. So
606 // we will simply push the descriptors with a fallback PTS of 0.
607 descriptor_statistics_->first_untrimmed_timestamp = kFallbackFirstPts;
608
609 RETURN_IF_NOT_OK(PushSerializedDescriptorObus(
610 descriptor_statistics_->common_samples_per_frame,
611 descriptor_statistics_->common_sample_rate,
612 descriptor_statistics_->common_bit_depth,
613 descriptor_statistics_->first_untrimmed_timestamp,
614 descriptor_statistics_->num_channels,
615 descriptor_statistics_->descriptor_obus));
616 state_ = kPushSerializedDescriptorsCalled;
617 break;
618 }
619 case kPushSerializedDescriptorsCalled:
620 break;
621 case kClosed:
622 return absl::FailedPreconditionError(
623 "`Abort` or `Close` previously called.");
624 }
625 CloseDerived();
626 state_ = kClosed;
627 return absl::OkStatus();
628 }
629
Abort()630 void ObuSequencerBase::Abort() {
631 AbortDerived();
632 state_ = kClosed;
633 }
634
HandleInitialTemporalUnits(const TemporalUnitView & temporal_unit,absl::Span<const uint8_t> serialized_temporal_unit)635 absl::Status ObuSequencerBase::HandleInitialTemporalUnits(
636 const TemporalUnitView& temporal_unit,
637 absl::Span<const uint8_t> serialized_temporal_unit) {
638 const bool found_first_untrimmed_sample =
639 temporal_unit.num_untrimmed_samples_ != 0;
640 if (found_first_untrimmed_sample) {
641 // Gather the PTS. For internal accuracy, we store this even if we don't
642 // need to delay the descriptors.
643 descriptor_statistics_->first_untrimmed_timestamp =
644 temporal_unit.start_timestamp_ +
645 temporal_unit.num_samples_to_trim_at_start_;
646 }
647
648 // Push immediately if we don't need to delay the descriptors.
649 if (!delay_descriptors_until_first_untrimmed_sample_) {
650 return PushSerializedTemporalUnit(temporal_unit.start_timestamp_,
651 temporal_unit.num_untrimmed_samples_,
652 serialized_temporal_unit);
653 }
654
655 if (!found_first_untrimmed_sample) {
656 // This frame is fully trimmed. Cache it for later.
657 delayed_temporal_units_.push_back(SerializedTemporalUnit{
658 .start_timestamp = temporal_unit.start_timestamp_,
659 .num_untrimmed_samples = temporal_unit.num_untrimmed_samples_,
660 .data = std::vector<uint8_t>(serialized_temporal_unit.begin(),
661 serialized_temporal_unit.end())});
662 return absl::OkStatus();
663 }
664
665 // Found the first untrimmed sample. Push out all delayed OBUs.
666 RETURN_IF_NOT_OK(PushSerializedDescriptorObus(
667 descriptor_statistics_->common_samples_per_frame,
668 descriptor_statistics_->common_sample_rate,
669 descriptor_statistics_->common_bit_depth,
670 descriptor_statistics_->first_untrimmed_timestamp,
671 descriptor_statistics_->num_channels,
672 descriptor_statistics_->descriptor_obus));
673 state_ = kPushSerializedDescriptorsCalled;
674
675 // Flush any delayed temporal units.
676 for (const auto& delayed_temporal_unit : delayed_temporal_units_) {
677 RETURN_IF_NOT_OK(PushSerializedTemporalUnit(
678 delayed_temporal_unit.start_timestamp,
679 delayed_temporal_unit.num_untrimmed_samples,
680 absl::MakeConstSpan(delayed_temporal_unit.data)));
681 }
682 delayed_temporal_units_.clear();
683 // Then finally, flush the current temporal unit.
684 return PushSerializedTemporalUnit(temporal_unit.start_timestamp_,
685 temporal_unit.num_untrimmed_samples_,
686 serialized_temporal_unit);
687 }
688
689 } // namespace iamf_tools
690