• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 3-Clause Clear License
5  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6  * License was not distributed with this source code in the LICENSE file, you
7  * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8  * Alliance for Open Media Patent License 1.0 was not distributed with this
9  * source code in the PATENTS file, you can obtain it at
10  * www.aomedia.org/license/patent.
11  */
12 #include "iamf/cli/obu_sequencer_base.h"
13 
14 #include <algorithm>
15 #include <cstdint>
16 #include <functional>
17 #include <list>
18 #include <optional>
19 #include <utility>
20 #include <vector>
21 
22 #include "absl/container/btree_map.h"
23 #include "absl/container/flat_hash_map.h"
24 #include "absl/container/flat_hash_set.h"
25 #include "absl/log/check.h"
26 #include "absl/log/log.h"
27 #include "absl/status/status.h"
28 #include "absl/types/span.h"
29 #include "iamf/cli/audio_element_with_data.h"
30 #include "iamf/cli/audio_frame_with_data.h"
31 #include "iamf/cli/cli_util.h"
32 #include "iamf/cli/parameter_block_with_data.h"
33 #include "iamf/cli/profile_filter.h"
34 #include "iamf/cli/temporal_unit_view.h"
35 #include "iamf/common/leb_generator.h"
36 #include "iamf/common/utils/macros.h"
37 #include "iamf/common/write_bit_buffer.h"
38 #include "iamf/obu/arbitrary_obu.h"
39 #include "iamf/obu/audio_element.h"
40 #include "iamf/obu/audio_frame.h"
41 #include "iamf/obu/codec_config.h"
42 #include "iamf/obu/ia_sequence_header.h"
43 #include "iamf/obu/mix_presentation.h"
44 #include "iamf/obu/obu_header.h"
45 #include "iamf/obu/parameter_block.h"
46 #include "iamf/obu/temporal_delimiter.h"
47 #include "iamf/obu/types.h"
48 
49 namespace iamf_tools {
50 
51 namespace {
52 
53 // Write buffer. Let's start with 64 KB. The buffer will resize for larger
54 // OBUs if needed.
55 constexpr int64_t kBufferStartSize = 65536;
56 
57 /*!\brief Map of start timestamp -> OBUs in that temporal unit.
58  *
59  * Map of temporal unit start time -> OBUs that overlap this temporal unit.
60  * Using absl::btree_map for convenience as this allows iterating by
61  * timestamp (which is the key).
62  */
63 typedef absl::btree_map<int32_t, TemporalUnitView> TemporalUnitMap;
64 
65 /*!\brief Helper class to abort an `ObuSequencerBase` on destruction.
66  *
67  * This class calls on an `ObuSequencerBase::Abort` on destruction. Or does
68  * nothing if `CancelAbort` is called.
69  *
70  * Typically, this is useful to create an instance of this class on the stack,
71  * in the scope of a function which has many locations where it may return an
72  * un-recoverable error. When those exit points are reached, the sequencer will
73  * automatically be aborted.
74  *
75  * Before any successful exit point, `CancelAbort` should be called, which will
76  * prevent the sequencer from being aborting.
77  */
78 class AbortOnDestruct {
79  public:
80   /*!\brief Constructor
81    *
82    * \param obu_sequencer The `ObuSequencerBase` to abort on destruction.
83    */
AbortOnDestruct(ObuSequencerBase * obu_sequencer)84   explicit AbortOnDestruct(ObuSequencerBase* obu_sequencer)
85       : obu_sequencer(obu_sequencer) {}
86 
87   /*!\brief Destructor */
~AbortOnDestruct()88   ~AbortOnDestruct() {
89     if (obu_sequencer != nullptr) {
90       obu_sequencer->Abort();
91     }
92   }
93 
94   /*!\brief Cancels the abort on destruction. */
CancelAbort()95   void CancelAbort() { obu_sequencer = nullptr; }
96 
97  private:
98   ObuSequencerBase* obu_sequencer;
99 };
100 
101 template <typename KeyValueMap, typename KeyComparator>
SortedKeys(const KeyValueMap & map,const KeyComparator & comparator)102 std::vector<uint32_t> SortedKeys(const KeyValueMap& map,
103                                  const KeyComparator& comparator) {
104   std::vector<uint32_t> keys;
105   keys.reserve(map.size());
106   for (const auto& [key, value] : map) {
107     keys.push_back(key);
108   }
109   std::sort(keys.begin(), keys.end(), comparator);
110   return keys;
111 }
112 // Some IA Sequences can be "trivial" and missing descriptor OBUs or audio
113 // frames. These would decode to an empty stream. Fallback to some reasonable,
114 // but arbitrary default values, when the true value is undefined.
115 
116 // Fallback number of samples per frame when there are no audio frames.
117 constexpr uint32_t kFallbackSamplesPerFrame = 1024;
118 // Fallback sample rate when there are no Codec Config OBUs.
119 constexpr uint32_t kFallbackSampleRate = 48000;
120 // Fallback bit-depth when there are no Codec Config OBUs.
121 constexpr uint8_t kFallbackBitDepth = 16;
122 // Fallback number of channels when there are no audio elements.
123 constexpr uint32_t kFallbackNumChannels = 2;
124 // Fallback first PTS when there are no audio frames.
125 constexpr int64_t kFallbackFirstPts = 0;
126 
127 // Gets the sum of the number of channels for the given audio elements. Or falls
128 // back to a default value if there are no audio elements.
GetNumberOfChannels(const absl::flat_hash_map<uint32_t,AudioElementWithData> & audio_elements)129 int32_t GetNumberOfChannels(
130     const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements) {
131   if (audio_elements.empty()) {
132     // The muxer fails if we return the true value (0 channels).
133     return kFallbackNumChannels;
134   }
135 
136   int32_t num_channels = 0;
137   for (const auto& [audio_element_id, audio_element] : audio_elements) {
138     // Add the number of channels for every substream in every audio element.
139     for (const auto& [substream_id, labels] :
140          audio_element.substream_id_to_labels) {
141       num_channels += static_cast<int32_t>(labels.size());
142     }
143   }
144   return num_channels;
145 }
146 
147 // Gets the common sample rate and bit depth for the given codec config OBUs. Or
148 // falls back to default values if there are no codec configs.
GetCommonSampleRateAndBitDepth(const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,uint32_t & common_sample_rate,uint8_t & common_bit_depth,bool & requires_resampling)149 absl::Status GetCommonSampleRateAndBitDepth(
150     const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
151     uint32_t& common_sample_rate, uint8_t& common_bit_depth,
152     bool& requires_resampling) {
153   if (codec_config_obus.empty()) {
154     // The true value is undefined, but the muxer requires non-zero values.
155     common_sample_rate = kFallbackSampleRate;
156     common_bit_depth = kFallbackBitDepth;
157     requires_resampling = false;
158     return absl::OkStatus();
159   }
160 
161   requires_resampling = false;
162   absl::flat_hash_set<uint32_t> sample_rates;
163   absl::flat_hash_set<uint8_t> bit_depths;
164   for (const auto& [unused_id, obu] : codec_config_obus) {
165     sample_rates.insert(obu.GetOutputSampleRate());
166     bit_depths.insert(obu.GetBitDepthToMeasureLoudness());
167   }
168 
169   return ::iamf_tools::GetCommonSampleRateAndBitDepth(
170       sample_rates, bit_depths, common_sample_rate, common_bit_depth,
171       requires_resampling);
172 }
173 
WriteObusWithHook(ArbitraryObu::InsertionHook insertion_hook,const std::vector<const ArbitraryObu * > & arbitrary_obus,WriteBitBuffer & wb)174 absl::Status WriteObusWithHook(
175     ArbitraryObu::InsertionHook insertion_hook,
176     const std::vector<const ArbitraryObu*>& arbitrary_obus,
177     WriteBitBuffer& wb) {
178   for (const auto& arbitrary_obu : arbitrary_obus) {
179     if (arbitrary_obu->insertion_hook_ == insertion_hook) {
180       RETURN_IF_NOT_OK(arbitrary_obu->ValidateAndWriteObu(wb));
181     }
182   }
183   return absl::OkStatus();
184 }
185 
FillDescriptorStatistics(const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,auto & descriptor_statistics)186 absl::Status FillDescriptorStatistics(
187     const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
188     auto& descriptor_statistics) {
189   descriptor_statistics.common_samples_per_frame = kFallbackSamplesPerFrame;
190   descriptor_statistics.common_sample_rate = kFallbackSampleRate;
191   descriptor_statistics.common_bit_depth = kFallbackBitDepth;
192   descriptor_statistics.num_channels = kFallbackNumChannels;
193 
194   bool requires_resampling = false;
195   RETURN_IF_NOT_OK(GetCommonSampleRateAndBitDepth(
196       codec_config_obus, descriptor_statistics.common_sample_rate,
197       descriptor_statistics.common_bit_depth, requires_resampling));
198   if (requires_resampling) {
199     return absl::UnimplementedError(
200         "Codec Config OBUs with different bit-depths and/or sample "
201         "rates are not in base-enhanced/base/simple profile; they are not "
202         "allowed in ISOBMFF.");
203   }
204 
205   // This assumes all Codec Configs have the same sample rate and frame size.
206   // We may need to be more careful if IA Samples do not all (except the
207   // final) have the same duration in the future.
208   return GetCommonSamplesPerFrame(
209       codec_config_obus, descriptor_statistics.common_samples_per_frame);
210 }
211 
212 [[deprecated("Remove this, and related types when `PickAndPlace` is removed.")]]
GenerateTemporalUnitMap(const std::list<AudioFrameWithData> & audio_frames,const std::list<ParameterBlockWithData> & parameter_blocks,const std::list<ArbitraryObu> & arbitrary_obus,TemporalUnitMap & temporal_unit_map)213 absl::Status GenerateTemporalUnitMap(
214     const std::list<AudioFrameWithData>& audio_frames,
215     const std::list<ParameterBlockWithData>& parameter_blocks,
216     const std::list<ArbitraryObu>& arbitrary_obus,
217     TemporalUnitMap& temporal_unit_map) {
218   // Initially, guess the temporal units by the start time. Deeper validation
219   // and sanitization occurs when creating the TemporalUnitView.
220   struct UnsanitizedTemporalUnit {
221     std::vector<const ParameterBlockWithData*> parameter_blocks;
222     std::vector<const AudioFrameWithData*> audio_frames;
223     std::vector<const ArbitraryObu*> arbitrary_obus;
224   };
225   typedef absl::flat_hash_map<InternalTimestamp, UnsanitizedTemporalUnit>
226       UnsanitizedTemporalUnitMap;
227   UnsanitizedTemporalUnitMap unsanitized_temporal_unit_map;
228 
229   for (const auto& parameter_block : parameter_blocks) {
230     unsanitized_temporal_unit_map[parameter_block.start_timestamp]
231         .parameter_blocks.push_back(&parameter_block);
232   }
233   for (auto& audio_frame : audio_frames) {
234     unsanitized_temporal_unit_map[audio_frame.start_timestamp]
235         .audio_frames.push_back(&audio_frame);
236   }
237   for (const auto& arbitrary_obu : arbitrary_obus) {
238     if (arbitrary_obu.insertion_tick_ == std::nullopt) {
239       continue;
240     }
241     unsanitized_temporal_unit_map[*arbitrary_obu.insertion_tick_]
242         .arbitrary_obus.push_back(&arbitrary_obu);
243   }
244   // Sanitize and build a map on the sanitized temporal units.
245   for (const auto& [timestamp, unsanitized_temporal_unit] :
246        unsanitized_temporal_unit_map) {
247     auto temporal_unit_view = TemporalUnitView::CreateFromPointers(
248         unsanitized_temporal_unit.parameter_blocks,
249         unsanitized_temporal_unit.audio_frames,
250         unsanitized_temporal_unit.arbitrary_obus);
251     if (!temporal_unit_view.ok()) {
252       return temporal_unit_view.status();
253     }
254     temporal_unit_map.emplace(timestamp, *std::move(temporal_unit_view));
255   }
256 
257   return absl::OkStatus();
258 }
259 
260 }  // namespace
261 
WriteTemporalUnit(bool include_temporal_delimiters,const TemporalUnitView & temporal_unit,WriteBitBuffer & wb,int & num_samples)262 absl::Status ObuSequencerBase::WriteTemporalUnit(
263     bool include_temporal_delimiters, const TemporalUnitView& temporal_unit,
264     WriteBitBuffer& wb, int& num_samples) {
265   num_samples += temporal_unit.num_untrimmed_samples_;
266 
267   if (include_temporal_delimiters) {
268     // Temporal delimiter has no payload.
269     const TemporalDelimiterObu obu((ObuHeader()));
270     RETURN_IF_NOT_OK(obu.ValidateAndWriteObu(wb));
271   }
272 
273   RETURN_IF_NOT_OK(
274       WriteObusWithHook(ArbitraryObu::kInsertionHookBeforeParameterBlocksAtTick,
275                         temporal_unit.arbitrary_obus_, wb));
276 
277   // Write the Parameter Block OBUs.
278   for (const auto& parameter_blocks : temporal_unit.parameter_blocks_) {
279     const auto& parameter_block = parameter_blocks;
280     RETURN_IF_NOT_OK(parameter_block->obu->ValidateAndWriteObu(wb));
281   }
282 
283   RETURN_IF_NOT_OK(
284       WriteObusWithHook(ArbitraryObu::kInsertionHookAfterParameterBlocksAtTick,
285                         temporal_unit.arbitrary_obus_, wb));
286 
287   // Write Audio Frame OBUs.
288   for (const auto& audio_frame : temporal_unit.audio_frames_) {
289     RETURN_IF_NOT_OK(audio_frame->obu.ValidateAndWriteObu(wb));
290     LOG_FIRST_N(INFO, 10) << "wb.bit_offset= " << wb.bit_offset()
291                           << " after Audio Frame";
292   }
293 
294   RETURN_IF_NOT_OK(
295       WriteObusWithHook(ArbitraryObu::kInsertionHookAfterAudioFramesAtTick,
296                         temporal_unit.arbitrary_obus_, wb));
297 
298   if (!wb.IsByteAligned()) {
299     return absl::InvalidArgumentError("Write buffer not byte-aligned");
300   }
301 
302   return absl::OkStatus();
303 }
304 
305 // Writes the descriptor OBUs. Section 5.1.1
306 // (https://aomediacodec.github.io/iamf/#standalone-descriptor-obus) orders the
307 // OBUs by type.
308 //
309 // For Codec Config OBUs and Audio Element OBUs, the order is arbitrary. For
310 // determinism this implementation orders them by ascending ID.
311 //
312 // For Mix Presentation OBUs, the order is the same as the original order.
313 // Because the original ordering may be used downstream when selecting the mix
314 // presentation
315 // (https://aomediacodec.github.io/iamf/#processing-mixpresentation-selection).
316 //
317 // For Arbitrary OBUs, they are inserted in an order implied by the insertion
318 // hook. Ties are broken by the original order, when multiple OBUs have the same
319 // hook.
WriteDescriptorObus(const IASequenceHeaderObu & ia_sequence_header_obu,const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,const absl::flat_hash_map<uint32_t,AudioElementWithData> & audio_elements,const std::list<MixPresentationObu> & mix_presentation_obus,const std::list<ArbitraryObu> & arbitrary_obus,WriteBitBuffer & wb)320 absl::Status ObuSequencerBase::WriteDescriptorObus(
321     const IASequenceHeaderObu& ia_sequence_header_obu,
322     const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
323     const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
324     const std::list<MixPresentationObu>& mix_presentation_obus,
325     const std::list<ArbitraryObu>& arbitrary_obus, WriteBitBuffer& wb) {
326   // Write IA Sequence Header OBU.
327   RETURN_IF_NOT_OK(ia_sequence_header_obu.ValidateAndWriteObu(wb));
328   LOG(INFO) << "wb.bit_offset= " << wb.bit_offset()
329             << " after IA Sequence Header";
330 
331   RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
332       ArbitraryObu::kInsertionHookAfterIaSequenceHeader, arbitrary_obus, wb));
333 
334   // Write Codec Config OBUs in ascending order of Codec Config IDs.
335   // TODO(b/332956880): Support customizing the ordering.
336   const std::vector<uint32_t> codec_config_ids =
337       SortedKeys(codec_config_obus, std::less<uint32_t>());
338   for (const auto id : codec_config_ids) {
339     RETURN_IF_NOT_OK(codec_config_obus.at(id).ValidateAndWriteObu(wb));
340     LOG(INFO) << "wb.bit_offset= " << wb.bit_offset() << " after Codec Config";
341   }
342 
343   RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
344       ArbitraryObu::kInsertionHookAfterCodecConfigs, arbitrary_obus, wb));
345 
346   // Write Audio Element OBUs in ascending order of Audio Element IDs.
347   // TODO(b/332956880): Support customizing the ordering.
348   const std::vector<uint32_t> audio_element_ids =
349       SortedKeys(audio_elements, std::less<uint32_t>());
350   for (const auto id : audio_element_ids) {
351     RETURN_IF_NOT_OK(audio_elements.at(id).obu.ValidateAndWriteObu(wb));
352     LOG(INFO) << "wb.bit_offset= " << wb.bit_offset() << " after Audio Element";
353   }
354 
355   RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
356       ArbitraryObu::kInsertionHookAfterAudioElements, arbitrary_obus, wb));
357 
358   // TODO(b/269708630): Ensure at least one the profiles in the IA Sequence
359   //                    Header supports all of the layers for scalable audio
360   //                    elements.
361   // Maintain the original order of Mix Presentation OBUs.
362   for (const auto& mix_presentation_obu : mix_presentation_obus) {
363     // Make sure the mix presentation is valid for at least one of the profiles
364     // in the sequence header before writing it.
365     absl::flat_hash_set<ProfileVersion> profile_version = {
366         ia_sequence_header_obu.GetPrimaryProfile(),
367         ia_sequence_header_obu.GetAdditionalProfile()};
368     RETURN_IF_NOT_OK(ProfileFilter::FilterProfilesForMixPresentation(
369         audio_elements, mix_presentation_obu, profile_version));
370 
371     RETURN_IF_NOT_OK(mix_presentation_obu.ValidateAndWriteObu(wb));
372     LOG(INFO) << "wb.bit_offset= " << wb.bit_offset()
373               << " after Mix Presentation";
374   }
375   RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
376       ArbitraryObu::kInsertionHookAfterMixPresentations, arbitrary_obus, wb));
377 
378   return absl::OkStatus();
379 }
380 
ObuSequencerBase(const LebGenerator & leb_generator,bool include_temporal_delimiters,bool delay_descriptors_until_first_untrimmed_sample)381 ObuSequencerBase::ObuSequencerBase(
382     const LebGenerator& leb_generator, bool include_temporal_delimiters,
383     bool delay_descriptors_until_first_untrimmed_sample)
384     : leb_generator_(leb_generator),
385       delay_descriptors_until_first_untrimmed_sample_(
386           delay_descriptors_until_first_untrimmed_sample),
387       include_temporal_delimiters_(include_temporal_delimiters),
388       wb_(kBufferStartSize, leb_generator) {}
389 
~ObuSequencerBase()390 ObuSequencerBase::~ObuSequencerBase() {
391   switch (state_) {
392     case kInitialized:
393       return;
394     case kPushDescriptorObusCalled:
395     case kPushSerializedDescriptorsCalled:
396       LOG(ERROR) << "OBUs have been pushed, but `ObuSequencerBase` is being "
397                     "destroyed without calling `Close` or `Abort`.";
398       return;
399     case kClosed:
400       return;
401   }
402   // The above switch is exhaustive.
403   LOG(FATAL) << "Unexpected state: " << static_cast<int>(state_);
404 };
405 
PushDescriptorObus(const IASequenceHeaderObu & ia_sequence_header_obu,const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,const absl::flat_hash_map<uint32_t,AudioElementWithData> & audio_elements,const std::list<MixPresentationObu> & mix_presentation_obus,const std::list<ArbitraryObu> & arbitrary_obus)406 absl::Status ObuSequencerBase::PushDescriptorObus(
407     const IASequenceHeaderObu& ia_sequence_header_obu,
408     const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
409     const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
410     const std::list<MixPresentationObu>& mix_presentation_obus,
411     const std::list<ArbitraryObu>& arbitrary_obus) {
412   // Many failure points should call `Abort`. We want to avoid leaving
413   // sequencers open if they may have invalid or corrupted IAMF data.
414   AbortOnDestruct abort_on_destruct(this);
415   switch (state_) {
416     case kInitialized:
417       break;
418     case kPushDescriptorObusCalled:
419     case kPushSerializedDescriptorsCalled:
420       return absl::FailedPreconditionError(
421           "`PushDescriptorObus` can only be called once.");
422     case kClosed:
423       return absl::FailedPreconditionError(
424           "`PushDescriptorObus` cannot be called after `Close` or `Abort`.");
425   }
426   state_ = kPushDescriptorObusCalled;
427   wb_.Reset();
428 
429   // Serialize descriptor OBUS and adjacent arbitrary OBUs.
430   RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
431       ArbitraryObu::kInsertionHookBeforeDescriptors, arbitrary_obus, wb_));
432   // Write out the descriptor OBUs.
433   RETURN_IF_NOT_OK(ObuSequencerBase::WriteDescriptorObus(
434       ia_sequence_header_obu, codec_config_obus, audio_elements,
435       mix_presentation_obus, arbitrary_obus, wb_));
436   RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
437       ArbitraryObu::kInsertionHookAfterDescriptors, arbitrary_obus, wb_));
438   // Cache the descriptor OBUs, so we can validate "functional" equivalence if
439   // the user calls `UpdateDescriptorObusAndClose`.
440   DescriptorStatistics descriptor_statistics{.descriptor_obus =
441                                                  wb_.bit_buffer()};
442   RETURN_IF_NOT_OK(
443       FillDescriptorStatistics(codec_config_obus, descriptor_statistics));
444   descriptor_statistics_.emplace(std::move(descriptor_statistics));
445 
446   if (!delay_descriptors_until_first_untrimmed_sample_) {
447     // Avoid unnecessary delay, for concrete classes that don't need
448     // `first_pts`.
449     RETURN_IF_NOT_OK(PushSerializedDescriptorObus(
450         descriptor_statistics_->common_samples_per_frame,
451         descriptor_statistics_->common_sample_rate,
452         descriptor_statistics_->common_bit_depth,
453         descriptor_statistics_->first_untrimmed_timestamp,
454         descriptor_statistics_->num_channels,
455         descriptor_statistics_->descriptor_obus));
456 
457     state_ = kPushSerializedDescriptorsCalled;
458   }
459 
460   abort_on_destruct.CancelAbort();
461   return absl::OkStatus();
462 }
463 
PushTemporalUnit(const TemporalUnitView & temporal_unit)464 absl::Status ObuSequencerBase::PushTemporalUnit(
465     const TemporalUnitView& temporal_unit) {
466   // Many failure points should call `Abort`. We want to avoid leaving
467   // sequencers open if they may have invalid or corrupted IAMF data.
468   AbortOnDestruct abort_on_destruct(this);
469   switch (state_) {
470     case kInitialized:
471       return absl::FailedPreconditionError(
472           "PushDescriptorObus must be called before PushTemporalUnit.");
473       break;
474     case kPushDescriptorObusCalled:
475     case kPushSerializedDescriptorsCalled:
476       break;
477     case kClosed:
478       return absl::FailedPreconditionError(
479           "PushTemporalUnit can only be called before `Close` or `Abort`.");
480   }
481   wb_.Reset();
482 
483   // Cache the frame for later
484   const int64_t start_timestamp =
485       static_cast<int64_t>(temporal_unit.start_timestamp_);
486   int num_samples = 0;
487   RETURN_IF_NOT_OK(WriteTemporalUnit(include_temporal_delimiters_,
488                                      temporal_unit, wb_, num_samples));
489   cumulative_num_samples_for_logging_ += num_samples;
490   num_temporal_units_for_logging_++;
491 
492   if (!descriptor_statistics_->first_untrimmed_timestamp.has_value()) {
493     // Treat the initial temporal units as a special case, this helps gather
494     // statistics about the first untrimmed sample.
495     RETURN_IF_NOT_OK(HandleInitialTemporalUnits(
496         temporal_unit, absl::MakeConstSpan(wb_.bit_buffer())));
497 
498   } else if (temporal_unit.num_samples_to_trim_at_start_ > 0) {
499     return absl::InvalidArgumentError(
500         "A unit has samples to trim at start, but the first untrimmed sample "
501         "was already found.");
502   } else [[likely]] {
503     // This is by far the most common case, after we have seen the first real
504     // frame of audio, we can handle this simply.
505     RETURN_IF_NOT_OK(PushSerializedTemporalUnit(
506         start_timestamp, num_samples, absl::MakeConstSpan(wb_.bit_buffer())));
507   }
508 
509   abort_on_destruct.CancelAbort();
510   return absl::OkStatus();
511 }
512 
PickAndPlace(const IASequenceHeaderObu & ia_sequence_header_obu,const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,const absl::flat_hash_map<uint32_t,AudioElementWithData> & audio_elements,const std::list<MixPresentationObu> & mix_presentation_obus,const std::list<AudioFrameWithData> & audio_frames,const std::list<ParameterBlockWithData> & parameter_blocks,const std::list<ArbitraryObu> & arbitrary_obus)513 absl::Status ObuSequencerBase::PickAndPlace(
514     const IASequenceHeaderObu& ia_sequence_header_obu,
515     const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
516     const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
517     const std::list<MixPresentationObu>& mix_presentation_obus,
518     const std::list<AudioFrameWithData>& audio_frames,
519     const std::list<ParameterBlockWithData>& parameter_blocks,
520     const std::list<ArbitraryObu>& arbitrary_obus) {
521   RETURN_IF_NOT_OK(PushDescriptorObus(ia_sequence_header_obu, codec_config_obus,
522                                       audio_elements, mix_presentation_obus,
523                                       arbitrary_obus));
524 
525   TemporalUnitMap temporal_unit_map;
526   RETURN_IF_NOT_OK(GenerateTemporalUnitMap(audio_frames, parameter_blocks,
527                                            arbitrary_obus, temporal_unit_map));
528   for (const auto& [timestamp, temporal_unit] : temporal_unit_map) {
529     RETURN_IF_NOT_OK(PushTemporalUnit(temporal_unit));
530   }
531   return Close();
532 }
533 
UpdateDescriptorObusAndClose(const IASequenceHeaderObu & ia_sequence_header_obu,const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_config_obus,const absl::flat_hash_map<uint32_t,AudioElementWithData> & audio_elements,const std::list<MixPresentationObu> & mix_presentation_obus,const std::list<ArbitraryObu> & arbitrary_obus)534 absl::Status ObuSequencerBase::UpdateDescriptorObusAndClose(
535     const IASequenceHeaderObu& ia_sequence_header_obu,
536     const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
537     const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
538     const std::list<MixPresentationObu>& mix_presentation_obus,
539     const std::list<ArbitraryObu>& arbitrary_obus) {
540   // Many failure points should call `Abort`. We want to avoid leaving
541   // sequencers open if they may have invalid or corrupted IAMF data.
542   AbortOnDestruct abort_on_destruct(this);
543   switch (state_) {
544     case kInitialized:
545       return absl::FailedPreconditionError(
546           "`UpdateDescriptorObusAndClose` must be called after "
547           "`PushDescriptorObus`.");
548     case kPushDescriptorObusCalled:
549     case kPushSerializedDescriptorsCalled:
550       break;
551     case kClosed:
552       return absl::FailedPreconditionError(
553           "`Abort` or `Close` previously called.");
554   }
555   wb_.Reset();
556 
557   // Serialize descriptor OBUS and adjacent arbitrary OBUs.
558   RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
559       ArbitraryObu::kInsertionHookBeforeDescriptors, arbitrary_obus, wb_));
560   // Write out the descriptor OBUs.
561   RETURN_IF_NOT_OK(ObuSequencerBase::WriteDescriptorObus(
562       ia_sequence_header_obu, codec_config_obus, audio_elements,
563       mix_presentation_obus, arbitrary_obus, wb_));
564   RETURN_IF_NOT_OK(ArbitraryObu::WriteObusWithHook(
565       ArbitraryObu::kInsertionHookAfterDescriptors, arbitrary_obus, wb_));
566 
567   // We're a bit loose with what types of metadata we allow to change. Check
568   // at least the "functional" statistics are equivalent.
569   DescriptorStatistics descriptor_statistics{.descriptor_obus =
570                                                  wb_.bit_buffer()};
571   RETURN_IF_NOT_OK(
572       FillDescriptorStatistics(codec_config_obus, descriptor_statistics));
573   if (descriptor_statistics_->common_samples_per_frame !=
574           descriptor_statistics.common_samples_per_frame ||
575       descriptor_statistics_->common_sample_rate !=
576           descriptor_statistics.common_sample_rate ||
577       descriptor_statistics_->common_bit_depth !=
578           descriptor_statistics.common_bit_depth ||
579       descriptor_statistics_->num_channels !=
580           descriptor_statistics.num_channels) {
581     return absl::FailedPreconditionError(
582         "Descriptor OBUs have changed size between finalizing and "
583         "closing.");
584   }
585   if (descriptor_statistics_->descriptor_obus.size() !=
586       descriptor_statistics.descriptor_obus.size()) {
587     return absl::UnimplementedError(
588         "Descriptor OBUs have changed size between finalizing and closing.");
589   }
590 
591   RETURN_IF_NOT_OK(
592       PushFinalizedDescriptorObus(absl::MakeConstSpan(wb_.bit_buffer())));
593   state_ = kPushSerializedDescriptorsCalled;
594   RETURN_IF_NOT_OK(Close());
595 
596   abort_on_destruct.CancelAbort();
597   return absl::OkStatus();
598 }
599 
Close()600 absl::Status ObuSequencerBase::Close() {
601   switch (state_) {
602     case kInitialized:
603       break;
604     case kPushDescriptorObusCalled: {
605       // Ok, trivial IA sequences don't have a first untrimmed timestamp. So
606       // we will simply push the descriptors with a fallback PTS of 0.
607       descriptor_statistics_->first_untrimmed_timestamp = kFallbackFirstPts;
608 
609       RETURN_IF_NOT_OK(PushSerializedDescriptorObus(
610           descriptor_statistics_->common_samples_per_frame,
611           descriptor_statistics_->common_sample_rate,
612           descriptor_statistics_->common_bit_depth,
613           descriptor_statistics_->first_untrimmed_timestamp,
614           descriptor_statistics_->num_channels,
615           descriptor_statistics_->descriptor_obus));
616       state_ = kPushSerializedDescriptorsCalled;
617       break;
618     }
619     case kPushSerializedDescriptorsCalled:
620       break;
621     case kClosed:
622       return absl::FailedPreconditionError(
623           "`Abort` or `Close` previously called.");
624   }
625   CloseDerived();
626   state_ = kClosed;
627   return absl::OkStatus();
628 }
629 
Abort()630 void ObuSequencerBase::Abort() {
631   AbortDerived();
632   state_ = kClosed;
633 }
634 
HandleInitialTemporalUnits(const TemporalUnitView & temporal_unit,absl::Span<const uint8_t> serialized_temporal_unit)635 absl::Status ObuSequencerBase::HandleInitialTemporalUnits(
636     const TemporalUnitView& temporal_unit,
637     absl::Span<const uint8_t> serialized_temporal_unit) {
638   const bool found_first_untrimmed_sample =
639       temporal_unit.num_untrimmed_samples_ != 0;
640   if (found_first_untrimmed_sample) {
641     // Gather the PTS. For internal accuracy, we store this even if we don't
642     // need to delay the descriptors.
643     descriptor_statistics_->first_untrimmed_timestamp =
644         temporal_unit.start_timestamp_ +
645         temporal_unit.num_samples_to_trim_at_start_;
646   }
647 
648   // Push immediately if we don't need to delay the descriptors.
649   if (!delay_descriptors_until_first_untrimmed_sample_) {
650     return PushSerializedTemporalUnit(temporal_unit.start_timestamp_,
651                                       temporal_unit.num_untrimmed_samples_,
652                                       serialized_temporal_unit);
653   }
654 
655   if (!found_first_untrimmed_sample) {
656     // This frame is fully trimmed. Cache it for later.
657     delayed_temporal_units_.push_back(SerializedTemporalUnit{
658         .start_timestamp = temporal_unit.start_timestamp_,
659         .num_untrimmed_samples = temporal_unit.num_untrimmed_samples_,
660         .data = std::vector<uint8_t>(serialized_temporal_unit.begin(),
661                                      serialized_temporal_unit.end())});
662     return absl::OkStatus();
663   }
664 
665   // Found the first untrimmed sample. Push out all delayed OBUs.
666   RETURN_IF_NOT_OK(PushSerializedDescriptorObus(
667       descriptor_statistics_->common_samples_per_frame,
668       descriptor_statistics_->common_sample_rate,
669       descriptor_statistics_->common_bit_depth,
670       descriptor_statistics_->first_untrimmed_timestamp,
671       descriptor_statistics_->num_channels,
672       descriptor_statistics_->descriptor_obus));
673   state_ = kPushSerializedDescriptorsCalled;
674 
675   // Flush any delayed temporal units.
676   for (const auto& delayed_temporal_unit : delayed_temporal_units_) {
677     RETURN_IF_NOT_OK(PushSerializedTemporalUnit(
678         delayed_temporal_unit.start_timestamp,
679         delayed_temporal_unit.num_untrimmed_samples,
680         absl::MakeConstSpan(delayed_temporal_unit.data)));
681   }
682   delayed_temporal_units_.clear();
683   // Then finally, flush the current temporal unit.
684   return PushSerializedTemporalUnit(temporal_unit.start_timestamp_,
685                                     temporal_unit.num_untrimmed_samples_,
686                                     serialized_temporal_unit);
687 }
688 
689 }  // namespace iamf_tools
690