• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 3-Clause Clear License
5  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6  * License was not distributed with this source code in the LICENSE file, you
7  * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8  * Alliance for Open Media Patent License 1.0 was not distributed with this
9  * source code in the PATENTS file, you can obtain it at
10  * www.aomedia.org/license/patent.
11  */
12 
13 #include "iamf/api/decoder/iamf_decoder.h"
14 
15 #include <cstddef>
16 #include <cstdint>
17 #include <memory>
18 #include <optional>
19 #include <queue>
20 #include <utility>
21 #include <vector>
22 
23 #include "absl/log/log.h"
24 #include "absl/status/status.h"
25 #include "absl/types/span.h"
26 #include "iamf/api/conversion/mix_presentation_conversion.h"
27 #include "iamf/api/types.h"
28 #include "iamf/cli/obu_processor.h"
29 #include "iamf/cli/rendering_mix_presentation_finalizer.h"
30 #include "iamf/common/read_bit_buffer.h"
31 #include "iamf/common/utils/macros.h"
32 #include "iamf/common/utils/sample_processing_utils.h"
33 #include "iamf/obu/mix_presentation.h"
34 
35 namespace iamf_tools {
36 namespace api {
37 
38 enum class Status { kAcceptingData, kFlushCalled };
39 
40 // Holds the internal state of the decoder to hide it and necessary includes
41 // from API users.
42 struct IamfDecoder::DecoderState {
DecoderStateiamf_tools::api::IamfDecoder::DecoderState43   DecoderState(std::unique_ptr<StreamBasedReadBitBuffer> read_bit_buffer,
44                const Layout& initial_requested_layout)
45       : read_bit_buffer(std::move(read_bit_buffer)),
46         layout(initial_requested_layout) {}
47 
48   // Current status of the decoder.
49   Status status = Status::kAcceptingData;
50 
51   // Used to process descriptor OBUs and temporal units. Is only created after
52   // the descriptor OBUs have been parsed.
53   std::unique_ptr<ObuProcessor> obu_processor;
54 
55   // Buffer that is filled with data from Decode().
56   std::unique_ptr<StreamBasedReadBitBuffer> read_bit_buffer;
57 
58   // Rendered PCM samples. Each element in the queue corresponds to a
59   // temporal unit. A temporal unit will never be partially filled, so the
60   // number of elements in the outer vector is equal to the number of decoded
61   // temporal units currently available.
62   std::queue<std::vector<std::vector<int32_t>>> rendered_pcm_samples;
63 
64   // The layout used for the rendered output audio.
65   // Initially set to the requested Layout but updated by ObuProcessor.
66   Layout layout;
67 
68   // TODO(b/379122580):  Use the bit depth of the underlying content.
69   // Defaulting to int32 for now.
70   OutputSampleType output_sample_type = OutputSampleType::kInt32LittleEndian;
71 };
72 
73 namespace {
74 constexpr int kInitialBufferSize = 1024;
75 
76 // Creates an ObuProcessor; an ObuProcessor is only created once all descriptor
77 // OBUs have been processed. Contracted to only return a resource exhausted
78 // error if there is not enough data to process the descriptor OBUs.
CreateObuProcessor(bool contains_all_descriptor_obus,absl::Span<const uint8_t> bitstream,StreamBasedReadBitBuffer * read_bit_buffer,Layout & in_out_layout)79 absl::StatusOr<std::unique_ptr<ObuProcessor>> CreateObuProcessor(
80     bool contains_all_descriptor_obus, absl::Span<const uint8_t> bitstream,
81     StreamBasedReadBitBuffer* read_bit_buffer, Layout& in_out_layout) {
82   // Happens only in the pure streaming case.
83   auto start_position = read_bit_buffer->Tell();
84   bool insufficient_data;
85   auto obu_processor = ObuProcessor::CreateForRendering(
86       in_out_layout,
87       RenderingMixPresentationFinalizer::ProduceNoSampleProcessors,
88       /*is_exhaustive_and_exact=*/contains_all_descriptor_obus, read_bit_buffer,
89       in_out_layout, insufficient_data);
90   if (obu_processor == nullptr) {
91     // `insufficient_data` is true iff everything so far is valid but more data
92     // is needed.
93     if (insufficient_data && !contains_all_descriptor_obus) {
94       return absl::ResourceExhaustedError(
95           "Have not received enough data yet to process descriptor "
96           "OBUs. Please call Decode() again with more data.");
97     }
98     return absl::InvalidArgumentError("Failed to create OBU processor.");
99   }
100   auto num_bits_read = read_bit_buffer->Tell() - start_position;
101   RETURN_IF_NOT_OK(read_bit_buffer->Flush(num_bits_read / 8));
102   return obu_processor;
103 }
104 
ProcessAllTemporalUnits(StreamBasedReadBitBuffer * read_bit_buffer,ObuProcessor * obu_processor,std::queue<std::vector<std::vector<int32_t>>> & rendered_pcm_samples)105 absl::Status ProcessAllTemporalUnits(
106     StreamBasedReadBitBuffer* read_bit_buffer, ObuProcessor* obu_processor,
107     std::queue<std::vector<std::vector<int32_t>>>& rendered_pcm_samples) {
108   LOG(INFO) << "Processing Temporal Units";
109   int32_t num_bits_read = 0;
110   bool continue_processing = true;
111   while (continue_processing) {
112     auto start_position_for_temporal_unit = read_bit_buffer->Tell();
113     std::optional<ObuProcessor::OutputTemporalUnit> output_temporal_unit;
114     // TODO(b/395889878): Add support for partial temporal units.
115     RETURN_IF_NOT_OK(obu_processor->ProcessTemporalUnit(
116         /*eos_is_end_of_sequence=*/false, output_temporal_unit,
117         continue_processing));
118     if (!output_temporal_unit.has_value()) {
119       break;
120     }
121 
122     // Trivial IA Sequences may have empty temporal units. Do not try to
123     // render empty temporal unit.
124     if (output_temporal_unit.has_value()) {
125       absl::Span<const std::vector<int32_t>>
126           rendered_pcm_samples_for_temporal_unit;
127       RETURN_IF_NOT_OK(obu_processor->RenderTemporalUnitAndMeasureLoudness(
128           output_temporal_unit->output_timestamp,
129           output_temporal_unit->output_audio_frames,
130           output_temporal_unit->output_parameter_blocks,
131           rendered_pcm_samples_for_temporal_unit));
132       rendered_pcm_samples.push(
133           std::vector(rendered_pcm_samples_for_temporal_unit.begin(),
134                       rendered_pcm_samples_for_temporal_unit.end()));
135     }
136     num_bits_read +=
137         (read_bit_buffer->Tell() - start_position_for_temporal_unit);
138   }
139   // Empty the buffer of the data that was processed thus far.
140   RETURN_IF_NOT_OK(read_bit_buffer->Flush(num_bits_read / 8));
141   LOG(INFO) << "Rendered " << rendered_pcm_samples.size()
142             << " temporal units. Please call GetOutputTemporalUnit() to get "
143                "the rendered PCM samples.";
144   return absl::OkStatus();
145 }
146 
BytesPerSample(OutputSampleType sample_type)147 size_t BytesPerSample(OutputSampleType sample_type) {
148   switch (sample_type) {
149     case OutputSampleType::kInt16LittleEndian:
150       return 2;
151     case OutputSampleType::kInt32LittleEndian:
152       return 4;
153     default:
154       return 0;
155   }
156 }
157 
WriteFrameToSpan(const std::vector<std::vector<int32_t>> & frame,OutputSampleType sample_type,absl::Span<uint8_t> & output_bytes,size_t & bytes_written)158 absl::Status WriteFrameToSpan(const std::vector<std::vector<int32_t>>& frame,
159                               OutputSampleType sample_type,
160                               absl::Span<uint8_t>& output_bytes,
161                               size_t& bytes_written) {
162   const size_t bytes_per_sample = BytesPerSample(sample_type);
163   const size_t bits_per_sample = bytes_per_sample * 8;
164   const size_t required_size =
165       frame.size() * frame[0].size() * bytes_per_sample;
166   if (output_bytes.size() < required_size) {
167     return absl::InvalidArgumentError(
168         "Span does not have enough space to write output bytes.");
169   }
170   const bool big_endian = false;
171   size_t write_position = 0;
172   uint8_t* data = output_bytes.data();
173   for (int t = 0; t < frame.size(); t++) {
174     for (int c = 0; c < frame[0].size(); ++c) {
175       const uint32_t sample = static_cast<uint32_t>(frame[t][c]);
176       RETURN_IF_NOT_OK(WritePcmSample(sample, bits_per_sample, big_endian, data,
177                                       write_position));
178     }
179   }
180   bytes_written = write_position;
181   return absl::OkStatus();
182 }
183 
184 }  // namespace
185 
IamfDecoder(std::unique_ptr<DecoderState> state)186 IamfDecoder::IamfDecoder(std::unique_ptr<DecoderState> state)
187     : state_(std::move(state)) {}
188 
189 // While these are all `= default`, they must be here in the source file because
190 // the unique_ptr of the partial class, DecoderState, prevents them from being
191 // inline.
192 IamfDecoder::~IamfDecoder() = default;
193 IamfDecoder::IamfDecoder(IamfDecoder&&) = default;
194 IamfDecoder& IamfDecoder::operator=(IamfDecoder&&) = default;
195 
Create(const OutputLayout & requested_layout)196 absl::StatusOr<IamfDecoder> IamfDecoder::Create(
197     const OutputLayout& requested_layout) {
198   std::unique_ptr<StreamBasedReadBitBuffer> read_bit_buffer =
199       StreamBasedReadBitBuffer::Create(kInitialBufferSize);
200   if (read_bit_buffer == nullptr) {
201     return absl::InternalError("Failed to create read bit buffer.");
202   }
203   std::unique_ptr<DecoderState> state = std::make_unique<DecoderState>(
204       std::move(read_bit_buffer), ApiToInternalType(requested_layout));
205   return IamfDecoder(std::move(state));
206 }
207 
CreateFromDescriptors(const OutputLayout & requested_layout,absl::Span<const uint8_t> descriptor_obus)208 absl::StatusOr<IamfDecoder> IamfDecoder::CreateFromDescriptors(
209     const OutputLayout& requested_layout,
210     absl::Span<const uint8_t> descriptor_obus) {
211   absl::StatusOr<IamfDecoder> decoder = Create(requested_layout);
212   if (!decoder.ok()) {
213     return decoder.status();
214   }
215   RETURN_IF_NOT_OK(
216       decoder->state_->read_bit_buffer->PushBytes(descriptor_obus));
217   absl::StatusOr<std::unique_ptr<ObuProcessor>> obu_processor =
218       CreateObuProcessor(/*contains_all_descriptor_obus=*/true, descriptor_obus,
219                          decoder->state_->read_bit_buffer.get(),
220                          decoder->state_->layout);
221   if (!obu_processor.ok()) {
222     return obu_processor.status();
223   }
224   decoder->state_->obu_processor = *std::move(obu_processor);
225   return decoder;
226 }
227 
Decode(absl::Span<const uint8_t> bitstream)228 absl::Status IamfDecoder::Decode(absl::Span<const uint8_t> bitstream) {
229   if (state_->status == Status::kFlushCalled) {
230     return absl::FailedPreconditionError(
231         "Decode() cannot be called after Flush() has been called.");
232   }
233   RETURN_IF_NOT_OK(state_->read_bit_buffer->PushBytes(bitstream));
234   if (!IsDescriptorProcessingComplete()) {
235     auto obu_processor = CreateObuProcessor(
236         /*contains_all_descriptor_obus=*/false, bitstream,
237         state_->read_bit_buffer.get(), state_->layout);
238     if (obu_processor.ok()) {
239       state_->obu_processor = *std::move(obu_processor);
240       return absl::OkStatus();
241     } else if (absl::IsResourceExhausted(obu_processor.status())) {
242       // Don't have enough data to process the descriptor OBUs yet, but no
243       // errors have occurred.
244       return absl::OkStatus();
245     } else {
246       // Corrupted data or other errors.
247       return obu_processor.status();
248     }
249   }
250 
251   // At this stage, we know that we've processed all descriptor OBUs.
252   RETURN_IF_NOT_OK(ProcessAllTemporalUnits(state_->read_bit_buffer.get(),
253                                            state_->obu_processor.get(),
254                                            state_->rendered_pcm_samples));
255   return absl::OkStatus();
256 }
257 
ConfigureMixPresentationId(MixPresentationId mix_presentation_id)258 absl::Status IamfDecoder::ConfigureMixPresentationId(
259     MixPresentationId mix_presentation_id) {
260   return absl::UnimplementedError(
261       "ConfigureMixPresentationId is not yet implemented.");
262 }
263 
ConfigureOutputSampleType(OutputSampleType output_sample_type)264 void IamfDecoder::ConfigureOutputSampleType(
265     OutputSampleType output_sample_type) {
266   state_->output_sample_type = output_sample_type;
267 }
268 
GetOutputTemporalUnit(absl::Span<uint8_t> output_bytes,size_t & bytes_written)269 absl::Status IamfDecoder::GetOutputTemporalUnit(
270     absl::Span<uint8_t> output_bytes, size_t& bytes_written) {
271   bytes_written = 0;
272   if (state_->rendered_pcm_samples.empty()) {
273     return absl::OkStatus();
274   }
275   OutputSampleType output_sample_type = GetOutputSampleType();
276   absl::Status status =
277       WriteFrameToSpan(state_->rendered_pcm_samples.front(), output_sample_type,
278                        output_bytes, bytes_written);
279   if (status.ok()) {
280     state_->rendered_pcm_samples.pop();
281     return absl::OkStatus();
282   }
283   return status;
284 }
285 
IsTemporalUnitAvailable() const286 bool IamfDecoder::IsTemporalUnitAvailable() const {
287   return !state_->rendered_pcm_samples.empty();
288 }
289 
IsDescriptorProcessingComplete() const290 bool IamfDecoder::IsDescriptorProcessingComplete() const {
291   return state_->obu_processor != nullptr;
292 }
293 
GetOutputLayout() const294 absl::StatusOr<OutputLayout> IamfDecoder::GetOutputLayout() const {
295   if (!IsDescriptorProcessingComplete()) {
296     return absl::FailedPreconditionError(
297         "GetOutputLayout() cannot be called before descriptor processing is "
298         "complete.");
299   }
300   return InternalToApiType(state_->layout);
301 }
302 
GetNumberOfOutputChannels() const303 absl::StatusOr<int> IamfDecoder::GetNumberOfOutputChannels() const {
304   if (!IsDescriptorProcessingComplete()) {
305     return absl::FailedPreconditionError(
306         "GetNumberOfOutputChannels() cannot be called before descriptor "
307         "processing is complete.");
308   }
309   int num_channels;
310   RETURN_IF_NOT_OK(MixPresentationObu::GetNumChannelsFromLayout(state_->layout,
311                                                                 num_channels));
312   return num_channels;
313 }
314 
GetMixPresentations(std::vector<MixPresentationMetadata> & output_mix_presentation_metadata) const315 absl::Status IamfDecoder::GetMixPresentations(
316     std::vector<MixPresentationMetadata>& output_mix_presentation_metadata)
317     const {
318   return absl::UnimplementedError(
319       "GetMixPresentations is not yet implemented.");
320 }
GetOutputSampleType() const321 OutputSampleType IamfDecoder::GetOutputSampleType() const {
322   return state_->output_sample_type;
323 }
324 
GetSampleRate() const325 absl::StatusOr<uint32_t> IamfDecoder::GetSampleRate() const {
326   if (!IsDescriptorProcessingComplete()) {
327     return absl::FailedPreconditionError(
328         "GetSampleRate() cannot be called before descriptor processing is "
329         "complete.");
330   }
331   return state_->obu_processor->GetOutputSampleRate();
332 }
333 
GetFrameSize() const334 absl::StatusOr<uint32_t> IamfDecoder::GetFrameSize() const {
335   if (!IsDescriptorProcessingComplete()) {
336     return absl::FailedPreconditionError(
337         "GetFrameSize() cannot be called before descriptor processing is "
338         "complete.");
339   }
340 
341   return state_->obu_processor->GetOutputFrameSize();
342 }
343 
Flush(absl::Span<uint8_t> output_bytes,size_t & bytes_written,bool & output_is_done)344 absl::Status IamfDecoder::Flush(absl::Span<uint8_t> output_bytes,
345                                 size_t& bytes_written, bool& output_is_done) {
346   state_->status = Status::kFlushCalled;
347   RETURN_IF_NOT_OK(GetOutputTemporalUnit(output_bytes, bytes_written));
348   output_is_done = state_->rendered_pcm_samples.empty();
349   return absl::OkStatus();
350 }
351 
Close()352 absl::Status IamfDecoder::Close() { return absl::OkStatus(); }
353 
354 }  // namespace api
355 }  // namespace iamf_tools
356