1 /*
2 * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * www.aomedia.org/license/patent.
11 */
12
13 #include "iamf/api/decoder/iamf_decoder.h"
14
15 #include <cstddef>
16 #include <cstdint>
17 #include <memory>
18 #include <optional>
19 #include <queue>
20 #include <utility>
21 #include <vector>
22
23 #include "absl/log/log.h"
24 #include "absl/status/status.h"
25 #include "absl/types/span.h"
26 #include "iamf/api/conversion/mix_presentation_conversion.h"
27 #include "iamf/api/types.h"
28 #include "iamf/cli/obu_processor.h"
29 #include "iamf/cli/rendering_mix_presentation_finalizer.h"
30 #include "iamf/common/read_bit_buffer.h"
31 #include "iamf/common/utils/macros.h"
32 #include "iamf/common/utils/sample_processing_utils.h"
33 #include "iamf/obu/mix_presentation.h"
34
35 namespace iamf_tools {
36 namespace api {
37
38 enum class Status { kAcceptingData, kFlushCalled };
39
40 // Holds the internal state of the decoder to hide it and necessary includes
41 // from API users.
42 struct IamfDecoder::DecoderState {
DecoderStateiamf_tools::api::IamfDecoder::DecoderState43 DecoderState(std::unique_ptr<StreamBasedReadBitBuffer> read_bit_buffer,
44 const Layout& initial_requested_layout)
45 : read_bit_buffer(std::move(read_bit_buffer)),
46 layout(initial_requested_layout) {}
47
48 // Current status of the decoder.
49 Status status = Status::kAcceptingData;
50
51 // Used to process descriptor OBUs and temporal units. Is only created after
52 // the descriptor OBUs have been parsed.
53 std::unique_ptr<ObuProcessor> obu_processor;
54
55 // Buffer that is filled with data from Decode().
56 std::unique_ptr<StreamBasedReadBitBuffer> read_bit_buffer;
57
58 // Rendered PCM samples. Each element in the queue corresponds to a
59 // temporal unit. A temporal unit will never be partially filled, so the
60 // number of elements in the outer vector is equal to the number of decoded
61 // temporal units currently available.
62 std::queue<std::vector<std::vector<int32_t>>> rendered_pcm_samples;
63
64 // The layout used for the rendered output audio.
65 // Initially set to the requested Layout but updated by ObuProcessor.
66 Layout layout;
67
68 // TODO(b/379122580): Use the bit depth of the underlying content.
69 // Defaulting to int32 for now.
70 OutputSampleType output_sample_type = OutputSampleType::kInt32LittleEndian;
71 };
72
73 namespace {
74 constexpr int kInitialBufferSize = 1024;
75
76 // Creates an ObuProcessor; an ObuProcessor is only created once all descriptor
77 // OBUs have been processed. Contracted to only return a resource exhausted
78 // error if there is not enough data to process the descriptor OBUs.
CreateObuProcessor(bool contains_all_descriptor_obus,absl::Span<const uint8_t> bitstream,StreamBasedReadBitBuffer * read_bit_buffer,Layout & in_out_layout)79 absl::StatusOr<std::unique_ptr<ObuProcessor>> CreateObuProcessor(
80 bool contains_all_descriptor_obus, absl::Span<const uint8_t> bitstream,
81 StreamBasedReadBitBuffer* read_bit_buffer, Layout& in_out_layout) {
82 // Happens only in the pure streaming case.
83 auto start_position = read_bit_buffer->Tell();
84 bool insufficient_data;
85 auto obu_processor = ObuProcessor::CreateForRendering(
86 in_out_layout,
87 RenderingMixPresentationFinalizer::ProduceNoSampleProcessors,
88 /*is_exhaustive_and_exact=*/contains_all_descriptor_obus, read_bit_buffer,
89 in_out_layout, insufficient_data);
90 if (obu_processor == nullptr) {
91 // `insufficient_data` is true iff everything so far is valid but more data
92 // is needed.
93 if (insufficient_data && !contains_all_descriptor_obus) {
94 return absl::ResourceExhaustedError(
95 "Have not received enough data yet to process descriptor "
96 "OBUs. Please call Decode() again with more data.");
97 }
98 return absl::InvalidArgumentError("Failed to create OBU processor.");
99 }
100 auto num_bits_read = read_bit_buffer->Tell() - start_position;
101 RETURN_IF_NOT_OK(read_bit_buffer->Flush(num_bits_read / 8));
102 return obu_processor;
103 }
104
ProcessAllTemporalUnits(StreamBasedReadBitBuffer * read_bit_buffer,ObuProcessor * obu_processor,std::queue<std::vector<std::vector<int32_t>>> & rendered_pcm_samples)105 absl::Status ProcessAllTemporalUnits(
106 StreamBasedReadBitBuffer* read_bit_buffer, ObuProcessor* obu_processor,
107 std::queue<std::vector<std::vector<int32_t>>>& rendered_pcm_samples) {
108 LOG(INFO) << "Processing Temporal Units";
109 int32_t num_bits_read = 0;
110 bool continue_processing = true;
111 while (continue_processing) {
112 auto start_position_for_temporal_unit = read_bit_buffer->Tell();
113 std::optional<ObuProcessor::OutputTemporalUnit> output_temporal_unit;
114 // TODO(b/395889878): Add support for partial temporal units.
115 RETURN_IF_NOT_OK(obu_processor->ProcessTemporalUnit(
116 /*eos_is_end_of_sequence=*/false, output_temporal_unit,
117 continue_processing));
118 if (!output_temporal_unit.has_value()) {
119 break;
120 }
121
122 // Trivial IA Sequences may have empty temporal units. Do not try to
123 // render empty temporal unit.
124 if (output_temporal_unit.has_value()) {
125 absl::Span<const std::vector<int32_t>>
126 rendered_pcm_samples_for_temporal_unit;
127 RETURN_IF_NOT_OK(obu_processor->RenderTemporalUnitAndMeasureLoudness(
128 output_temporal_unit->output_timestamp,
129 output_temporal_unit->output_audio_frames,
130 output_temporal_unit->output_parameter_blocks,
131 rendered_pcm_samples_for_temporal_unit));
132 rendered_pcm_samples.push(
133 std::vector(rendered_pcm_samples_for_temporal_unit.begin(),
134 rendered_pcm_samples_for_temporal_unit.end()));
135 }
136 num_bits_read +=
137 (read_bit_buffer->Tell() - start_position_for_temporal_unit);
138 }
139 // Empty the buffer of the data that was processed thus far.
140 RETURN_IF_NOT_OK(read_bit_buffer->Flush(num_bits_read / 8));
141 LOG(INFO) << "Rendered " << rendered_pcm_samples.size()
142 << " temporal units. Please call GetOutputTemporalUnit() to get "
143 "the rendered PCM samples.";
144 return absl::OkStatus();
145 }
146
BytesPerSample(OutputSampleType sample_type)147 size_t BytesPerSample(OutputSampleType sample_type) {
148 switch (sample_type) {
149 case OutputSampleType::kInt16LittleEndian:
150 return 2;
151 case OutputSampleType::kInt32LittleEndian:
152 return 4;
153 default:
154 return 0;
155 }
156 }
157
WriteFrameToSpan(const std::vector<std::vector<int32_t>> & frame,OutputSampleType sample_type,absl::Span<uint8_t> & output_bytes,size_t & bytes_written)158 absl::Status WriteFrameToSpan(const std::vector<std::vector<int32_t>>& frame,
159 OutputSampleType sample_type,
160 absl::Span<uint8_t>& output_bytes,
161 size_t& bytes_written) {
162 const size_t bytes_per_sample = BytesPerSample(sample_type);
163 const size_t bits_per_sample = bytes_per_sample * 8;
164 const size_t required_size =
165 frame.size() * frame[0].size() * bytes_per_sample;
166 if (output_bytes.size() < required_size) {
167 return absl::InvalidArgumentError(
168 "Span does not have enough space to write output bytes.");
169 }
170 const bool big_endian = false;
171 size_t write_position = 0;
172 uint8_t* data = output_bytes.data();
173 for (int t = 0; t < frame.size(); t++) {
174 for (int c = 0; c < frame[0].size(); ++c) {
175 const uint32_t sample = static_cast<uint32_t>(frame[t][c]);
176 RETURN_IF_NOT_OK(WritePcmSample(sample, bits_per_sample, big_endian, data,
177 write_position));
178 }
179 }
180 bytes_written = write_position;
181 return absl::OkStatus();
182 }
183
184 } // namespace
185
IamfDecoder(std::unique_ptr<DecoderState> state)186 IamfDecoder::IamfDecoder(std::unique_ptr<DecoderState> state)
187 : state_(std::move(state)) {}
188
189 // While these are all `= default`, they must be here in the source file because
190 // the unique_ptr of the partial class, DecoderState, prevents them from being
191 // inline.
192 IamfDecoder::~IamfDecoder() = default;
193 IamfDecoder::IamfDecoder(IamfDecoder&&) = default;
194 IamfDecoder& IamfDecoder::operator=(IamfDecoder&&) = default;
195
Create(const OutputLayout & requested_layout)196 absl::StatusOr<IamfDecoder> IamfDecoder::Create(
197 const OutputLayout& requested_layout) {
198 std::unique_ptr<StreamBasedReadBitBuffer> read_bit_buffer =
199 StreamBasedReadBitBuffer::Create(kInitialBufferSize);
200 if (read_bit_buffer == nullptr) {
201 return absl::InternalError("Failed to create read bit buffer.");
202 }
203 std::unique_ptr<DecoderState> state = std::make_unique<DecoderState>(
204 std::move(read_bit_buffer), ApiToInternalType(requested_layout));
205 return IamfDecoder(std::move(state));
206 }
207
CreateFromDescriptors(const OutputLayout & requested_layout,absl::Span<const uint8_t> descriptor_obus)208 absl::StatusOr<IamfDecoder> IamfDecoder::CreateFromDescriptors(
209 const OutputLayout& requested_layout,
210 absl::Span<const uint8_t> descriptor_obus) {
211 absl::StatusOr<IamfDecoder> decoder = Create(requested_layout);
212 if (!decoder.ok()) {
213 return decoder.status();
214 }
215 RETURN_IF_NOT_OK(
216 decoder->state_->read_bit_buffer->PushBytes(descriptor_obus));
217 absl::StatusOr<std::unique_ptr<ObuProcessor>> obu_processor =
218 CreateObuProcessor(/*contains_all_descriptor_obus=*/true, descriptor_obus,
219 decoder->state_->read_bit_buffer.get(),
220 decoder->state_->layout);
221 if (!obu_processor.ok()) {
222 return obu_processor.status();
223 }
224 decoder->state_->obu_processor = *std::move(obu_processor);
225 return decoder;
226 }
227
Decode(absl::Span<const uint8_t> bitstream)228 absl::Status IamfDecoder::Decode(absl::Span<const uint8_t> bitstream) {
229 if (state_->status == Status::kFlushCalled) {
230 return absl::FailedPreconditionError(
231 "Decode() cannot be called after Flush() has been called.");
232 }
233 RETURN_IF_NOT_OK(state_->read_bit_buffer->PushBytes(bitstream));
234 if (!IsDescriptorProcessingComplete()) {
235 auto obu_processor = CreateObuProcessor(
236 /*contains_all_descriptor_obus=*/false, bitstream,
237 state_->read_bit_buffer.get(), state_->layout);
238 if (obu_processor.ok()) {
239 state_->obu_processor = *std::move(obu_processor);
240 return absl::OkStatus();
241 } else if (absl::IsResourceExhausted(obu_processor.status())) {
242 // Don't have enough data to process the descriptor OBUs yet, but no
243 // errors have occurred.
244 return absl::OkStatus();
245 } else {
246 // Corrupted data or other errors.
247 return obu_processor.status();
248 }
249 }
250
251 // At this stage, we know that we've processed all descriptor OBUs.
252 RETURN_IF_NOT_OK(ProcessAllTemporalUnits(state_->read_bit_buffer.get(),
253 state_->obu_processor.get(),
254 state_->rendered_pcm_samples));
255 return absl::OkStatus();
256 }
257
ConfigureMixPresentationId(MixPresentationId mix_presentation_id)258 absl::Status IamfDecoder::ConfigureMixPresentationId(
259 MixPresentationId mix_presentation_id) {
260 return absl::UnimplementedError(
261 "ConfigureMixPresentationId is not yet implemented.");
262 }
263
ConfigureOutputSampleType(OutputSampleType output_sample_type)264 void IamfDecoder::ConfigureOutputSampleType(
265 OutputSampleType output_sample_type) {
266 state_->output_sample_type = output_sample_type;
267 }
268
GetOutputTemporalUnit(absl::Span<uint8_t> output_bytes,size_t & bytes_written)269 absl::Status IamfDecoder::GetOutputTemporalUnit(
270 absl::Span<uint8_t> output_bytes, size_t& bytes_written) {
271 bytes_written = 0;
272 if (state_->rendered_pcm_samples.empty()) {
273 return absl::OkStatus();
274 }
275 OutputSampleType output_sample_type = GetOutputSampleType();
276 absl::Status status =
277 WriteFrameToSpan(state_->rendered_pcm_samples.front(), output_sample_type,
278 output_bytes, bytes_written);
279 if (status.ok()) {
280 state_->rendered_pcm_samples.pop();
281 return absl::OkStatus();
282 }
283 return status;
284 }
285
IsTemporalUnitAvailable() const286 bool IamfDecoder::IsTemporalUnitAvailable() const {
287 return !state_->rendered_pcm_samples.empty();
288 }
289
IsDescriptorProcessingComplete() const290 bool IamfDecoder::IsDescriptorProcessingComplete() const {
291 return state_->obu_processor != nullptr;
292 }
293
GetOutputLayout() const294 absl::StatusOr<OutputLayout> IamfDecoder::GetOutputLayout() const {
295 if (!IsDescriptorProcessingComplete()) {
296 return absl::FailedPreconditionError(
297 "GetOutputLayout() cannot be called before descriptor processing is "
298 "complete.");
299 }
300 return InternalToApiType(state_->layout);
301 }
302
GetNumberOfOutputChannels() const303 absl::StatusOr<int> IamfDecoder::GetNumberOfOutputChannels() const {
304 if (!IsDescriptorProcessingComplete()) {
305 return absl::FailedPreconditionError(
306 "GetNumberOfOutputChannels() cannot be called before descriptor "
307 "processing is complete.");
308 }
309 int num_channels;
310 RETURN_IF_NOT_OK(MixPresentationObu::GetNumChannelsFromLayout(state_->layout,
311 num_channels));
312 return num_channels;
313 }
314
GetMixPresentations(std::vector<MixPresentationMetadata> & output_mix_presentation_metadata) const315 absl::Status IamfDecoder::GetMixPresentations(
316 std::vector<MixPresentationMetadata>& output_mix_presentation_metadata)
317 const {
318 return absl::UnimplementedError(
319 "GetMixPresentations is not yet implemented.");
320 }
GetOutputSampleType() const321 OutputSampleType IamfDecoder::GetOutputSampleType() const {
322 return state_->output_sample_type;
323 }
324
GetSampleRate() const325 absl::StatusOr<uint32_t> IamfDecoder::GetSampleRate() const {
326 if (!IsDescriptorProcessingComplete()) {
327 return absl::FailedPreconditionError(
328 "GetSampleRate() cannot be called before descriptor processing is "
329 "complete.");
330 }
331 return state_->obu_processor->GetOutputSampleRate();
332 }
333
GetFrameSize() const334 absl::StatusOr<uint32_t> IamfDecoder::GetFrameSize() const {
335 if (!IsDescriptorProcessingComplete()) {
336 return absl::FailedPreconditionError(
337 "GetFrameSize() cannot be called before descriptor processing is "
338 "complete.");
339 }
340
341 return state_->obu_processor->GetOutputFrameSize();
342 }
343
Flush(absl::Span<uint8_t> output_bytes,size_t & bytes_written,bool & output_is_done)344 absl::Status IamfDecoder::Flush(absl::Span<uint8_t> output_bytes,
345 size_t& bytes_written, bool& output_is_done) {
346 state_->status = Status::kFlushCalled;
347 RETURN_IF_NOT_OK(GetOutputTemporalUnit(output_bytes, bytes_written));
348 output_is_done = state_->rendered_pcm_samples.empty();
349 return absl::OkStatus();
350 }
351
Close()352 absl::Status IamfDecoder::Close() { return absl::OkStatus(); }
353
354 } // namespace api
355 } // namespace iamf_tools
356