1 /*
2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * www.aomedia.org/license/patent.
11 */
12 #include "iamf/obu/audio_element.h"
13
14 #include <algorithm>
15 #include <array>
16 #include <cstdint>
17 #include <cstdlib>
18 #include <sstream>
19 #include <string>
20 #include <utility>
21 #include <variant>
22 #include <vector>
23
24 #include "absl/container/flat_hash_set.h"
25 #include "absl/log/log.h"
26 #include "absl/status/status.h"
27 #include "absl/strings/str_cat.h"
28 #include "absl/types/span.h"
29 #include "iamf/common/read_bit_buffer.h"
30 #include "iamf/common/utils/macros.h"
31 #include "iamf/common/utils/numeric_utils.h"
32 #include "iamf/common/utils/validation_utils.h"
33 #include "iamf/common/write_bit_buffer.h"
34 #include "iamf/obu/demixing_param_definition.h"
35 #include "iamf/obu/obu_base.h"
36 #include "iamf/obu/obu_header.h"
37 #include "iamf/obu/param_definitions.h"
38 #include "iamf/obu/types.h"
39
40 namespace iamf_tools {
41
42 using absl::OkStatus;
43
44 namespace {
45
46 // Returns the number of elements in the demixing_matrix.
GetNumDemixingMatrixElements(const AmbisonicsProjectionConfig & config)47 size_t GetNumDemixingMatrixElements(const AmbisonicsProjectionConfig& config) {
48 const size_t c = static_cast<size_t>(config.output_channel_count);
49 const size_t n = static_cast<size_t>(config.substream_count);
50 const size_t m = static_cast<size_t>(config.coupled_substream_count);
51
52 return (n + m) * c;
53 }
54
LogChannelBased(const ScalableChannelLayoutConfig & channel_config)55 void LogChannelBased(const ScalableChannelLayoutConfig& channel_config) {
56 LOG(INFO) << " scalable_channel_layout_config:";
57 LOG(INFO) << " num_layers= " << absl::StrCat(channel_config.num_layers);
58 LOG(INFO) << " reserved= " << absl::StrCat(channel_config.reserved);
59 for (int i = 0; i < channel_config.num_layers; ++i) {
60 LOG(INFO) << " channel_audio_layer_configs[" << i << "]:";
61 const auto& channel_audio_layer_config =
62 channel_config.channel_audio_layer_configs[i];
63 LOG(INFO) << " loudspeaker_layout= "
64 << absl::StrCat(channel_audio_layer_config.loudspeaker_layout);
65 LOG(INFO) << " output_gain_is_present_flag= "
66 << absl::StrCat(
67 channel_audio_layer_config.output_gain_is_present_flag);
68 LOG(INFO) << " recon_gain_is_present_flag= "
69 << absl::StrCat(
70 channel_audio_layer_config.recon_gain_is_present_flag);
71 LOG(INFO) << " reserved= "
72 << absl::StrCat(channel_audio_layer_config.reserved_a);
73 LOG(INFO) << " substream_count= "
74 << absl::StrCat(channel_audio_layer_config.substream_count);
75 LOG(INFO) << " coupled_substream_count= "
76 << absl::StrCat(
77 channel_audio_layer_config.coupled_substream_count);
78 if (channel_audio_layer_config.output_gain_is_present_flag == 1) {
79 LOG(INFO) << " output_gain_flag= "
80 << absl::StrCat(channel_audio_layer_config.output_gain_flag);
81 LOG(INFO) << " reserved= "
82 << absl::StrCat(channel_audio_layer_config.reserved_b);
83 LOG(INFO) << " output_gain= "
84 << channel_audio_layer_config.output_gain;
85 }
86 if (channel_audio_layer_config.expanded_loudspeaker_layout.has_value()) {
87 LOG(INFO) << " expanded_loudspeaker_layout= "
88 << absl::StrCat(
89 *channel_audio_layer_config.expanded_loudspeaker_layout);
90 } else {
91 LOG(INFO) << " expanded_loudspeaker_layout= Not present.";
92 }
93 }
94 }
95
LogAmbisonicsMonoConfig(const AmbisonicsMonoConfig & mono_config)96 void LogAmbisonicsMonoConfig(const AmbisonicsMonoConfig& mono_config) {
97 LOG(INFO) << " ambisonics_mono_config:";
98 LOG(INFO) << " output_channel_count:"
99 << absl::StrCat(mono_config.output_channel_count);
100 LOG(INFO) << " substream_count:"
101 << absl::StrCat(mono_config.substream_count);
102 std::stringstream channel_mapping_stream;
103 for (int c = 0; c < mono_config.output_channel_count; c++) {
104 channel_mapping_stream << absl::StrCat(mono_config.channel_mapping[c])
105 << ", ";
106 }
107 LOG(INFO) << " channel_mapping: [ " << channel_mapping_stream.str() << "]";
108 }
109
LogAmbisonicsProjectionConfig(const AmbisonicsProjectionConfig & projection_config)110 void LogAmbisonicsProjectionConfig(
111 const AmbisonicsProjectionConfig& projection_config) {
112 LOG(INFO) << " ambisonics_projection_config:";
113 LOG(INFO) << " output_channel_count:"
114 << absl::StrCat(projection_config.output_channel_count);
115 LOG(INFO) << " substream_count:"
116 << absl::StrCat(projection_config.substream_count);
117 LOG(INFO) << " coupled_substream_count:"
118 << absl::StrCat(projection_config.coupled_substream_count);
119 std::string demixing_matrix_string;
120 for (int i = 0; i < (projection_config.substream_count +
121 projection_config.coupled_substream_count) *
122 projection_config.output_channel_count;
123 i++) {
124 absl::StrAppend(&demixing_matrix_string,
125 projection_config.demixing_matrix[i], ",");
126 }
127 LOG(INFO) << " demixing_matrix: [ " << demixing_matrix_string << "]";
128 }
129
LogSceneBased(const AmbisonicsConfig & ambisonics_config)130 void LogSceneBased(const AmbisonicsConfig& ambisonics_config) {
131 LOG(INFO) << " ambisonics_config:";
132 LOG(INFO) << " ambisonics_mode= "
133 << absl::StrCat(ambisonics_config.ambisonics_mode);
134 if (ambisonics_config.ambisonics_mode ==
135 AmbisonicsConfig::kAmbisonicsModeMono) {
136 LogAmbisonicsMonoConfig(
137 std::get<AmbisonicsMonoConfig>(ambisonics_config.ambisonics_config));
138 } else if (ambisonics_config.ambisonics_mode ==
139 AmbisonicsConfig::kAmbisonicsModeProjection) {
140 LogAmbisonicsProjectionConfig(std::get<AmbisonicsProjectionConfig>(
141 ambisonics_config.ambisonics_config));
142 }
143 }
144
145 // Returns `absl::OkStatus()` if all parameters have a unique
146 // `param_definition_type` in the OBU. `absl::InvalidArgumentError()`
147 // otherwise.
ValidateUniqueParamDefinitionType(const std::vector<AudioElementParam> & audio_element_params)148 absl::Status ValidateUniqueParamDefinitionType(
149 const std::vector<AudioElementParam>& audio_element_params) {
150 std::vector<ParamDefinition::ParameterDefinitionType>
151 collected_param_definition_types;
152 collected_param_definition_types.reserve(audio_element_params.size());
153 for (const auto& param : audio_element_params) {
154 collected_param_definition_types.push_back(param.GetType());
155 }
156
157 return ValidateUnique(collected_param_definition_types.begin(),
158 collected_param_definition_types.end(),
159 "audio_element_params");
160 }
161
ValidateOutputChannelCount(const uint8_t channel_count)162 absl::Status ValidateOutputChannelCount(const uint8_t channel_count) {
163 uint8_t next_valid_output_channel_count;
164 RETURN_IF_NOT_OK(AmbisonicsConfig ::GetNextValidOutputChannelCount(
165 channel_count, next_valid_output_channel_count));
166
167 if (next_valid_output_channel_count == channel_count) {
168 return absl::OkStatus();
169 }
170
171 return absl::InvalidArgumentError(absl::StrCat(
172 "Invalid Ambisonics output channel_count = ", channel_count));
173 }
174
175 // Writes an element of the `audio_element_params` array of a scalable channel
176 // `AudioElementObu`.
ValidateAndWriteAudioElementParam(const AudioElementParam & param,WriteBitBuffer & wb)177 absl::Status ValidateAndWriteAudioElementParam(const AudioElementParam& param,
178 WriteBitBuffer& wb) {
179 const auto param_definition_type = param.GetType();
180
181 // Write the main portion of the `AudioElementParam`.
182 RETURN_IF_NOT_OK(
183 wb.WriteUleb128(static_cast<DecodedUleb128>(param_definition_type)));
184
185 if (param_definition_type == ParamDefinition::kParameterDefinitionMixGain) {
186 return absl::InvalidArgumentError(
187 "Mix Gain parameter type is explicitly forbidden for "
188 "Audio Element OBUs.");
189 }
190 RETURN_IF_NOT_OK(std::visit(
191 [&wb](auto& param_definition) {
192 return param_definition.ValidateAndWrite(wb);
193 },
194 param.param_definition));
195
196 return absl::OkStatus();
197 }
198
199 // Writes the `ScalableChannelLayoutConfig` of an `AudioElementObu`.
ValidateAndWriteScalableChannelLayout(const ScalableChannelLayoutConfig & layout,const DecodedUleb128 num_substreams,WriteBitBuffer & wb)200 absl::Status ValidateAndWriteScalableChannelLayout(
201 const ScalableChannelLayoutConfig& layout,
202 const DecodedUleb128 num_substreams, WriteBitBuffer& wb) {
203 RETURN_IF_NOT_OK(layout.Validate(num_substreams));
204
205 // Write the main portion of the `ScalableChannelLayoutConfig`.
206 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(layout.num_layers, 3));
207 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(layout.reserved, 5));
208
209 // Loop to write the `channel_audio_layer_configs` array.
210 for (const auto& layer_config : layout.channel_audio_layer_configs) {
211 RETURN_IF_NOT_OK(layer_config.Write(wb));
212 }
213
214 return absl::OkStatus();
215 }
216
217 // Reads the `ScalableChannelLayoutConfig` of an `AudioElementObu`.
ReadAndValidateScalableChannelLayout(ScalableChannelLayoutConfig & layout,const DecodedUleb128 num_substreams,ReadBitBuffer & rb)218 absl::Status ReadAndValidateScalableChannelLayout(
219 ScalableChannelLayoutConfig& layout, const DecodedUleb128 num_substreams,
220 ReadBitBuffer& rb) {
221 // Read the main portion of the `ScalableChannelLayoutConfig`.
222 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(3, layout.num_layers));
223 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(5, layout.reserved));
224
225 for (int i = 0; i < layout.num_layers; ++i) {
226 ChannelAudioLayerConfig layer_config;
227 RETURN_IF_NOT_OK(layer_config.Read(rb));
228 layout.channel_audio_layer_configs.push_back(layer_config);
229 }
230
231 RETURN_IF_NOT_OK(layout.Validate(num_substreams));
232
233 return absl::OkStatus();
234 }
235
236 // Writes the `AmbisonicsMonoConfig` of an ambisonics mono `AudioElementObu`.
ValidateAndWriteAmbisonicsMono(const AmbisonicsMonoConfig & mono_config,DecodedUleb128 num_substreams,WriteBitBuffer & wb)237 absl::Status ValidateAndWriteAmbisonicsMono(
238 const AmbisonicsMonoConfig& mono_config, DecodedUleb128 num_substreams,
239 WriteBitBuffer& wb) {
240 RETURN_IF_NOT_OK(mono_config.Validate(num_substreams));
241
242 RETURN_IF_NOT_OK(
243 wb.WriteUnsignedLiteral(mono_config.output_channel_count, 8));
244 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(mono_config.substream_count, 8));
245
246 RETURN_IF_NOT_OK(
247 wb.WriteUint8Span(absl::MakeConstSpan(mono_config.channel_mapping)));
248
249 return absl::OkStatus();
250 }
251
252 // Writes the `AmbisonicsProjectionConfig` of an ambisonics projection
253 // `AudioElementObu`.
ValidateAndWriteAmbisonicsProjection(const AmbisonicsProjectionConfig & projection_config,DecodedUleb128 num_substreams,WriteBitBuffer & wb)254 absl::Status ValidateAndWriteAmbisonicsProjection(
255 const AmbisonicsProjectionConfig& projection_config,
256 DecodedUleb128 num_substreams, WriteBitBuffer& wb) {
257 RETURN_IF_NOT_OK(projection_config.Validate(num_substreams));
258
259 // Write the main portion of the `AmbisonicsProjectionConfig`.
260 RETURN_IF_NOT_OK(
261 wb.WriteUnsignedLiteral(projection_config.output_channel_count, 8));
262 RETURN_IF_NOT_OK(
263 wb.WriteUnsignedLiteral(projection_config.substream_count, 8));
264 RETURN_IF_NOT_OK(
265 wb.WriteUnsignedLiteral(projection_config.coupled_substream_count, 8));
266
267 // Loop to write the `demixing_matrix`.
268 for (size_t i = 0; i < projection_config.demixing_matrix.size(); i++) {
269 RETURN_IF_NOT_OK(wb.WriteSigned16(projection_config.demixing_matrix[i]));
270 }
271
272 return absl::OkStatus();
273 }
274
275 // Writes the `AmbisonicsConfig` of an ambisonics `AudioElementObu`.
ValidateAndWriteAmbisonicsConfig(const AmbisonicsConfig & config,DecodedUleb128 num_substreams,WriteBitBuffer & wb)276 absl::Status ValidateAndWriteAmbisonicsConfig(const AmbisonicsConfig& config,
277 DecodedUleb128 num_substreams,
278 WriteBitBuffer& wb) {
279 // Write the main portion of the `AmbisonicsConfig`.
280 RETURN_IF_NOT_OK(
281 wb.WriteUleb128(static_cast<DecodedUleb128>(config.ambisonics_mode)));
282
283 // Write the specific config based on `ambisonics_mode`.
284 switch (config.ambisonics_mode) {
285 using enum AmbisonicsConfig::AmbisonicsMode;
286 case kAmbisonicsModeMono:
287 return ValidateAndWriteAmbisonicsMono(
288 std::get<AmbisonicsMonoConfig>(config.ambisonics_config),
289 num_substreams, wb);
290 case kAmbisonicsModeProjection:
291 return ValidateAndWriteAmbisonicsProjection(
292 std::get<AmbisonicsProjectionConfig>(config.ambisonics_config),
293 num_substreams, wb);
294 default:
295 return absl::OkStatus();
296 }
297 }
298
ReadAndValidateAmbisonicsProjection(AmbisonicsProjectionConfig & projection_config,DecodedUleb128 num_substreams,ReadBitBuffer & rb)299 absl::Status ReadAndValidateAmbisonicsProjection(
300 AmbisonicsProjectionConfig& projection_config,
301 DecodedUleb128 num_substreams, ReadBitBuffer& rb) {
302 RETURN_IF_NOT_OK(
303 rb.ReadUnsignedLiteral(8, projection_config.output_channel_count));
304 RETURN_IF_NOT_OK(
305 rb.ReadUnsignedLiteral(8, projection_config.substream_count));
306 RETURN_IF_NOT_OK(
307 rb.ReadUnsignedLiteral(8, projection_config.coupled_substream_count));
308 const size_t demixing_matrix_size =
309 GetNumDemixingMatrixElements(projection_config);
310 for (size_t i = 0; i < demixing_matrix_size; ++i) {
311 int16_t demixing_matrix_value;
312 RETURN_IF_NOT_OK(rb.ReadSigned16(demixing_matrix_value));
313 projection_config.demixing_matrix.push_back(demixing_matrix_value);
314 }
315 RETURN_IF_NOT_OK(projection_config.Validate(num_substreams));
316 return OkStatus();
317 }
318
ReadAndValidateAmbisonicsMonoConfig(AmbisonicsMonoConfig & mono_config,DecodedUleb128 num_substreams,ReadBitBuffer & rb)319 absl::Status ReadAndValidateAmbisonicsMonoConfig(
320 AmbisonicsMonoConfig& mono_config, DecodedUleb128 num_substreams,
321 ReadBitBuffer& rb) {
322 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(8, mono_config.output_channel_count));
323 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(8, mono_config.substream_count));
324 const size_t channel_mapping_size = mono_config.output_channel_count;
325 mono_config.channel_mapping.resize(channel_mapping_size);
326 RETURN_IF_NOT_OK(
327 rb.ReadUint8Span(absl::MakeSpan(mono_config.channel_mapping)));
328 RETURN_IF_NOT_OK(mono_config.Validate(num_substreams));
329 return OkStatus();
330 }
331
332 // Reads the `AmbisonicsConfig` of an ambisonics `AudioElementObu`.
ReadAndValidateAmbisonicsConfig(AmbisonicsConfig & config,DecodedUleb128 num_substreams,ReadBitBuffer & rb)333 absl::Status ReadAndValidateAmbisonicsConfig(AmbisonicsConfig& config,
334 DecodedUleb128 num_substreams,
335 ReadBitBuffer& rb) {
336 DecodedUleb128 ambisonics_mode;
337 RETURN_IF_NOT_OK(rb.ReadULeb128(ambisonics_mode));
338 config.ambisonics_mode =
339 static_cast<AmbisonicsConfig::AmbisonicsMode>(ambisonics_mode);
340 switch (config.ambisonics_mode) {
341 using enum AmbisonicsConfig::AmbisonicsMode;
342 case kAmbisonicsModeMono: {
343 config.ambisonics_config = AmbisonicsMonoConfig();
344 return ReadAndValidateAmbisonicsMonoConfig(
345 std::get<AmbisonicsMonoConfig>(config.ambisonics_config),
346 num_substreams, rb);
347 }
348 case kAmbisonicsModeProjection: {
349 config.ambisonics_config = AmbisonicsProjectionConfig();
350 return ReadAndValidateAmbisonicsProjection(
351 std::get<AmbisonicsProjectionConfig>(config.ambisonics_config),
352 num_substreams, rb);
353 }
354 default:
355 return OkStatus();
356 }
357 }
358
359 } // namespace
360
ReadAndValidate(uint32_t audio_element_id,ReadBitBuffer & rb)361 absl::Status AudioElementParam::ReadAndValidate(uint32_t audio_element_id,
362 ReadBitBuffer& rb) {
363 // Reads the main portion of the `AudioElementParam`.
364 DecodedUleb128 param_definition_type_uleb;
365 RETURN_IF_NOT_OK(rb.ReadULeb128(param_definition_type_uleb));
366 const auto param_definition_type =
367 static_cast<ParamDefinition::ParameterDefinitionType>(
368 param_definition_type_uleb);
369
370 switch (param_definition_type) {
371 case ParamDefinition::kParameterDefinitionMixGain: {
372 return absl::InvalidArgumentError(
373 "Mix Gain parameter type is explicitly forbidden for Audio Element "
374 "OBUs.");
375 }
376 case ParamDefinition::kParameterDefinitionReconGain: {
377 auto& recon_gain_param_definition =
378 param_definition.emplace<ReconGainParamDefinition>(audio_element_id);
379 RETURN_IF_NOT_OK(recon_gain_param_definition.ReadAndValidate(rb));
380 return absl::OkStatus();
381 }
382 case ParamDefinition::kParameterDefinitionDemixing: {
383 auto& demixing_param_definition =
384 param_definition.emplace<DemixingParamDefinition>();
385 RETURN_IF_NOT_OK(demixing_param_definition.ReadAndValidate(rb));
386 return absl::OkStatus();
387 }
388 default:
389 auto& extended_param_definition =
390 param_definition.emplace<ExtendedParamDefinition>(
391 param_definition_type);
392 RETURN_IF_NOT_OK(extended_param_definition.ReadAndValidate(rb));
393 return absl::OkStatus();
394 }
395 }
396
Write(WriteBitBuffer & wb) const397 absl::Status ChannelAudioLayerConfig::Write(WriteBitBuffer& wb) const {
398 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(loudspeaker_layout, 4));
399 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(output_gain_is_present_flag, 1));
400 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(recon_gain_is_present_flag, 1));
401 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(reserved_a, 2));
402 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(substream_count, 8));
403 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(coupled_substream_count, 8));
404
405 if (output_gain_is_present_flag == 1) {
406 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(output_gain_flag, 6));
407 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(reserved_b, 2));
408 RETURN_IF_NOT_OK(wb.WriteSigned16(output_gain));
409 }
410
411 if (loudspeaker_layout == kLayoutExpanded) {
412 RETURN_IF_NOT_OK(ValidateHasValue(expanded_loudspeaker_layout,
413 "`expanded_loudspeaker_layout`"));
414 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(*expanded_loudspeaker_layout, 8));
415 }
416
417 return absl::OkStatus();
418 }
419
Read(ReadBitBuffer & rb)420 absl::Status ChannelAudioLayerConfig::Read(ReadBitBuffer& rb) {
421 uint8_t loudspeaker_layout_uint8;
422 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(4, loudspeaker_layout_uint8));
423 loudspeaker_layout = static_cast<ChannelAudioLayerConfig::LoudspeakerLayout>(
424 loudspeaker_layout_uint8);
425 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(1, output_gain_is_present_flag));
426 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(1, recon_gain_is_present_flag));
427 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(2, reserved_a));
428 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(8, substream_count));
429 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(8, coupled_substream_count));
430
431 if (output_gain_is_present_flag == 1) {
432 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(6, output_gain_flag));
433 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(2, reserved_b));
434 RETURN_IF_NOT_OK(rb.ReadSigned16(output_gain));
435 }
436
437 if (loudspeaker_layout == kLayoutExpanded) {
438 uint8_t expanded_loudspeaker_layout_uint8;
439 RETURN_IF_NOT_OK(
440 rb.ReadUnsignedLiteral(8, expanded_loudspeaker_layout_uint8));
441 expanded_loudspeaker_layout =
442 static_cast<ChannelAudioLayerConfig::ExpandedLoudspeakerLayout>(
443 expanded_loudspeaker_layout_uint8);
444 }
445
446 return absl::OkStatus();
447 }
448
Validate(DecodedUleb128 num_substreams_in_audio_element) const449 absl::Status ScalableChannelLayoutConfig::Validate(
450 DecodedUleb128 num_substreams_in_audio_element) const {
451 if (num_layers == 0 || num_layers > 6) {
452 return absl::InvalidArgumentError(
453 absl::StrCat("Expected `num_layers` in [1, 6]; got ", num_layers));
454 }
455 RETURN_IF_NOT_OK(ValidateContainerSizeEqual(
456 "channel_audio_layer_configs", channel_audio_layer_configs, num_layers));
457
458 // Determine whether any binaural layouts are found and the total number of
459 // substreams.
460 DecodedUleb128 cumulative_substream_count = 0;
461 bool has_binaural_layout = false;
462 for (const auto& layer_config : channel_audio_layer_configs) {
463 if (layer_config.loudspeaker_layout ==
464 ChannelAudioLayerConfig::kLayoutBinaural) {
465 has_binaural_layout = true;
466 }
467
468 cumulative_substream_count +=
469 static_cast<DecodedUleb128>(layer_config.substream_count);
470 }
471
472 if (cumulative_substream_count != num_substreams_in_audio_element) {
473 return absl::InvalidArgumentError(
474 "Cumulative substream count from all layers is not equal to "
475 "the `num_substreams` in the OBU.");
476 }
477
478 if (has_binaural_layout && num_layers != 1) {
479 return absl::InvalidArgumentError(
480 "There must be exactly 1 layer if there is a binaural layout.");
481 }
482
483 return absl::OkStatus();
484 }
485
Validate(DecodedUleb128 num_substreams_in_audio_element) const486 absl::Status AmbisonicsMonoConfig::Validate(
487 DecodedUleb128 num_substreams_in_audio_element) const {
488 MAYBE_RETURN_IF_NOT_OK(ValidateOutputChannelCount(output_channel_count));
489 RETURN_IF_NOT_OK(ValidateContainerSizeEqual(
490 "channel_mapping", channel_mapping, output_channel_count));
491 if (substream_count > output_channel_count) {
492 return absl::InvalidArgumentError(
493 absl::StrCat("Expected substream_count=", substream_count,
494 " to be less than or equal to `output_channel_count`=",
495 output_channel_count, "."));
496 }
497 if (num_substreams_in_audio_element != substream_count) {
498 return absl::InvalidArgumentError(
499 absl::StrCat("Expected substream_count=", substream_count,
500 " to be equal to num_substreams_in_audio_element=",
501 num_substreams_in_audio_element, "."));
502 }
503
504 // Track the number of unique substream indices in the mapping.
505 absl::flat_hash_set<uint8_t> unique_substream_indices;
506 for (const auto& substream_index : channel_mapping) {
507 if (substream_index == kInactiveAmbisonicsChannelNumber) {
508 // OK. This implies the nth ambisonics channel number is dropped (i.e. the
509 // user wants mixed-order ambisonics).
510 continue;
511 }
512 if (substream_index >= substream_count) {
513 return absl::InvalidArgumentError(absl::StrCat(
514 "Mapping out of bounds. When substream_count= ", substream_count,
515 " there is no substream_index= ", substream_index, "."));
516 }
517
518 unique_substream_indices.insert(substream_index);
519 }
520
521 if (unique_substream_indices.size() != substream_count) {
522 return absl::InvalidArgumentError(absl::StrCat(
523 "A substream is in limbo; it has no associated ACN. ",
524 "substream_count= ", substream_count,
525 ", unique_substream_indices.size()= ", unique_substream_indices.size(),
526 "."));
527 }
528
529 return absl::OkStatus();
530 }
531
Validate(DecodedUleb128 num_substreams_in_audio_element) const532 absl::Status AmbisonicsProjectionConfig::Validate(
533 DecodedUleb128 num_substreams_in_audio_element) const {
534 RETURN_IF_NOT_OK(ValidateOutputChannelCount(output_channel_count));
535 if (coupled_substream_count > substream_count) {
536 return absl::InvalidArgumentError(absl::StrCat(
537 "Expected coupled_substream_count= ", coupled_substream_count,
538 " to be less than or equal to substream_count= ", substream_count));
539 }
540
541 if ((static_cast<int>(substream_count) +
542 static_cast<int>(coupled_substream_count)) > output_channel_count) {
543 return absl::InvalidArgumentError(absl::StrCat(
544 "Expected coupled_substream_count= ", coupled_substream_count,
545 " + substream_count= ", substream_count,
546 " to be less than or equal to `output_channel_count`= ",
547 output_channel_count, "."));
548 }
549 if (num_substreams_in_audio_element != substream_count) {
550 return absl::InvalidArgumentError(
551 absl::StrCat("Expected substream_count= ", substream_count,
552 " to be equal to num_substreams_in_audio_element= ",
553 num_substreams_in_audio_element, "."));
554 }
555
556 const size_t expected_num_elements = GetNumDemixingMatrixElements(*this);
557 RETURN_IF_NOT_OK(ValidateContainerSizeEqual(
558 "demixing_matrix", demixing_matrix, expected_num_elements));
559
560 return absl::OkStatus();
561 }
562
GetNextValidOutputChannelCount(uint8_t requested_output_channel_count,uint8_t & next_valid_output_channel_count)563 absl::Status AmbisonicsConfig::GetNextValidOutputChannelCount(
564 uint8_t requested_output_channel_count,
565 uint8_t& next_valid_output_channel_count) {
566 // Valid values are `(1+n)^2`, for integer `n` in the range [0, 14].
567 static constexpr auto kValidAmbisonicChannelCounts = []() -> auto {
568 std::array<uint8_t, 15> channel_count_i;
569 for (int i = 0; i < channel_count_i.size(); ++i) {
570 channel_count_i[i] = (i + 1) * (i + 1);
571 }
572 return channel_count_i;
573 }();
574
575 // Lookup the next higher or equal valid channel count.
576 auto valid_channel_count_iter = std::lower_bound(
577 kValidAmbisonicChannelCounts.begin(), kValidAmbisonicChannelCounts.end(),
578 requested_output_channel_count);
579 if (valid_channel_count_iter != kValidAmbisonicChannelCounts.end()) {
580 next_valid_output_channel_count = *valid_channel_count_iter;
581 return absl::OkStatus();
582 }
583
584 return absl::InvalidArgumentError(absl::StrCat(
585 "Output channel count is too large. requested_output_channel_count= ",
586 requested_output_channel_count,
587 ". Max=", kValidAmbisonicChannelCounts.back(), "."));
588 }
589
AudioElementObu(const ObuHeader & header,DecodedUleb128 audio_element_id,AudioElementType audio_element_type,const uint8_t reserved,DecodedUleb128 codec_config_id)590 AudioElementObu::AudioElementObu(const ObuHeader& header,
591 DecodedUleb128 audio_element_id,
592 AudioElementType audio_element_type,
593 const uint8_t reserved,
594 DecodedUleb128 codec_config_id)
595 : ObuBase(header, kObuIaAudioElement),
596 num_substreams_(0),
597 num_parameters_(0),
598 audio_element_id_(audio_element_id),
599 audio_element_type_(audio_element_type),
600 reserved_(reserved),
601 codec_config_id_(codec_config_id) {}
602
CreateFromBuffer(const ObuHeader & header,int64_t payload_size,ReadBitBuffer & rb)603 absl::StatusOr<AudioElementObu> AudioElementObu::CreateFromBuffer(
604 const ObuHeader& header, int64_t payload_size, ReadBitBuffer& rb) {
605 AudioElementObu audio_element_obu(header);
606 RETURN_IF_NOT_OK(audio_element_obu.ReadAndValidatePayload(payload_size, rb));
607 return audio_element_obu;
608 }
609
InitializeAudioSubstreams(DecodedUleb128 num_substreams)610 void AudioElementObu::InitializeAudioSubstreams(DecodedUleb128 num_substreams) {
611 num_substreams_ = num_substreams;
612 audio_substream_ids_.resize(static_cast<size_t>(num_substreams));
613 }
614
InitializeParams(const DecodedUleb128 num_parameters)615 void AudioElementObu::InitializeParams(const DecodedUleb128 num_parameters) {
616 num_parameters_ = num_parameters;
617 audio_element_params_.reserve(static_cast<size_t>(num_parameters));
618 }
619
620 // Initializes the scalable channel portion of an `AudioElementObu`.
InitializeScalableChannelLayout(const uint32_t num_layers,const uint32_t reserved)621 absl::Status AudioElementObu::InitializeScalableChannelLayout(
622 const uint32_t num_layers, const uint32_t reserved) {
623 // Validate the audio element type is correct.
624 if (audio_element_type_ != kAudioElementChannelBased) {
625 return absl::InvalidArgumentError(absl::StrCat(
626 "`InitializeScalableChannelLayout()` can only be called ",
627 "when `audio_element_type_ == kAudioElementChannelBased`, ", "but got ",
628 audio_element_type_));
629 }
630
631 ScalableChannelLayoutConfig config;
632 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
633 "ScalableChannelLayoutConfig.num_layers", num_layers, config.num_layers));
634 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
635 "ScalableChannelLayoutConfig.reserved", reserved, config.reserved));
636 config.channel_audio_layer_configs.resize(num_layers);
637 config_ = config;
638 return absl::OkStatus();
639 }
640
641 // Initializes the ambisonics mono portion of an `AudioElementObu`.
InitializeAmbisonicsMono(const uint32_t output_channel_count,const uint32_t substream_count)642 absl::Status AudioElementObu::InitializeAmbisonicsMono(
643 const uint32_t output_channel_count, const uint32_t substream_count) {
644 // Validate the audio element type and ambisonics mode are correct.
645 if (audio_element_type_ != kAudioElementSceneBased) {
646 return absl::InvalidArgumentError(
647 absl::StrCat("`InitializeAmbisonicsMono()` can only be called ",
648 "when `audio_element_type_ == kAudioElementSceneBased`, ",
649 "but got ", audio_element_type_));
650 }
651
652 AmbisonicsConfig config;
653 config.ambisonics_mode = AmbisonicsConfig::kAmbisonicsModeMono;
654
655 AmbisonicsMonoConfig mono_config;
656 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
657 "AmbisonicsMonoConfig.output_channel_count", output_channel_count,
658 mono_config.output_channel_count));
659 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
660 "AmbisonicsMonoConfig.substream_count", substream_count,
661 mono_config.substream_count));
662 mono_config.channel_mapping.resize(output_channel_count);
663 config.ambisonics_config = mono_config;
664 config_ = config;
665
666 return absl::OkStatus();
667 }
668
669 // Initializes the ambisonics projection portion of an `AudioElementObu`.
InitializeAmbisonicsProjection(const uint32_t output_channel_count,const uint32_t substream_count,const uint32_t coupled_substream_count)670 absl::Status AudioElementObu::InitializeAmbisonicsProjection(
671 const uint32_t output_channel_count, const uint32_t substream_count,
672 const uint32_t coupled_substream_count) {
673 // Validate the audio element type and ambisonics mode are correct.
674 if (audio_element_type_ != kAudioElementSceneBased) {
675 return absl::InvalidArgumentError(
676 absl::StrCat("`InitializeAmbisonicsProjection()` can only be called ",
677 "when `audio_element_type_ == kAudioElementSceneBased`, ",
678 "but got ", audio_element_type_));
679 }
680
681 AmbisonicsConfig config;
682 config.ambisonics_mode = AmbisonicsConfig::kAmbisonicsModeProjection;
683
684 AmbisonicsProjectionConfig projection_config;
685 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
686 "AmbisonicsProjectionConfig.output_channel_count", output_channel_count,
687 projection_config.output_channel_count));
688 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
689 "AmbisonicsProjectionConfig.substream_count", substream_count,
690 projection_config.substream_count));
691 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
692 "AmbisonicsProjectionConfig.coupled_substream_count",
693 coupled_substream_count, projection_config.coupled_substream_count));
694 const size_t num_elements = GetNumDemixingMatrixElements(projection_config);
695 projection_config.demixing_matrix.resize(num_elements);
696 config.ambisonics_config = projection_config;
697 config_ = config;
698
699 return absl::OkStatus();
700 }
701
InitializeExtensionConfig(const DecodedUleb128 audio_element_config_size)702 void AudioElementObu::InitializeExtensionConfig(
703 const DecodedUleb128 audio_element_config_size) {
704 config_ =
705 ExtensionConfig{.audio_element_config_size = audio_element_config_size};
706 }
707
PrintObu() const708 void AudioElementObu::PrintObu() const {
709 LOG(INFO) << "Audio Element OBU:";
710 LOG(INFO) << " audio_element_id= " << audio_element_id_;
711 LOG(INFO) << " audio_element_type= " << absl::StrCat(audio_element_type_);
712 LOG(INFO) << " reserved= " << absl::StrCat(reserved_);
713 LOG(INFO) << " codec_config_id= " << codec_config_id_;
714 LOG(INFO) << " num_substreams= " << num_substreams_;
715 for (int i = 0; i < num_substreams_; ++i) {
716 const auto& substream_id = audio_substream_ids_[i];
717 LOG(INFO) << " audio_substream_ids[" << i << "]= " << substream_id;
718 }
719 LOG(INFO) << " num_parameters= " << num_parameters_;
720 for (int i = 0; i < num_parameters_; ++i) {
721 LOG(INFO) << " params[" << i << "]";
722 std::visit([](const auto& param_definition) { param_definition.Print(); },
723 audio_element_params_[i].param_definition);
724 }
725 if (audio_element_type_ == kAudioElementChannelBased) {
726 LogChannelBased(std::get<ScalableChannelLayoutConfig>(config_));
727 } else if (audio_element_type_ == kAudioElementSceneBased) {
728 LogSceneBased(std::get<AmbisonicsConfig>(config_));
729 }
730 }
731
ValidateAndWritePayload(WriteBitBuffer & wb) const732 absl::Status AudioElementObu::ValidateAndWritePayload(
733 WriteBitBuffer& wb) const {
734 RETURN_IF_NOT_OK(ValidateUniqueParamDefinitionType(audio_element_params_));
735
736 RETURN_IF_NOT_OK(wb.WriteUleb128(audio_element_id_));
737 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(audio_element_type_, 3));
738 RETURN_IF_NOT_OK(wb.WriteUnsignedLiteral(reserved_, 5));
739 RETURN_IF_NOT_OK(wb.WriteUleb128(codec_config_id_));
740 RETURN_IF_NOT_OK(wb.WriteUleb128(num_substreams_));
741
742 // Loop to write the audio substream IDs portion of the obu.
743 RETURN_IF_NOT_OK(ValidateContainerSizeEqual(
744 "audio_substream_ids", audio_substream_ids_, num_substreams_));
745 for (const auto& audio_substream_id : audio_substream_ids_) {
746 RETURN_IF_NOT_OK(wb.WriteUleb128(audio_substream_id));
747 }
748
749 RETURN_IF_NOT_OK(wb.WriteUleb128(num_parameters_));
750
751 // Loop to write the parameter portion of the obu.
752 RETURN_IF_NOT_OK(ValidateContainerSizeEqual(
753 "audio_element_params_", audio_element_params_, num_parameters_));
754 for (const auto& audio_element_param : audio_element_params_) {
755 RETURN_IF_NOT_OK(
756 ValidateAndWriteAudioElementParam(audio_element_param, wb));
757 }
758
759 // Write the specific `audio_element_type`'s config.
760 switch (audio_element_type_) {
761 case kAudioElementChannelBased:
762 return ValidateAndWriteScalableChannelLayout(
763 std::get<ScalableChannelLayoutConfig>(config_), num_substreams_, wb);
764 case kAudioElementSceneBased:
765 return ValidateAndWriteAmbisonicsConfig(
766 std::get<AmbisonicsConfig>(config_), num_substreams_, wb);
767 default: {
768 const auto& extension_config = std::get<ExtensionConfig>(config_);
769 RETURN_IF_NOT_OK(
770 wb.WriteUleb128(extension_config.audio_element_config_size));
771 RETURN_IF_NOT_OK(ValidateContainerSizeEqual(
772 "audio_element_config_bytes",
773 extension_config.audio_element_config_bytes,
774 extension_config.audio_element_config_size));
775 RETURN_IF_NOT_OK(wb.WriteUint8Span(
776 absl::MakeConstSpan(extension_config.audio_element_config_bytes)));
777
778 return absl::OkStatus();
779 }
780 }
781
782 return absl::OkStatus();
783 }
784
ReadAndValidatePayloadDerived(int64_t,ReadBitBuffer & rb)785 absl::Status AudioElementObu::ReadAndValidatePayloadDerived(
786 int64_t /*payload_size*/, ReadBitBuffer& rb) {
787 RETURN_IF_NOT_OK(rb.ReadULeb128(audio_element_id_));
788 uint8_t audio_element_type;
789 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(3, audio_element_type));
790 audio_element_type_ = static_cast<AudioElementType>(audio_element_type);
791 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(5, reserved_));
792 RETURN_IF_NOT_OK(rb.ReadULeb128(codec_config_id_));
793 RETURN_IF_NOT_OK(rb.ReadULeb128(num_substreams_));
794
795 // Loop to read the audio substream IDs portion of the obu.
796 for (int i = 0; i < num_substreams_; ++i) {
797 DecodedUleb128 audio_substream_id;
798 RETURN_IF_NOT_OK(rb.ReadULeb128(audio_substream_id));
799 audio_substream_ids_.push_back(audio_substream_id);
800 }
801 RETURN_IF_NOT_OK(ValidateContainerSizeEqual(
802 "audio_substream_ids", audio_substream_ids_, num_substreams_));
803
804 RETURN_IF_NOT_OK(rb.ReadULeb128(num_parameters_));
805
806 // Loop to read the parameter portion of the obu.
807 for (int i = 0; i < num_parameters_; ++i) {
808 AudioElementParam audio_element_param;
809 RETURN_IF_NOT_OK(
810 audio_element_param.ReadAndValidate(audio_element_id_, rb));
811 audio_element_params_.push_back(std::move(audio_element_param));
812 }
813 RETURN_IF_NOT_OK(ValidateContainerSizeEqual(
814 "num_parameters", audio_element_params_, num_parameters_));
815
816 // Write the specific `audio_element_type`'s config.
817 switch (audio_element_type_) {
818 case kAudioElementChannelBased:
819 config_ = ScalableChannelLayoutConfig();
820 return ReadAndValidateScalableChannelLayout(
821 std::get<ScalableChannelLayoutConfig>(config_), num_substreams_, rb);
822 case kAudioElementSceneBased:
823 config_ = AmbisonicsConfig();
824 return ReadAndValidateAmbisonicsConfig(
825 std::get<AmbisonicsConfig>(config_), num_substreams_, rb);
826 default: {
827 ExtensionConfig extension_config;
828 RETURN_IF_NOT_OK(
829 rb.ReadULeb128(extension_config.audio_element_config_size));
830 for (int i = 0; i < extension_config.audio_element_config_size; ++i) {
831 uint8_t config_bytes;
832 RETURN_IF_NOT_OK(rb.ReadUnsignedLiteral(8, config_bytes));
833 extension_config.audio_element_config_bytes.push_back(config_bytes);
834 }
835
836 RETURN_IF_NOT_OK(ValidateContainerSizeEqual(
837 "audio_element_config_bytes",
838 extension_config.audio_element_config_bytes,
839 extension_config.audio_element_config_size));
840
841 return absl::OkStatus();
842 }
843 }
844 RETURN_IF_NOT_OK(ValidateUniqueParamDefinitionType(audio_element_params_));
845 return absl::OkStatus();
846 }
847
848 } // namespace iamf_tools
849