1 /*
2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * www.aomedia.org/license/patent.
11 */
12 #include "iamf/cli/proto_conversion/proto_to_obu/audio_element_generator.h"
13
14 #include <cstdint>
15 #include <optional>
16 #include <utility>
17 #include <vector>
18
19 #include "absl/base/no_destructor.h"
20 #include "absl/container/flat_hash_map.h"
21 #include "absl/container/flat_hash_set.h"
22 #include "absl/log/check.h"
23 #include "absl/log/log.h"
24 #include "absl/status/status.h"
25 #include "absl/strings/str_cat.h"
26 #include "absl/types/span.h"
27 #include "iamf/cli/audio_element_with_data.h"
28 #include "iamf/cli/channel_label.h"
29 #include "iamf/cli/obu_with_data_generator.h"
30 #include "iamf/cli/proto/audio_element.pb.h"
31 #include "iamf/cli/proto/param_definitions.pb.h"
32 #include "iamf/cli/proto_conversion/lookup_tables.h"
33 #include "iamf/cli/proto_conversion/proto_utils.h"
34 #include "iamf/common/utils/macros.h"
35 #include "iamf/common/utils/map_utils.h"
36 #include "iamf/common/utils/numeric_utils.h"
37 #include "iamf/obu/audio_element.h"
38 #include "iamf/obu/codec_config.h"
39 #include "iamf/obu/demixing_param_definition.h"
40 #include "iamf/obu/param_definitions.h"
41 #include "iamf/obu/types.h"
42
43 namespace iamf_tools {
44
45 using absl::InvalidArgumentError;
46 using absl::StrCat;
47
48 using enum ChannelLabel::Label;
49
50 namespace {
51 // Copies the `ParameterDefinitionType` based on the input data.
CopyAudioElementParamDefinitionType(iamf_tools_cli_proto::AudioElementParam user_data_parameter,ParamDefinition::ParameterDefinitionType & output_param_definition_type)52 absl::Status CopyAudioElementParamDefinitionType(
53 iamf_tools_cli_proto::AudioElementParam user_data_parameter,
54 ParamDefinition::ParameterDefinitionType& output_param_definition_type) {
55 if (user_data_parameter.has_deprecated_param_definition_type()) {
56 return InvalidArgumentError(
57 "Please upgrade the `deprecated_param_definition_type` "
58 "field to the new `param_definition_type` field."
59 "\nSuggested upgrades:\n"
60 "- `deprecated_param_definition_type: 1` -> `param_definition_type: "
61 "PARAM_DEFINITION_TYPE_DEMIXING`\n"
62 "- `deprecated_param_definition_type: 2` -> `param_definition_type: "
63 "PARAM_DEFINITION_TYPE_RECON_GAIN`\n");
64 }
65 if (!user_data_parameter.has_param_definition_type()) {
66 return InvalidArgumentError("Missing `param_definition_type` field.");
67 }
68
69 switch (user_data_parameter.param_definition_type()) {
70 using enum iamf_tools_cli_proto::ParamDefinitionType;
71 using enum ParamDefinition::ParameterDefinitionType;
72 case PARAM_DEFINITION_TYPE_DEMIXING:
73 output_param_definition_type = kParameterDefinitionDemixing;
74 return absl::OkStatus();
75 case PARAM_DEFINITION_TYPE_RECON_GAIN:
76 output_param_definition_type = kParameterDefinitionReconGain;
77 return absl::OkStatus();
78 case PARAM_DEFINITION_TYPE_MIX_GAIN:
79 return InvalidArgumentError(absl::StrCat(
80 "Mix gain parameters are not permitted in audio elements"));
81 case PARAM_DEFINITION_TYPE_RESERVED_3:
82 output_param_definition_type = kParameterDefinitionReservedStart;
83 return absl::OkStatus();
84 default:
85 return InvalidArgumentError(
86 StrCat("Unknown or invalid param_definition_type= ",
87 user_data_parameter.param_definition_type()));
88 }
89 }
90
GenerateAudioSubstreams(const iamf_tools_cli_proto::AudioElementObuMetadata & audio_element_metadata,AudioElementObu & audio_element_obu)91 absl::Status GenerateAudioSubstreams(
92 const iamf_tools_cli_proto::AudioElementObuMetadata& audio_element_metadata,
93 AudioElementObu& audio_element_obu) {
94 if (audio_element_metadata.num_substreams() !=
95 audio_element_metadata.audio_substream_ids_size()) {
96 return InvalidArgumentError(
97 StrCat("User data has inconsistent `num_substreams` and "
98 "`audio_substream_ids`. User provided ",
99 audio_element_metadata.audio_substream_ids_size(),
100 " substreams in `audio_substream_ids`, and `num_substreams`= ",
101 audio_element_metadata.num_substreams()));
102 }
103
104 audio_element_obu.InitializeAudioSubstreams(
105 audio_element_metadata.num_substreams());
106 for (int i = 0; i < audio_element_metadata.num_substreams(); ++i) {
107 audio_element_obu.audio_substream_ids_[i] =
108 audio_element_metadata.audio_substream_ids(i);
109 }
110 return absl::OkStatus();
111 }
112
GenerateParameterDefinitions(const iamf_tools_cli_proto::AudioElementObuMetadata & audio_element_metadata,const CodecConfigObu & codec_config_obu,AudioElementObu & audio_element_obu)113 absl::Status GenerateParameterDefinitions(
114 const iamf_tools_cli_proto::AudioElementObuMetadata& audio_element_metadata,
115 const CodecConfigObu& codec_config_obu,
116 AudioElementObu& audio_element_obu) {
117 if (audio_element_metadata.num_parameters() !=
118 audio_element_metadata.audio_element_params_size()) {
119 return InvalidArgumentError(StrCat(
120 "User data has inconsistent `num_parameters`. Found: ",
121 audio_element_metadata.audio_element_params_size(),
122 " parameters, expected: ", audio_element_metadata.num_parameters()));
123 }
124
125 audio_element_obu.InitializeParams(audio_element_metadata.num_parameters());
126 for (int i = 0; i < audio_element_metadata.num_parameters(); ++i) {
127 const auto& user_data_parameter =
128 audio_element_metadata.audio_element_params(i);
129
130 ParamDefinition::ParameterDefinitionType copied_param_definition_type;
131 RETURN_IF_NOT_OK(CopyAudioElementParamDefinitionType(
132 user_data_parameter, copied_param_definition_type));
133 switch (copied_param_definition_type) {
134 using enum ParamDefinition::ParameterDefinitionType;
135 case kParameterDefinitionDemixing: {
136 DemixingParamDefinition demixing_param_definition;
137 RETURN_IF_NOT_OK(CopyParamDefinition(
138 user_data_parameter.demixing_param().param_definition(),
139 demixing_param_definition));
140 // Copy the `DemixingInfoParameterData` in the IAMF spec.
141 RETURN_IF_NOT_OK(CopyDemixingInfoParameterData(
142 user_data_parameter.demixing_param()
143 .default_demixing_info_parameter_data(),
144 demixing_param_definition.default_demixing_info_parameter_data_));
145 // Copy the extension portion of `DefaultDemixingInfoParameterData` in
146 // the IAMF spec.
147 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
148 "DemixingParamDefinition.default_w",
149 user_data_parameter.demixing_param().default_w(),
150 demixing_param_definition.default_demixing_info_parameter_data_
151 .default_w));
152 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
153 "DemixingParamDefinition.reserved",
154 user_data_parameter.demixing_param().reserved(),
155 demixing_param_definition.default_demixing_info_parameter_data_
156 .reserved_for_future_use));
157 if (demixing_param_definition.duration_ !=
158 codec_config_obu.GetCodecConfig().num_samples_per_frame) {
159 return InvalidArgumentError(
160 StrCat("Demixing parameter duration= ",
161 demixing_param_definition.duration_,
162 " is inconsistent with num_samples_per_frame=",
163 codec_config_obu.GetCodecConfig().num_samples_per_frame));
164 }
165 audio_element_obu.audio_element_params_.emplace_back(
166 AudioElementParam{demixing_param_definition});
167 break;
168 }
169 case kParameterDefinitionReconGain: {
170 ReconGainParamDefinition recon_gain_param_definition(
171 audio_element_obu.GetAudioElementId());
172 RETURN_IF_NOT_OK(CopyParamDefinition(
173 user_data_parameter.recon_gain_param().param_definition(),
174 recon_gain_param_definition));
175 if (recon_gain_param_definition.duration_ !=
176 codec_config_obu.GetCodecConfig().num_samples_per_frame) {
177 return InvalidArgumentError(
178 StrCat("Recon gain parameter duration= ",
179 recon_gain_param_definition.duration_,
180 " is inconsistent with num_samples_per_frame=",
181 codec_config_obu.GetCodecConfig().num_samples_per_frame));
182 }
183 audio_element_obu.audio_element_params_.emplace_back(
184 AudioElementParam{recon_gain_param_definition});
185 break;
186 }
187 case kParameterDefinitionMixGain:
188 return InvalidArgumentError(
189 "Mix gain parameters are not permitted in audio elements.");
190 default: {
191 const auto& user_param_definition =
192 user_data_parameter.param_definition_extension();
193 ExtendedParamDefinition extended_param_definition(
194 copied_param_definition_type);
195 // Copy the extension bytes.
196 extended_param_definition.param_definition_size_ =
197 user_param_definition.param_definition_size();
198 extended_param_definition.param_definition_bytes_.resize(
199 user_param_definition.param_definition_size());
200 RETURN_IF_NOT_OK(StaticCastSpanIfInRange(
201 "param_definition_bytes",
202 absl::MakeConstSpan(user_param_definition.param_definition_bytes()),
203 absl::MakeSpan(extended_param_definition.param_definition_bytes_)));
204
205 audio_element_obu.audio_element_params_.emplace_back(
206 AudioElementParam{extended_param_definition});
207 } break;
208 }
209 }
210
211 return absl::OkStatus();
212 }
213
ValidateReconGainDefined(const CodecConfigObu & codec_config_obu,const AudioElementObu & audio_element_obu)214 absl::Status ValidateReconGainDefined(
215 const CodecConfigObu& codec_config_obu,
216 const AudioElementObu& audio_element_obu) {
217 bool recon_gain_required = false;
218 const auto channel_config =
219 std::get<ScalableChannelLayoutConfig>(audio_element_obu.config_);
220 const auto& channel_audio_layer_configs =
221 channel_config.channel_audio_layer_configs;
222 for (int i = 0; i < channel_config.num_layers; i++) {
223 uint8_t expected_recon_gain_is_present_flag;
224 if (i == 0) {
225 // First layer: there is no demixed channel, so recon gain is not
226 // required.
227 expected_recon_gain_is_present_flag = 0;
228 } else if (codec_config_obu.IsLossless()) {
229 // Lossless codec does not require recon gain.
230 expected_recon_gain_is_present_flag = 0;
231 } else {
232 expected_recon_gain_is_present_flag = 1;
233 recon_gain_required = true;
234 }
235 if (channel_audio_layer_configs[i].recon_gain_is_present_flag !=
236 expected_recon_gain_is_present_flag) {
237 return InvalidArgumentError(
238 StrCat("`recon_gain_is_present_flag` for layer ", i, " should be ",
239 expected_recon_gain_is_present_flag, " but is ",
240 channel_audio_layer_configs[i].recon_gain_is_present_flag));
241 }
242 }
243
244 // Look for recon gain definitions.
245 bool recon_gain_defined = false;
246 for (const auto& audio_element_param :
247 audio_element_obu.audio_element_params_) {
248 const auto param_definition_type = audio_element_param.GetType();
249 if (param_definition_type ==
250 ParamDefinition::kParameterDefinitionReconGain) {
251 recon_gain_defined = true;
252 break;
253 }
254 }
255
256 if (recon_gain_defined != recon_gain_required) {
257 return InvalidArgumentError(
258 StrCat("Recon gain is ", (recon_gain_required ? "" : "not "),
259 "required but is ", (recon_gain_defined ? "" : "not "),
260 "defined in Audio Element OBU ID= ",
261 audio_element_obu.GetAudioElementId()));
262 }
263
264 return absl::OkStatus();
265 }
266
267 // Copies the `LoudspeakerLayout` based on the input data.
CopyLoudspeakerLayout(const iamf_tools_cli_proto::ChannelAudioLayerConfig & input_channel_audio_layer_config,ChannelAudioLayerConfig::LoudspeakerLayout & output_loudspeaker_layout)268 absl::Status CopyLoudspeakerLayout(
269 const iamf_tools_cli_proto::ChannelAudioLayerConfig&
270 input_channel_audio_layer_config,
271 ChannelAudioLayerConfig::LoudspeakerLayout& output_loudspeaker_layout) {
272 if (input_channel_audio_layer_config.has_deprecated_loudspeaker_layout()) {
273 return InvalidArgumentError(
274 "Please upgrade the `deprecated_loudspeaker_layout` field to the new "
275 "`loudspeaker_layout` field.\n"
276 "Suggested upgrades:\n"
277 "- `deprecated_loudspeaker_layout: 0` -> `loudspeaker_layout: "
278 "LOUDSPEAKER_LAYOUT_MONO`\n"
279 "- `deprecated_loudspeaker_layout: 1` -> `loudspeaker_layout: "
280 "LOUDSPEAKER_LAYOUT_STEREO`\n"
281 "- `deprecated_loudspeaker_layout: 2` -> `loudspeaker_layout: "
282 "LOUDSPEAKER_LAYOUT_5_1_CH`\n"
283 "- `deprecated_loudspeaker_layout: 3` -> `loudspeaker_layout: "
284 "LOUDSPEAKER_LAYOUT_5_1_2_CH`\n"
285 "- `deprecated_loudspeaker_layout: 4` -> `loudspeaker_layout: "
286 "LOUDSPEAKER_LAYOUT_5_1_4_CH`\n"
287 "- `deprecated_loudspeaker_layout: 5` -> `loudspeaker_layout: "
288 "LOUDSPEAKER_LAYOUT_7_1_CH`\n"
289 "- `deprecated_loudspeaker_layout: 6` -> `loudspeaker_layout: "
290 "LOUDSPEAKER_LAYOUT_7_1_2_CH`\n"
291 "- `deprecated_loudspeaker_layout: 7` -> `loudspeaker_layout: "
292 "LOUDSPEAKER_LAYOUT_7_1_4_CH`\n"
293 "- `deprecated_loudspeaker_layout: 8` -> `loudspeaker_layout: "
294 "LOUDSPEAKER_LAYOUT_3_1_2_CH`\n"
295 "- `deprecated_loudspeaker_layout: 9` -> `loudspeaker_layout: "
296 "LOUDSPEAKER_LAYOUT_BINAURAL`\n");
297 }
298
299 static const auto kProtoToInternalLoudspeakerLayout = BuildStaticMapFromPairs(
300 LookupTables::kProtoAndInternalLoudspeakerLayouts);
301
302 return CopyFromMap(*kProtoToInternalLoudspeakerLayout,
303 input_channel_audio_layer_config.loudspeaker_layout(),
304 "Internal version of proto `LoudspeakerLayout`= ",
305 output_loudspeaker_layout);
306 }
307
308 // Copies the `ExpandedLoudspeakerLayout` based on the input data.
CopyExpandedLoudspeakerLayout(iamf_tools_cli_proto::ExpandedLoudspeakerLayout input_expanded_loudspeaker_layout,ChannelAudioLayerConfig::ExpandedLoudspeakerLayout & output_expanded_loudspeaker_layout)309 absl::Status CopyExpandedLoudspeakerLayout(
310 iamf_tools_cli_proto::ExpandedLoudspeakerLayout
311 input_expanded_loudspeaker_layout,
312 ChannelAudioLayerConfig::ExpandedLoudspeakerLayout&
313 output_expanded_loudspeaker_layout) {
314 static const auto kProtoToInternalExpandedLoudspeakerLayout =
315 BuildStaticMapFromPairs(
316 LookupTables::kProtoAndInternalExpandedLoudspeakerLayouts);
317
318 return CopyFromMap(*kProtoToInternalExpandedLoudspeakerLayout,
319 input_expanded_loudspeaker_layout,
320 "Internal version of proto `ExpandedLoudspeakerLayout`= ",
321 output_expanded_loudspeaker_layout);
322 }
323
324 // Copies the `LoudspeakerLayout` and `ExpandedLoudspeakerLayout` based on the
325 // input data.
CopyLoudspeakerLayoutAndExpandedLoudspeakerLayout(const iamf_tools_cli_proto::ChannelAudioLayerConfig & input_layer_config,ChannelAudioLayerConfig::LoudspeakerLayout & output_loudspeaker_layout,std::optional<ChannelAudioLayerConfig::ExpandedLoudspeakerLayout> & output_expanded_loudspeaker_layout)326 absl::Status CopyLoudspeakerLayoutAndExpandedLoudspeakerLayout(
327 const iamf_tools_cli_proto::ChannelAudioLayerConfig& input_layer_config,
328 ChannelAudioLayerConfig::LoudspeakerLayout& output_loudspeaker_layout,
329 std::optional<ChannelAudioLayerConfig::ExpandedLoudspeakerLayout>&
330 output_expanded_loudspeaker_layout) {
331 RETURN_IF_NOT_OK(
332 CopyLoudspeakerLayout(input_layer_config, output_loudspeaker_layout));
333
334 if (output_loudspeaker_layout == ChannelAudioLayerConfig::kLayoutExpanded) {
335 ChannelAudioLayerConfig::ExpandedLoudspeakerLayout
336 expanded_loudspeaker_layout;
337 RETURN_IF_NOT_OK(CopyExpandedLoudspeakerLayout(
338 input_layer_config.expanded_loudspeaker_layout(),
339 expanded_loudspeaker_layout));
340 output_expanded_loudspeaker_layout = expanded_loudspeaker_layout;
341 } else {
342 // Ignore user input since it would not be in the bitstream as of IAMF
343 // v1.1.0.
344 output_expanded_loudspeaker_layout = std::nullopt;
345 }
346
347 return absl::OkStatus();
348 }
349
FillScalableChannelLayoutConfig(const iamf_tools_cli_proto::AudioElementObuMetadata & audio_element_metadata,const CodecConfigObu & codec_config_obu,AudioElementWithData & audio_element)350 absl::Status FillScalableChannelLayoutConfig(
351 const iamf_tools_cli_proto::AudioElementObuMetadata& audio_element_metadata,
352 const CodecConfigObu& codec_config_obu,
353 AudioElementWithData& audio_element) {
354 if (!audio_element_metadata.has_scalable_channel_layout_config()) {
355 return InvalidArgumentError(StrCat(
356 "Audio Element Metadata [", audio_element_metadata.audio_element_id(),
357 " is of type AUDIO_ELEMENT_CHANNEL_BASED but does not have",
358 " the `scalable_channel_layout_config` field."));
359 }
360
361 const auto& input_config =
362 audio_element_metadata.scalable_channel_layout_config();
363 RETURN_IF_NOT_OK(audio_element.obu.InitializeScalableChannelLayout(
364 input_config.num_layers(), input_config.reserved()));
365 auto& config =
366 std::get<ScalableChannelLayoutConfig>(audio_element.obu.config_);
367 if (config.num_layers != input_config.channel_audio_layer_configs_size()) {
368 return InvalidArgumentError(StrCat(
369 "Expected ", config.num_layers, " layers in the metadata. Found ",
370 input_config.channel_audio_layer_configs_size(), " layers."));
371 }
372 for (int i = 0; i < config.num_layers; ++i) {
373 ChannelAudioLayerConfig* const layer_config =
374 &config.channel_audio_layer_configs[i];
375
376 const auto& input_layer_config =
377 input_config.channel_audio_layer_configs(i);
378
379 RETURN_IF_NOT_OK(CopyLoudspeakerLayoutAndExpandedLoudspeakerLayout(
380 input_layer_config, layer_config->loudspeaker_layout,
381 layer_config->expanded_loudspeaker_layout));
382 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
383 "ChannelAudioLayerConfig.output_gain_is_present_flag",
384 input_layer_config.output_gain_is_present_flag(),
385 layer_config->output_gain_is_present_flag));
386 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
387 "ChannelAudioLayerConfig.recon_gain_is_present_flag",
388 input_layer_config.recon_gain_is_present_flag(),
389 layer_config->recon_gain_is_present_flag));
390 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
391 "ChannelAudioLayerConfig.reserved_a", input_layer_config.reserved_a(),
392 layer_config->reserved_a));
393 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
394 "ChannelAudioLayerConfig.substream_count",
395 input_layer_config.substream_count(), layer_config->substream_count));
396 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
397 "ChannelAudioLayerConfig.coupled_substream_count",
398 input_layer_config.coupled_substream_count(),
399 layer_config->coupled_substream_count));
400
401 if (layer_config->output_gain_is_present_flag == 1) {
402 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
403 "ChannelAudioLayerConfig.output_gain_flag",
404 input_layer_config.output_gain_flag(),
405 layer_config->output_gain_flag));
406 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
407 "ChannelAudioLayerConfig.reserved_b", input_layer_config.reserved_b(),
408 layer_config->reserved_b));
409 RETURN_IF_NOT_OK(StaticCastIfInRange<int32_t, int16_t>(
410 "ChannelAudioLayerConfig.output_gain",
411 input_layer_config.output_gain(), layer_config->output_gain));
412 }
413 }
414
415 RETURN_IF_NOT_OK(
416 ValidateReconGainDefined(codec_config_obu, audio_element.obu));
417
418 return ObuWithDataGenerator::FinalizeScalableChannelLayoutConfig(
419 audio_element.obu.audio_substream_ids_, config,
420 audio_element.substream_id_to_labels, audio_element.label_to_output_gain,
421 audio_element.channel_numbers_for_layers);
422 }
423
FillAmbisonicsMonoConfig(const iamf_tools_cli_proto::AmbisonicsConfig & input_config,const DecodedUleb128 audio_element_id,AudioElementObu & audio_element_obu,SubstreamIdLabelsMap & substream_id_to_labels)424 absl::Status FillAmbisonicsMonoConfig(
425 const iamf_tools_cli_proto::AmbisonicsConfig& input_config,
426 const DecodedUleb128 audio_element_id, AudioElementObu& audio_element_obu,
427 SubstreamIdLabelsMap& substream_id_to_labels) {
428 if (!input_config.has_ambisonics_mono_config()) {
429 return InvalidArgumentError(
430 StrCat("Audio Element Metadata [", audio_element_id,
431 " is of mode AMBISONICS_MODE_MONO but does not have the "
432 "`ambisonics_mono_config` field."));
433 }
434 const auto& input_mono_config = input_config.ambisonics_mono_config();
435 RETURN_IF_NOT_OK(audio_element_obu.InitializeAmbisonicsMono(
436 input_mono_config.output_channel_count(),
437 input_mono_config.substream_count()));
438 auto& mono_config = std::get<AmbisonicsMonoConfig>(
439 std::get<AmbisonicsConfig>(audio_element_obu.config_).ambisonics_config);
440 if (input_mono_config.channel_mapping_size() !=
441 input_mono_config.output_channel_count()) {
442 return InvalidArgumentError(StrCat(
443 "Audio Element Metadata [", audio_element_id,
444 " has output_channel_count= ", input_mono_config.output_channel_count(),
445 ", but `channel_mapping` has ",
446 input_mono_config.channel_mapping_size(), " elements."));
447 }
448
449 for (int i = 0; i < input_mono_config.channel_mapping_size(); ++i) {
450 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
451 "AmbisonicsMonoConfig.channel_mapping",
452 input_mono_config.channel_mapping(i), mono_config.channel_mapping[i]));
453 }
454
455 // Validate the mono config. This ensures no substream indices should be out
456 // of bounds.
457 RETURN_IF_NOT_OK(mono_config.Validate(audio_element_obu.num_substreams_));
458 // Populate substream_id_to_labels.
459 RETURN_IF_NOT_OK(ObuWithDataGenerator::FinalizeAmbisonicsConfig(
460 audio_element_obu, substream_id_to_labels));
461 return absl::OkStatus();
462 }
463
FillAmbisonicsProjectionConfig(const iamf_tools_cli_proto::AmbisonicsConfig & input_config,const DecodedUleb128 audio_element_id,AudioElementObu & audio_element_obu,SubstreamIdLabelsMap & substream_id_to_labels)464 absl::Status FillAmbisonicsProjectionConfig(
465 const iamf_tools_cli_proto::AmbisonicsConfig& input_config,
466 const DecodedUleb128 audio_element_id, AudioElementObu& audio_element_obu,
467 SubstreamIdLabelsMap& substream_id_to_labels) {
468 if (!input_config.has_ambisonics_projection_config()) {
469 return InvalidArgumentError(
470 StrCat("Audio Element Metadata [", audio_element_id,
471 " is of mode AMBISONICS_MODE_PROJECTION but does not have"
472 " the `AMBISONICS_MODE_PROJECTION` field."));
473 }
474 const auto& input_projection_config =
475 input_config.ambisonics_projection_config();
476 RETURN_IF_NOT_OK(audio_element_obu.InitializeAmbisonicsProjection(
477 input_projection_config.output_channel_count(),
478 input_projection_config.substream_count(),
479 input_projection_config.coupled_substream_count()));
480 auto& projection_config = std::get<AmbisonicsProjectionConfig>(
481 std::get<AmbisonicsConfig>(audio_element_obu.config_).ambisonics_config);
482 const int expected_demixing_matrix_size =
483 (input_projection_config.substream_count() +
484 input_projection_config.coupled_substream_count()) *
485 input_projection_config.output_channel_count();
486 if (input_projection_config.demixing_matrix_size() !=
487 expected_demixing_matrix_size) {
488 return InvalidArgumentError(
489 StrCat("Audio Element Metadata [", audio_element_id,
490 " expects demixing_matrix_size= ", expected_demixing_matrix_size,
491 ", but `demixing_matrix` has ",
492 input_projection_config.demixing_matrix_size(), " elements."));
493 }
494
495 for (int i = 0; i < input_projection_config.demixing_matrix_size(); ++i) {
496 RETURN_IF_NOT_OK(StaticCastIfInRange<int32_t, int16_t>(
497 absl::StrCat("AmbisonicsProjectionConfig.demixing_matrix[", i, "]"),
498 input_projection_config.demixing_matrix(i),
499 projection_config.demixing_matrix[i]));
500 }
501 RETURN_IF_NOT_OK(ObuWithDataGenerator::FinalizeAmbisonicsConfig(
502 audio_element_obu, substream_id_to_labels));
503 return absl::OkStatus();
504 }
505
FillAmbisonicsConfig(const iamf_tools_cli_proto::AudioElementObuMetadata & audio_element_metadata,AudioElementWithData & audio_element)506 absl::Status FillAmbisonicsConfig(
507 const iamf_tools_cli_proto::AudioElementObuMetadata& audio_element_metadata,
508 AudioElementWithData& audio_element) {
509 if (!audio_element_metadata.has_ambisonics_config()) {
510 LOG(ERROR) << "Audio Element Metadata ["
511 << audio_element_metadata.audio_element_id()
512 << " is of type AUDIO_ELEMENT_SCENE_BASED but does not have"
513 << " the `ambisonics_config` field.";
514 return InvalidArgumentError(StrCat(
515 "Audio Element Metadata [", audio_element_metadata.audio_element_id(),
516 " is of type AUDIO_ELEMENT_SCENE_BASED but does not have"
517 " the `ambisonics_config` field."));
518 }
519
520 const auto& input_config = audio_element_metadata.ambisonics_config();
521 AmbisonicsConfig::AmbisonicsMode ambisonics_mode;
522 switch (input_config.ambisonics_mode()) {
523 using enum iamf_tools_cli_proto::AmbisonicsMode;
524 using enum AmbisonicsConfig::AmbisonicsMode;
525 case AMBISONICS_MODE_MONO:
526 ambisonics_mode = kAmbisonicsModeMono;
527 RETURN_IF_NOT_OK(FillAmbisonicsMonoConfig(
528 input_config, audio_element_metadata.audio_element_id(),
529 audio_element.obu, audio_element.substream_id_to_labels));
530 break;
531 case AMBISONICS_MODE_PROJECTION:
532 ambisonics_mode = kAmbisonicsModeProjection;
533 RETURN_IF_NOT_OK(FillAmbisonicsProjectionConfig(
534 input_config, audio_element_metadata.audio_element_id(),
535 audio_element.obu, audio_element.substream_id_to_labels));
536 break;
537 default:
538 LOG(ERROR) << "Unrecognized ambisonics_mode: "
539 << input_config.ambisonics_mode();
540 return InvalidArgumentError(StrCat("Unrecognized ambisonics_mode: ",
541 input_config.ambisonics_mode()));
542 }
543 std::get<AmbisonicsConfig>(audio_element.obu.config_).ambisonics_mode =
544 ambisonics_mode;
545
546 return absl::OkStatus();
547 }
548
LogAudioElements(const absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements)549 void LogAudioElements(
550 const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
551 audio_elements) {
552 // Examine Audio Element OBUs.
553 for (const auto& [audio_element_id, audio_element] : audio_elements) {
554 audio_element.obu.PrintObu();
555
556 // Log `substream_id_to_labels` separately.
557 for (const auto& [substream_id, labels] :
558 audio_element.substream_id_to_labels) {
559 LOG(INFO) << "Substream ID: " << substream_id;
560 LOG(INFO) << " num_channels= " << labels.size();
561 }
562 }
563 }
564
565 } // namespace
566
Generate(const absl::flat_hash_map<uint32_t,CodecConfigObu> & codec_configs,absl::flat_hash_map<DecodedUleb128,AudioElementWithData> & audio_elements)567 absl::Status AudioElementGenerator::Generate(
568 const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_configs,
569 absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& audio_elements) {
570 for (const auto& audio_element_metadata : audio_element_metadata_) {
571 // Common data.
572 const auto audio_element_id = audio_element_metadata.audio_element_id();
573
574 AudioElementObu::AudioElementType audio_element_type;
575 switch (audio_element_metadata.audio_element_type()) {
576 using enum iamf_tools_cli_proto::AudioElementType;
577 using enum AudioElementObu::AudioElementType;
578 case AUDIO_ELEMENT_CHANNEL_BASED:
579 audio_element_type = kAudioElementChannelBased;
580 break;
581 case AUDIO_ELEMENT_SCENE_BASED:
582 audio_element_type = kAudioElementSceneBased;
583 break;
584 default:
585 return InvalidArgumentError(
586 StrCat("Unrecognized audio_element_type= ",
587 audio_element_metadata.audio_element_type()));
588 }
589 uint8_t reserved;
590 RETURN_IF_NOT_OK(StaticCastIfInRange<uint32_t, uint8_t>(
591 "AudioElementObuMetadata.reserved", audio_element_metadata.reserved(),
592 reserved));
593 const auto codec_config_id = audio_element_metadata.codec_config_id();
594
595 AudioElementObu audio_element_obu(
596 GetHeaderFromMetadata(audio_element_metadata.obu_header()),
597 audio_element_id, audio_element_type, reserved, codec_config_id);
598
599 // Audio Substreams.
600 RETURN_IF_NOT_OK(
601 GenerateAudioSubstreams(audio_element_metadata, audio_element_obu));
602
603 // Parameter definitions.
604 if (!codec_configs.contains(audio_element_metadata.codec_config_id())) {
605 return InvalidArgumentError(
606 StrCat("Failed to find matching codec_config_id=",
607 audio_element_metadata.codec_config_id()));
608 }
609 const auto& codec_config_obu =
610 codec_configs.at(audio_element_metadata.codec_config_id());
611 RETURN_IF_NOT_OK(GenerateParameterDefinitions(
612 audio_element_metadata, codec_config_obu, audio_element_obu));
613
614 // Config data based on `audio_element_type`.
615 // Insert first so even if the following operations fail, the OBU will be
616 // destroyed by one of the transitive callers of this function.
617 auto [new_audio_element_iter, inserted] = audio_elements.emplace(
618 audio_element_id, AudioElementWithData{
619 .obu = std::move(audio_element_obu),
620 .codec_config = &codec_config_obu,
621 });
622 if (!inserted) {
623 return InvalidArgumentError(StrCat(
624 "Inserting Audio Element with ID ",
625 audio_element_metadata.audio_element_id(),
626 " failed because there is a duplicated element with the same ID"));
627 }
628
629 switch (new_audio_element_iter->second.obu.GetAudioElementType()) {
630 using enum AudioElementObu::AudioElementType;
631 case kAudioElementChannelBased:
632 RETURN_IF_NOT_OK(FillScalableChannelLayoutConfig(
633 audio_element_metadata, codec_config_obu,
634 new_audio_element_iter->second));
635 break;
636 case kAudioElementSceneBased:
637 RETURN_IF_NOT_OK(FillAmbisonicsConfig(audio_element_metadata,
638 new_audio_element_iter->second));
639 break;
640 default:
641 return InvalidArgumentError(
642 StrCat("Unrecognized audio_element_type= ",
643 new_audio_element_iter->second.obu.GetAudioElementType()));
644 }
645 }
646
647 LogAudioElements(audio_elements);
648 return absl::OkStatus();
649 }
650
651 } // namespace iamf_tools
652