1 /*
2 * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * www.aomedia.org/license/patent.
11 */
12
13 #include "iamf/cli/obu_with_data_generator.h"
14
15 #include <cstdint>
16 #include <list>
17 #include <memory>
18 #include <optional>
19 #include <utility>
20 #include <variant>
21 #include <vector>
22
23 #include "absl/container/flat_hash_map.h"
24 #include "absl/log/check.h"
25 #include "absl/log/log.h"
26 #include "absl/status/status.h"
27 #include "absl/status/statusor.h"
28 #include "absl/strings/str_cat.h"
29 #include "iamf/cli/audio_element_with_data.h"
30 #include "iamf/cli/audio_frame_with_data.h"
31 #include "iamf/cli/channel_label.h"
32 #include "iamf/cli/cli_util.h"
33 #include "iamf/cli/global_timing_module.h"
34 #include "iamf/cli/parameter_block_with_data.h"
35 #include "iamf/cli/parameters_manager.h"
36 #include "iamf/common/utils/macros.h"
37 #include "iamf/common/utils/numeric_utils.h"
38 #include "iamf/common/utils/validation_utils.h"
39 #include "iamf/obu/audio_element.h"
40 #include "iamf/obu/audio_frame.h"
41 #include "iamf/obu/codec_config.h"
42 #include "iamf/obu/demixing_info_parameter_data.h"
43 #include "iamf/obu/param_definitions.h"
44 #include "iamf/obu/parameter_block.h"
45 #include "iamf/obu/recon_gain_info_parameter_data.h"
46 #include "iamf/obu/types.h"
47
48 namespace iamf_tools {
49
50 using absl::InvalidArgumentError;
51 using absl::StrCat;
52
53 using enum ChannelLabel::Label;
54
55 namespace {
56
LoudspeakerLayoutToChannels(const ChannelAudioLayerConfig::LoudspeakerLayout loudspeaker_layout,ChannelNumbers & channels)57 absl::Status LoudspeakerLayoutToChannels(
58 const ChannelAudioLayerConfig::LoudspeakerLayout loudspeaker_layout,
59 ChannelNumbers& channels) {
60 switch (loudspeaker_layout) {
61 using enum ChannelAudioLayerConfig::LoudspeakerLayout;
62 case kLayoutMono:
63 channels = {1, 0, 0};
64 break;
65 case kLayoutStereo:
66 channels = {2, 0, 0};
67 break;
68 case kLayout5_1_ch:
69 channels = {5, 1, 0};
70 break;
71 case kLayout5_1_2_ch:
72 channels = {5, 1, 2};
73 break;
74 case kLayout5_1_4_ch:
75 channels = {5, 1, 4};
76 break;
77 case kLayout7_1_ch:
78 channels = {7, 1, 0};
79 break;
80 case kLayout7_1_2_ch:
81 channels = {7, 1, 2};
82 break;
83 case kLayout7_1_4_ch:
84 channels = {7, 1, 4};
85 break;
86 case kLayout3_1_2_ch:
87 channels = {3, 1, 2};
88 break;
89 case kLayoutBinaural:
90 channels = {2, 0, 0};
91 break;
92 default:
93 return InvalidArgumentError(
94 StrCat("Unknown loudspeaker_layout= ", loudspeaker_layout));
95 }
96 return absl::OkStatus();
97 }
98
99 // For the Base Channel Group (BCG). This is the first layer of a scalable audio
100 // element.
101 // https://aomediacodec.github.io/iamf/#scalablechannelaudio-channelgroupformat
CollectBaseChannelGroupLabels(const ChannelNumbers & layer_channels,std::list<ChannelLabel::Label> * coupled_substream_labels,std::list<ChannelLabel::Label> * non_coupled_substream_labels)102 absl::Status CollectBaseChannelGroupLabels(
103 const ChannelNumbers& layer_channels,
104 std::list<ChannelLabel::Label>* coupled_substream_labels,
105 std::list<ChannelLabel::Label>* non_coupled_substream_labels) {
106 switch (layer_channels.surround) {
107 case 1:
108 non_coupled_substream_labels->push_back(kMono);
109 break;
110 case 2:
111 coupled_substream_labels->push_back(kL2);
112 coupled_substream_labels->push_back(kR2);
113 break;
114 case 3:
115 coupled_substream_labels->push_back(kL3);
116 coupled_substream_labels->push_back(kR3);
117 non_coupled_substream_labels->push_back(kCentre);
118 break;
119 case 5:
120 coupled_substream_labels->push_back(kL5);
121 coupled_substream_labels->push_back(kR5);
122 coupled_substream_labels->push_back(kLs5);
123 coupled_substream_labels->push_back(kRs5);
124 non_coupled_substream_labels->push_back(kCentre);
125 break;
126 case 7:
127 coupled_substream_labels->push_back(kL7);
128 coupled_substream_labels->push_back(kR7);
129 coupled_substream_labels->push_back(kLss7);
130 coupled_substream_labels->push_back(kRss7);
131 coupled_substream_labels->push_back(kLrs7);
132 coupled_substream_labels->push_back(kRrs7);
133 non_coupled_substream_labels->push_back(kCentre);
134 break;
135 default:
136 LOG(ERROR) << "Unsupported number of surround channels: "
137 << layer_channels.surround;
138 return InvalidArgumentError(
139 StrCat("Unsupported number of surround channels: ",
140 layer_channels.surround));
141 }
142 switch (layer_channels.height) {
143 case 0:
144 // Not adding anything.
145 break;
146 case 2:
147 if (layer_channels.surround == 3) {
148 coupled_substream_labels->push_back(kLtf3);
149 coupled_substream_labels->push_back(kRtf3);
150 } else {
151 coupled_substream_labels->push_back(kLtf2);
152 coupled_substream_labels->push_back(kRtf2);
153 }
154 break;
155 case 4:
156 coupled_substream_labels->push_back(kLtf4);
157 coupled_substream_labels->push_back(kRtf4);
158 coupled_substream_labels->push_back(kLtb4);
159 coupled_substream_labels->push_back(kRtb4);
160 break;
161 default:
162 LOG(ERROR) << "Unsupported number of height channels: "
163 << layer_channels.height;
164 return InvalidArgumentError(StrCat(
165 "Unsupported number of height channels: ", layer_channels.height));
166 }
167 switch (layer_channels.lfe) {
168 case 0:
169 // Not adding anything.
170 break;
171 case 1:
172 non_coupled_substream_labels->push_back(kLFE);
173 break;
174 default:
175 return InvalidArgumentError(
176 StrCat("Unsupported number of LFE channels: ", layer_channels.lfe));
177 }
178
179 return absl::OkStatus();
180 }
181
CollectChannelLayersAndLabelsForExpandedLoudspeakerLayout(int layer_index,std::optional<ChannelAudioLayerConfig::ExpandedLoudspeakerLayout> expanded_loudspeaker_layout,ChannelNumbers & channel_numbers,std::list<ChannelLabel::Label> & coupled_substream_labels,std::list<ChannelLabel::Label> & non_coupled_substream_labels)182 absl::Status CollectChannelLayersAndLabelsForExpandedLoudspeakerLayout(
183 int layer_index,
184 std::optional<ChannelAudioLayerConfig::ExpandedLoudspeakerLayout>
185 expanded_loudspeaker_layout,
186 ChannelNumbers& channel_numbers,
187 std::list<ChannelLabel::Label>& coupled_substream_labels,
188 std::list<ChannelLabel::Label>& non_coupled_substream_labels) {
189 if (layer_index != 0) {
190 return absl::InvalidArgumentError(
191 "Expanded layout is only permitted when there is a single layer.");
192 }
193 RETURN_IF_NOT_OK(ValidateHasValue(expanded_loudspeaker_layout,
194 "Expanded layout is required."));
195
196 switch (*expanded_loudspeaker_layout) {
197 using enum ChannelAudioLayerConfig::ExpandedLoudspeakerLayout;
198 case kExpandedLayoutLFE:
199 channel_numbers = {0, 1, 0};
200 non_coupled_substream_labels = {kLFE};
201 break;
202 case kExpandedLayoutStereoS:
203 channel_numbers = {2, 0, 0};
204 coupled_substream_labels = {kLs5, kRs5};
205 break;
206 case kExpandedLayoutStereoSS:
207 channel_numbers = {2, 0, 0};
208 coupled_substream_labels = {kLss7, kRss7};
209 break;
210 case kExpandedLayoutStereoRS:
211 channel_numbers = {2, 0, 0};
212 coupled_substream_labels = {kLrs7, kRrs7};
213 break;
214 case kExpandedLayoutStereoTF:
215 channel_numbers = {0, 0, 2};
216 coupled_substream_labels = {kLtf4, kRtf4};
217 break;
218 case kExpandedLayoutStereoTB:
219 channel_numbers = {0, 0, 2};
220 coupled_substream_labels = {kLtb4, kRtb4};
221 break;
222 case kExpandedLayoutTop4Ch:
223 channel_numbers = {0, 0, 4};
224 coupled_substream_labels = {kLtf4, kRtf4, kLtb4, kRtb4};
225 break;
226 case kExpandedLayout3_0_ch:
227 channel_numbers = {3, 0, 0};
228 coupled_substream_labels = {kL7, kR7};
229 non_coupled_substream_labels = {kCentre};
230 break;
231 case kExpandedLayout9_1_6_ch:
232 channel_numbers = {9, 1, 6};
233 coupled_substream_labels = {kFLc, kFRc, kFL, kFR, kSiL,
234 kSiR, kBL, kBR, kTpFL, kTpFR,
235 kTpSiL, kTpSiR, kTpBL, kTpBR};
236 non_coupled_substream_labels = {kFC, kLFE};
237 break;
238 case kExpandedLayoutStereoF:
239 channel_numbers = {2, 0, 0};
240 coupled_substream_labels = {kFL, kFR};
241 break;
242 case kExpandedLayoutStereoSi:
243 channel_numbers = {2, 0, 0};
244 coupled_substream_labels = {kSiL, kSiR};
245 break;
246 case kExpandedLayoutStereoTpSi:
247 channel_numbers = {0, 0, 2};
248 coupled_substream_labels = {kTpSiL, kTpSiR};
249 break;
250 case kExpandedLayoutTop6Ch:
251 channel_numbers = {0, 0, 6};
252 coupled_substream_labels = {kTpFL, kTpFR, kTpSiL, kTpSiR, kTpBL, kTpBR};
253 break;
254 default:
255 return absl::InvalidArgumentError(
256 StrCat("Unsupported expanded loudspeaker layout= ",
257 *expanded_loudspeaker_layout));
258 }
259
260 LOG(INFO) << "Layer[" << layer_index << "]:";
261 LogChannelNumbers(" layer_channels", channel_numbers);
262
263 return absl::OkStatus();
264 }
265
266 // For the Demixed Channel Groups (DCG). This all layers after the first layer
267 // in a scalable audio element.
268 // https://aomediacodec.github.io/iamf/#scalablechannelaudio-channelgroupformat
CollectDemixedChannelGroupLabels(const ChannelNumbers & accumulated_channels,const ChannelNumbers & layer_channels,std::list<ChannelLabel::Label> * coupled_substream_labels,std::list<ChannelLabel::Label> * non_coupled_substream_labels)269 absl::Status CollectDemixedChannelGroupLabels(
270 const ChannelNumbers& accumulated_channels,
271 const ChannelNumbers& layer_channels,
272 std::list<ChannelLabel::Label>* coupled_substream_labels,
273 std::list<ChannelLabel::Label>* non_coupled_substream_labels) {
274 bool push_l2_in_the_end = false;
275 for (int surround = accumulated_channels.surround + 1;
276 surround <= layer_channels.surround; surround++) {
277 switch (surround) {
278 case 2:
279 // This is the special case where layer 1 is Mono and layer 2 is
280 // Stereo. According to the Spec 3.7.2
281 // (https://aomediacodec.github.io/iamf/#syntax-scalable-channel-layout-config):
282 // "The Centre (or Front Centre) channel comes first and is followed by
283 // the LFE (or LFE1) channel, and then the L channel.". Save pushing
284 // kL2 till the end.
285 push_l2_in_the_end = true;
286 break;
287 case 3:
288 non_coupled_substream_labels->push_back(kCentre);
289 break;
290 case 5:
291 coupled_substream_labels->push_back(kL5);
292 coupled_substream_labels->push_back(kR5);
293 break;
294 case 7:
295 coupled_substream_labels->push_back(kLss7);
296 coupled_substream_labels->push_back(kRss7);
297 break;
298 default:
299 if (surround > 7) {
300 return InvalidArgumentError(
301 StrCat("Unsupported number of surround channels: ", surround));
302 }
303 break;
304 }
305 }
306
307 if (layer_channels.height > accumulated_channels.height) {
308 if (accumulated_channels.height == 0) {
309 if (layer_channels.height == 4) {
310 coupled_substream_labels->push_back(kLtf4);
311 coupled_substream_labels->push_back(kRtf4);
312 coupled_substream_labels->push_back(kLtb4);
313 coupled_substream_labels->push_back(kRtb4);
314 } else if (layer_channels.height == 2) {
315 if (layer_channels.surround == 3) {
316 coupled_substream_labels->push_back(kLtf3);
317 coupled_substream_labels->push_back(kRtf3);
318 } else {
319 coupled_substream_labels->push_back(kLtf2);
320 coupled_substream_labels->push_back(kRtf2);
321 }
322 } else {
323 return InvalidArgumentError(StrCat(
324 "Unsupported number of height channels: ", layer_channels.height));
325 }
326 } else if (accumulated_channels.height == 2) {
327 coupled_substream_labels->push_back(kLtf4);
328 coupled_substream_labels->push_back(kRtf4);
329 } else {
330 return InvalidArgumentError(
331 absl::StrCat("Unsupported number of height channels: ",
332 accumulated_channels.height));
333 }
334 }
335
336 if (layer_channels.lfe > accumulated_channels.lfe) {
337 if (layer_channels.lfe == 1) {
338 non_coupled_substream_labels->push_back(kLFE);
339 } else {
340 return InvalidArgumentError(
341 StrCat("Unsupported number of LFE channels: ", layer_channels.lfe));
342 }
343 }
344
345 if (push_l2_in_the_end) {
346 non_coupled_substream_labels->push_back(kL2);
347 }
348
349 return absl::OkStatus();
350 }
351
AddSubstreamLabels(const std::list<ChannelLabel::Label> & coupled_substream_labels,const std::list<ChannelLabel::Label> & non_coupled_substream_labels,const std::vector<DecodedUleb128> & substream_ids,SubstreamIdLabelsMap & substream_id_to_labels,int & substream_index)352 absl::Status AddSubstreamLabels(
353 const std::list<ChannelLabel::Label>& coupled_substream_labels,
354 const std::list<ChannelLabel::Label>& non_coupled_substream_labels,
355 const std::vector<DecodedUleb128>& substream_ids,
356 SubstreamIdLabelsMap& substream_id_to_labels, int& substream_index) {
357 CHECK_EQ(coupled_substream_labels.size() % 2, 0);
358 // Determine how many substream IDs will be used below. This helps prevent
359 // indexing `substream_ids` out of bounds.
360 const auto substreams_to_add =
361 coupled_substream_labels.size() / 2 + non_coupled_substream_labels.size();
362 if (substream_index + substreams_to_add > substream_ids.size()) {
363 return absl::OutOfRangeError(
364 absl::StrCat("Too few substream IDs are present to assign all labels. "
365 "substream_ids.size()= ",
366 substream_ids.size()));
367 }
368
369 // First add coupled substream labels, two at a time.
370 for (auto iter = coupled_substream_labels.begin();
371 iter != coupled_substream_labels.end() &&
372 substream_index < substream_ids.size();) {
373 const auto substream_id = substream_ids[substream_index++];
374 auto& labels_for_substream_id = substream_id_to_labels[substream_id];
375 const auto first_label = *iter++;
376 const auto second_label = *iter++;
377
378 labels_for_substream_id.push_back(first_label);
379 labels_for_substream_id.push_back(second_label);
380 LOG(INFO) << " substream_id_to_labels[" << substream_id
381 << "]: " << first_label << "/" << second_label;
382 }
383
384 // Then add non-coupled substream labels.
385 for (auto iter = non_coupled_substream_labels.begin();
386 iter != non_coupled_substream_labels.end();) {
387 const auto substream_id = substream_ids[substream_index++];
388 substream_id_to_labels[substream_id].push_back(*iter++);
389 LOG(INFO) << " substream_id_to_labels[" << substream_id
390 << "]: " << substream_id_to_labels[substream_id].back();
391 }
392 return absl::OkStatus();
393 }
394
ValidateSubstreamCounts(const std::list<ChannelLabel::Label> & coupled_substream_labels,const std::list<ChannelLabel::Label> & non_coupled_substream_labels,const ChannelAudioLayerConfig & layer_config)395 absl::Status ValidateSubstreamCounts(
396 const std::list<ChannelLabel::Label>& coupled_substream_labels,
397 const std::list<ChannelLabel::Label>& non_coupled_substream_labels,
398 const ChannelAudioLayerConfig& layer_config) {
399 const auto num_required_coupled_channels =
400 static_cast<uint32_t>(coupled_substream_labels.size()) / 2;
401 const auto num_required_non_coupled_channels =
402 static_cast<uint32_t>(non_coupled_substream_labels.size());
403 LOG(INFO) << "num_required_coupled_channels = "
404 << num_required_coupled_channels;
405 LOG(INFO) << "num_required_non_coupled_channels= "
406 << num_required_non_coupled_channels;
407
408 const auto coupled_substream_count_in_obu =
409 static_cast<uint32_t>(layer_config.coupled_substream_count);
410 const auto substream_count_in_obu =
411 static_cast<uint32_t>(layer_config.substream_count);
412 if (coupled_substream_count_in_obu != num_required_coupled_channels) {
413 return InvalidArgumentError(StrCat(
414 "Coupled substream count different from the required number. In OBU: ",
415 coupled_substream_count_in_obu,
416 " vs expected: ", num_required_coupled_channels));
417 }
418
419 // The sum of coupled and non-coupled channels must be the same as
420 // the `substream_count` recorded in the OBU.
421 if (substream_count_in_obu !=
422 (num_required_non_coupled_channels + num_required_coupled_channels)) {
423 return InvalidArgumentError(StrCat(
424 "Substream count different from the #non-coupled substreams. In OBU: ",
425 substream_count_in_obu, " vs expected: ",
426 num_required_non_coupled_channels + num_required_coupled_channels));
427 }
428
429 return absl::OkStatus();
430 }
431
OutputGainApplies(const uint8_t output_gain_flag,ChannelLabel::Label label)432 bool OutputGainApplies(const uint8_t output_gain_flag,
433 ChannelLabel::Label label) {
434 switch (label) {
435 case kMono:
436 case kL2:
437 case kL3:
438 return output_gain_flag & (1 << 5);
439 case kR2:
440 case kR3:
441 return output_gain_flag & (1 << 4);
442 case kLs5:
443 return output_gain_flag & (1 << 3);
444 case kRs5:
445 return output_gain_flag & (1 << 2);
446 case kLtf2:
447 case kLtf3:
448 return output_gain_flag & (1 << 1);
449 case kRtf2:
450 case kRtf3:
451 return output_gain_flag & 1;
452 default:
453 return false;
454 }
455 }
456
FinalizeAmbisonicsMonoConfig(const AudioElementObu & audio_element_obu,const AmbisonicsMonoConfig & mono_config,SubstreamIdLabelsMap & substream_id_to_labels)457 absl::Status FinalizeAmbisonicsMonoConfig(
458 const AudioElementObu& audio_element_obu,
459 const AmbisonicsMonoConfig& mono_config,
460 SubstreamIdLabelsMap& substream_id_to_labels) {
461 // Fill `substream_id_to_labels`. `channel_mapping` encodes the mapping of
462 // Ambisonics Channel Number (ACN) to substream index.
463 for (int ambisonics_channel_number = 0;
464 ambisonics_channel_number < mono_config.channel_mapping.size();
465 ++ambisonics_channel_number) {
466 const uint8_t obu_substream_index =
467 mono_config.channel_mapping[ambisonics_channel_number];
468 if (obu_substream_index ==
469 AmbisonicsMonoConfig::kInactiveAmbisonicsChannelNumber) {
470 LOG(INFO) << "Detected mixed-order ambisonics with A"
471 << ambisonics_channel_number << " dropped.";
472 continue;
473 }
474 const DecodedUleb128 substream_id =
475 audio_element_obu.audio_substream_ids_[obu_substream_index];
476
477 // Add the associated ACN to the labels associated with that substream.
478 const auto ambisonics_label =
479 ChannelLabel::AmbisonicsChannelNumberToLabel(ambisonics_channel_number);
480 if (!ambisonics_label.ok()) {
481 return ambisonics_label.status();
482 }
483 substream_id_to_labels[substream_id].push_back(*ambisonics_label);
484 }
485 return absl::OkStatus();
486 }
487
FinalizeAmbisonicsProjectionConfig(const AudioElementObu & audio_element_obu,const AmbisonicsProjectionConfig & projection_config,SubstreamIdLabelsMap & substream_id_to_labels)488 absl::Status FinalizeAmbisonicsProjectionConfig(
489 const AudioElementObu& audio_element_obu,
490 const AmbisonicsProjectionConfig& projection_config,
491 SubstreamIdLabelsMap& substream_id_to_labels) {
492 if (audio_element_obu.num_substreams_ !=
493 static_cast<uint32_t>(projection_config.substream_count)) {
494 return InvalidArgumentError(
495 StrCat("`num_substreams` different from `substream_count`: (",
496 audio_element_obu.num_substreams_, " vs ",
497 projection_config.substream_count, ")"));
498 }
499
500 // For projection mode, assume coupled substreams (using 2 channels) come
501 // first and are followed by non-coupled substreams (using 1 channel each).
502 for (int i = 0; i < audio_element_obu.num_substreams_; ++i) {
503 const std::list<int> ambisonic_channel_numbers =
504 i < projection_config.coupled_substream_count
505 ? std::list<int>{2 * i, 2 * i + 1}
506 : std::list<int>{2 * projection_config.coupled_substream_count + i};
507 for (const auto ambisonic_channel_number : ambisonic_channel_numbers) {
508 const auto ambisonics_label =
509 ChannelLabel::AmbisonicsChannelNumberToLabel(
510 ambisonic_channel_number);
511 if (!ambisonics_label.ok()) {
512 return ambisonics_label.status();
513 }
514 substream_id_to_labels[audio_element_obu.audio_substream_ids_[i]]
515 .push_back(*ambisonics_label);
516 }
517 }
518 return absl::OkStatus();
519 }
520
CollectChannelLayersAndLabelsForLoudspeakerLayout(int layer_index,ChannelAudioLayerConfig::LoudspeakerLayout loudspeaker_layout,const ChannelNumbers & accumulated_channels,ChannelNumbers & layer_channels,std::list<ChannelLabel::Label> & coupled_substream_labels,std::list<ChannelLabel::Label> & non_coupled_substream_labels)521 absl::Status CollectChannelLayersAndLabelsForLoudspeakerLayout(
522 int layer_index,
523 ChannelAudioLayerConfig::LoudspeakerLayout loudspeaker_layout,
524 const ChannelNumbers& accumulated_channels, ChannelNumbers& layer_channels,
525 std::list<ChannelLabel::Label>& coupled_substream_labels,
526 std::list<ChannelLabel::Label>& non_coupled_substream_labels) {
527 // Figure out the `ChannelNumber` representation of ChannelGroup #i, i.e.
528 // the additional channels presented in this layer.
529 RETURN_IF_NOT_OK(
530 LoudspeakerLayoutToChannels(loudspeaker_layout, layer_channels));
531
532 // Channel number in each group can only grow or stay the same.
533 if (layer_channels.surround < accumulated_channels.surround ||
534 layer_channels.lfe < accumulated_channels.lfe ||
535 layer_channels.height < accumulated_channels.height) {
536 LogChannelNumbers("From", accumulated_channels);
537 LogChannelNumbers("To", layer_channels);
538 return InvalidArgumentError(
539 StrCat("At least one channel number decreased from "
540 "accumulated_channels to layer_channels"));
541 }
542
543 LOG(INFO) << "Layer[" << layer_index << "]:";
544 LogChannelNumbers(" layer_channels", layer_channels);
545 LogChannelNumbers(" accumulated_channels", accumulated_channels);
546
547 if (layer_index == 0) {
548 return CollectBaseChannelGroupLabels(layer_channels,
549 &coupled_substream_labels,
550 &non_coupled_substream_labels);
551 } else {
552 return CollectDemixedChannelGroupLabels(
553 accumulated_channels, layer_channels, &coupled_substream_labels,
554 &non_coupled_substream_labels);
555 }
556 }
557
558 } // namespace
559
560 absl::StatusOr<absl::flat_hash_map<DecodedUleb128, AudioElementWithData>>
GenerateAudioElementsWithData(const absl::flat_hash_map<DecodedUleb128,CodecConfigObu> & codec_config_obus,absl::flat_hash_map<DecodedUleb128,AudioElementObu> & audio_element_obus)561 ObuWithDataGenerator::GenerateAudioElementsWithData(
562 const absl::flat_hash_map<DecodedUleb128, CodecConfigObu>&
563 codec_config_obus,
564 absl::flat_hash_map<DecodedUleb128, AudioElementObu>& audio_element_obus) {
565 absl::flat_hash_map<DecodedUleb128, AudioElementWithData>
566 audio_element_with_data;
567 for (auto& [audio_element_id, audio_element_obu] : audio_element_obus) {
568 SubstreamIdLabelsMap substream_id_to_labels;
569 LabelGainMap label_to_output_gain;
570 std::vector<ChannelNumbers> channel_numbers_for_layers;
571 if (audio_element_obu.GetAudioElementType() ==
572 AudioElementObu::AudioElementType::kAudioElementChannelBased) {
573 if (!std::holds_alternative<ScalableChannelLayoutConfig>(
574 audio_element_obu.config_)) {
575 return absl::InvalidArgumentError(
576 "Audio Element OBU signals it holds a scalable channel layout "
577 "config, but one is not present.");
578 }
579
580 RETURN_IF_NOT_OK(
581 ObuWithDataGenerator::FinalizeScalableChannelLayoutConfig(
582 audio_element_obu.audio_substream_ids_,
583 std::get<ScalableChannelLayoutConfig>(audio_element_obu.config_),
584 substream_id_to_labels, label_to_output_gain,
585 channel_numbers_for_layers));
586 }
587 if (audio_element_obu.GetAudioElementType() ==
588 AudioElementObu::AudioElementType::kAudioElementSceneBased) {
589 RETURN_IF_NOT_OK(ObuWithDataGenerator::FinalizeAmbisonicsConfig(
590 audio_element_obu, substream_id_to_labels));
591 }
592 auto iter = codec_config_obus.find(audio_element_obu.GetCodecConfigId());
593 if (iter == codec_config_obus.end()) {
594 return absl::InvalidArgumentError(
595 "codec_config_obus does not contain codec_config_id");
596 }
597 audio_element_with_data.emplace(
598 audio_element_id,
599 AudioElementWithData{
600 .obu = std::move(audio_element_obu),
601 .codec_config = &iter->second,
602 .substream_id_to_labels = substream_id_to_labels,
603 .label_to_output_gain = label_to_output_gain,
604 .channel_numbers_for_layers = channel_numbers_for_layers});
605 }
606 audio_element_obus.clear();
607 return audio_element_with_data;
608 }
609
610 absl::StatusOr<AudioFrameWithData>
GenerateAudioFrameWithData(const AudioElementWithData & audio_element_with_data,const AudioFrameObu & audio_frame_obu,GlobalTimingModule & global_timing_module,ParametersManager & parameters_manager)611 ObuWithDataGenerator::GenerateAudioFrameWithData(
612 const AudioElementWithData& audio_element_with_data,
613 const AudioFrameObu& audio_frame_obu,
614 GlobalTimingModule& global_timing_module,
615 ParametersManager& parameters_manager) {
616 const auto audio_substream_id = audio_frame_obu.GetSubstreamId();
617 const auto audio_element_id = audio_element_with_data.obu.GetAudioElementId();
618
619 // Make sure we have the correct audio element.
620 if (!audio_element_with_data.substream_id_to_labels.contains(
621 audio_substream_id)) {
622 return absl::InvalidArgumentError(absl::StrCat(
623 "Audio element with ID= ", audio_element_id,
624 " does not contain a substream with ID= ", audio_substream_id));
625 }
626
627 const uint32_t duration =
628 audio_element_with_data.codec_config->GetNumSamplesPerFrame();
629
630 // Get the timestamps and demixing and recon-gain parameters to fill in
631 // `AudioFrameWithData`.
632 InternalTimestamp start_timestamp;
633 InternalTimestamp end_timestamp;
634 RETURN_IF_NOT_OK(global_timing_module.GetNextAudioFrameTimestamps(
635 audio_substream_id, duration, start_timestamp, end_timestamp));
636 DownMixingParams down_mixing_params;
637 RETURN_IF_NOT_OK(parameters_manager.GetDownMixingParameters(
638 audio_element_id, down_mixing_params));
639 ReconGainInfoParameterData recon_gain_info_parameter_data;
640 RETURN_IF_NOT_OK(parameters_manager.GetReconGainInfoParameterData(
641 audio_element_id,
642 audio_element_with_data.channel_numbers_for_layers.size(),
643 recon_gain_info_parameter_data));
644
645 return AudioFrameWithData{
646 .obu = std::move(audio_frame_obu),
647 .start_timestamp = start_timestamp,
648 .end_timestamp = end_timestamp,
649 .pcm_samples = std::nullopt, // The PCM samples cannot be derived from
650 // the bitstream.
651 .down_mixing_params = down_mixing_params,
652 .recon_gain_info_parameter_data = recon_gain_info_parameter_data,
653 .audio_element_with_data = &audio_element_with_data};
654 }
655
656 absl::StatusOr<ParameterBlockWithData>
GenerateParameterBlockWithData(InternalTimestamp input_start_timestamp,GlobalTimingModule & global_timing_module,std::unique_ptr<ParameterBlockObu> parameter_block_obu)657 ObuWithDataGenerator::GenerateParameterBlockWithData(
658 InternalTimestamp input_start_timestamp,
659 GlobalTimingModule& global_timing_module,
660 std::unique_ptr<ParameterBlockObu> parameter_block_obu) {
661 InternalTimestamp start_timestamp;
662 InternalTimestamp end_timestamp;
663 RETURN_IF_NOT_OK(global_timing_module.GetNextParameterBlockTimestamps(
664 parameter_block_obu->parameter_id_, input_start_timestamp,
665 parameter_block_obu->GetDuration(), start_timestamp, end_timestamp));
666 return ParameterBlockWithData{.obu = std::move(parameter_block_obu),
667 .start_timestamp = start_timestamp,
668 .end_timestamp = end_timestamp};
669 }
670
FinalizeScalableChannelLayoutConfig(const std::vector<DecodedUleb128> & audio_substream_ids,const ScalableChannelLayoutConfig & config,SubstreamIdLabelsMap & substream_id_to_labels,LabelGainMap & label_to_output_gain,std::vector<ChannelNumbers> & channel_numbers_for_layers)671 absl::Status ObuWithDataGenerator::FinalizeScalableChannelLayoutConfig(
672 const std::vector<DecodedUleb128>& audio_substream_ids,
673 const ScalableChannelLayoutConfig& config,
674 SubstreamIdLabelsMap& substream_id_to_labels,
675 LabelGainMap& label_to_output_gain,
676 std::vector<ChannelNumbers>& channel_numbers_for_layers) {
677 RETURN_IF_NOT_OK(ValidateUnique(audio_substream_ids.begin(),
678 audio_substream_ids.end(),
679 "audio_substream_ids"));
680 // Starting from no channel at all.
681 ChannelNumbers accumulated_channels = {0, 0, 0};
682 int substream_index = 0;
683 channel_numbers_for_layers.reserve(config.num_layers);
684 for (int i = 0; i < config.num_layers; ++i) {
685 const int previous_layer_substream_index = substream_index;
686
687 ChannelNumbers layer_channels;
688 std::list<ChannelLabel::Label> coupled_substream_labels;
689 std::list<ChannelLabel::Label> non_coupled_substream_labels;
690 const auto& layer_config = config.channel_audio_layer_configs[i];
691 if (layer_config.loudspeaker_layout ==
692 ChannelAudioLayerConfig::kLayoutExpanded) {
693 RETURN_IF_NOT_OK(
694 CollectChannelLayersAndLabelsForExpandedLoudspeakerLayout(
695 i, layer_config.expanded_loudspeaker_layout, layer_channels,
696 coupled_substream_labels, non_coupled_substream_labels));
697 } else {
698 RETURN_IF_NOT_OK(CollectChannelLayersAndLabelsForLoudspeakerLayout(
699 i, layer_config.loudspeaker_layout, accumulated_channels,
700 layer_channels, coupled_substream_labels,
701 non_coupled_substream_labels));
702 }
703
704 channel_numbers_for_layers.push_back(layer_channels);
705
706 RETURN_IF_NOT_OK(AddSubstreamLabels(
707 coupled_substream_labels, non_coupled_substream_labels,
708 audio_substream_ids, substream_id_to_labels, substream_index));
709 RETURN_IF_NOT_OK(ValidateSubstreamCounts(
710 coupled_substream_labels, non_coupled_substream_labels, layer_config));
711
712 accumulated_channels = layer_channels;
713
714 // Handle output gains.
715 if (layer_config.output_gain_is_present_flag == 1) {
716 // Loop through all substream IDs added in this layer.
717 for (int i = previous_layer_substream_index; i < substream_index; i++) {
718 const auto substream_id = audio_substream_ids[i];
719
720 LOG(INFO) << "Output gain for substream ID: " << substream_id << ":";
721 for (const auto& label : substream_id_to_labels.at(substream_id)) {
722 if (OutputGainApplies(layer_config.output_gain_flag, label)) {
723 label_to_output_gain[label] = Q7_8ToFloat(layer_config.output_gain);
724 LOG(INFO) << " " << label << ": Q7.8= " << layer_config.output_gain
725 << "; dB= " << label_to_output_gain[label];
726 } else {
727 LOG(INFO) << " " << label << ": (not found)";
728 }
729 }
730 }
731 }
732 }
733
734 // Validate that all substreams were assigned at least one label.
735 RETURN_IF_NOT_OK(ValidateEqual(
736 audio_substream_ids.size(), substream_id_to_labels.size(),
737 "audio_substream_ids.size() vs. substream_id_to_labels.size()"));
738
739 return absl::OkStatus();
740 }
741
742 // TODO(b/340540080): Add tests for this function and remove fragility, for
743 // example, null pointers, get<> that can fail, etc.
FinalizeAmbisonicsConfig(const AudioElementObu & audio_element_obu,SubstreamIdLabelsMap & substream_id_to_labels)744 absl::Status ObuWithDataGenerator::FinalizeAmbisonicsConfig(
745 const AudioElementObu& audio_element_obu,
746 SubstreamIdLabelsMap& substream_id_to_labels) {
747 if (audio_element_obu.GetAudioElementType() !=
748 AudioElementObu::AudioElementType::kAudioElementSceneBased) {
749 return InvalidArgumentError(
750 "Cannot finalize AmbisonicsMonoConfig for a non-scene-based Audio "
751 "Element OBU.");
752 }
753 const auto& ambisonics_config =
754 std::get<AmbisonicsConfig>(audio_element_obu.config_);
755 switch (ambisonics_config.ambisonics_mode) {
756 case AmbisonicsConfig::AmbisonicsMode::kAmbisonicsModeMono:
757 return FinalizeAmbisonicsMonoConfig(
758 audio_element_obu,
759 std::get<AmbisonicsMonoConfig>(ambisonics_config.ambisonics_config),
760 substream_id_to_labels);
761 case AmbisonicsConfig::AmbisonicsMode::kAmbisonicsModeProjection:
762 return FinalizeAmbisonicsProjectionConfig(
763 audio_element_obu,
764 std::get<AmbisonicsProjectionConfig>(
765 ambisonics_config.ambisonics_config),
766 substream_id_to_labels);
767 default:
768 return absl::UnimplementedError(
769 StrCat("Unimplemented Ambisonics mode: ",
770 ambisonics_config.ambisonics_mode));
771 }
772 }
773
774 } // namespace iamf_tools
775