1 /*
2 * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * www.aomedia.org/license/patent.
11 */
12
13 #include "iamf/cli/adm_to_user_metadata/adm/xml_to_adm.h"
14
15 #include <algorithm>
16 #include <cstddef>
17 #include <cstdint>
18 #include <ios>
19 #include <iterator>
20 #include <sstream>
21 #include <string>
22 #include <unordered_map>
23 #include <vector>
24
25 #include "absl/base/no_destructor.h"
26 #include "absl/container/flat_hash_set.h"
27 #include "absl/log/check.h"
28 #include "absl/log/log.h"
29 #include "absl/status/status.h"
30 #include "absl/status/statusor.h"
31 #include "absl/strings/numbers.h"
32 #include "absl/strings/str_cat.h"
33 #include "absl/strings/string_view.h"
34 #include "expat/lib/expat.h"
35 #include "expat/lib/expat_external.h"
36 #include "iamf/cli/adm_to_user_metadata/adm/adm_elements.h"
37
38 namespace iamf_tools {
39 namespace adm_to_user_metadata {
40
41 namespace {
42 constexpr absl::string_view kTypeDefinitionDirectSpeakers = "0001";
43 constexpr absl::string_view kTypeDefinitionObject = "0003";
44 constexpr absl::string_view kTypeDefinitionHOA = "0004";
45 constexpr absl::string_view kTypeDefinitionBinaural = "0005";
46
47 // It defines adm elements.
48 enum AdmElement {
49 kAudioProgramme = 0,
50 kAudioContent = 1,
51 kAudioObject = 2,
52 kAudioPack = 3,
53 kAudioChannel = 4,
54 kAudioBlock = 5,
55 kElementDefault = 6
56 };
57
58 // It defines the attributes of audio programme.
59 enum AdmProgrammeElement {
60 kAudioContentIDRef = 0,
61 kIntegratedLoudness = 1,
62 kMaxTruePeak = 2,
63 kDialogueLoudness = 3,
64 kAudioProgrammeAudioPackFormatIDRef = 4,
65 kProgrammeDefault = 5
66 };
67
68 // It defines the attributes of audio content.
69 enum AdmContentElement { kAudioObjectIDRef = 0, kContentDefault = 1 };
70
71 // It defines the attributes of audio object.
72 enum AdmObjectElement {
73 kAudioObjectAudioPackFormatIDRef = 0,
74 kAudioTrackUIDRef = 1,
75 kAudioComplementaryObjectIDRef = 2,
76 kGain = 3,
77 kAudioObjectLabel = 4,
78 kObjectDefault = 5
79 };
80
81 // It defines the attributes of audio pack format.
82 enum AdmPackFormat {
83 kAudioPackAudioChannelFormatIDRef = 0,
84 kAudioPackLabel = 1,
85 kPackDefault = 2
86 };
87
88 // It defines the attributes of audio channel format.
89 enum AdmChannelFormat { kAudioChannelLabel = 0, kChannelDefault = 1 };
90
91 // It defines the attributes of audio block.
92 enum AdmBlockFormat {
93 kX = 0,
94 kY = 1,
95 kZ = 2,
96 kAudioBlockLabel = 3,
97 kBlockDefault = 4
98 };
99
100 // This class is used by xml parser to collect and store various attributes and
101 // information of xml.
102 struct Handler {
103 ADM adm;
104 absl::flat_hash_set<std::string> invalid_audio_objects;
105 std::string audio_object_id;
106
107 AdmElement parent = kElementDefault;
108 AdmProgrammeElement audio_programme_tag = kProgrammeDefault;
109 AdmContentElement audio_content_tag = kContentDefault;
110 AdmObjectElement audio_object_tag = kObjectDefault;
111 AdmPackFormat audio_pack_tag = kPackDefault;
112 AdmChannelFormat audio_channel_tag = kChannelDefault;
113 AdmBlockFormat audio_block_tag = kBlockDefault;
114
115 absl::Status status = absl::OkStatus();
116 };
117
UpdateErrorStatusIfFalse(bool status,absl::string_view field_name,Handler & handler)118 void UpdateErrorStatusIfFalse(bool status, absl::string_view field_name,
119 Handler& handler) {
120 if (!status && handler.status.ok()) {
121 handler.status = absl::InvalidArgumentError(
122 absl::StrCat("Failed to parse ", field_name));
123 }
124 }
125
126 // This function sets the handler's tag for program, content, or object based
127 // upon the name attribute.
SetHandlerTag(absl::string_view name,const char ** atts,Handler & handler)128 void SetHandlerTag(absl::string_view name, const char** atts,
129 Handler& handler) {
130 if (name == "audioContentIDRef") {
131 handler.audio_programme_tag = kAudioContentIDRef;
132 } else if (name == "integratedLoudness") {
133 handler.audio_programme_tag = kIntegratedLoudness;
134 } else if (name == "maxTruePeak") {
135 handler.audio_programme_tag = kMaxTruePeak;
136 } else if (name == "dialogueLoudness") {
137 handler.audio_programme_tag = kDialogueLoudness;
138 } else if (name == "audioObjectIDRef") {
139 handler.audio_content_tag = kAudioObjectIDRef;
140 } else if (name == "audioPackFormatIDRef") {
141 if (handler.parent == kAudioProgramme) {
142 handler.audio_programme_tag = kAudioProgrammeAudioPackFormatIDRef;
143 } else {
144 handler.audio_object_tag = kAudioObjectAudioPackFormatIDRef;
145 }
146 } else if (name == "audioTrackUIDRef") {
147 handler.audio_object_tag = kAudioTrackUIDRef;
148 } else if (name == "audioComplementaryObjectIDRef") {
149 handler.audio_object_tag = kAudioComplementaryObjectIDRef;
150 } else if (name == "gain") {
151 handler.audio_object_tag = kGain;
152 } else if (name == "audioObjectLabel") {
153 handler.audio_object_tag = kAudioObjectLabel;
154 } else if (name == "audioPackLabel") {
155 handler.audio_pack_tag = kAudioPackLabel;
156 } else if (name == "audioChannelFormatIDRef") {
157 handler.audio_pack_tag = kAudioPackAudioChannelFormatIDRef;
158 } else if (name == "position") {
159 handler.audio_block_tag = kBlockDefault;
160 for (int32_t i = 0; atts[i]; i += 2) {
161 if ((std::string)atts[i + 1] == "X") {
162 handler.audio_block_tag = kX;
163 } else if ((std::string)atts[i + 1] == "Y") {
164 handler.audio_block_tag = kY;
165 } else if ((std::string)atts[i + 1] == "Z") {
166 handler.audio_block_tag = kZ;
167 }
168 }
169 } else if (name == "audioBlockFormatID") {
170 handler.audio_block_tag = kAudioBlockLabel;
171 }
172 }
173
174 // Sets the attributes of AudioProgramme.
SetAudioProgrammeValue(absl::string_view key,absl::string_view value,AudioProgramme & audio_programme)175 void SetAudioProgrammeValue(absl::string_view key, absl::string_view value,
176 AudioProgramme& audio_programme) {
177 if (key == "audioProgrammeID") {
178 audio_programme.id = value;
179 } else if (key == "audioProgrammeName") {
180 audio_programme.name = value;
181 } else if (key == "audioProgrammeLabel") {
182 audio_programme.audio_programme_label = value;
183 }
184 }
185
186 // Sets the attributes of AudioContent.
SetAudioContentValue(absl::string_view key,absl::string_view value,AudioContent & audio_content)187 void SetAudioContentValue(absl::string_view key, absl::string_view value,
188 AudioContent& audio_content) {
189 if (key == "audioContentID") {
190 audio_content.id = value;
191 } else if (key == "audioContentName") {
192 audio_content.name = value;
193 }
194 }
195
196 // Sets the attributes of AudioObject.
SetAudioObjectValue(absl::string_view key,absl::string_view value,AudioObject & audio_object,Handler & handler)197 void SetAudioObjectValue(absl::string_view key, absl::string_view value,
198 AudioObject& audio_object, Handler& handler) {
199 if (key == "audioObjectID") {
200 handler.audio_object_id = value;
201 audio_object.id = value;
202 } else if (key == "audioObjectName") {
203 audio_object.name = value;
204 } else if (key == "importance") {
205 UpdateErrorStatusIfFalse(absl::SimpleAtoi(value, &audio_object.importance),
206 "importance", handler);
207 }
208 }
209
210 // Sets the attributes of AudioPack.
SetAudioPackValue(absl::string_view key,absl::string_view value,AudioPackFormat & audio_pack)211 void SetAudioPackValue(absl::string_view key, absl::string_view value,
212 AudioPackFormat& audio_pack) {
213 if (key == "audioPackFormatID") {
214 audio_pack.id = (std::string)value;
215 } else if (key == "audioPackFormatName") {
216 audio_pack.name = (std::string)value;
217 } else if (key == "typeLabel") {
218 audio_pack.audio_pack_label = (std::string)value;
219 }
220 }
221
222 // Sets the attributes of AudioChannel.
SetAudioChannelValue(absl::string_view key,absl::string_view value,AudioChannelFormat & audio_channel)223 void SetAudioChannelValue(absl::string_view key, absl::string_view value,
224 AudioChannelFormat& audio_channel) {
225 if (key == "audioChannelFormatID") {
226 audio_channel.id = (std::string)value;
227 } else if (key == "audioChannelFormatName") {
228 audio_channel.name = (std::string)value;
229 } else if (key == "typeLabel") {
230 audio_channel.audio_channel_label = (std::string)value;
231 }
232 }
233
234 // Parse and store the timing information in Audio Block.
235 // The input string which holds the timing information will be in the format
236 // 'hh:mm:ss.zzzzz'.
ParseTimingInfo(absl::string_view time_string,BlockTime & time)237 void ParseTimingInfo(absl::string_view time_string, BlockTime& time) {
238 time.hour = std::stoi(std::string(time_string.substr(0, 2)));
239 time.minute = std::stoi(std::string(time_string.substr(3, 2)));
240 time.second = std::stod(std::string(time_string.substr(6)));
241 }
242
243 // Sets the attributes of AudioBlock.
SetAudioBlockValue(absl::string_view key,absl::string_view value,AudioBlockFormat & audio_block)244 void SetAudioBlockValue(absl::string_view key, absl::string_view value,
245 AudioBlockFormat& audio_block) {
246 if (key == "audioBlockFormatID") {
247 audio_block.id = value;
248 } else if (key == "rtime") {
249 ParseTimingInfo(value, audio_block.rtime);
250 } else if (key == "duration") {
251 ParseTimingInfo(value, audio_block.duration);
252 }
253 }
254
255 // Removes objects from the ADM structure based on the given importance
256 // threshold. Also, removes audio objects with IDs found in the set of invalid
257 // audio objects.
RemoveLowImportanceAndInvalidAudioObjects(int32_t importance_threshold,Handler & handler)258 void RemoveLowImportanceAndInvalidAudioObjects(int32_t importance_threshold,
259 Handler& handler) {
260 std::vector<AudioObject>& audio_object_list = handler.adm.audio_objects;
261 audio_object_list.erase(
262 std::remove_if(audio_object_list.begin(), audio_object_list.end(),
263 [&](AudioObject value) {
264 return value.importance < importance_threshold ||
265 (handler.invalid_audio_objects.find(value.id) !=
266 handler.invalid_audio_objects.end());
267 }),
268 audio_object_list.end());
269 }
270
271 // Checks if the metadata is user defined or part of the common definitions.
272 // NOTE: An ADM audioPackFormatID AP_yyyyxxxx which belongs to common
273 // definitions would have 'xxxx' in the range [0x0001, 0x0FFF].
IsUserMetadataDefined(absl::string_view xxxx_substring)274 bool IsUserMetadataDefined(absl::string_view xxxx_substring) {
275 std::istringstream iss(std::string(xxxx_substring), std::ios_base::in);
276 int32_t int_value;
277 iss >> std::hex >> int_value;
278 return int_value > 0x0fff;
279 }
280
281 // Validates the specific layout in terms of the 'xxxx' digits of
282 // audioPackFormatId (AP_yyyyxxxx) in ADM.
IsLoudspeakerLayoutValid(absl::string_view xxxx_substring)283 bool IsLoudspeakerLayoutValid(absl::string_view xxxx_substring) {
284 static const absl::NoDestructor<absl::flat_hash_set<std::string>>
285 kValidLoudspeakerLayouts({
286 {"0001"}, // Mono
287 {"0002"}, // Stereo
288 {"0003"}, // 5.1
289 {"0004"}, // 5.1.2
290 {"0005"}, // 5.1.4
291 {"000f"}, // 7.1
292 {"0017"}, // 7.1.4
293 });
294 return kValidLoudspeakerLayouts->contains(xxxx_substring);
295 }
296
297 // Validates the HOA layout in terms of the 'xxxx' digits of audioPackFormatId
298 // (AP_yyyyxxxx) in ADM.
IsHoaLayoutValid(absl::string_view xxxx_substring)299 bool IsHoaLayoutValid(absl::string_view xxxx_substring) {
300 static const absl::NoDestructor<absl::flat_hash_set<std::string>>
301 kValidHoaLayouts({
302 {"0001"}, // First-order ambisonics.
303 {"0002"}, // Second-order ambisonics.
304 {"0003"}, // Third-order ambisonics.
305 });
306 return kValidHoaLayouts->contains(xxxx_substring);
307 }
308
309 // Validates the Binaural layout in terms of the 'xxxx' digits of
310 // audioPackFormatId (AP_yyyyxxxx) in ADM.
IsBinauralLayoutValid(absl::string_view xxxx_substring)311 bool IsBinauralLayoutValid(absl::string_view xxxx_substring) {
312 constexpr absl::string_view kValidBinauralLayout = "0001";
313 return xxxx_substring == kValidBinauralLayout;
314 }
315
316 // Converts channel names to their abbreviated channel codes and creates an
317 // audio pack layout string of channel codes separated by commas.
CreatePackLayout(const std::vector<std::string> & channel_names)318 absl::StatusOr<std::string> CreatePackLayout(
319 const std::vector<std::string>& channel_names) {
320 static const std::unordered_map<std::string, std::string> channel_name_map = {
321 {"RoomCentricLeft", "L"},
322 {"RoomCentricRight", "R"},
323 {"RoomCentricCenter", "C"},
324 {"RoomCentricLFE", "LFE"},
325 {"RoomCentricLeftSideSurround", "Lss"},
326 {"RoomCentricRightSideSurround", "Rss"},
327 {"RoomCentricLeftRearSurround", "Lrs"},
328 {"RoomCentricRightRearSurround", "Rrs"},
329 {"RoomCentricLeftTopSurround", "Lts"},
330 {"RoomCentricRightTopSurround", "Rts"},
331 {"RoomCentricLeftSurround", "Ls"},
332 {"RoomCentricRightSurround", "Rs"}};
333
334 std::string pack_layout = "";
335 for (const auto& channel_name : channel_names) {
336 auto channel_name_iter = channel_name_map.find(channel_name);
337 if (channel_name_iter != channel_name_map.end()) {
338 pack_layout += channel_name_iter->second;
339 } else {
340 return absl::InvalidArgumentError(
341 absl::StrCat("Invalid channel format= ", channel_name));
342 }
343 pack_layout += ",";
344 }
345
346 if (!pack_layout.empty()) {
347 pack_layout.pop_back();
348 }
349 return pack_layout;
350 }
351
352 // Determines whether the given audio pack layout string exists within known
353 // valid pack layouts. Returns an error if invalid.
ValidatePackLayout(const std::string & pack_layout)354 absl::Status ValidatePackLayout(const std::string& pack_layout) {
355 static const absl::NoDestructor<absl::flat_hash_set<std::string>>
356 kValidPackLayouts({
357 {"L,R"},
358 {"L,R,C"},
359 {"L,R,C,Ls,Rs"},
360 {"L,R,C,LFE,Ls,Rs"},
361 {"L,R,C,Lss,Rss,Lrs,Rrs"},
362 {"L,R,C,LFE,Lss,Rss,Lrs,Rrs"},
363 {"L,R,C,Lss,Rss,Lrs,Rrs,Lts,Rts"},
364 {"L,R,C,LFE,Lss,Rss,Lrs,Rrs,Lts,Rts"},
365 });
366
367 if (!kValidPackLayouts->contains(pack_layout)) {
368 return absl::InvalidArgumentError(
369 absl::StrCat("Invalid pack layout= ", pack_layout));
370 }
371
372 return absl::OkStatus();
373 }
374
375 // Check if the metadata belongs to the common definitions (Recommendation ITU-R
376 // BS.2094)
ValidateAdmObjectForDefaultAdm(absl::string_view type_definition,absl::string_view audio_pack_id_yyyy_part)377 absl::Status ValidateAdmObjectForDefaultAdm(
378 absl::string_view type_definition,
379 absl::string_view audio_pack_id_yyyy_part) {
380 if (IsUserMetadataDefined(audio_pack_id_yyyy_part)) {
381 return absl::InvalidArgumentError("Not under common definition.");
382 }
383
384 if (type_definition == kTypeDefinitionDirectSpeakers) {
385 if (!IsLoudspeakerLayoutValid(audio_pack_id_yyyy_part)) {
386 return absl::InvalidArgumentError(
387 "Loudspeaker layout is not supported by IAMF");
388 }
389 } else if (type_definition == kTypeDefinitionHOA) {
390 if (!IsHoaLayoutValid(audio_pack_id_yyyy_part)) {
391 return absl::InvalidArgumentError("HOA layout is not known");
392 }
393 } else if (type_definition == kTypeDefinitionBinaural) {
394 if (!IsBinauralLayoutValid(audio_pack_id_yyyy_part)) {
395 return absl::InvalidArgumentError("Binaural layout is not known.");
396 }
397 } else {
398 return absl::InvalidArgumentError(
399 absl::StrCat("Unsupported type_definition= ", type_definition));
400 }
401
402 return absl::OkStatus();
403 }
404
ValidateAdmObjectForDolbyAdm(const ADM & adm,const AudioObject & audio_object,absl::string_view type_definition)405 absl::Status ValidateAdmObjectForDolbyAdm(const ADM& adm,
406 const AudioObject& audio_object,
407 absl::string_view type_definition) {
408 if (type_definition != kTypeDefinitionDirectSpeakers &&
409 type_definition != kTypeDefinitionObject) {
410 return absl::InvalidArgumentError(
411 absl::StrCat("Unsupported type_definition= ", type_definition,
412 " when processing a Dolby ADM."));
413 }
414 if (audio_object.audio_pack_format_id_refs.size() != 1) {
415 return absl::InvalidArgumentError(
416 "Expected only one audio pack ID ref for an audio object in a Dolby "
417 "ADM file.");
418 }
419
420 absl::string_view audio_pack_id = audio_object.audio_pack_format_id_refs[0];
421 auto pack_id = std::find_if(adm.audio_packs.begin(), adm.audio_packs.end(),
422 [&audio_pack_id](const AudioPackFormat& pack) {
423 return pack.id == audio_pack_id;
424 });
425 size_t pack_index = (pack_id != adm.audio_packs.end())
426 ? std::distance(adm.audio_packs.begin(), pack_id)
427 : 0;
428
429 auto num_channels_in_pack =
430 adm.audio_packs[pack_index].audio_channel_format_id_refs_map.size();
431 auto num_tracks_in_object = audio_object.audio_track_uid_ref.size();
432 if (type_definition == kTypeDefinitionObject) {
433 if (num_tracks_in_object != 1) {
434 return absl::InvalidArgumentError(
435 "Audio object should have only 1 track ID ref for type definition "
436 "object");
437 }
438 if (num_channels_in_pack != 1) {
439 return absl::InvalidArgumentError(
440 "Audio pack should have only 1 channel ID ref for type definition "
441 "object");
442 }
443 } else {
444 CHECK_EQ(type_definition, kTypeDefinitionDirectSpeakers);
445 if (num_tracks_in_object > 10) {
446 return absl::InvalidArgumentError(
447 "Maximum number of occurrences of track UID refs for DirectSpeakers "
448 "is 10.");
449 }
450 if (num_channels_in_pack > 10) {
451 return absl::InvalidArgumentError(
452 "Maximum number of occurrences of channel ID refs for DirectSpeakers "
453 "is 10.");
454 }
455
456 // Create an audio pack layout string based on channel names present within
457 // an audio pack.
458 std::vector<std::string> channel_names;
459 for (auto& channel_ref :
460 adm.audio_packs[pack_index].audio_channel_format_id_refs_map) {
461 auto& audio_channel = adm.audio_channels[channel_ref.second];
462 channel_names.push_back(audio_channel.name);
463 }
464
465 // Validate audio pack layout.
466 auto audio_pack_layout = CreatePackLayout(channel_names);
467 if (!audio_pack_layout.ok()) {
468 return audio_pack_layout.status();
469 }
470 return ValidatePackLayout(*audio_pack_layout);
471 }
472
473 return absl::OkStatus();
474 }
475
476 // Validates audio objects based on the input file type.
ValidateAudioObjects(const ADM & adm,Handler & handler)477 void ValidateAudioObjects(const ADM& adm, Handler& handler) {
478 absl::Status status = absl::OkStatus();
479 std::vector<std::string> audio_pack_layouts;
480
481 for (auto& audio_object : adm.audio_objects) {
482 if (audio_object.audio_pack_format_id_refs.empty()) {
483 // Skip the empty audio objects.
484 continue;
485 }
486
487 absl::string_view audio_pack_id = audio_object.audio_pack_format_id_refs[0];
488 absl::string_view type_definition = audio_pack_id.substr(3, 4);
489 absl::string_view audio_pack_id_yyyy_part = audio_pack_id.substr(7, 4);
490 if (adm.file_type == kAdmFileTypeDefault) {
491 status = ValidateAdmObjectForDefaultAdm(type_definition,
492 audio_pack_id_yyyy_part);
493 } else {
494 CHECK_EQ(adm.file_type, kAdmFileTypeDolby);
495 status = ValidateAdmObjectForDolbyAdm(adm, audio_object, type_definition);
496 }
497 if (!status.ok()) {
498 LOG(WARNING) << "Ignoring unknown object with audio_object_id= "
499 << audio_object.id << ". Error: " << status;
500 handler.invalid_audio_objects.insert(audio_object.id);
501 }
502 }
503 }
504
505 // A function to use with `expat::XML_SetCharacterDataHandler`. This function
506 // is responsible for storing character data encountered while parsing an AXML
507 // chunk in their respective handler.adm class attributes.
XMLCharacterDataHandlerForExpat(void * parser_data,const XML_Char * text,int32_t len)508 void XMLCharacterDataHandlerForExpat(void* parser_data, const XML_Char* text,
509 int32_t len) {
510 Handler& handler = *static_cast<Handler*>(parser_data);
511
512 int32_t idx = 0;
513 switch (handler.parent) {
514 case kAudioProgramme: {
515 // Populates audio programme class.
516 idx = handler.adm.audio_programmes.size();
517 auto& loudness_metadata =
518 handler.adm.audio_programmes[idx - 1].loudness_metadata;
519 ReferenceLayout reference_layout;
520 switch (handler.audio_programme_tag) {
521 case kAudioContentIDRef: {
522 handler.adm.audio_programmes[idx - 1].audio_content_id_refs.push_back(
523 std::string(text, len));
524 break;
525 }
526 case kIntegratedLoudness: {
527 UpdateErrorStatusIfFalse(
528 absl::SimpleAtof(absl::string_view(text, len),
529 &loudness_metadata.integrated_loudness),
530 "integrated_loudness", handler);
531 break;
532 }
533 case kMaxTruePeak: {
534 // Activate the optional then read it in.
535 loudness_metadata.max_true_peak = 0.0;
536 UpdateErrorStatusIfFalse(
537 absl::SimpleAtof(absl::string_view(text, len),
538 &loudness_metadata.max_true_peak.value()),
539 "max_true_peak", handler);
540 break;
541 }
542 case kDialogueLoudness: {
543 // Activate the optional then read it in.
544 loudness_metadata.dialogue_loudness = 0.0;
545 UpdateErrorStatusIfFalse(
546 absl::SimpleAtof(absl::string_view(text, len),
547 &loudness_metadata.dialogue_loudness.value()),
548 "dialogue_loudness", handler);
549
550 break;
551 }
552 case kAudioProgrammeAudioPackFormatIDRef: {
553 reference_layout.audio_pack_format_id_ref.push_back(
554 std::string(text, len));
555 handler.adm.audio_programmes[idx - 1]
556 .authoring_information.reference_layout = reference_layout;
557 break;
558 }
559 case kProgrammeDefault: {
560 break;
561 }
562 default: {
563 LOG(ERROR) << "Unexpected case";
564 }
565 }
566 // To handle unwanted character like spaces, new lines.
567 handler.audio_programme_tag = kProgrammeDefault;
568 break;
569 }
570 case kAudioContent: {
571 // Populates audio content class.
572 idx = handler.adm.audio_contents.size();
573 switch (handler.audio_content_tag) {
574 case kAudioObjectIDRef: {
575 handler.adm.audio_contents[idx - 1].audio_object_id_ref.push_back(
576 std::string(text, len));
577 break;
578 }
579 case kContentDefault: {
580 break;
581 }
582 default: {
583 LOG(ERROR) << "Unexpected case";
584 }
585 }
586 // To handle unwanted character like spaces, new lines.
587 handler.audio_content_tag = kContentDefault;
588 break;
589 }
590 case kAudioObject: {
591 // Populates audio object class.
592 idx = handler.adm.audio_objects.size();
593 switch (handler.audio_object_tag) {
594 case kAudioObjectAudioPackFormatIDRef: {
595 handler.adm.audio_objects[idx - 1]
596 .audio_pack_format_id_refs.push_back(std::string(text, len));
597 break;
598 }
599 case kAudioTrackUIDRef: {
600 handler.adm.audio_objects[idx - 1].audio_track_uid_ref.push_back(
601 std::string(text, len));
602 break;
603 }
604 case kAudioComplementaryObjectIDRef: {
605 handler.adm.audio_objects[idx - 1]
606 .audio_comple_object_id_ref.push_back(std::string(text, len));
607 break;
608 }
609 case kGain: {
610 UpdateErrorStatusIfFalse(
611 absl::SimpleAtof(absl::string_view(text, len),
612 &handler.adm.audio_objects[idx - 1].gain),
613 "gain", handler);
614 break;
615 }
616 case kAudioObjectLabel: {
617 handler.adm.audio_objects[idx - 1].audio_object_label =
618 (std::string(text, len));
619 break;
620 }
621 case kObjectDefault: {
622 break;
623 }
624 default: {
625 LOG(ERROR) << "Unexpected case";
626 }
627 }
628 // To handle unwanted character like spaces, new lines.
629 handler.audio_object_tag = kObjectDefault;
630 break;
631 }
632 case kAudioPack: {
633 // Populates audio pack object.
634 idx = handler.adm.audio_packs.size();
635 switch (handler.audio_pack_tag) {
636 case kAudioPackAudioChannelFormatIDRef: {
637 handler.adm.audio_packs[idx - 1]
638 .audio_channel_format_id_refs_map.emplace_back(
639 std::string(text, len), size_t(-1));
640 break;
641 }
642 case kAudioPackLabel: {
643 handler.adm.audio_packs[idx - 1].id = (std::string(text, len));
644 break;
645 }
646 case kPackDefault: {
647 break;
648 }
649 default: {
650 LOG(ERROR) << "Unexpected case";
651 }
652 }
653
654 // To handle unwanted character like spaces, new lines.
655 handler.audio_pack_tag = kPackDefault;
656 break;
657 }
658 case kAudioChannel: {
659 // Populates audio channel object.
660 idx = handler.adm.audio_channels.size();
661 switch (handler.audio_channel_tag) {
662 case kAudioChannelLabel: {
663 handler.adm.audio_channels[idx - 1].id = (std::string(text, len));
664 break;
665 }
666 case kChannelDefault: {
667 break;
668 }
669 default: {
670 LOG(ERROR) << "Unexpected case";
671 }
672 }
673
674 // To handle unwanted character like spaces, new lines.
675 handler.audio_channel_tag = kChannelDefault;
676 break;
677 }
678 case kAudioBlock: {
679 // Populates audio block object.
680 idx = handler.adm.audio_channels.size();
681 auto& audio_blocks = handler.adm.audio_channels[idx - 1].audio_blocks;
682 switch (handler.audio_block_tag) {
683 case kX: {
684 UpdateErrorStatusIfFalse(
685 absl::SimpleAtof(absl::string_view(text, len),
686 &audio_blocks.back().position.x),
687 "position", handler);
688 break;
689 }
690 case kY: {
691 UpdateErrorStatusIfFalse(
692 absl::SimpleAtof(absl::string_view(text, len),
693 &audio_blocks.back().position.y),
694 "position", handler);
695 break;
696 }
697 case kZ: {
698 UpdateErrorStatusIfFalse(
699 absl::SimpleAtof(absl::string_view(text, len),
700 &audio_blocks.back().position.z),
701 "position", handler);
702 break;
703 }
704 case kAudioBlockLabel: {
705 audio_blocks.back().id = (std::string(text, len));
706 break;
707 }
708 case kBlockDefault: {
709 break;
710 }
711 default: {
712 LOG(ERROR) << "Unexpected case";
713 }
714 }
715
716 // To handle unwanted characters like spaces, new lines.
717 handler.audio_block_tag = kBlockDefault;
718 break;
719 }
720 case kElementDefault: {
721 break;
722 }
723 default: {
724 LOG(ERROR) << "Unexpected case";
725 }
726 }
727 }
728
729 // A function to use with `expat::XML_SetStartElementHandler`. It sets the
730 // handler's parent tag depending on the name attribute.
XMLStartTagHandlerForExpat(void * parser_data,const char * name,const char ** atts)731 void XMLStartTagHandlerForExpat(void* parser_data, const char* name,
732 const char** atts) {
733 Handler& handler = *static_cast<Handler*>(parser_data);
734 absl::string_view adm_element(name);
735 if (adm_element == "audioProgramme") {
736 // If the tag 'audioProgramme' is encountered while parsing the axml, create
737 // an instance of AudioProgramme class, populate its attributes and add it
738 // to ADM.
739 handler.parent = kAudioProgramme;
740 AudioProgramme audio_programme;
741 LoudnessMetadata loudness_metadata;
742 AuthoringInformation authoring_information;
743 audio_programme.loudness_metadata = loudness_metadata;
744 audio_programme.authoring_information = authoring_information;
745 for (int32_t i = 0; atts[i]; i += 2) {
746 SetAudioProgrammeValue(absl::string_view(atts[i]),
747 absl::string_view(atts[i + 1]), audio_programme);
748 }
749 handler.adm.audio_programmes.push_back(audio_programme);
750 } else if (adm_element == "audioContent") {
751 // If the tag 'audioContent' is encountered while parsing the axml, create
752 // an instance of AudioContent class, populate its attributes and add it to
753 // ADM.
754 handler.parent = kAudioContent;
755 AudioContent audio_content;
756 for (int32_t i = 0; atts[i]; i += 2) {
757 SetAudioContentValue(absl::string_view(atts[i]),
758 absl::string_view(atts[i + 1]), audio_content);
759 }
760 handler.adm.audio_contents.push_back(audio_content);
761 } else if (adm_element == "audioObject") {
762 // If the tag 'audioObject' is encountered while parsing the axml, create an
763 // instance of AudioObject class, populate its attributes and add it to ADM.
764 handler.parent = kAudioObject;
765 AudioObject audio_object;
766 for (int32_t i = 0; atts[i]; i += 2) {
767 SetAudioObjectValue(absl::string_view(atts[i]),
768 absl::string_view(atts[i + 1]), audio_object,
769 handler);
770 }
771 handler.adm.audio_objects.push_back(audio_object);
772 } else if (adm_element == "audioPackFormat") {
773 // If the tag 'audioPackFormat' is encountered while parsing the axml,
774 // create an instance of AudioPack class, populate its attributes and add it
775 // to ADM.
776 handler.parent = kAudioPack;
777 AudioPackFormat audio_pack;
778 for (int32_t i = 0; atts[i]; i += 2) {
779 SetAudioPackValue(absl::string_view(atts[i]),
780 absl::string_view(atts[i + 1]), audio_pack);
781 }
782 handler.adm.audio_packs.push_back(audio_pack);
783 } else if (adm_element == "audioChannelFormat") {
784 // If the tag 'audioChannelFormat' is encountered while parsing the axml,
785 // create an instance of AudioChannel class, populate its attributes and add
786 // it to ADM.
787 handler.parent = kAudioChannel;
788 AudioChannelFormat audio_channel;
789 for (int32_t i = 0; atts[i]; i += 2) {
790 SetAudioChannelValue(absl::string_view(atts[i]),
791 absl::string_view(atts[i + 1]), audio_channel);
792 }
793 handler.adm.audio_channels.push_back(audio_channel);
794 } else if (adm_element == "audioBlockFormat") {
795 // If the tag 'audioBlockFormat' is encountered while parsing the axml,
796 // create an instance of AudioBlockFormat class, populate its attributes and
797 // add it to ADM.
798 handler.parent = kAudioBlock;
799 AudioBlockFormat audio_block;
800 CartesianPosition position;
801 audio_block.position = position;
802 for (int32_t i = 0; atts[i]; i += 2) {
803 SetAudioBlockValue(absl::string_view(atts[i]),
804 absl::string_view(atts[i + 1]), audio_block);
805 }
806 handler.adm.audio_channels.back().audio_blocks.push_back(audio_block);
807 } else {
808 SetHandlerTag(adm_element, atts, handler);
809 }
810 }
811
812 // A function to map each audio pack to their corresponding audio channel
813 // formats. It sets the corresponding indices into a vector of pairs inside each
814 // audio pack instance.
SetChannelIndices(ADM & adm)815 void SetChannelIndices(ADM& adm) {
816 // Iterate over all audio packs
817 for (auto& audio_pack : adm.audio_packs) {
818 for (auto& id_ref_and_index : audio_pack.audio_channel_format_id_refs_map) {
819 const std::string& channel_id_ref = id_ref_and_index.first;
820 auto channel_id =
821 std::find_if(adm.audio_channels.begin(), adm.audio_channels.end(),
822 [&channel_id_ref](const AudioChannelFormat& channel) {
823 return channel.id == channel_id_ref;
824 });
825
826 if (channel_id != adm.audio_channels.end()) {
827 size_t channel_index =
828 std::distance(adm.audio_channels.begin(), channel_id);
829 id_ref_and_index.second = channel_index;
830 } else {
831 LOG(WARNING) << "Channel ID ref " << channel_id_ref << " not found!";
832 }
833 }
834 }
835 }
836 } // namespace
837
ParseXmlToAdm(absl::string_view xml_data,int32_t importance_threshold,AdmFileType file_type)838 absl::StatusOr<ADM> ParseXmlToAdm(absl::string_view xml_data,
839 int32_t importance_threshold,
840 AdmFileType file_type) {
841 Handler handler;
842
843 // Creating an XML parser and attaching a handler object to it. Also, parser
844 // is linked with functions that have logic to deal with the start tag of XML
845 // and the character of XML.
846 XML_Parser parser = XML_ParserCreate(nullptr);
847 XML_SetUserData(parser, &handler);
848 handler.adm.file_type = file_type;
849 XML_SetStartElementHandler(parser, XMLStartTagHandlerForExpat);
850 XML_SetCharacterDataHandler(parser, XMLCharacterDataHandlerForExpat);
851
852 switch (const auto xml_status =
853 XML_Parse(parser, xml_data.data(), xml_data.length(), true)) {
854 case XML_STATUS_OK:
855 SetChannelIndices(handler.adm);
856 ValidateAudioObjects(handler.adm, handler);
857 RemoveLowImportanceAndInvalidAudioObjects(importance_threshold, handler);
858 XML_ParserFree(parser);
859 if (!handler.status.ok()) {
860 return handler.status;
861 }
862 return handler.adm;
863 case XML_STATUS_ERROR:
864 XML_ParserFree(parser);
865 return absl::InvalidArgumentError(
866 absl::StrCat("XML parsing error. XML_Parse() returned ", xml_status));
867 case XML_STATUS_SUSPENDED:
868 XML_ParserFree(parser);
869 return absl::FailedPreconditionError(absl::StrCat(
870 "XML parsing suspended. XML_Parse() returned ", xml_status));
871 default:
872 XML_ParserFree(parser);
873 return absl::UnknownError(absl::StrCat(
874 "XML parsing failed. XML_Parse() returned ", xml_status));
875 }
876 }
877
878 } // namespace adm_to_user_metadata
879 } // namespace iamf_tools
880