• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 3-Clause Clear License
5  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6  * License was not distributed with this source code in the LICENSE file, you
7  * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
8  * Alliance for Open Media Patent License 1.0 was not distributed with this
9  * source code in the PATENTS file, you can obtain it at
10  * www.aomedia.org/license/patent.
11  */
12 
13 #include "iamf/cli/adm_to_user_metadata/adm/xml_to_adm.h"
14 
15 #include <algorithm>
16 #include <cstddef>
17 #include <cstdint>
18 #include <ios>
19 #include <iterator>
20 #include <sstream>
21 #include <string>
22 #include <unordered_map>
23 #include <vector>
24 
25 #include "absl/base/no_destructor.h"
26 #include "absl/container/flat_hash_set.h"
27 #include "absl/log/check.h"
28 #include "absl/log/log.h"
29 #include "absl/status/status.h"
30 #include "absl/status/statusor.h"
31 #include "absl/strings/numbers.h"
32 #include "absl/strings/str_cat.h"
33 #include "absl/strings/string_view.h"
34 #include "expat/lib/expat.h"
35 #include "expat/lib/expat_external.h"
36 #include "iamf/cli/adm_to_user_metadata/adm/adm_elements.h"
37 
38 namespace iamf_tools {
39 namespace adm_to_user_metadata {
40 
41 namespace {
42 constexpr absl::string_view kTypeDefinitionDirectSpeakers = "0001";
43 constexpr absl::string_view kTypeDefinitionObject = "0003";
44 constexpr absl::string_view kTypeDefinitionHOA = "0004";
45 constexpr absl::string_view kTypeDefinitionBinaural = "0005";
46 
47 // It defines adm elements.
48 enum AdmElement {
49   kAudioProgramme = 0,
50   kAudioContent = 1,
51   kAudioObject = 2,
52   kAudioPack = 3,
53   kAudioChannel = 4,
54   kAudioBlock = 5,
55   kElementDefault = 6
56 };
57 
58 // It defines the attributes of audio programme.
59 enum AdmProgrammeElement {
60   kAudioContentIDRef = 0,
61   kIntegratedLoudness = 1,
62   kMaxTruePeak = 2,
63   kDialogueLoudness = 3,
64   kAudioProgrammeAudioPackFormatIDRef = 4,
65   kProgrammeDefault = 5
66 };
67 
68 // It defines the attributes of audio content.
69 enum AdmContentElement { kAudioObjectIDRef = 0, kContentDefault = 1 };
70 
71 // It defines the attributes of audio object.
72 enum AdmObjectElement {
73   kAudioObjectAudioPackFormatIDRef = 0,
74   kAudioTrackUIDRef = 1,
75   kAudioComplementaryObjectIDRef = 2,
76   kGain = 3,
77   kAudioObjectLabel = 4,
78   kObjectDefault = 5
79 };
80 
81 // It defines the attributes of audio pack format.
82 enum AdmPackFormat {
83   kAudioPackAudioChannelFormatIDRef = 0,
84   kAudioPackLabel = 1,
85   kPackDefault = 2
86 };
87 
88 // It defines the attributes of audio channel format.
89 enum AdmChannelFormat { kAudioChannelLabel = 0, kChannelDefault = 1 };
90 
91 // It defines the attributes of audio block.
92 enum AdmBlockFormat {
93   kX = 0,
94   kY = 1,
95   kZ = 2,
96   kAudioBlockLabel = 3,
97   kBlockDefault = 4
98 };
99 
100 // This class is used by xml parser to collect and store various attributes and
101 // information of xml.
102 struct Handler {
103   ADM adm;
104   absl::flat_hash_set<std::string> invalid_audio_objects;
105   std::string audio_object_id;
106 
107   AdmElement parent = kElementDefault;
108   AdmProgrammeElement audio_programme_tag = kProgrammeDefault;
109   AdmContentElement audio_content_tag = kContentDefault;
110   AdmObjectElement audio_object_tag = kObjectDefault;
111   AdmPackFormat audio_pack_tag = kPackDefault;
112   AdmChannelFormat audio_channel_tag = kChannelDefault;
113   AdmBlockFormat audio_block_tag = kBlockDefault;
114 
115   absl::Status status = absl::OkStatus();
116 };
117 
UpdateErrorStatusIfFalse(bool status,absl::string_view field_name,Handler & handler)118 void UpdateErrorStatusIfFalse(bool status, absl::string_view field_name,
119                               Handler& handler) {
120   if (!status && handler.status.ok()) {
121     handler.status = absl::InvalidArgumentError(
122         absl::StrCat("Failed to parse ", field_name));
123   }
124 }
125 
126 // This function sets the handler's tag for program, content, or object based
127 // upon the name attribute.
SetHandlerTag(absl::string_view name,const char ** atts,Handler & handler)128 void SetHandlerTag(absl::string_view name, const char** atts,
129                    Handler& handler) {
130   if (name == "audioContentIDRef") {
131     handler.audio_programme_tag = kAudioContentIDRef;
132   } else if (name == "integratedLoudness") {
133     handler.audio_programme_tag = kIntegratedLoudness;
134   } else if (name == "maxTruePeak") {
135     handler.audio_programme_tag = kMaxTruePeak;
136   } else if (name == "dialogueLoudness") {
137     handler.audio_programme_tag = kDialogueLoudness;
138   } else if (name == "audioObjectIDRef") {
139     handler.audio_content_tag = kAudioObjectIDRef;
140   } else if (name == "audioPackFormatIDRef") {
141     if (handler.parent == kAudioProgramme) {
142       handler.audio_programme_tag = kAudioProgrammeAudioPackFormatIDRef;
143     } else {
144       handler.audio_object_tag = kAudioObjectAudioPackFormatIDRef;
145     }
146   } else if (name == "audioTrackUIDRef") {
147     handler.audio_object_tag = kAudioTrackUIDRef;
148   } else if (name == "audioComplementaryObjectIDRef") {
149     handler.audio_object_tag = kAudioComplementaryObjectIDRef;
150   } else if (name == "gain") {
151     handler.audio_object_tag = kGain;
152   } else if (name == "audioObjectLabel") {
153     handler.audio_object_tag = kAudioObjectLabel;
154   } else if (name == "audioPackLabel") {
155     handler.audio_pack_tag = kAudioPackLabel;
156   } else if (name == "audioChannelFormatIDRef") {
157     handler.audio_pack_tag = kAudioPackAudioChannelFormatIDRef;
158   } else if (name == "position") {
159     handler.audio_block_tag = kBlockDefault;
160     for (int32_t i = 0; atts[i]; i += 2) {
161       if ((std::string)atts[i + 1] == "X") {
162         handler.audio_block_tag = kX;
163       } else if ((std::string)atts[i + 1] == "Y") {
164         handler.audio_block_tag = kY;
165       } else if ((std::string)atts[i + 1] == "Z") {
166         handler.audio_block_tag = kZ;
167       }
168     }
169   } else if (name == "audioBlockFormatID") {
170     handler.audio_block_tag = kAudioBlockLabel;
171   }
172 }
173 
174 // Sets the attributes of AudioProgramme.
SetAudioProgrammeValue(absl::string_view key,absl::string_view value,AudioProgramme & audio_programme)175 void SetAudioProgrammeValue(absl::string_view key, absl::string_view value,
176                             AudioProgramme& audio_programme) {
177   if (key == "audioProgrammeID") {
178     audio_programme.id = value;
179   } else if (key == "audioProgrammeName") {
180     audio_programme.name = value;
181   } else if (key == "audioProgrammeLabel") {
182     audio_programme.audio_programme_label = value;
183   }
184 }
185 
186 // Sets the attributes of AudioContent.
SetAudioContentValue(absl::string_view key,absl::string_view value,AudioContent & audio_content)187 void SetAudioContentValue(absl::string_view key, absl::string_view value,
188                           AudioContent& audio_content) {
189   if (key == "audioContentID") {
190     audio_content.id = value;
191   } else if (key == "audioContentName") {
192     audio_content.name = value;
193   }
194 }
195 
196 // Sets the attributes of AudioObject.
SetAudioObjectValue(absl::string_view key,absl::string_view value,AudioObject & audio_object,Handler & handler)197 void SetAudioObjectValue(absl::string_view key, absl::string_view value,
198                          AudioObject& audio_object, Handler& handler) {
199   if (key == "audioObjectID") {
200     handler.audio_object_id = value;
201     audio_object.id = value;
202   } else if (key == "audioObjectName") {
203     audio_object.name = value;
204   } else if (key == "importance") {
205     UpdateErrorStatusIfFalse(absl::SimpleAtoi(value, &audio_object.importance),
206                              "importance", handler);
207   }
208 }
209 
210 // Sets the attributes of AudioPack.
SetAudioPackValue(absl::string_view key,absl::string_view value,AudioPackFormat & audio_pack)211 void SetAudioPackValue(absl::string_view key, absl::string_view value,
212                        AudioPackFormat& audio_pack) {
213   if (key == "audioPackFormatID") {
214     audio_pack.id = (std::string)value;
215   } else if (key == "audioPackFormatName") {
216     audio_pack.name = (std::string)value;
217   } else if (key == "typeLabel") {
218     audio_pack.audio_pack_label = (std::string)value;
219   }
220 }
221 
222 // Sets the attributes of AudioChannel.
SetAudioChannelValue(absl::string_view key,absl::string_view value,AudioChannelFormat & audio_channel)223 void SetAudioChannelValue(absl::string_view key, absl::string_view value,
224                           AudioChannelFormat& audio_channel) {
225   if (key == "audioChannelFormatID") {
226     audio_channel.id = (std::string)value;
227   } else if (key == "audioChannelFormatName") {
228     audio_channel.name = (std::string)value;
229   } else if (key == "typeLabel") {
230     audio_channel.audio_channel_label = (std::string)value;
231   }
232 }
233 
234 // Parse and store the timing information in Audio Block.
235 // The input string which holds the timing information will be in the format
236 // 'hh:mm:ss.zzzzz'.
ParseTimingInfo(absl::string_view time_string,BlockTime & time)237 void ParseTimingInfo(absl::string_view time_string, BlockTime& time) {
238   time.hour = std::stoi(std::string(time_string.substr(0, 2)));
239   time.minute = std::stoi(std::string(time_string.substr(3, 2)));
240   time.second = std::stod(std::string(time_string.substr(6)));
241 }
242 
243 // Sets the attributes of AudioBlock.
SetAudioBlockValue(absl::string_view key,absl::string_view value,AudioBlockFormat & audio_block)244 void SetAudioBlockValue(absl::string_view key, absl::string_view value,
245                         AudioBlockFormat& audio_block) {
246   if (key == "audioBlockFormatID") {
247     audio_block.id = value;
248   } else if (key == "rtime") {
249     ParseTimingInfo(value, audio_block.rtime);
250   } else if (key == "duration") {
251     ParseTimingInfo(value, audio_block.duration);
252   }
253 }
254 
255 // Removes objects from the ADM structure based on the given importance
256 // threshold. Also, removes audio objects with IDs found in the set of invalid
257 // audio objects.
RemoveLowImportanceAndInvalidAudioObjects(int32_t importance_threshold,Handler & handler)258 void RemoveLowImportanceAndInvalidAudioObjects(int32_t importance_threshold,
259                                                Handler& handler) {
260   std::vector<AudioObject>& audio_object_list = handler.adm.audio_objects;
261   audio_object_list.erase(
262       std::remove_if(audio_object_list.begin(), audio_object_list.end(),
263                      [&](AudioObject value) {
264                        return value.importance < importance_threshold ||
265                               (handler.invalid_audio_objects.find(value.id) !=
266                                handler.invalid_audio_objects.end());
267                      }),
268       audio_object_list.end());
269 }
270 
271 // Checks if the metadata is user defined or part of the common definitions.
272 // NOTE: An ADM audioPackFormatID AP_yyyyxxxx which belongs to common
273 // definitions would have 'xxxx' in the range [0x0001, 0x0FFF].
IsUserMetadataDefined(absl::string_view xxxx_substring)274 bool IsUserMetadataDefined(absl::string_view xxxx_substring) {
275   std::istringstream iss(std::string(xxxx_substring), std::ios_base::in);
276   int32_t int_value;
277   iss >> std::hex >> int_value;
278   return int_value > 0x0fff;
279 }
280 
281 // Validates the specific layout in terms of the 'xxxx' digits of
282 // audioPackFormatId (AP_yyyyxxxx) in ADM.
IsLoudspeakerLayoutValid(absl::string_view xxxx_substring)283 bool IsLoudspeakerLayoutValid(absl::string_view xxxx_substring) {
284   static const absl::NoDestructor<absl::flat_hash_set<std::string>>
285       kValidLoudspeakerLayouts({
286           {"0001"},  // Mono
287           {"0002"},  // Stereo
288           {"0003"},  // 5.1
289           {"0004"},  // 5.1.2
290           {"0005"},  // 5.1.4
291           {"000f"},  // 7.1
292           {"0017"},  // 7.1.4
293       });
294   return kValidLoudspeakerLayouts->contains(xxxx_substring);
295 }
296 
297 // Validates the HOA layout in terms of the 'xxxx' digits of audioPackFormatId
298 // (AP_yyyyxxxx) in ADM.
IsHoaLayoutValid(absl::string_view xxxx_substring)299 bool IsHoaLayoutValid(absl::string_view xxxx_substring) {
300   static const absl::NoDestructor<absl::flat_hash_set<std::string>>
301       kValidHoaLayouts({
302           {"0001"},  // First-order ambisonics.
303           {"0002"},  // Second-order ambisonics.
304           {"0003"},  // Third-order ambisonics.
305       });
306   return kValidHoaLayouts->contains(xxxx_substring);
307 }
308 
309 // Validates the Binaural layout in terms of the 'xxxx' digits of
310 // audioPackFormatId (AP_yyyyxxxx) in ADM.
IsBinauralLayoutValid(absl::string_view xxxx_substring)311 bool IsBinauralLayoutValid(absl::string_view xxxx_substring) {
312   constexpr absl::string_view kValidBinauralLayout = "0001";
313   return xxxx_substring == kValidBinauralLayout;
314 }
315 
316 // Converts channel names to their abbreviated channel codes and creates an
317 // audio pack layout string of channel codes separated by commas.
CreatePackLayout(const std::vector<std::string> & channel_names)318 absl::StatusOr<std::string> CreatePackLayout(
319     const std::vector<std::string>& channel_names) {
320   static const std::unordered_map<std::string, std::string> channel_name_map = {
321       {"RoomCentricLeft", "L"},
322       {"RoomCentricRight", "R"},
323       {"RoomCentricCenter", "C"},
324       {"RoomCentricLFE", "LFE"},
325       {"RoomCentricLeftSideSurround", "Lss"},
326       {"RoomCentricRightSideSurround", "Rss"},
327       {"RoomCentricLeftRearSurround", "Lrs"},
328       {"RoomCentricRightRearSurround", "Rrs"},
329       {"RoomCentricLeftTopSurround", "Lts"},
330       {"RoomCentricRightTopSurround", "Rts"},
331       {"RoomCentricLeftSurround", "Ls"},
332       {"RoomCentricRightSurround", "Rs"}};
333 
334   std::string pack_layout = "";
335   for (const auto& channel_name : channel_names) {
336     auto channel_name_iter = channel_name_map.find(channel_name);
337     if (channel_name_iter != channel_name_map.end()) {
338       pack_layout += channel_name_iter->second;
339     } else {
340       return absl::InvalidArgumentError(
341           absl::StrCat("Invalid channel format= ", channel_name));
342     }
343     pack_layout += ",";
344   }
345 
346   if (!pack_layout.empty()) {
347     pack_layout.pop_back();
348   }
349   return pack_layout;
350 }
351 
352 // Determines whether the given audio pack layout string exists within known
353 // valid pack layouts. Returns an error if invalid.
ValidatePackLayout(const std::string & pack_layout)354 absl::Status ValidatePackLayout(const std::string& pack_layout) {
355   static const absl::NoDestructor<absl::flat_hash_set<std::string>>
356       kValidPackLayouts({
357           {"L,R"},
358           {"L,R,C"},
359           {"L,R,C,Ls,Rs"},
360           {"L,R,C,LFE,Ls,Rs"},
361           {"L,R,C,Lss,Rss,Lrs,Rrs"},
362           {"L,R,C,LFE,Lss,Rss,Lrs,Rrs"},
363           {"L,R,C,Lss,Rss,Lrs,Rrs,Lts,Rts"},
364           {"L,R,C,LFE,Lss,Rss,Lrs,Rrs,Lts,Rts"},
365       });
366 
367   if (!kValidPackLayouts->contains(pack_layout)) {
368     return absl::InvalidArgumentError(
369         absl::StrCat("Invalid pack layout= ", pack_layout));
370   }
371 
372   return absl::OkStatus();
373 }
374 
375 // Check if the metadata belongs to the common definitions (Recommendation ITU-R
376 // BS.2094)
ValidateAdmObjectForDefaultAdm(absl::string_view type_definition,absl::string_view audio_pack_id_yyyy_part)377 absl::Status ValidateAdmObjectForDefaultAdm(
378     absl::string_view type_definition,
379     absl::string_view audio_pack_id_yyyy_part) {
380   if (IsUserMetadataDefined(audio_pack_id_yyyy_part)) {
381     return absl::InvalidArgumentError("Not under common definition.");
382   }
383 
384   if (type_definition == kTypeDefinitionDirectSpeakers) {
385     if (!IsLoudspeakerLayoutValid(audio_pack_id_yyyy_part)) {
386       return absl::InvalidArgumentError(
387           "Loudspeaker layout is not supported by IAMF");
388     }
389   } else if (type_definition == kTypeDefinitionHOA) {
390     if (!IsHoaLayoutValid(audio_pack_id_yyyy_part)) {
391       return absl::InvalidArgumentError("HOA layout is not known");
392     }
393   } else if (type_definition == kTypeDefinitionBinaural) {
394     if (!IsBinauralLayoutValid(audio_pack_id_yyyy_part)) {
395       return absl::InvalidArgumentError("Binaural layout is not known.");
396     }
397   } else {
398     return absl::InvalidArgumentError(
399         absl::StrCat("Unsupported type_definition= ", type_definition));
400   }
401 
402   return absl::OkStatus();
403 }
404 
ValidateAdmObjectForDolbyAdm(const ADM & adm,const AudioObject & audio_object,absl::string_view type_definition)405 absl::Status ValidateAdmObjectForDolbyAdm(const ADM& adm,
406                                           const AudioObject& audio_object,
407                                           absl::string_view type_definition) {
408   if (type_definition != kTypeDefinitionDirectSpeakers &&
409       type_definition != kTypeDefinitionObject) {
410     return absl::InvalidArgumentError(
411         absl::StrCat("Unsupported type_definition= ", type_definition,
412                      " when processing a Dolby ADM."));
413   }
414   if (audio_object.audio_pack_format_id_refs.size() != 1) {
415     return absl::InvalidArgumentError(
416         "Expected only one audio pack ID ref for an audio object in a Dolby "
417         "ADM file.");
418   }
419 
420   absl::string_view audio_pack_id = audio_object.audio_pack_format_id_refs[0];
421   auto pack_id = std::find_if(adm.audio_packs.begin(), adm.audio_packs.end(),
422                               [&audio_pack_id](const AudioPackFormat& pack) {
423                                 return pack.id == audio_pack_id;
424                               });
425   size_t pack_index = (pack_id != adm.audio_packs.end())
426                           ? std::distance(adm.audio_packs.begin(), pack_id)
427                           : 0;
428 
429   auto num_channels_in_pack =
430       adm.audio_packs[pack_index].audio_channel_format_id_refs_map.size();
431   auto num_tracks_in_object = audio_object.audio_track_uid_ref.size();
432   if (type_definition == kTypeDefinitionObject) {
433     if (num_tracks_in_object != 1) {
434       return absl::InvalidArgumentError(
435           "Audio object should have only 1 track ID ref for type definition "
436           "object");
437     }
438     if (num_channels_in_pack != 1) {
439       return absl::InvalidArgumentError(
440           "Audio pack should have only 1 channel ID ref for type definition "
441           "object");
442     }
443   } else {
444     CHECK_EQ(type_definition, kTypeDefinitionDirectSpeakers);
445     if (num_tracks_in_object > 10) {
446       return absl::InvalidArgumentError(
447           "Maximum number of occurrences of track UID refs for DirectSpeakers "
448           "is 10.");
449     }
450     if (num_channels_in_pack > 10) {
451       return absl::InvalidArgumentError(
452           "Maximum number of occurrences of channel ID refs for DirectSpeakers "
453           "is 10.");
454     }
455 
456     // Create an audio pack layout string based on channel names present within
457     // an audio pack.
458     std::vector<std::string> channel_names;
459     for (auto& channel_ref :
460          adm.audio_packs[pack_index].audio_channel_format_id_refs_map) {
461       auto& audio_channel = adm.audio_channels[channel_ref.second];
462       channel_names.push_back(audio_channel.name);
463     }
464 
465     // Validate audio pack layout.
466     auto audio_pack_layout = CreatePackLayout(channel_names);
467     if (!audio_pack_layout.ok()) {
468       return audio_pack_layout.status();
469     }
470     return ValidatePackLayout(*audio_pack_layout);
471   }
472 
473   return absl::OkStatus();
474 }
475 
476 // Validates audio objects based on the input file type.
ValidateAudioObjects(const ADM & adm,Handler & handler)477 void ValidateAudioObjects(const ADM& adm, Handler& handler) {
478   absl::Status status = absl::OkStatus();
479   std::vector<std::string> audio_pack_layouts;
480 
481   for (auto& audio_object : adm.audio_objects) {
482     if (audio_object.audio_pack_format_id_refs.empty()) {
483       // Skip the empty audio objects.
484       continue;
485     }
486 
487     absl::string_view audio_pack_id = audio_object.audio_pack_format_id_refs[0];
488     absl::string_view type_definition = audio_pack_id.substr(3, 4);
489     absl::string_view audio_pack_id_yyyy_part = audio_pack_id.substr(7, 4);
490     if (adm.file_type == kAdmFileTypeDefault) {
491       status = ValidateAdmObjectForDefaultAdm(type_definition,
492                                               audio_pack_id_yyyy_part);
493     } else {
494       CHECK_EQ(adm.file_type, kAdmFileTypeDolby);
495       status = ValidateAdmObjectForDolbyAdm(adm, audio_object, type_definition);
496     }
497     if (!status.ok()) {
498       LOG(WARNING) << "Ignoring unknown object with audio_object_id= "
499                    << audio_object.id << ". Error: " << status;
500       handler.invalid_audio_objects.insert(audio_object.id);
501     }
502   }
503 }
504 
505 //  A function to use with `expat::XML_SetCharacterDataHandler`. This function
506 //  is responsible for storing character data encountered while parsing an AXML
507 //  chunk in their respective handler.adm class attributes.
XMLCharacterDataHandlerForExpat(void * parser_data,const XML_Char * text,int32_t len)508 void XMLCharacterDataHandlerForExpat(void* parser_data, const XML_Char* text,
509                                      int32_t len) {
510   Handler& handler = *static_cast<Handler*>(parser_data);
511 
512   int32_t idx = 0;
513   switch (handler.parent) {
514     case kAudioProgramme: {
515       // Populates audio programme class.
516       idx = handler.adm.audio_programmes.size();
517       auto& loudness_metadata =
518           handler.adm.audio_programmes[idx - 1].loudness_metadata;
519       ReferenceLayout reference_layout;
520       switch (handler.audio_programme_tag) {
521         case kAudioContentIDRef: {
522           handler.adm.audio_programmes[idx - 1].audio_content_id_refs.push_back(
523               std::string(text, len));
524           break;
525         }
526         case kIntegratedLoudness: {
527           UpdateErrorStatusIfFalse(
528               absl::SimpleAtof(absl::string_view(text, len),
529                                &loudness_metadata.integrated_loudness),
530               "integrated_loudness", handler);
531           break;
532         }
533         case kMaxTruePeak: {
534           // Activate the optional then read it in.
535           loudness_metadata.max_true_peak = 0.0;
536           UpdateErrorStatusIfFalse(
537               absl::SimpleAtof(absl::string_view(text, len),
538                                &loudness_metadata.max_true_peak.value()),
539               "max_true_peak", handler);
540           break;
541         }
542         case kDialogueLoudness: {
543           // Activate the optional then read it in.
544           loudness_metadata.dialogue_loudness = 0.0;
545           UpdateErrorStatusIfFalse(
546               absl::SimpleAtof(absl::string_view(text, len),
547                                &loudness_metadata.dialogue_loudness.value()),
548               "dialogue_loudness", handler);
549 
550           break;
551         }
552         case kAudioProgrammeAudioPackFormatIDRef: {
553           reference_layout.audio_pack_format_id_ref.push_back(
554               std::string(text, len));
555           handler.adm.audio_programmes[idx - 1]
556               .authoring_information.reference_layout = reference_layout;
557           break;
558         }
559         case kProgrammeDefault: {
560           break;
561         }
562         default: {
563           LOG(ERROR) << "Unexpected case";
564         }
565       }
566       // To handle unwanted character like spaces, new lines.
567       handler.audio_programme_tag = kProgrammeDefault;
568       break;
569     }
570     case kAudioContent: {
571       // Populates audio content class.
572       idx = handler.adm.audio_contents.size();
573       switch (handler.audio_content_tag) {
574         case kAudioObjectIDRef: {
575           handler.adm.audio_contents[idx - 1].audio_object_id_ref.push_back(
576               std::string(text, len));
577           break;
578         }
579         case kContentDefault: {
580           break;
581         }
582         default: {
583           LOG(ERROR) << "Unexpected case";
584         }
585       }
586       // To handle unwanted character like spaces, new lines.
587       handler.audio_content_tag = kContentDefault;
588       break;
589     }
590     case kAudioObject: {
591       // Populates audio object class.
592       idx = handler.adm.audio_objects.size();
593       switch (handler.audio_object_tag) {
594         case kAudioObjectAudioPackFormatIDRef: {
595           handler.adm.audio_objects[idx - 1]
596               .audio_pack_format_id_refs.push_back(std::string(text, len));
597           break;
598         }
599         case kAudioTrackUIDRef: {
600           handler.adm.audio_objects[idx - 1].audio_track_uid_ref.push_back(
601               std::string(text, len));
602           break;
603         }
604         case kAudioComplementaryObjectIDRef: {
605           handler.adm.audio_objects[idx - 1]
606               .audio_comple_object_id_ref.push_back(std::string(text, len));
607           break;
608         }
609         case kGain: {
610           UpdateErrorStatusIfFalse(
611               absl::SimpleAtof(absl::string_view(text, len),
612                                &handler.adm.audio_objects[idx - 1].gain),
613               "gain", handler);
614           break;
615         }
616         case kAudioObjectLabel: {
617           handler.adm.audio_objects[idx - 1].audio_object_label =
618               (std::string(text, len));
619           break;
620         }
621         case kObjectDefault: {
622           break;
623         }
624         default: {
625           LOG(ERROR) << "Unexpected case";
626         }
627       }
628       // To handle unwanted character like spaces, new lines.
629       handler.audio_object_tag = kObjectDefault;
630       break;
631     }
632     case kAudioPack: {
633       // Populates audio pack object.
634       idx = handler.adm.audio_packs.size();
635       switch (handler.audio_pack_tag) {
636         case kAudioPackAudioChannelFormatIDRef: {
637           handler.adm.audio_packs[idx - 1]
638               .audio_channel_format_id_refs_map.emplace_back(
639                   std::string(text, len), size_t(-1));
640           break;
641         }
642         case kAudioPackLabel: {
643           handler.adm.audio_packs[idx - 1].id = (std::string(text, len));
644           break;
645         }
646         case kPackDefault: {
647           break;
648         }
649         default: {
650           LOG(ERROR) << "Unexpected case";
651         }
652       }
653 
654       // To handle unwanted character like spaces, new lines.
655       handler.audio_pack_tag = kPackDefault;
656       break;
657     }
658     case kAudioChannel: {
659       // Populates audio channel object.
660       idx = handler.adm.audio_channels.size();
661       switch (handler.audio_channel_tag) {
662         case kAudioChannelLabel: {
663           handler.adm.audio_channels[idx - 1].id = (std::string(text, len));
664           break;
665         }
666         case kChannelDefault: {
667           break;
668         }
669         default: {
670           LOG(ERROR) << "Unexpected case";
671         }
672       }
673 
674       // To handle unwanted character like spaces, new lines.
675       handler.audio_channel_tag = kChannelDefault;
676       break;
677     }
678     case kAudioBlock: {
679       // Populates audio block object.
680       idx = handler.adm.audio_channels.size();
681       auto& audio_blocks = handler.adm.audio_channels[idx - 1].audio_blocks;
682       switch (handler.audio_block_tag) {
683         case kX: {
684           UpdateErrorStatusIfFalse(
685               absl::SimpleAtof(absl::string_view(text, len),
686                                &audio_blocks.back().position.x),
687               "position", handler);
688           break;
689         }
690         case kY: {
691           UpdateErrorStatusIfFalse(
692               absl::SimpleAtof(absl::string_view(text, len),
693                                &audio_blocks.back().position.y),
694               "position", handler);
695           break;
696         }
697         case kZ: {
698           UpdateErrorStatusIfFalse(
699               absl::SimpleAtof(absl::string_view(text, len),
700                                &audio_blocks.back().position.z),
701               "position", handler);
702           break;
703         }
704         case kAudioBlockLabel: {
705           audio_blocks.back().id = (std::string(text, len));
706           break;
707         }
708         case kBlockDefault: {
709           break;
710         }
711         default: {
712           LOG(ERROR) << "Unexpected case";
713         }
714       }
715 
716       // To handle unwanted characters like spaces, new lines.
717       handler.audio_block_tag = kBlockDefault;
718       break;
719     }
720     case kElementDefault: {
721       break;
722     }
723     default: {
724       LOG(ERROR) << "Unexpected case";
725     }
726   }
727 }
728 
729 // A function to use with `expat::XML_SetStartElementHandler`. It sets the
730 // handler's parent tag depending on the name attribute.
XMLStartTagHandlerForExpat(void * parser_data,const char * name,const char ** atts)731 void XMLStartTagHandlerForExpat(void* parser_data, const char* name,
732                                 const char** atts) {
733   Handler& handler = *static_cast<Handler*>(parser_data);
734   absl::string_view adm_element(name);
735   if (adm_element == "audioProgramme") {
736     // If the tag 'audioProgramme' is encountered while parsing the axml, create
737     // an instance of AudioProgramme class, populate its attributes and add it
738     // to ADM.
739     handler.parent = kAudioProgramme;
740     AudioProgramme audio_programme;
741     LoudnessMetadata loudness_metadata;
742     AuthoringInformation authoring_information;
743     audio_programme.loudness_metadata = loudness_metadata;
744     audio_programme.authoring_information = authoring_information;
745     for (int32_t i = 0; atts[i]; i += 2) {
746       SetAudioProgrammeValue(absl::string_view(atts[i]),
747                              absl::string_view(atts[i + 1]), audio_programme);
748     }
749     handler.adm.audio_programmes.push_back(audio_programme);
750   } else if (adm_element == "audioContent") {
751     // If the tag 'audioContent' is encountered while parsing the axml, create
752     // an instance of AudioContent class, populate its attributes and add it to
753     // ADM.
754     handler.parent = kAudioContent;
755     AudioContent audio_content;
756     for (int32_t i = 0; atts[i]; i += 2) {
757       SetAudioContentValue(absl::string_view(atts[i]),
758                            absl::string_view(atts[i + 1]), audio_content);
759     }
760     handler.adm.audio_contents.push_back(audio_content);
761   } else if (adm_element == "audioObject") {
762     // If the tag 'audioObject' is encountered while parsing the axml, create an
763     // instance of AudioObject class, populate its attributes and add it to ADM.
764     handler.parent = kAudioObject;
765     AudioObject audio_object;
766     for (int32_t i = 0; atts[i]; i += 2) {
767       SetAudioObjectValue(absl::string_view(atts[i]),
768                           absl::string_view(atts[i + 1]), audio_object,
769                           handler);
770     }
771     handler.adm.audio_objects.push_back(audio_object);
772   } else if (adm_element == "audioPackFormat") {
773     // If the tag 'audioPackFormat' is encountered while parsing the axml,
774     // create an instance of AudioPack class, populate its attributes and add it
775     // to ADM.
776     handler.parent = kAudioPack;
777     AudioPackFormat audio_pack;
778     for (int32_t i = 0; atts[i]; i += 2) {
779       SetAudioPackValue(absl::string_view(atts[i]),
780                         absl::string_view(atts[i + 1]), audio_pack);
781     }
782     handler.adm.audio_packs.push_back(audio_pack);
783   } else if (adm_element == "audioChannelFormat") {
784     // If the tag 'audioChannelFormat' is encountered while parsing the axml,
785     // create an instance of AudioChannel class, populate its attributes and add
786     // it to ADM.
787     handler.parent = kAudioChannel;
788     AudioChannelFormat audio_channel;
789     for (int32_t i = 0; atts[i]; i += 2) {
790       SetAudioChannelValue(absl::string_view(atts[i]),
791                            absl::string_view(atts[i + 1]), audio_channel);
792     }
793     handler.adm.audio_channels.push_back(audio_channel);
794   } else if (adm_element == "audioBlockFormat") {
795     // If the tag 'audioBlockFormat' is encountered while parsing the axml,
796     // create an instance of AudioBlockFormat class, populate its attributes and
797     // add it to ADM.
798     handler.parent = kAudioBlock;
799     AudioBlockFormat audio_block;
800     CartesianPosition position;
801     audio_block.position = position;
802     for (int32_t i = 0; atts[i]; i += 2) {
803       SetAudioBlockValue(absl::string_view(atts[i]),
804                          absl::string_view(atts[i + 1]), audio_block);
805     }
806     handler.adm.audio_channels.back().audio_blocks.push_back(audio_block);
807   } else {
808     SetHandlerTag(adm_element, atts, handler);
809   }
810 }
811 
812 // A function to map each audio pack to their corresponding audio channel
813 // formats. It sets the corresponding indices into a vector of pairs inside each
814 // audio pack instance.
SetChannelIndices(ADM & adm)815 void SetChannelIndices(ADM& adm) {
816   // Iterate over all audio packs
817   for (auto& audio_pack : adm.audio_packs) {
818     for (auto& id_ref_and_index : audio_pack.audio_channel_format_id_refs_map) {
819       const std::string& channel_id_ref = id_ref_and_index.first;
820       auto channel_id =
821           std::find_if(adm.audio_channels.begin(), adm.audio_channels.end(),
822                        [&channel_id_ref](const AudioChannelFormat& channel) {
823                          return channel.id == channel_id_ref;
824                        });
825 
826       if (channel_id != adm.audio_channels.end()) {
827         size_t channel_index =
828             std::distance(adm.audio_channels.begin(), channel_id);
829         id_ref_and_index.second = channel_index;
830       } else {
831         LOG(WARNING) << "Channel ID ref " << channel_id_ref << " not found!";
832       }
833     }
834   }
835 }
836 }  // namespace
837 
ParseXmlToAdm(absl::string_view xml_data,int32_t importance_threshold,AdmFileType file_type)838 absl::StatusOr<ADM> ParseXmlToAdm(absl::string_view xml_data,
839                                   int32_t importance_threshold,
840                                   AdmFileType file_type) {
841   Handler handler;
842 
843   // Creating an XML parser and attaching a handler object to it. Also, parser
844   // is linked with functions that have logic to deal with the start tag of XML
845   // and the character of XML.
846   XML_Parser parser = XML_ParserCreate(nullptr);
847   XML_SetUserData(parser, &handler);
848   handler.adm.file_type = file_type;
849   XML_SetStartElementHandler(parser, XMLStartTagHandlerForExpat);
850   XML_SetCharacterDataHandler(parser, XMLCharacterDataHandlerForExpat);
851 
852   switch (const auto xml_status =
853               XML_Parse(parser, xml_data.data(), xml_data.length(), true)) {
854     case XML_STATUS_OK:
855       SetChannelIndices(handler.adm);
856       ValidateAudioObjects(handler.adm, handler);
857       RemoveLowImportanceAndInvalidAudioObjects(importance_threshold, handler);
858       XML_ParserFree(parser);
859       if (!handler.status.ok()) {
860         return handler.status;
861       }
862       return handler.adm;
863     case XML_STATUS_ERROR:
864       XML_ParserFree(parser);
865       return absl::InvalidArgumentError(
866           absl::StrCat("XML parsing error. XML_Parse() returned ", xml_status));
867     case XML_STATUS_SUSPENDED:
868       XML_ParserFree(parser);
869       return absl::FailedPreconditionError(absl::StrCat(
870           "XML parsing suspended. XML_Parse() returned ", xml_status));
871     default:
872       XML_ParserFree(parser);
873       return absl::UnknownError(absl::StrCat(
874           "XML parsing failed. XML_Parse() returned ", xml_status));
875   }
876 }
877 
878 }  // namespace adm_to_user_metadata
879 }  // namespace iamf_tools
880