• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 The Libphonenumber Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.i18n.phonenumbers;
18 
19 import com.google.i18n.phonenumbers.Phonemetadata.NumberFormat;
20 import com.google.i18n.phonenumbers.Phonemetadata.PhoneMetadata;
21 import com.google.i18n.phonenumbers.Phonemetadata.PhoneMetadataCollection;
22 import com.google.i18n.phonenumbers.Phonemetadata.PhoneNumberDesc;
23 import java.io.File;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.HashSet;
27 import java.util.List;
28 import java.util.Map;
29 import java.util.Set;
30 import java.util.TreeMap;
31 import java.util.TreeSet;
32 import java.util.logging.Level;
33 import java.util.logging.Logger;
34 import java.util.regex.Pattern;
35 import java.util.regex.PatternSyntaxException;
36 import javax.xml.parsers.DocumentBuilder;
37 import javax.xml.parsers.DocumentBuilderFactory;
38 import org.w3c.dom.Document;
39 import org.w3c.dom.Element;
40 import org.w3c.dom.NodeList;
41 
42 /**
43  * Library to build phone number metadata from the XML format.
44  *
45  * @author Shaopeng Jia
46  */
47 public class BuildMetadataFromXml {
48   private static final Logger logger = Logger.getLogger(BuildMetadataFromXml.class.getName());
49 
50   // String constants used to fetch the XML nodes and attributes.
51   private static final String CARRIER_CODE_FORMATTING_RULE = "carrierCodeFormattingRule";
52   private static final String CARRIER_SPECIFIC = "carrierSpecific";
53   private static final String COUNTRY_CODE = "countryCode";
54   private static final String EMERGENCY = "emergency";
55   private static final String EXAMPLE_NUMBER = "exampleNumber";
56   private static final String FIXED_LINE = "fixedLine";
57   private static final String FORMAT = "format";
58   private static final String GENERAL_DESC = "generalDesc";
59   private static final String INTERNATIONAL_PREFIX = "internationalPrefix";
60   private static final String INTL_FORMAT = "intlFormat";
61   private static final String LEADING_DIGITS = "leadingDigits";
62   private static final String MAIN_COUNTRY_FOR_CODE = "mainCountryForCode";
63   private static final String MOBILE = "mobile";
64   private static final String MOBILE_NUMBER_PORTABLE_REGION = "mobileNumberPortableRegion";
65   private static final String NATIONAL_NUMBER_PATTERN = "nationalNumberPattern";
66   private static final String NATIONAL_PREFIX = "nationalPrefix";
67   private static final String NATIONAL_PREFIX_FORMATTING_RULE = "nationalPrefixFormattingRule";
68   private static final String NATIONAL_PREFIX_OPTIONAL_WHEN_FORMATTING =
69       "nationalPrefixOptionalWhenFormatting";
70   private static final String NATIONAL_PREFIX_FOR_PARSING = "nationalPrefixForParsing";
71   private static final String NATIONAL_PREFIX_TRANSFORM_RULE = "nationalPrefixTransformRule";
72   private static final String NO_INTERNATIONAL_DIALLING = "noInternationalDialling";
73   private static final String NUMBER_FORMAT = "numberFormat";
74   private static final String PAGER = "pager";
75   private static final String PATTERN = "pattern";
76   private static final String PERSONAL_NUMBER = "personalNumber";
77   private static final String POSSIBLE_LENGTHS = "possibleLengths";
78   private static final String NATIONAL = "national";
79   private static final String LOCAL_ONLY = "localOnly";
80   private static final String PREFERRED_EXTN_PREFIX = "preferredExtnPrefix";
81   private static final String PREFERRED_INTERNATIONAL_PREFIX = "preferredInternationalPrefix";
82   private static final String PREMIUM_RATE = "premiumRate";
83   private static final String SHARED_COST = "sharedCost";
84   private static final String SHORT_CODE = "shortCode";
85   private static final String SMS_SERVICES = "smsServices";
86   private static final String STANDARD_RATE = "standardRate";
87   private static final String TOLL_FREE = "tollFree";
88   private static final String UAN = "uan";
89   private static final String VOICEMAIL = "voicemail";
90   private static final String VOIP = "voip";
91 
92   private static final Set<String> PHONE_NUMBER_DESCS_WITHOUT_MATCHING_TYPES =
93       new HashSet<String>(Arrays.asList(new String[]{NO_INTERNATIONAL_DIALLING}));
94 
95   // Build the PhoneMetadataCollection from the input XML file.
buildPhoneMetadataCollection(String inputXmlFile, boolean liteBuild, boolean specialBuild)96   public static PhoneMetadataCollection buildPhoneMetadataCollection(String inputXmlFile,
97       boolean liteBuild, boolean specialBuild) throws Exception {
98     DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
99     DocumentBuilder builder = builderFactory.newDocumentBuilder();
100     File xmlFile = new File(inputXmlFile);
101     Document document = builder.parse(xmlFile);
102     // TODO: Look for other uses of these constants and possibly pull them out into a separate
103     // constants file.
104     boolean isShortNumberMetadata = inputXmlFile.contains("ShortNumberMetadata");
105     boolean isAlternateFormatsMetadata = inputXmlFile.contains("PhoneNumberAlternateFormats");
106     return buildPhoneMetadataCollection(document, liteBuild, specialBuild,
107         isShortNumberMetadata, isAlternateFormatsMetadata);
108   }
109 
110   // @VisibleForTesting
buildPhoneMetadataCollection(Document document, boolean liteBuild, boolean specialBuild, boolean isShortNumberMetadata, boolean isAlternateFormatsMetadata)111   static PhoneMetadataCollection buildPhoneMetadataCollection(Document document,
112       boolean liteBuild, boolean specialBuild, boolean isShortNumberMetadata,
113       boolean isAlternateFormatsMetadata) throws Exception {
114     document.getDocumentElement().normalize();
115     Element rootElement = document.getDocumentElement();
116     NodeList territory = rootElement.getElementsByTagName("territory");
117     PhoneMetadataCollection.Builder metadataCollection = PhoneMetadataCollection.newBuilder();
118     int numOfTerritories = territory.getLength();
119     // TODO: Infer filter from a single flag.
120     MetadataFilter metadataFilter = getMetadataFilter(liteBuild, specialBuild);
121     for (int i = 0; i < numOfTerritories; i++) {
122       Element territoryElement = (Element) territory.item(i);
123       String regionCode = "";
124       // For the main metadata file this should always be set, but for other supplementary data
125       // files the country calling code may be all that is needed.
126       if (territoryElement.hasAttribute("id")) {
127         regionCode = territoryElement.getAttribute("id");
128       }
129       PhoneMetadata.Builder metadata = loadCountryMetadata(regionCode, territoryElement,
130           isShortNumberMetadata, isAlternateFormatsMetadata);
131       metadataFilter.filterMetadata(metadata);
132       metadataCollection.addMetadata(metadata.build());
133     }
134     return metadataCollection.build();
135   }
136 
137   // Build a mapping from a country calling code to the region codes which denote the country/region
138   // represented by that country code. In the case of multiple countries sharing a calling code,
139   // such as the NANPA countries, the one indicated with "isMainCountryForCode" in the metadata
140   // should be first.
buildCountryCodeToRegionCodeMap( PhoneMetadataCollection metadataCollection)141   public static Map<Integer, List<String>> buildCountryCodeToRegionCodeMap(
142       PhoneMetadataCollection metadataCollection) {
143     Map<Integer, List<String>> countryCodeToRegionCodeMap = new TreeMap<Integer, List<String>>();
144     for (PhoneMetadata metadata : metadataCollection.getMetadataList()) {
145       String regionCode = metadata.getId();
146       int countryCode = metadata.getCountryCode();
147       if (countryCodeToRegionCodeMap.containsKey(countryCode)) {
148         if (metadata.getMainCountryForCode()) {
149           countryCodeToRegionCodeMap.get(countryCode).add(0, regionCode);
150         } else {
151           countryCodeToRegionCodeMap.get(countryCode).add(regionCode);
152         }
153       } else {
154         // For most countries, there will be only one region code for the country calling code.
155         List<String> listWithRegionCode = new ArrayList<String>(1);
156         if (!regionCode.equals("")) {  // For alternate formats, there are no region codes at all.
157           listWithRegionCode.add(regionCode);
158         }
159         countryCodeToRegionCodeMap.put(countryCode, listWithRegionCode);
160       }
161     }
162     return countryCodeToRegionCodeMap;
163   }
164 
validateRE(String regex)165   private static String validateRE(String regex) {
166     return validateRE(regex, false);
167   }
168 
169   // @VisibleForTesting
validateRE(String regex, boolean removeWhitespace)170   static String validateRE(String regex, boolean removeWhitespace) {
171     // Removes all the whitespace and newline from the regexp. Not using pattern compile options to
172     // make it work across programming languages.
173     String compressedRegex = removeWhitespace ? regex.replaceAll("\\s", "") : regex;
174     Pattern.compile(compressedRegex);
175     // We don't ever expect to see | followed by a ) in our metadata - this would be an indication
176     // of a bug. If one wants to make something optional, we prefer ? to using an empty group.
177     int errorIndex = compressedRegex.indexOf("|)");
178     if (errorIndex >= 0) {
179       logger.log(Level.SEVERE, "Error with original regex: " + regex
180           + "\n| should not be followed directly by ) in phone number regular expressions.");
181       throw new PatternSyntaxException("| followed by )", compressedRegex, errorIndex);
182     }
183     // return the regex if it is of correct syntax, i.e. compile did not fail with a
184     // PatternSyntaxException.
185     return compressedRegex;
186   }
187 
188   /**
189    * Returns the national prefix of the provided country element.
190    */
191   // @VisibleForTesting
getNationalPrefix(Element element)192   static String getNationalPrefix(Element element) {
193     return element.hasAttribute(NATIONAL_PREFIX) ? element.getAttribute(NATIONAL_PREFIX) : "";
194   }
195 
196   // @VisibleForTesting
loadTerritoryTagMetadata(String regionCode, Element element, String nationalPrefix)197   static PhoneMetadata.Builder loadTerritoryTagMetadata(String regionCode, Element element,
198                                                         String nationalPrefix) {
199     PhoneMetadata.Builder metadata = PhoneMetadata.newBuilder();
200     metadata.setId(regionCode);
201     if (element.hasAttribute(COUNTRY_CODE)) {
202       metadata.setCountryCode(Integer.parseInt(element.getAttribute(COUNTRY_CODE)));
203     }
204     if (element.hasAttribute(LEADING_DIGITS)) {
205       metadata.setLeadingDigits(validateRE(element.getAttribute(LEADING_DIGITS)));
206     }
207     if (element.hasAttribute(INTERNATIONAL_PREFIX)) {
208       metadata.setInternationalPrefix(validateRE(element.getAttribute(INTERNATIONAL_PREFIX)));
209     }
210     if (element.hasAttribute(PREFERRED_INTERNATIONAL_PREFIX)) {
211       metadata.setPreferredInternationalPrefix(
212           element.getAttribute(PREFERRED_INTERNATIONAL_PREFIX));
213     }
214     if (element.hasAttribute(NATIONAL_PREFIX_FOR_PARSING)) {
215       metadata.setNationalPrefixForParsing(
216           validateRE(element.getAttribute(NATIONAL_PREFIX_FOR_PARSING), true));
217       if (element.hasAttribute(NATIONAL_PREFIX_TRANSFORM_RULE)) {
218         metadata.setNationalPrefixTransformRule(
219             validateRE(element.getAttribute(NATIONAL_PREFIX_TRANSFORM_RULE)));
220       }
221     }
222     if (!nationalPrefix.isEmpty()) {
223       metadata.setNationalPrefix(nationalPrefix);
224       if (!metadata.hasNationalPrefixForParsing()) {
225         metadata.setNationalPrefixForParsing(nationalPrefix);
226       }
227     }
228     if (element.hasAttribute(PREFERRED_EXTN_PREFIX)) {
229       metadata.setPreferredExtnPrefix(element.getAttribute(PREFERRED_EXTN_PREFIX));
230     }
231     if (element.hasAttribute(MAIN_COUNTRY_FOR_CODE)) {
232       metadata.setMainCountryForCode(true);
233     }
234     if (element.hasAttribute(MOBILE_NUMBER_PORTABLE_REGION)) {
235       metadata.setMobileNumberPortableRegion(true);
236     }
237     return metadata;
238   }
239 
240   /**
241    * Extracts the pattern for international format. If there is no intlFormat, default to using the
242    * national format. If the intlFormat is set to "NA" the intlFormat should be ignored.
243    *
244    * @throws  RuntimeException if multiple intlFormats have been encountered.
245    * @return  whether an international number format is defined.
246    */
247   // @VisibleForTesting
loadInternationalFormat(PhoneMetadata.Builder metadata, Element numberFormatElement, NumberFormat nationalFormat)248   static boolean loadInternationalFormat(PhoneMetadata.Builder metadata,
249                                          Element numberFormatElement,
250                                          NumberFormat nationalFormat) {
251     NumberFormat.Builder intlFormat = NumberFormat.newBuilder();
252     NodeList intlFormatPattern = numberFormatElement.getElementsByTagName(INTL_FORMAT);
253     boolean hasExplicitIntlFormatDefined = false;
254 
255     if (intlFormatPattern.getLength() > 1) {
256       logger.log(Level.SEVERE,
257           "A maximum of one intlFormat pattern for a numberFormat element should be defined.");
258       String countryId = metadata.getId().length() > 0 ? metadata.getId()
259           : Integer.toString(metadata.getCountryCode());
260       throw new RuntimeException("Invalid number of intlFormat patterns for country: " + countryId);
261     } else if (intlFormatPattern.getLength() == 0) {
262       // Default to use the same as the national pattern if none is defined.
263       intlFormat.mergeFrom(nationalFormat);
264     } else {
265       intlFormat.setPattern(numberFormatElement.getAttribute(PATTERN));
266       setLeadingDigitsPatterns(numberFormatElement, intlFormat);
267       String intlFormatPatternValue = intlFormatPattern.item(0).getFirstChild().getNodeValue();
268       if (!intlFormatPatternValue.equals("NA")) {
269         intlFormat.setFormat(intlFormatPatternValue);
270       }
271       hasExplicitIntlFormatDefined = true;
272     }
273 
274     if (intlFormat.hasFormat()) {
275       metadata.addIntlNumberFormat(intlFormat.build());
276     }
277     return hasExplicitIntlFormatDefined;
278   }
279 
280   /**
281    * Extracts the pattern for the national format.
282    *
283    * @throws  RuntimeException if multiple or no formats have been encountered.
284    */
285   // @VisibleForTesting
loadNationalFormat(PhoneMetadata.Builder metadata, Element numberFormatElement, NumberFormat.Builder format)286   static void loadNationalFormat(PhoneMetadata.Builder metadata, Element numberFormatElement,
287                                  NumberFormat.Builder format) {
288     setLeadingDigitsPatterns(numberFormatElement, format);
289     format.setPattern(validateRE(numberFormatElement.getAttribute(PATTERN)));
290 
291     NodeList formatPattern = numberFormatElement.getElementsByTagName(FORMAT);
292     int numFormatPatterns = formatPattern.getLength();
293     if (numFormatPatterns != 1) {
294       logger.log(Level.SEVERE, "One format pattern for a numberFormat element should be defined.");
295       String countryId = metadata.getId().length() > 0 ? metadata.getId()
296           : Integer.toString(metadata.getCountryCode());
297       throw new RuntimeException("Invalid number of format patterns (" + numFormatPatterns
298           + ") for country: " + countryId);
299     }
300     format.setFormat(formatPattern.item(0).getFirstChild().getNodeValue());
301   }
302 
303   /**
304    * Extracts the available formats from the provided DOM element. If it does not contain any
305    * nationalPrefixFormattingRule, the one passed-in is retained; similarly for
306    * nationalPrefixOptionalWhenFormatting. The nationalPrefix, nationalPrefixFormattingRule and
307    * nationalPrefixOptionalWhenFormatting values are provided from the parent (territory) element.
308    */
309   // @VisibleForTesting
loadAvailableFormats(PhoneMetadata.Builder metadata, Element element, String nationalPrefix, String nationalPrefixFormattingRule, boolean nationalPrefixOptionalWhenFormatting)310   static void loadAvailableFormats(PhoneMetadata.Builder metadata,
311                                    Element element, String nationalPrefix,
312                                    String nationalPrefixFormattingRule,
313                                    boolean nationalPrefixOptionalWhenFormatting) {
314     String carrierCodeFormattingRule = "";
315     if (element.hasAttribute(CARRIER_CODE_FORMATTING_RULE)) {
316       carrierCodeFormattingRule = validateRE(
317           getDomesticCarrierCodeFormattingRuleFromElement(element, nationalPrefix));
318     }
319     NodeList numberFormatElements = element.getElementsByTagName(NUMBER_FORMAT);
320     boolean hasExplicitIntlFormatDefined = false;
321 
322     int numOfFormatElements = numberFormatElements.getLength();
323     if (numOfFormatElements > 0) {
324       for (int i = 0; i < numOfFormatElements; i++) {
325         Element numberFormatElement = (Element) numberFormatElements.item(i);
326         NumberFormat.Builder format = NumberFormat.newBuilder();
327 
328         if (numberFormatElement.hasAttribute(NATIONAL_PREFIX_FORMATTING_RULE)) {
329           format.setNationalPrefixFormattingRule(
330               getNationalPrefixFormattingRuleFromElement(numberFormatElement, nationalPrefix));
331         } else if (!nationalPrefixFormattingRule.equals("")) {
332           format.setNationalPrefixFormattingRule(nationalPrefixFormattingRule);
333         }
334         if (numberFormatElement.hasAttribute(NATIONAL_PREFIX_OPTIONAL_WHEN_FORMATTING)) {
335           format.setNationalPrefixOptionalWhenFormatting(
336               Boolean.valueOf(numberFormatElement.getAttribute(
337                   NATIONAL_PREFIX_OPTIONAL_WHEN_FORMATTING)));
338         } else if (format.getNationalPrefixOptionalWhenFormatting()
339             != nationalPrefixOptionalWhenFormatting) {
340           // Inherit from the parent field if it is not already the same as the default.
341           format.setNationalPrefixOptionalWhenFormatting(nationalPrefixOptionalWhenFormatting);
342         }
343         if (numberFormatElement.hasAttribute(CARRIER_CODE_FORMATTING_RULE)) {
344           format.setDomesticCarrierCodeFormattingRule(validateRE(
345               getDomesticCarrierCodeFormattingRuleFromElement(numberFormatElement,
346                                                               nationalPrefix)));
347         } else if (!carrierCodeFormattingRule.equals("")) {
348           format.setDomesticCarrierCodeFormattingRule(carrierCodeFormattingRule);
349         }
350         loadNationalFormat(metadata, numberFormatElement, format);
351         metadata.addNumberFormat(format);
352 
353         if (loadInternationalFormat(metadata, numberFormatElement, format.build())) {
354           hasExplicitIntlFormatDefined = true;
355         }
356       }
357       // Only a small number of regions need to specify the intlFormats in the xml. For the majority
358       // of countries the intlNumberFormat metadata is an exact copy of the national NumberFormat
359       // metadata. To minimize the size of the metadata file, we only keep intlNumberFormats that
360       // actually differ in some way to the national formats.
361       if (!hasExplicitIntlFormatDefined) {
362         metadata.clearIntlNumberFormat();
363       }
364     }
365   }
366 
367   // @VisibleForTesting
setLeadingDigitsPatterns(Element numberFormatElement, NumberFormat.Builder format)368   static void setLeadingDigitsPatterns(Element numberFormatElement, NumberFormat.Builder format) {
369     NodeList leadingDigitsPatternNodes = numberFormatElement.getElementsByTagName(LEADING_DIGITS);
370     int numOfLeadingDigitsPatterns = leadingDigitsPatternNodes.getLength();
371     if (numOfLeadingDigitsPatterns > 0) {
372       for (int i = 0; i < numOfLeadingDigitsPatterns; i++) {
373         format.addLeadingDigitsPattern(
374             validateRE((leadingDigitsPatternNodes.item(i)).getFirstChild().getNodeValue(), true));
375       }
376     }
377   }
378 
379   // @VisibleForTesting
getNationalPrefixFormattingRuleFromElement(Element element, String nationalPrefix)380   static String getNationalPrefixFormattingRuleFromElement(Element element,
381                                                            String nationalPrefix) {
382     String nationalPrefixFormattingRule = element.getAttribute(NATIONAL_PREFIX_FORMATTING_RULE);
383     // Replace $NP with national prefix and $FG with the first group ($1).
384     nationalPrefixFormattingRule =
385         nationalPrefixFormattingRule.replaceFirst("\\$NP", nationalPrefix)
386             .replaceFirst("\\$FG", "\\$1");
387     return nationalPrefixFormattingRule;
388   }
389 
390   // @VisibleForTesting
getDomesticCarrierCodeFormattingRuleFromElement(Element element, String nationalPrefix)391   static String getDomesticCarrierCodeFormattingRuleFromElement(Element element,
392                                                                 String nationalPrefix) {
393     String carrierCodeFormattingRule = element.getAttribute(CARRIER_CODE_FORMATTING_RULE);
394     // Replace $FG with the first group ($1) and $NP with the national prefix.
395     carrierCodeFormattingRule = carrierCodeFormattingRule.replaceFirst("\\$FG", "\\$1")
396         .replaceFirst("\\$NP", nationalPrefix);
397     return carrierCodeFormattingRule;
398   }
399 
400   /**
401    * Checks if the possible lengths provided as a sorted set are equal to the possible lengths
402    * stored already in the description pattern. Note that possibleLengths may be empty but must not
403    * be null, and the PhoneNumberDesc passed in should also not be null.
404    */
arePossibleLengthsEqual(TreeSet<Integer> possibleLengths, PhoneNumberDesc desc)405   private static boolean arePossibleLengthsEqual(TreeSet<Integer> possibleLengths,
406       PhoneNumberDesc desc) {
407     if (possibleLengths.size() != desc.getPossibleLengthCount()) {
408       return false;
409     }
410     // Note that both should be sorted already, and we know they are the same length.
411     int i = 0;
412     for (Integer length : possibleLengths) {
413       if (length != desc.getPossibleLength(i)) {
414         return false;
415       }
416       i++;
417     }
418     return true;
419   }
420 
421   /**
422    * Processes a phone number description element from the XML file and returns it as a
423    * PhoneNumberDesc. If the description element is a fixed line or mobile number, the parent
424    * description will be used to fill in the whole element if necessary, or any components that are
425    * missing. For all other types, the parent description will only be used to fill in missing
426    * components if the type has a partial definition. For example, if no "tollFree" element exists,
427    * we assume there are no toll free numbers for that locale, and return a phone number description
428    * with no national number data and [-1] for the possible lengths. Note that the parent
429    * description must therefore already be processed before this method is called on any child
430    * elements.
431    *
432    * @param parentDesc  a generic phone number description that will be used to fill in missing
433    *     parts of the description, or null if this is the root node. This must be processed before
434    *     this is run on any child elements.
435    * @param countryElement  the XML element representing all the country information
436    * @param numberType  the name of the number type, corresponding to the appropriate tag in the XML
437    *     file with information about that type
438    * @return  complete description of that phone number type
439    */
440   // @VisibleForTesting
processPhoneNumberDescElement(PhoneNumberDesc.Builder parentDesc, Element countryElement, String numberType)441   static PhoneNumberDesc.Builder processPhoneNumberDescElement(PhoneNumberDesc.Builder parentDesc,
442                                                                Element countryElement,
443                                                                String numberType) {
444     NodeList phoneNumberDescList = countryElement.getElementsByTagName(numberType);
445     PhoneNumberDesc.Builder numberDesc = PhoneNumberDesc.newBuilder();
446     if (phoneNumberDescList.getLength() == 0) {
447       // -1 will never match a possible phone number length, so is safe to use to ensure this never
448       // matches. We don't leave it empty, since for compression reasons, we use the empty list to
449       // mean that the generalDesc possible lengths apply.
450       numberDesc.addPossibleLength(-1);
451       return numberDesc;
452     }
453     if (phoneNumberDescList.getLength() > 0) {
454       if (phoneNumberDescList.getLength() > 1) {
455         throw new RuntimeException(
456             String.format("Multiple elements with type %s found.", numberType));
457       }
458       Element element = (Element) phoneNumberDescList.item(0);
459       if (parentDesc != null) {
460         // New way of handling possible number lengths. We don't do this for the general
461         // description, since these tags won't be present; instead we will calculate its values
462         // based on the values for all the other number type descriptions (see
463         // setPossibleLengthsGeneralDesc).
464         TreeSet<Integer> lengths = new TreeSet<Integer>();
465         TreeSet<Integer> localOnlyLengths = new TreeSet<Integer>();
466         populatePossibleLengthSets(element, lengths, localOnlyLengths);
467         setPossibleLengths(lengths, localOnlyLengths, parentDesc.build(), numberDesc);
468       }
469 
470       NodeList validPattern = element.getElementsByTagName(NATIONAL_NUMBER_PATTERN);
471       if (validPattern.getLength() > 0) {
472         numberDesc.setNationalNumberPattern(
473             validateRE(validPattern.item(0).getFirstChild().getNodeValue(), true));
474       }
475 
476       NodeList exampleNumber = element.getElementsByTagName(EXAMPLE_NUMBER);
477       if (exampleNumber.getLength() > 0) {
478         numberDesc.setExampleNumber(exampleNumber.item(0).getFirstChild().getNodeValue());
479       }
480     }
481     return numberDesc;
482   }
483 
484   // @VisibleForTesting
setRelevantDescPatterns(PhoneMetadata.Builder metadata, Element element, boolean isShortNumberMetadata)485   static void setRelevantDescPatterns(PhoneMetadata.Builder metadata, Element element,
486       boolean isShortNumberMetadata) {
487     PhoneNumberDesc.Builder generalDesc = processPhoneNumberDescElement(null, element,
488         GENERAL_DESC);
489     // Calculate the possible lengths for the general description. This will be based on the
490     // possible lengths of the child elements.
491     setPossibleLengthsGeneralDesc(generalDesc, metadata.getId(), element, isShortNumberMetadata);
492     metadata.setGeneralDesc(generalDesc);
493 
494     if (!isShortNumberMetadata) {
495       // Set fields used by regular length phone numbers.
496       metadata.setFixedLine(processPhoneNumberDescElement(generalDesc, element, FIXED_LINE));
497       metadata.setMobile(processPhoneNumberDescElement(generalDesc, element, MOBILE));
498       metadata.setSharedCost(processPhoneNumberDescElement(generalDesc, element, SHARED_COST));
499       metadata.setVoip(processPhoneNumberDescElement(generalDesc, element, VOIP));
500       metadata.setPersonalNumber(processPhoneNumberDescElement(generalDesc, element,
501           PERSONAL_NUMBER));
502       metadata.setPager(processPhoneNumberDescElement(generalDesc, element, PAGER));
503       metadata.setUan(processPhoneNumberDescElement(generalDesc, element, UAN));
504       metadata.setVoicemail(processPhoneNumberDescElement(generalDesc, element, VOICEMAIL));
505       metadata.setNoInternationalDialling(processPhoneNumberDescElement(generalDesc, element,
506           NO_INTERNATIONAL_DIALLING));
507       boolean mobileAndFixedAreSame = metadata.getMobile().getNationalNumberPattern()
508           .equals(metadata.getFixedLine().getNationalNumberPattern());
509       if (metadata.getSameMobileAndFixedLinePattern() != mobileAndFixedAreSame) {
510         // Set this if it is not the same as the default.
511         metadata.setSameMobileAndFixedLinePattern(mobileAndFixedAreSame);
512       }
513       metadata.setTollFree(processPhoneNumberDescElement(generalDesc, element, TOLL_FREE));
514       metadata.setPremiumRate(processPhoneNumberDescElement(generalDesc, element, PREMIUM_RATE));
515     } else {
516       // Set fields used by short numbers.
517       metadata.setStandardRate(processPhoneNumberDescElement(generalDesc, element, STANDARD_RATE));
518       metadata.setShortCode(processPhoneNumberDescElement(generalDesc, element, SHORT_CODE));
519       metadata.setCarrierSpecific(processPhoneNumberDescElement(generalDesc, element,
520           CARRIER_SPECIFIC));
521       metadata.setEmergency(processPhoneNumberDescElement(generalDesc, element, EMERGENCY));
522       metadata.setTollFree(processPhoneNumberDescElement(generalDesc, element, TOLL_FREE));
523       metadata.setPremiumRate(processPhoneNumberDescElement(generalDesc, element, PREMIUM_RATE));
524       metadata.setSmsServices(processPhoneNumberDescElement(generalDesc, element, SMS_SERVICES));
525     }
526   }
527 
528   /**
529    * Parses a possible length string into a set of the integers that are covered.
530    *
531    * @param possibleLengthString  a string specifying the possible lengths of phone numbers. Follows
532    *     this syntax: ranges or elements are separated by commas, and ranges are specified in
533    *     [min-max] notation, inclusive. For example, [3-5],7,9,[11-14] should be parsed to
534    *     3,4,5,7,9,11,12,13,14.
535    */
parsePossibleLengthStringToSet(String possibleLengthString)536   private static Set<Integer> parsePossibleLengthStringToSet(String possibleLengthString) {
537     if (possibleLengthString.length() == 0) {
538       throw new RuntimeException("Empty possibleLength string found.");
539     }
540     String[] lengths = possibleLengthString.split(",");
541     Set<Integer> lengthSet = new TreeSet<Integer>();
542     for (int i = 0; i < lengths.length; i++) {
543       String lengthSubstring = lengths[i];
544       if (lengthSubstring.length() == 0) {
545         throw new RuntimeException(String.format("Leading, trailing or adjacent commas in possible "
546             + "length string %s, these should only separate numbers or ranges.",
547             possibleLengthString));
548       } else if (lengthSubstring.charAt(0) == '[') {
549         if (lengthSubstring.charAt(lengthSubstring.length() - 1) != ']') {
550           throw new RuntimeException(String.format("Missing end of range character in possible "
551               + "length string %s.", possibleLengthString));
552         }
553         // Strip the leading and trailing [], and split on the -.
554         String[] minMax = lengthSubstring.substring(1, lengthSubstring.length() - 1).split("-");
555         if (minMax.length != 2) {
556           throw new RuntimeException(String.format("Ranges must have exactly one - character: "
557               + "missing for %s.", possibleLengthString));
558         }
559         int min = Integer.parseInt(minMax[0]);
560         int max = Integer.parseInt(minMax[1]);
561         // We don't even accept [6-7] since we prefer the shorter 6,7 variant; for a range to be in
562         // use the hyphen needs to replace at least one digit.
563         if (max - min < 2) {
564           throw new RuntimeException(String.format("The first number in a range should be two or "
565               + "more digits lower than the second. Culprit possibleLength string: %s",
566               possibleLengthString));
567         }
568         for (int j = min; j <= max; j++) {
569           if (!lengthSet.add(j)) {
570             throw new RuntimeException(String.format("Duplicate length element found (%d) in "
571                 + "possibleLength string %s", j, possibleLengthString));
572           }
573         }
574       } else {
575         int length = Integer.parseInt(lengthSubstring);
576         if (!lengthSet.add(length)) {
577             throw new RuntimeException(String.format("Duplicate length element found (%d) in "
578                 + "possibleLength string %s", length, possibleLengthString));
579           }
580       }
581     }
582     return lengthSet;
583   }
584 
585   /**
586    * Reads the possible lengths present in the metadata and splits them into two sets: one for
587    * full-length numbers, one for local numbers.
588    *
589    * @param data  one or more phone number descriptions, represented as XML nodes
590    * @param lengths  a set to which to add possible lengths of full phone numbers
591    * @param localOnlyLengths  a set to which to add possible lengths of phone numbers only diallable
592    *     locally (e.g. within a province)
593    */
populatePossibleLengthSets(Element data, TreeSet<Integer> lengths, TreeSet<Integer> localOnlyLengths)594   private static void populatePossibleLengthSets(Element data, TreeSet<Integer> lengths,
595       TreeSet<Integer> localOnlyLengths) {
596     NodeList possibleLengths = data.getElementsByTagName(POSSIBLE_LENGTHS);
597     for (int i = 0; i < possibleLengths.getLength(); i++) {
598       Element element = (Element) possibleLengths.item(i);
599       String nationalLengths = element.getAttribute(NATIONAL);
600       // We don't add to the phone metadata yet, since we want to sort length elements found under
601       // different nodes first, make sure there are no duplicates between them and that the
602       // localOnly lengths don't overlap with the others.
603       Set<Integer> thisElementLengths = parsePossibleLengthStringToSet(nationalLengths);
604       if (element.hasAttribute(LOCAL_ONLY)) {
605         String localLengths = element.getAttribute(LOCAL_ONLY);
606         Set<Integer> thisElementLocalOnlyLengths = parsePossibleLengthStringToSet(localLengths);
607         Set<Integer> intersection = new HashSet<Integer>(thisElementLengths);
608         intersection.retainAll(thisElementLocalOnlyLengths);
609         if (!intersection.isEmpty()) {
610           throw new RuntimeException(String.format(
611               "Possible length(s) found specified as a normal and local-only length: %s",
612               intersection));
613         }
614         // We check again when we set these lengths on the metadata itself in setPossibleLengths
615         // that the elements in localOnly are not also in lengths. For e.g. the generalDesc, it
616         // might have a local-only length for one type that is a normal length for another type. We
617         // don't consider this an error, but we do want to remove the local-only lengths.
618         localOnlyLengths.addAll(thisElementLocalOnlyLengths);
619       }
620       // It is okay if at this time we have duplicates, because the same length might be possible
621       // for e.g. fixed-line and for mobile numbers, and this method operates potentially on
622       // multiple phoneNumberDesc XML elements.
623       lengths.addAll(thisElementLengths);
624     }
625   }
626 
627   /**
628    * Sets possible lengths in the general description, derived from certain child elements.
629    */
630   // @VisibleForTesting
setPossibleLengthsGeneralDesc(PhoneNumberDesc.Builder generalDesc, String metadataId, Element data, boolean isShortNumberMetadata)631   static void setPossibleLengthsGeneralDesc(PhoneNumberDesc.Builder generalDesc, String metadataId,
632       Element data, boolean isShortNumberMetadata) {
633     TreeSet<Integer> lengths = new TreeSet<Integer>();
634     TreeSet<Integer> localOnlyLengths = new TreeSet<Integer>();
635     // The general description node should *always* be present if metadata for other types is
636     // present, aside from in some unit tests.
637     // (However, for e.g. formatting metadata in PhoneNumberAlternateFormats, no PhoneNumberDesc
638     // elements are present).
639     NodeList generalDescNodes = data.getElementsByTagName(GENERAL_DESC);
640     if (generalDescNodes.getLength() > 0) {
641       Element generalDescNode = (Element) generalDescNodes.item(0);
642       populatePossibleLengthSets(generalDescNode, lengths, localOnlyLengths);
643       if (!lengths.isEmpty() || !localOnlyLengths.isEmpty()) {
644         // We shouldn't have anything specified at the "general desc" level: we are going to
645         // calculate this ourselves from child elements.
646         throw new RuntimeException(String.format("Found possible lengths specified at general "
647             + "desc: this should be derived from child elements. Affected country: %s",
648             metadataId));
649       }
650     }
651     if (!isShortNumberMetadata) {
652       // Make a copy here since we want to remove some nodes, but we don't want to do that on our
653       // actual data.
654       Element allDescData = (Element) data.cloneNode(true /* deep copy */);
655       for (String tag : PHONE_NUMBER_DESCS_WITHOUT_MATCHING_TYPES) {
656         NodeList nodesToRemove = allDescData.getElementsByTagName(tag);
657         if (nodesToRemove.getLength() > 0) {
658           // We check when we process phone number descriptions that there are only one of each
659           // type, so this is safe to do.
660           allDescData.removeChild(nodesToRemove.item(0));
661         }
662       }
663       populatePossibleLengthSets(allDescData, lengths, localOnlyLengths);
664     } else {
665       // For short number metadata, we want to copy the lengths from the "short code" section only.
666       // This is because it's the more detailed validation pattern, it's not a sub-type of short
667       // codes. The other lengths will be checked later to see that they are a sub-set of these
668       // possible lengths.
669       NodeList shortCodeDescList = data.getElementsByTagName(SHORT_CODE);
670       if (shortCodeDescList.getLength() > 0) {
671         Element shortCodeDesc = (Element) shortCodeDescList.item(0);
672         populatePossibleLengthSets(shortCodeDesc, lengths, localOnlyLengths);
673       }
674       if (localOnlyLengths.size() > 0) {
675         throw new RuntimeException("Found local-only lengths in short-number metadata");
676       }
677     }
678     setPossibleLengths(lengths, localOnlyLengths, null, generalDesc);
679   }
680 
681   /**
682    * Sets the possible length fields in the metadata from the sets of data passed in. Checks that
683    * the length is covered by the "parent" phone number description element if one is present, and
684    * if the lengths are exactly the same as this, they are not filled in for efficiency reasons.
685    *
686    * @param parentDesc  the "general description" element or null if desc is the generalDesc itself
687    * @param desc  the PhoneNumberDesc object that we are going to set lengths for
688    */
setPossibleLengths(TreeSet<Integer> lengths, TreeSet<Integer> localOnlyLengths, PhoneNumberDesc parentDesc, PhoneNumberDesc.Builder desc)689   private static void setPossibleLengths(TreeSet<Integer> lengths,
690       TreeSet<Integer> localOnlyLengths, PhoneNumberDesc parentDesc, PhoneNumberDesc.Builder desc) {
691     // We clear these fields since the metadata tends to inherit from the parent element for other
692     // fields (via a mergeFrom).
693     desc.clearPossibleLength();
694     desc.clearPossibleLengthLocalOnly();
695     // Only add the lengths to this sub-type if they aren't exactly the same as the possible
696     // lengths in the general desc (for metadata size reasons).
697     if (parentDesc == null || !arePossibleLengthsEqual(lengths, parentDesc)) {
698       for (Integer length : lengths) {
699         if (parentDesc == null || parentDesc.getPossibleLengthList().contains(length)) {
700           desc.addPossibleLength(length);
701         } else {
702           // We shouldn't have possible lengths defined in a child element that are not covered by
703           // the general description. We check this here even though the general description is
704           // derived from child elements because it is only derived from a subset, and we need to
705           // ensure *all* child elements have a valid possible length.
706           throw new RuntimeException(String.format(
707               "Out-of-range possible length found (%d), parent lengths %s.",
708               length, parentDesc.getPossibleLengthList()));
709         }
710       }
711     }
712     // We check that the local-only length isn't also a normal possible length (only relevant for
713     // the general-desc, since within elements such as fixed-line we would throw an exception if we
714     // saw this) before adding it to the collection of possible local-only lengths.
715     for (Integer length : localOnlyLengths) {
716       if (!lengths.contains(length)) {
717         // We check it is covered by either of the possible length sets of the parent
718         // PhoneNumberDesc, because for example 7 might be a valid localOnly length for mobile, but
719         // a valid national length for fixedLine, so the generalDesc would have the 7 removed from
720         // localOnly.
721         if (parentDesc == null || parentDesc.getPossibleLengthLocalOnlyList().contains(length)
722           || parentDesc.getPossibleLengthList().contains(length)) {
723           desc.addPossibleLengthLocalOnly(length);
724         } else {
725           throw new RuntimeException(String.format(
726               "Out-of-range local-only possible length found (%d), parent length %s.",
727               length, parentDesc.getPossibleLengthLocalOnlyList()));
728         }
729       }
730     }
731   }
732 
733   // @VisibleForTesting
loadCountryMetadata(String regionCode, Element element, boolean isShortNumberMetadata, boolean isAlternateFormatsMetadata)734   static PhoneMetadata.Builder loadCountryMetadata(String regionCode,
735       Element element,
736       boolean isShortNumberMetadata,
737       boolean isAlternateFormatsMetadata) {
738     String nationalPrefix = getNationalPrefix(element);
739     PhoneMetadata.Builder metadata = loadTerritoryTagMetadata(regionCode, element, nationalPrefix);
740     String nationalPrefixFormattingRule =
741         getNationalPrefixFormattingRuleFromElement(element, nationalPrefix);
742     loadAvailableFormats(metadata, element, nationalPrefix,
743                          nationalPrefixFormattingRule,
744                          element.hasAttribute(NATIONAL_PREFIX_OPTIONAL_WHEN_FORMATTING));
745     if (!isAlternateFormatsMetadata) {
746       // The alternate formats metadata does not need most of the patterns to be set.
747       setRelevantDescPatterns(metadata, element, isShortNumberMetadata);
748     }
749     return metadata;
750   }
751 
752   /**
753    * Processes the custom build flags and gets a {@code MetadataFilter} which may be used to
754    * filter {@code PhoneMetadata} objects. Incompatible flag combinations throw RuntimeException.
755    *
756    * @param liteBuild  The liteBuild flag value as given by the command-line
757    * @param specialBuild  The specialBuild flag value as given by the command-line
758    */
759   // @VisibleForTesting
getMetadataFilter(boolean liteBuild, boolean specialBuild)760   static MetadataFilter getMetadataFilter(boolean liteBuild, boolean specialBuild) {
761     if (specialBuild) {
762       if (liteBuild) {
763         throw new RuntimeException("liteBuild and specialBuild may not both be set");
764       }
765       return MetadataFilter.forSpecialBuild();
766     }
767     if (liteBuild) {
768       return MetadataFilter.forLiteBuild();
769     }
770     return MetadataFilter.emptyFilter();
771   }
772 }
773