1 /* 2 * Copyright (C) 2009 The Libphonenumber Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.i18n.phonenumbers; 18 19 import com.google.i18n.phonenumbers.Phonemetadata.NumberFormat; 20 import com.google.i18n.phonenumbers.Phonemetadata.PhoneMetadata; 21 import com.google.i18n.phonenumbers.Phonemetadata.PhoneMetadataCollection; 22 import com.google.i18n.phonenumbers.Phonemetadata.PhoneNumberDesc; 23 import java.io.File; 24 import java.util.ArrayList; 25 import java.util.Arrays; 26 import java.util.HashSet; 27 import java.util.List; 28 import java.util.Map; 29 import java.util.Set; 30 import java.util.TreeMap; 31 import java.util.TreeSet; 32 import java.util.logging.Level; 33 import java.util.logging.Logger; 34 import java.util.regex.Pattern; 35 import java.util.regex.PatternSyntaxException; 36 import javax.xml.parsers.DocumentBuilder; 37 import javax.xml.parsers.DocumentBuilderFactory; 38 import org.w3c.dom.Document; 39 import org.w3c.dom.Element; 40 import org.w3c.dom.NodeList; 41 42 /** 43 * Library to build phone number metadata from the XML format. 44 * 45 * @author Shaopeng Jia 46 */ 47 public class BuildMetadataFromXml { 48 private static final Logger logger = Logger.getLogger(BuildMetadataFromXml.class.getName()); 49 50 // String constants used to fetch the XML nodes and attributes. 51 private static final String CARRIER_CODE_FORMATTING_RULE = "carrierCodeFormattingRule"; 52 private static final String CARRIER_SPECIFIC = "carrierSpecific"; 53 private static final String COUNTRY_CODE = "countryCode"; 54 private static final String EMERGENCY = "emergency"; 55 private static final String EXAMPLE_NUMBER = "exampleNumber"; 56 private static final String FIXED_LINE = "fixedLine"; 57 private static final String FORMAT = "format"; 58 private static final String GENERAL_DESC = "generalDesc"; 59 private static final String INTERNATIONAL_PREFIX = "internationalPrefix"; 60 private static final String INTL_FORMAT = "intlFormat"; 61 private static final String LEADING_DIGITS = "leadingDigits"; 62 private static final String MAIN_COUNTRY_FOR_CODE = "mainCountryForCode"; 63 private static final String MOBILE = "mobile"; 64 private static final String MOBILE_NUMBER_PORTABLE_REGION = "mobileNumberPortableRegion"; 65 private static final String NATIONAL_NUMBER_PATTERN = "nationalNumberPattern"; 66 private static final String NATIONAL_PREFIX = "nationalPrefix"; 67 private static final String NATIONAL_PREFIX_FORMATTING_RULE = "nationalPrefixFormattingRule"; 68 private static final String NATIONAL_PREFIX_OPTIONAL_WHEN_FORMATTING = 69 "nationalPrefixOptionalWhenFormatting"; 70 private static final String NATIONAL_PREFIX_FOR_PARSING = "nationalPrefixForParsing"; 71 private static final String NATIONAL_PREFIX_TRANSFORM_RULE = "nationalPrefixTransformRule"; 72 private static final String NO_INTERNATIONAL_DIALLING = "noInternationalDialling"; 73 private static final String NUMBER_FORMAT = "numberFormat"; 74 private static final String PAGER = "pager"; 75 private static final String PATTERN = "pattern"; 76 private static final String PERSONAL_NUMBER = "personalNumber"; 77 private static final String POSSIBLE_LENGTHS = "possibleLengths"; 78 private static final String NATIONAL = "national"; 79 private static final String LOCAL_ONLY = "localOnly"; 80 private static final String PREFERRED_EXTN_PREFIX = "preferredExtnPrefix"; 81 private static final String PREFERRED_INTERNATIONAL_PREFIX = "preferredInternationalPrefix"; 82 private static final String PREMIUM_RATE = "premiumRate"; 83 private static final String SHARED_COST = "sharedCost"; 84 private static final String SHORT_CODE = "shortCode"; 85 private static final String SMS_SERVICES = "smsServices"; 86 private static final String STANDARD_RATE = "standardRate"; 87 private static final String TOLL_FREE = "tollFree"; 88 private static final String UAN = "uan"; 89 private static final String VOICEMAIL = "voicemail"; 90 private static final String VOIP = "voip"; 91 92 private static final Set<String> PHONE_NUMBER_DESCS_WITHOUT_MATCHING_TYPES = 93 new HashSet<String>(Arrays.asList(new String[]{NO_INTERNATIONAL_DIALLING})); 94 95 // Build the PhoneMetadataCollection from the input XML file. buildPhoneMetadataCollection(String inputXmlFile, boolean liteBuild, boolean specialBuild)96 public static PhoneMetadataCollection buildPhoneMetadataCollection(String inputXmlFile, 97 boolean liteBuild, boolean specialBuild) throws Exception { 98 DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); 99 DocumentBuilder builder = builderFactory.newDocumentBuilder(); 100 File xmlFile = new File(inputXmlFile); 101 Document document = builder.parse(xmlFile); 102 // TODO: Look for other uses of these constants and possibly pull them out into a separate 103 // constants file. 104 boolean isShortNumberMetadata = inputXmlFile.contains("ShortNumberMetadata"); 105 boolean isAlternateFormatsMetadata = inputXmlFile.contains("PhoneNumberAlternateFormats"); 106 return buildPhoneMetadataCollection(document, liteBuild, specialBuild, 107 isShortNumberMetadata, isAlternateFormatsMetadata); 108 } 109 110 // @VisibleForTesting buildPhoneMetadataCollection(Document document, boolean liteBuild, boolean specialBuild, boolean isShortNumberMetadata, boolean isAlternateFormatsMetadata)111 static PhoneMetadataCollection buildPhoneMetadataCollection(Document document, 112 boolean liteBuild, boolean specialBuild, boolean isShortNumberMetadata, 113 boolean isAlternateFormatsMetadata) throws Exception { 114 document.getDocumentElement().normalize(); 115 Element rootElement = document.getDocumentElement(); 116 NodeList territory = rootElement.getElementsByTagName("territory"); 117 PhoneMetadataCollection.Builder metadataCollection = PhoneMetadataCollection.newBuilder(); 118 int numOfTerritories = territory.getLength(); 119 // TODO: Infer filter from a single flag. 120 MetadataFilter metadataFilter = getMetadataFilter(liteBuild, specialBuild); 121 for (int i = 0; i < numOfTerritories; i++) { 122 Element territoryElement = (Element) territory.item(i); 123 String regionCode = ""; 124 // For the main metadata file this should always be set, but for other supplementary data 125 // files the country calling code may be all that is needed. 126 if (territoryElement.hasAttribute("id")) { 127 regionCode = territoryElement.getAttribute("id"); 128 } 129 PhoneMetadata.Builder metadata = loadCountryMetadata(regionCode, territoryElement, 130 isShortNumberMetadata, isAlternateFormatsMetadata); 131 metadataFilter.filterMetadata(metadata); 132 metadataCollection.addMetadata(metadata.build()); 133 } 134 return metadataCollection.build(); 135 } 136 137 // Build a mapping from a country calling code to the region codes which denote the country/region 138 // represented by that country code. In the case of multiple countries sharing a calling code, 139 // such as the NANPA countries, the one indicated with "isMainCountryForCode" in the metadata 140 // should be first. buildCountryCodeToRegionCodeMap( PhoneMetadataCollection metadataCollection)141 public static Map<Integer, List<String>> buildCountryCodeToRegionCodeMap( 142 PhoneMetadataCollection metadataCollection) { 143 Map<Integer, List<String>> countryCodeToRegionCodeMap = new TreeMap<Integer, List<String>>(); 144 for (PhoneMetadata metadata : metadataCollection.getMetadataList()) { 145 String regionCode = metadata.getId(); 146 int countryCode = metadata.getCountryCode(); 147 if (countryCodeToRegionCodeMap.containsKey(countryCode)) { 148 if (metadata.getMainCountryForCode()) { 149 countryCodeToRegionCodeMap.get(countryCode).add(0, regionCode); 150 } else { 151 countryCodeToRegionCodeMap.get(countryCode).add(regionCode); 152 } 153 } else { 154 // For most countries, there will be only one region code for the country calling code. 155 List<String> listWithRegionCode = new ArrayList<String>(1); 156 if (!regionCode.equals("")) { // For alternate formats, there are no region codes at all. 157 listWithRegionCode.add(regionCode); 158 } 159 countryCodeToRegionCodeMap.put(countryCode, listWithRegionCode); 160 } 161 } 162 return countryCodeToRegionCodeMap; 163 } 164 validateRE(String regex)165 private static String validateRE(String regex) { 166 return validateRE(regex, false); 167 } 168 169 // @VisibleForTesting validateRE(String regex, boolean removeWhitespace)170 static String validateRE(String regex, boolean removeWhitespace) { 171 // Removes all the whitespace and newline from the regexp. Not using pattern compile options to 172 // make it work across programming languages. 173 String compressedRegex = removeWhitespace ? regex.replaceAll("\\s", "") : regex; 174 Pattern.compile(compressedRegex); 175 // We don't ever expect to see | followed by a ) in our metadata - this would be an indication 176 // of a bug. If one wants to make something optional, we prefer ? to using an empty group. 177 int errorIndex = compressedRegex.indexOf("|)"); 178 if (errorIndex >= 0) { 179 logger.log(Level.SEVERE, "Error with original regex: " + regex 180 + "\n| should not be followed directly by ) in phone number regular expressions."); 181 throw new PatternSyntaxException("| followed by )", compressedRegex, errorIndex); 182 } 183 // return the regex if it is of correct syntax, i.e. compile did not fail with a 184 // PatternSyntaxException. 185 return compressedRegex; 186 } 187 188 /** 189 * Returns the national prefix of the provided country element. 190 */ 191 // @VisibleForTesting getNationalPrefix(Element element)192 static String getNationalPrefix(Element element) { 193 return element.hasAttribute(NATIONAL_PREFIX) ? element.getAttribute(NATIONAL_PREFIX) : ""; 194 } 195 196 // @VisibleForTesting loadTerritoryTagMetadata(String regionCode, Element element, String nationalPrefix)197 static PhoneMetadata.Builder loadTerritoryTagMetadata(String regionCode, Element element, 198 String nationalPrefix) { 199 PhoneMetadata.Builder metadata = PhoneMetadata.newBuilder(); 200 metadata.setId(regionCode); 201 if (element.hasAttribute(COUNTRY_CODE)) { 202 metadata.setCountryCode(Integer.parseInt(element.getAttribute(COUNTRY_CODE))); 203 } 204 if (element.hasAttribute(LEADING_DIGITS)) { 205 metadata.setLeadingDigits(validateRE(element.getAttribute(LEADING_DIGITS))); 206 } 207 if (element.hasAttribute(INTERNATIONAL_PREFIX)) { 208 metadata.setInternationalPrefix(validateRE(element.getAttribute(INTERNATIONAL_PREFIX))); 209 } 210 if (element.hasAttribute(PREFERRED_INTERNATIONAL_PREFIX)) { 211 metadata.setPreferredInternationalPrefix( 212 element.getAttribute(PREFERRED_INTERNATIONAL_PREFIX)); 213 } 214 if (element.hasAttribute(NATIONAL_PREFIX_FOR_PARSING)) { 215 metadata.setNationalPrefixForParsing( 216 validateRE(element.getAttribute(NATIONAL_PREFIX_FOR_PARSING), true)); 217 if (element.hasAttribute(NATIONAL_PREFIX_TRANSFORM_RULE)) { 218 metadata.setNationalPrefixTransformRule( 219 validateRE(element.getAttribute(NATIONAL_PREFIX_TRANSFORM_RULE))); 220 } 221 } 222 if (!nationalPrefix.isEmpty()) { 223 metadata.setNationalPrefix(nationalPrefix); 224 if (!metadata.hasNationalPrefixForParsing()) { 225 metadata.setNationalPrefixForParsing(nationalPrefix); 226 } 227 } 228 if (element.hasAttribute(PREFERRED_EXTN_PREFIX)) { 229 metadata.setPreferredExtnPrefix(element.getAttribute(PREFERRED_EXTN_PREFIX)); 230 } 231 if (element.hasAttribute(MAIN_COUNTRY_FOR_CODE)) { 232 metadata.setMainCountryForCode(true); 233 } 234 if (element.hasAttribute(MOBILE_NUMBER_PORTABLE_REGION)) { 235 metadata.setMobileNumberPortableRegion(true); 236 } 237 return metadata; 238 } 239 240 /** 241 * Extracts the pattern for international format. If there is no intlFormat, default to using the 242 * national format. If the intlFormat is set to "NA" the intlFormat should be ignored. 243 * 244 * @throws RuntimeException if multiple intlFormats have been encountered. 245 * @return whether an international number format is defined. 246 */ 247 // @VisibleForTesting loadInternationalFormat(PhoneMetadata.Builder metadata, Element numberFormatElement, NumberFormat nationalFormat)248 static boolean loadInternationalFormat(PhoneMetadata.Builder metadata, 249 Element numberFormatElement, 250 NumberFormat nationalFormat) { 251 NumberFormat.Builder intlFormat = NumberFormat.newBuilder(); 252 NodeList intlFormatPattern = numberFormatElement.getElementsByTagName(INTL_FORMAT); 253 boolean hasExplicitIntlFormatDefined = false; 254 255 if (intlFormatPattern.getLength() > 1) { 256 logger.log(Level.SEVERE, 257 "A maximum of one intlFormat pattern for a numberFormat element should be defined."); 258 String countryId = metadata.getId().length() > 0 ? metadata.getId() 259 : Integer.toString(metadata.getCountryCode()); 260 throw new RuntimeException("Invalid number of intlFormat patterns for country: " + countryId); 261 } else if (intlFormatPattern.getLength() == 0) { 262 // Default to use the same as the national pattern if none is defined. 263 intlFormat.mergeFrom(nationalFormat); 264 } else { 265 intlFormat.setPattern(numberFormatElement.getAttribute(PATTERN)); 266 setLeadingDigitsPatterns(numberFormatElement, intlFormat); 267 String intlFormatPatternValue = intlFormatPattern.item(0).getFirstChild().getNodeValue(); 268 if (!intlFormatPatternValue.equals("NA")) { 269 intlFormat.setFormat(intlFormatPatternValue); 270 } 271 hasExplicitIntlFormatDefined = true; 272 } 273 274 if (intlFormat.hasFormat()) { 275 metadata.addIntlNumberFormat(intlFormat.build()); 276 } 277 return hasExplicitIntlFormatDefined; 278 } 279 280 /** 281 * Extracts the pattern for the national format. 282 * 283 * @throws RuntimeException if multiple or no formats have been encountered. 284 */ 285 // @VisibleForTesting loadNationalFormat(PhoneMetadata.Builder metadata, Element numberFormatElement, NumberFormat.Builder format)286 static void loadNationalFormat(PhoneMetadata.Builder metadata, Element numberFormatElement, 287 NumberFormat.Builder format) { 288 setLeadingDigitsPatterns(numberFormatElement, format); 289 format.setPattern(validateRE(numberFormatElement.getAttribute(PATTERN))); 290 291 NodeList formatPattern = numberFormatElement.getElementsByTagName(FORMAT); 292 int numFormatPatterns = formatPattern.getLength(); 293 if (numFormatPatterns != 1) { 294 logger.log(Level.SEVERE, "One format pattern for a numberFormat element should be defined."); 295 String countryId = metadata.getId().length() > 0 ? metadata.getId() 296 : Integer.toString(metadata.getCountryCode()); 297 throw new RuntimeException("Invalid number of format patterns (" + numFormatPatterns 298 + ") for country: " + countryId); 299 } 300 format.setFormat(formatPattern.item(0).getFirstChild().getNodeValue()); 301 } 302 303 /** 304 * Extracts the available formats from the provided DOM element. If it does not contain any 305 * nationalPrefixFormattingRule, the one passed-in is retained; similarly for 306 * nationalPrefixOptionalWhenFormatting. The nationalPrefix, nationalPrefixFormattingRule and 307 * nationalPrefixOptionalWhenFormatting values are provided from the parent (territory) element. 308 */ 309 // @VisibleForTesting loadAvailableFormats(PhoneMetadata.Builder metadata, Element element, String nationalPrefix, String nationalPrefixFormattingRule, boolean nationalPrefixOptionalWhenFormatting)310 static void loadAvailableFormats(PhoneMetadata.Builder metadata, 311 Element element, String nationalPrefix, 312 String nationalPrefixFormattingRule, 313 boolean nationalPrefixOptionalWhenFormatting) { 314 String carrierCodeFormattingRule = ""; 315 if (element.hasAttribute(CARRIER_CODE_FORMATTING_RULE)) { 316 carrierCodeFormattingRule = validateRE( 317 getDomesticCarrierCodeFormattingRuleFromElement(element, nationalPrefix)); 318 } 319 NodeList numberFormatElements = element.getElementsByTagName(NUMBER_FORMAT); 320 boolean hasExplicitIntlFormatDefined = false; 321 322 int numOfFormatElements = numberFormatElements.getLength(); 323 if (numOfFormatElements > 0) { 324 for (int i = 0; i < numOfFormatElements; i++) { 325 Element numberFormatElement = (Element) numberFormatElements.item(i); 326 NumberFormat.Builder format = NumberFormat.newBuilder(); 327 328 if (numberFormatElement.hasAttribute(NATIONAL_PREFIX_FORMATTING_RULE)) { 329 format.setNationalPrefixFormattingRule( 330 getNationalPrefixFormattingRuleFromElement(numberFormatElement, nationalPrefix)); 331 } else if (!nationalPrefixFormattingRule.equals("")) { 332 format.setNationalPrefixFormattingRule(nationalPrefixFormattingRule); 333 } 334 if (numberFormatElement.hasAttribute(NATIONAL_PREFIX_OPTIONAL_WHEN_FORMATTING)) { 335 format.setNationalPrefixOptionalWhenFormatting( 336 Boolean.valueOf(numberFormatElement.getAttribute( 337 NATIONAL_PREFIX_OPTIONAL_WHEN_FORMATTING))); 338 } else if (format.getNationalPrefixOptionalWhenFormatting() 339 != nationalPrefixOptionalWhenFormatting) { 340 // Inherit from the parent field if it is not already the same as the default. 341 format.setNationalPrefixOptionalWhenFormatting(nationalPrefixOptionalWhenFormatting); 342 } 343 if (numberFormatElement.hasAttribute(CARRIER_CODE_FORMATTING_RULE)) { 344 format.setDomesticCarrierCodeFormattingRule(validateRE( 345 getDomesticCarrierCodeFormattingRuleFromElement(numberFormatElement, 346 nationalPrefix))); 347 } else if (!carrierCodeFormattingRule.equals("")) { 348 format.setDomesticCarrierCodeFormattingRule(carrierCodeFormattingRule); 349 } 350 loadNationalFormat(metadata, numberFormatElement, format); 351 metadata.addNumberFormat(format); 352 353 if (loadInternationalFormat(metadata, numberFormatElement, format.build())) { 354 hasExplicitIntlFormatDefined = true; 355 } 356 } 357 // Only a small number of regions need to specify the intlFormats in the xml. For the majority 358 // of countries the intlNumberFormat metadata is an exact copy of the national NumberFormat 359 // metadata. To minimize the size of the metadata file, we only keep intlNumberFormats that 360 // actually differ in some way to the national formats. 361 if (!hasExplicitIntlFormatDefined) { 362 metadata.clearIntlNumberFormat(); 363 } 364 } 365 } 366 367 // @VisibleForTesting setLeadingDigitsPatterns(Element numberFormatElement, NumberFormat.Builder format)368 static void setLeadingDigitsPatterns(Element numberFormatElement, NumberFormat.Builder format) { 369 NodeList leadingDigitsPatternNodes = numberFormatElement.getElementsByTagName(LEADING_DIGITS); 370 int numOfLeadingDigitsPatterns = leadingDigitsPatternNodes.getLength(); 371 if (numOfLeadingDigitsPatterns > 0) { 372 for (int i = 0; i < numOfLeadingDigitsPatterns; i++) { 373 format.addLeadingDigitsPattern( 374 validateRE((leadingDigitsPatternNodes.item(i)).getFirstChild().getNodeValue(), true)); 375 } 376 } 377 } 378 379 // @VisibleForTesting getNationalPrefixFormattingRuleFromElement(Element element, String nationalPrefix)380 static String getNationalPrefixFormattingRuleFromElement(Element element, 381 String nationalPrefix) { 382 String nationalPrefixFormattingRule = element.getAttribute(NATIONAL_PREFIX_FORMATTING_RULE); 383 // Replace $NP with national prefix and $FG with the first group ($1). 384 nationalPrefixFormattingRule = 385 nationalPrefixFormattingRule.replaceFirst("\\$NP", nationalPrefix) 386 .replaceFirst("\\$FG", "\\$1"); 387 return nationalPrefixFormattingRule; 388 } 389 390 // @VisibleForTesting getDomesticCarrierCodeFormattingRuleFromElement(Element element, String nationalPrefix)391 static String getDomesticCarrierCodeFormattingRuleFromElement(Element element, 392 String nationalPrefix) { 393 String carrierCodeFormattingRule = element.getAttribute(CARRIER_CODE_FORMATTING_RULE); 394 // Replace $FG with the first group ($1) and $NP with the national prefix. 395 carrierCodeFormattingRule = carrierCodeFormattingRule.replaceFirst("\\$FG", "\\$1") 396 .replaceFirst("\\$NP", nationalPrefix); 397 return carrierCodeFormattingRule; 398 } 399 400 /** 401 * Checks if the possible lengths provided as a sorted set are equal to the possible lengths 402 * stored already in the description pattern. Note that possibleLengths may be empty but must not 403 * be null, and the PhoneNumberDesc passed in should also not be null. 404 */ arePossibleLengthsEqual(TreeSet<Integer> possibleLengths, PhoneNumberDesc desc)405 private static boolean arePossibleLengthsEqual(TreeSet<Integer> possibleLengths, 406 PhoneNumberDesc desc) { 407 if (possibleLengths.size() != desc.getPossibleLengthCount()) { 408 return false; 409 } 410 // Note that both should be sorted already, and we know they are the same length. 411 int i = 0; 412 for (Integer length : possibleLengths) { 413 if (length != desc.getPossibleLength(i)) { 414 return false; 415 } 416 i++; 417 } 418 return true; 419 } 420 421 /** 422 * Processes a phone number description element from the XML file and returns it as a 423 * PhoneNumberDesc. If the description element is a fixed line or mobile number, the parent 424 * description will be used to fill in the whole element if necessary, or any components that are 425 * missing. For all other types, the parent description will only be used to fill in missing 426 * components if the type has a partial definition. For example, if no "tollFree" element exists, 427 * we assume there are no toll free numbers for that locale, and return a phone number description 428 * with no national number data and [-1] for the possible lengths. Note that the parent 429 * description must therefore already be processed before this method is called on any child 430 * elements. 431 * 432 * @param parentDesc a generic phone number description that will be used to fill in missing 433 * parts of the description, or null if this is the root node. This must be processed before 434 * this is run on any child elements. 435 * @param countryElement the XML element representing all the country information 436 * @param numberType the name of the number type, corresponding to the appropriate tag in the XML 437 * file with information about that type 438 * @return complete description of that phone number type 439 */ 440 // @VisibleForTesting processPhoneNumberDescElement(PhoneNumberDesc.Builder parentDesc, Element countryElement, String numberType)441 static PhoneNumberDesc.Builder processPhoneNumberDescElement(PhoneNumberDesc.Builder parentDesc, 442 Element countryElement, 443 String numberType) { 444 NodeList phoneNumberDescList = countryElement.getElementsByTagName(numberType); 445 PhoneNumberDesc.Builder numberDesc = PhoneNumberDesc.newBuilder(); 446 if (phoneNumberDescList.getLength() == 0) { 447 // -1 will never match a possible phone number length, so is safe to use to ensure this never 448 // matches. We don't leave it empty, since for compression reasons, we use the empty list to 449 // mean that the generalDesc possible lengths apply. 450 numberDesc.addPossibleLength(-1); 451 return numberDesc; 452 } 453 if (phoneNumberDescList.getLength() > 0) { 454 if (phoneNumberDescList.getLength() > 1) { 455 throw new RuntimeException( 456 String.format("Multiple elements with type %s found.", numberType)); 457 } 458 Element element = (Element) phoneNumberDescList.item(0); 459 if (parentDesc != null) { 460 // New way of handling possible number lengths. We don't do this for the general 461 // description, since these tags won't be present; instead we will calculate its values 462 // based on the values for all the other number type descriptions (see 463 // setPossibleLengthsGeneralDesc). 464 TreeSet<Integer> lengths = new TreeSet<Integer>(); 465 TreeSet<Integer> localOnlyLengths = new TreeSet<Integer>(); 466 populatePossibleLengthSets(element, lengths, localOnlyLengths); 467 setPossibleLengths(lengths, localOnlyLengths, parentDesc.build(), numberDesc); 468 } 469 470 NodeList validPattern = element.getElementsByTagName(NATIONAL_NUMBER_PATTERN); 471 if (validPattern.getLength() > 0) { 472 numberDesc.setNationalNumberPattern( 473 validateRE(validPattern.item(0).getFirstChild().getNodeValue(), true)); 474 } 475 476 NodeList exampleNumber = element.getElementsByTagName(EXAMPLE_NUMBER); 477 if (exampleNumber.getLength() > 0) { 478 numberDesc.setExampleNumber(exampleNumber.item(0).getFirstChild().getNodeValue()); 479 } 480 } 481 return numberDesc; 482 } 483 484 // @VisibleForTesting setRelevantDescPatterns(PhoneMetadata.Builder metadata, Element element, boolean isShortNumberMetadata)485 static void setRelevantDescPatterns(PhoneMetadata.Builder metadata, Element element, 486 boolean isShortNumberMetadata) { 487 PhoneNumberDesc.Builder generalDesc = processPhoneNumberDescElement(null, element, 488 GENERAL_DESC); 489 // Calculate the possible lengths for the general description. This will be based on the 490 // possible lengths of the child elements. 491 setPossibleLengthsGeneralDesc(generalDesc, metadata.getId(), element, isShortNumberMetadata); 492 metadata.setGeneralDesc(generalDesc); 493 494 if (!isShortNumberMetadata) { 495 // Set fields used by regular length phone numbers. 496 metadata.setFixedLine(processPhoneNumberDescElement(generalDesc, element, FIXED_LINE)); 497 metadata.setMobile(processPhoneNumberDescElement(generalDesc, element, MOBILE)); 498 metadata.setSharedCost(processPhoneNumberDescElement(generalDesc, element, SHARED_COST)); 499 metadata.setVoip(processPhoneNumberDescElement(generalDesc, element, VOIP)); 500 metadata.setPersonalNumber(processPhoneNumberDescElement(generalDesc, element, 501 PERSONAL_NUMBER)); 502 metadata.setPager(processPhoneNumberDescElement(generalDesc, element, PAGER)); 503 metadata.setUan(processPhoneNumberDescElement(generalDesc, element, UAN)); 504 metadata.setVoicemail(processPhoneNumberDescElement(generalDesc, element, VOICEMAIL)); 505 metadata.setNoInternationalDialling(processPhoneNumberDescElement(generalDesc, element, 506 NO_INTERNATIONAL_DIALLING)); 507 boolean mobileAndFixedAreSame = metadata.getMobile().getNationalNumberPattern() 508 .equals(metadata.getFixedLine().getNationalNumberPattern()); 509 if (metadata.getSameMobileAndFixedLinePattern() != mobileAndFixedAreSame) { 510 // Set this if it is not the same as the default. 511 metadata.setSameMobileAndFixedLinePattern(mobileAndFixedAreSame); 512 } 513 metadata.setTollFree(processPhoneNumberDescElement(generalDesc, element, TOLL_FREE)); 514 metadata.setPremiumRate(processPhoneNumberDescElement(generalDesc, element, PREMIUM_RATE)); 515 } else { 516 // Set fields used by short numbers. 517 metadata.setStandardRate(processPhoneNumberDescElement(generalDesc, element, STANDARD_RATE)); 518 metadata.setShortCode(processPhoneNumberDescElement(generalDesc, element, SHORT_CODE)); 519 metadata.setCarrierSpecific(processPhoneNumberDescElement(generalDesc, element, 520 CARRIER_SPECIFIC)); 521 metadata.setEmergency(processPhoneNumberDescElement(generalDesc, element, EMERGENCY)); 522 metadata.setTollFree(processPhoneNumberDescElement(generalDesc, element, TOLL_FREE)); 523 metadata.setPremiumRate(processPhoneNumberDescElement(generalDesc, element, PREMIUM_RATE)); 524 metadata.setSmsServices(processPhoneNumberDescElement(generalDesc, element, SMS_SERVICES)); 525 } 526 } 527 528 /** 529 * Parses a possible length string into a set of the integers that are covered. 530 * 531 * @param possibleLengthString a string specifying the possible lengths of phone numbers. Follows 532 * this syntax: ranges or elements are separated by commas, and ranges are specified in 533 * [min-max] notation, inclusive. For example, [3-5],7,9,[11-14] should be parsed to 534 * 3,4,5,7,9,11,12,13,14. 535 */ parsePossibleLengthStringToSet(String possibleLengthString)536 private static Set<Integer> parsePossibleLengthStringToSet(String possibleLengthString) { 537 if (possibleLengthString.length() == 0) { 538 throw new RuntimeException("Empty possibleLength string found."); 539 } 540 String[] lengths = possibleLengthString.split(","); 541 Set<Integer> lengthSet = new TreeSet<Integer>(); 542 for (int i = 0; i < lengths.length; i++) { 543 String lengthSubstring = lengths[i]; 544 if (lengthSubstring.length() == 0) { 545 throw new RuntimeException(String.format("Leading, trailing or adjacent commas in possible " 546 + "length string %s, these should only separate numbers or ranges.", 547 possibleLengthString)); 548 } else if (lengthSubstring.charAt(0) == '[') { 549 if (lengthSubstring.charAt(lengthSubstring.length() - 1) != ']') { 550 throw new RuntimeException(String.format("Missing end of range character in possible " 551 + "length string %s.", possibleLengthString)); 552 } 553 // Strip the leading and trailing [], and split on the -. 554 String[] minMax = lengthSubstring.substring(1, lengthSubstring.length() - 1).split("-"); 555 if (minMax.length != 2) { 556 throw new RuntimeException(String.format("Ranges must have exactly one - character: " 557 + "missing for %s.", possibleLengthString)); 558 } 559 int min = Integer.parseInt(minMax[0]); 560 int max = Integer.parseInt(minMax[1]); 561 // We don't even accept [6-7] since we prefer the shorter 6,7 variant; for a range to be in 562 // use the hyphen needs to replace at least one digit. 563 if (max - min < 2) { 564 throw new RuntimeException(String.format("The first number in a range should be two or " 565 + "more digits lower than the second. Culprit possibleLength string: %s", 566 possibleLengthString)); 567 } 568 for (int j = min; j <= max; j++) { 569 if (!lengthSet.add(j)) { 570 throw new RuntimeException(String.format("Duplicate length element found (%d) in " 571 + "possibleLength string %s", j, possibleLengthString)); 572 } 573 } 574 } else { 575 int length = Integer.parseInt(lengthSubstring); 576 if (!lengthSet.add(length)) { 577 throw new RuntimeException(String.format("Duplicate length element found (%d) in " 578 + "possibleLength string %s", length, possibleLengthString)); 579 } 580 } 581 } 582 return lengthSet; 583 } 584 585 /** 586 * Reads the possible lengths present in the metadata and splits them into two sets: one for 587 * full-length numbers, one for local numbers. 588 * 589 * @param data one or more phone number descriptions, represented as XML nodes 590 * @param lengths a set to which to add possible lengths of full phone numbers 591 * @param localOnlyLengths a set to which to add possible lengths of phone numbers only diallable 592 * locally (e.g. within a province) 593 */ populatePossibleLengthSets(Element data, TreeSet<Integer> lengths, TreeSet<Integer> localOnlyLengths)594 private static void populatePossibleLengthSets(Element data, TreeSet<Integer> lengths, 595 TreeSet<Integer> localOnlyLengths) { 596 NodeList possibleLengths = data.getElementsByTagName(POSSIBLE_LENGTHS); 597 for (int i = 0; i < possibleLengths.getLength(); i++) { 598 Element element = (Element) possibleLengths.item(i); 599 String nationalLengths = element.getAttribute(NATIONAL); 600 // We don't add to the phone metadata yet, since we want to sort length elements found under 601 // different nodes first, make sure there are no duplicates between them and that the 602 // localOnly lengths don't overlap with the others. 603 Set<Integer> thisElementLengths = parsePossibleLengthStringToSet(nationalLengths); 604 if (element.hasAttribute(LOCAL_ONLY)) { 605 String localLengths = element.getAttribute(LOCAL_ONLY); 606 Set<Integer> thisElementLocalOnlyLengths = parsePossibleLengthStringToSet(localLengths); 607 Set<Integer> intersection = new HashSet<Integer>(thisElementLengths); 608 intersection.retainAll(thisElementLocalOnlyLengths); 609 if (!intersection.isEmpty()) { 610 throw new RuntimeException(String.format( 611 "Possible length(s) found specified as a normal and local-only length: %s", 612 intersection)); 613 } 614 // We check again when we set these lengths on the metadata itself in setPossibleLengths 615 // that the elements in localOnly are not also in lengths. For e.g. the generalDesc, it 616 // might have a local-only length for one type that is a normal length for another type. We 617 // don't consider this an error, but we do want to remove the local-only lengths. 618 localOnlyLengths.addAll(thisElementLocalOnlyLengths); 619 } 620 // It is okay if at this time we have duplicates, because the same length might be possible 621 // for e.g. fixed-line and for mobile numbers, and this method operates potentially on 622 // multiple phoneNumberDesc XML elements. 623 lengths.addAll(thisElementLengths); 624 } 625 } 626 627 /** 628 * Sets possible lengths in the general description, derived from certain child elements. 629 */ 630 // @VisibleForTesting setPossibleLengthsGeneralDesc(PhoneNumberDesc.Builder generalDesc, String metadataId, Element data, boolean isShortNumberMetadata)631 static void setPossibleLengthsGeneralDesc(PhoneNumberDesc.Builder generalDesc, String metadataId, 632 Element data, boolean isShortNumberMetadata) { 633 TreeSet<Integer> lengths = new TreeSet<Integer>(); 634 TreeSet<Integer> localOnlyLengths = new TreeSet<Integer>(); 635 // The general description node should *always* be present if metadata for other types is 636 // present, aside from in some unit tests. 637 // (However, for e.g. formatting metadata in PhoneNumberAlternateFormats, no PhoneNumberDesc 638 // elements are present). 639 NodeList generalDescNodes = data.getElementsByTagName(GENERAL_DESC); 640 if (generalDescNodes.getLength() > 0) { 641 Element generalDescNode = (Element) generalDescNodes.item(0); 642 populatePossibleLengthSets(generalDescNode, lengths, localOnlyLengths); 643 if (!lengths.isEmpty() || !localOnlyLengths.isEmpty()) { 644 // We shouldn't have anything specified at the "general desc" level: we are going to 645 // calculate this ourselves from child elements. 646 throw new RuntimeException(String.format("Found possible lengths specified at general " 647 + "desc: this should be derived from child elements. Affected country: %s", 648 metadataId)); 649 } 650 } 651 if (!isShortNumberMetadata) { 652 // Make a copy here since we want to remove some nodes, but we don't want to do that on our 653 // actual data. 654 Element allDescData = (Element) data.cloneNode(true /* deep copy */); 655 for (String tag : PHONE_NUMBER_DESCS_WITHOUT_MATCHING_TYPES) { 656 NodeList nodesToRemove = allDescData.getElementsByTagName(tag); 657 if (nodesToRemove.getLength() > 0) { 658 // We check when we process phone number descriptions that there are only one of each 659 // type, so this is safe to do. 660 allDescData.removeChild(nodesToRemove.item(0)); 661 } 662 } 663 populatePossibleLengthSets(allDescData, lengths, localOnlyLengths); 664 } else { 665 // For short number metadata, we want to copy the lengths from the "short code" section only. 666 // This is because it's the more detailed validation pattern, it's not a sub-type of short 667 // codes. The other lengths will be checked later to see that they are a sub-set of these 668 // possible lengths. 669 NodeList shortCodeDescList = data.getElementsByTagName(SHORT_CODE); 670 if (shortCodeDescList.getLength() > 0) { 671 Element shortCodeDesc = (Element) shortCodeDescList.item(0); 672 populatePossibleLengthSets(shortCodeDesc, lengths, localOnlyLengths); 673 } 674 if (localOnlyLengths.size() > 0) { 675 throw new RuntimeException("Found local-only lengths in short-number metadata"); 676 } 677 } 678 setPossibleLengths(lengths, localOnlyLengths, null, generalDesc); 679 } 680 681 /** 682 * Sets the possible length fields in the metadata from the sets of data passed in. Checks that 683 * the length is covered by the "parent" phone number description element if one is present, and 684 * if the lengths are exactly the same as this, they are not filled in for efficiency reasons. 685 * 686 * @param parentDesc the "general description" element or null if desc is the generalDesc itself 687 * @param desc the PhoneNumberDesc object that we are going to set lengths for 688 */ setPossibleLengths(TreeSet<Integer> lengths, TreeSet<Integer> localOnlyLengths, PhoneNumberDesc parentDesc, PhoneNumberDesc.Builder desc)689 private static void setPossibleLengths(TreeSet<Integer> lengths, 690 TreeSet<Integer> localOnlyLengths, PhoneNumberDesc parentDesc, PhoneNumberDesc.Builder desc) { 691 // We clear these fields since the metadata tends to inherit from the parent element for other 692 // fields (via a mergeFrom). 693 desc.clearPossibleLength(); 694 desc.clearPossibleLengthLocalOnly(); 695 // Only add the lengths to this sub-type if they aren't exactly the same as the possible 696 // lengths in the general desc (for metadata size reasons). 697 if (parentDesc == null || !arePossibleLengthsEqual(lengths, parentDesc)) { 698 for (Integer length : lengths) { 699 if (parentDesc == null || parentDesc.getPossibleLengthList().contains(length)) { 700 desc.addPossibleLength(length); 701 } else { 702 // We shouldn't have possible lengths defined in a child element that are not covered by 703 // the general description. We check this here even though the general description is 704 // derived from child elements because it is only derived from a subset, and we need to 705 // ensure *all* child elements have a valid possible length. 706 throw new RuntimeException(String.format( 707 "Out-of-range possible length found (%d), parent lengths %s.", 708 length, parentDesc.getPossibleLengthList())); 709 } 710 } 711 } 712 // We check that the local-only length isn't also a normal possible length (only relevant for 713 // the general-desc, since within elements such as fixed-line we would throw an exception if we 714 // saw this) before adding it to the collection of possible local-only lengths. 715 for (Integer length : localOnlyLengths) { 716 if (!lengths.contains(length)) { 717 // We check it is covered by either of the possible length sets of the parent 718 // PhoneNumberDesc, because for example 7 might be a valid localOnly length for mobile, but 719 // a valid national length for fixedLine, so the generalDesc would have the 7 removed from 720 // localOnly. 721 if (parentDesc == null || parentDesc.getPossibleLengthLocalOnlyList().contains(length) 722 || parentDesc.getPossibleLengthList().contains(length)) { 723 desc.addPossibleLengthLocalOnly(length); 724 } else { 725 throw new RuntimeException(String.format( 726 "Out-of-range local-only possible length found (%d), parent length %s.", 727 length, parentDesc.getPossibleLengthLocalOnlyList())); 728 } 729 } 730 } 731 } 732 733 // @VisibleForTesting loadCountryMetadata(String regionCode, Element element, boolean isShortNumberMetadata, boolean isAlternateFormatsMetadata)734 static PhoneMetadata.Builder loadCountryMetadata(String regionCode, 735 Element element, 736 boolean isShortNumberMetadata, 737 boolean isAlternateFormatsMetadata) { 738 String nationalPrefix = getNationalPrefix(element); 739 PhoneMetadata.Builder metadata = loadTerritoryTagMetadata(regionCode, element, nationalPrefix); 740 String nationalPrefixFormattingRule = 741 getNationalPrefixFormattingRuleFromElement(element, nationalPrefix); 742 loadAvailableFormats(metadata, element, nationalPrefix, 743 nationalPrefixFormattingRule, 744 element.hasAttribute(NATIONAL_PREFIX_OPTIONAL_WHEN_FORMATTING)); 745 if (!isAlternateFormatsMetadata) { 746 // The alternate formats metadata does not need most of the patterns to be set. 747 setRelevantDescPatterns(metadata, element, isShortNumberMetadata); 748 } 749 return metadata; 750 } 751 752 /** 753 * Processes the custom build flags and gets a {@code MetadataFilter} which may be used to 754 * filter {@code PhoneMetadata} objects. Incompatible flag combinations throw RuntimeException. 755 * 756 * @param liteBuild The liteBuild flag value as given by the command-line 757 * @param specialBuild The specialBuild flag value as given by the command-line 758 */ 759 // @VisibleForTesting getMetadataFilter(boolean liteBuild, boolean specialBuild)760 static MetadataFilter getMetadataFilter(boolean liteBuild, boolean specialBuild) { 761 if (specialBuild) { 762 if (liteBuild) { 763 throw new RuntimeException("liteBuild and specialBuild may not both be set"); 764 } 765 return MetadataFilter.forSpecialBuild(); 766 } 767 if (liteBuild) { 768 return MetadataFilter.forLiteBuild(); 769 } 770 return MetadataFilter.emptyFilter(); 771 } 772 } 773