1 /* 2 * Copyright (C) 2011 The Libphonenumber Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.i18n.phonenumbers; 18 19 import com.google.i18n.phonenumbers.PhoneNumberUtil.Leniency; 20 import com.google.i18n.phonenumbers.PhoneNumberUtil.MatchType; 21 import com.google.i18n.phonenumbers.PhoneNumberUtil.PhoneNumberFormat; 22 import com.google.i18n.phonenumbers.Phonemetadata.NumberFormat; 23 import com.google.i18n.phonenumbers.Phonemetadata.PhoneMetadata; 24 import com.google.i18n.phonenumbers.Phonenumber.PhoneNumber.CountryCodeSource; 25 import com.google.i18n.phonenumbers.Phonenumber.PhoneNumber; 26 import com.google.i18n.phonenumbers.internal.RegexCache; 27 import com.google.i18n.phonenumbers.metadata.DefaultMetadataDependenciesProvider; 28 import java.lang.Character.UnicodeBlock; 29 import java.util.Iterator; 30 import java.util.NoSuchElementException; 31 import java.util.regex.Matcher; 32 import java.util.regex.Pattern; 33 34 /** 35 * A stateful class that finds and extracts telephone numbers from {@linkplain CharSequence text}. 36 * Instances can be created using the {@linkplain PhoneNumberUtil#findNumbers factory methods} in 37 * {@link PhoneNumberUtil}. 38 * 39 * <p>Vanity numbers (phone numbers using alphabetic digits such as <tt>1-800-SIX-FLAGS</tt> are 40 * not found. 41 * 42 * <p>This class is not thread-safe. 43 */ 44 final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> { 45 /** 46 * The phone number pattern used by {@link #find}, similar to 47 * {@code PhoneNumberUtil.VALID_PHONE_NUMBER}, but with the following differences: 48 * <ul> 49 * <li>All captures are limited in order to place an upper bound to the text matched by the 50 * pattern. 51 * <ul> 52 * <li>Leading punctuation / plus signs are limited. 53 * <li>Consecutive occurrences of punctuation are limited. 54 * <li>Number of digits is limited. 55 * </ul> 56 * <li>No whitespace is allowed at the start or end. 57 * <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported. 58 * </ul> 59 */ 60 private static final Pattern PATTERN; 61 /** 62 * Matches strings that look like publication pages. Example: 63 * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns. 64 * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre> 65 * 66 * The string "211-227 (2003)" is not a telephone number. 67 */ 68 private static final Pattern PUB_PAGES = Pattern.compile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}"); 69 70 /** 71 * Matches strings that look like dates using "/" as a separator. Examples: 3/10/2011, 31/10/96 or 72 * 08/31/95. 73 */ 74 private static final Pattern SLASH_SEPARATED_DATES = 75 Pattern.compile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}"); 76 77 /** 78 * Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the 79 * trailing ":\d\d" -- that is covered by TIME_STAMPS_SUFFIX. 80 */ 81 private static final Pattern TIME_STAMPS = 82 Pattern.compile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$"); 83 private static final Pattern TIME_STAMPS_SUFFIX = Pattern.compile(":[0-5]\\d"); 84 85 /** 86 * Pattern to check that brackets match. Opening brackets should be closed within a phone number. 87 * This also checks that there is something inside the brackets. Having no brackets at all is also 88 * fine. 89 */ 90 private static final Pattern MATCHING_BRACKETS; 91 92 /** 93 * Patterns used to extract phone numbers from a larger phone-number-like pattern. These are 94 * ordered according to specificity. For example, white-space is last since that is frequently 95 * used in numbers, not just to separate two numbers. We have separate patterns since we don't 96 * want to break up the phone-number-like text on more than one different kind of symbol at one 97 * time, although symbols of the same type (e.g. space) can be safely grouped together. 98 * 99 * Note that if there is a match, we will always check any text found up to the first match as 100 * well. 101 */ 102 private static final Pattern[] INNER_MATCHES = { 103 // Breaks on the slash - e.g. "651-234-2345/332-445-1234" 104 Pattern.compile("/+(.*)"), 105 // Note that the bracket here is inside the capturing group, since we consider it part of the 106 // phone number. Will match a pattern like "(650) 223 3345 (754) 223 3321". 107 Pattern.compile("(\\([^(]*)"), 108 // Breaks on a hyphen - e.g. "12345 - 332-445-1234 is my number." 109 // We require a space on either side of the hyphen for it to be considered a separator. 110 Pattern.compile("(?:\\p{Z}-|-\\p{Z})\\p{Z}*(.+)"), 111 // Various types of wide hyphens. Note we have decided not to enforce a space here, since it's 112 // possible that it's supposed to be used to break two numbers without spaces, and we haven't 113 // seen many instances of it used within a number. 114 Pattern.compile("[\u2012-\u2015\uFF0D]\\p{Z}*(.+)"), 115 // Breaks on a full stop - e.g. "12345. 332-445-1234 is my number." 116 Pattern.compile("\\.+\\p{Z}*([^.]+)"), 117 // Breaks on space - e.g. "3324451234 8002341234" 118 Pattern.compile("\\p{Z}+(\\P{Z}+)") 119 }; 120 121 /** 122 * Punctuation that may be at the start of a phone number - brackets and plus signs. 123 */ 124 private static final Pattern LEAD_CLASS; 125 126 static { 127 /* Builds the MATCHING_BRACKETS and PATTERN regular expressions. The building blocks below exist 128 * to make the pattern more easily understood. */ 129 130 String openingParens = "(\\[\uFF08\uFF3B"; 131 String closingParens = ")\\]\uFF09\uFF3D"; 132 String nonParens = "[^" + openingParens + closingParens + "]"; 133 134 /* Limit on the number of pairs of brackets in a phone number. */ 135 String bracketPairLimit = limit(0, 3); 136 /* 137 * An opening bracket at the beginning may not be closed, but subsequent ones should be. It's 138 * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a 139 * closing bracket first. We limit the sets of brackets in a phone number to four. 140 */ 141 MATCHING_BRACKETS = Pattern.compile( 142 "(?:[" + openingParens + "])?" + "(?:" + nonParens + "+" + "[" + closingParens + "])?" 143 + nonParens + "+" 144 + "(?:[" + openingParens + "]" + nonParens + "+[" + closingParens + "])" + bracketPairLimit 145 + nonParens + "*"); 146 147 /* Limit on the number of leading (plus) characters. */ 148 String leadLimit = limit(0, 2); 149 /* Limit on the number of consecutive punctuation characters. */ 150 String punctuationLimit = limit(0, 4); 151 /* The maximum number of digits allowed in a digit-separated block. As we allow all digits in a 152 * single block, set high enough to accommodate the entire national number and the international 153 * country code. */ 154 int digitBlockLimit = 155 PhoneNumberUtil.MAX_LENGTH_FOR_NSN + PhoneNumberUtil.MAX_LENGTH_COUNTRY_CODE; 156 /* Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some 157 * formats use spaces to separate each digit. */ 158 String blockLimit = limit(0, digitBlockLimit); 159 160 /* A punctuation sequence allowing white space. */ 161 String punctuation = "[" + PhoneNumberUtil.VALID_PUNCTUATION + "]" + punctuationLimit; 162 /* A digits block without punctuation. */ 163 String digitSequence = "\\p{Nd}" + limit(1, digitBlockLimit); 164 165 String leadClassChars = openingParens + PhoneNumberUtil.PLUS_CHARS; 166 String leadClass = "[" + leadClassChars + "]"; 167 LEAD_CLASS = Pattern.compile(leadClass); 168 169 /* Phone number pattern allowing optional punctuation. */ 170 PATTERN = Pattern.compile( 171 "(?:" + leadClass + punctuation + ")" + leadLimit 172 + digitSequence + "(?:" + punctuation + digitSequence + ")" + blockLimit 173 + "(?:" + PhoneNumberUtil.EXTN_PATTERNS_FOR_MATCHING + ")?", 174 PhoneNumberUtil.REGEX_FLAGS); 175 } 176 177 /** Returns a regular expression quantifier with an upper and lower limit. */ limit(int lower, int upper)178 private static String limit(int lower, int upper) { 179 if ((lower < 0) || (upper <= 0) || (upper < lower)) { 180 throw new IllegalArgumentException(); 181 } 182 return "{" + lower + "," + upper + "}"; 183 } 184 185 /** The potential states of a PhoneNumberMatcher. */ 186 private enum State { 187 NOT_READY, READY, DONE 188 } 189 190 /** The phone number utility. */ 191 private final PhoneNumberUtil phoneUtil; 192 /** The text searched for phone numbers. */ 193 private final CharSequence text; 194 /** 195 * The region (country) to assume for phone numbers without an international prefix, possibly 196 * null. 197 */ 198 private final String preferredRegion; 199 /** The degree of validation requested. */ 200 private final Leniency leniency; 201 /** The maximum number of retries after matching an invalid number. */ 202 private long maxTries; 203 204 /** The iteration tristate. */ 205 private State state = State.NOT_READY; 206 /** The last successful match, null unless in {@link State#READY}. */ 207 private PhoneNumberMatch lastMatch = null; 208 /** The next index to start searching at. Undefined in {@link State#DONE}. */ 209 private int searchIndex = 0; 210 211 // A cache for frequently used country-specific regular expressions. Set to 32 to cover ~2-3 212 // countries being used for the same doc with ~10 patterns for each country. Some pages will have 213 // a lot more countries in use, but typically fewer numbers for each so expanding the cache for 214 // that use-case won't have a lot of benefit. 215 private final RegexCache regexCache = new RegexCache(32); 216 217 /** 218 * Creates a new instance. See the factory methods in {@link PhoneNumberUtil} on how to obtain a 219 * new instance. 220 * 221 * @param util the phone number util to use 222 * @param text the character sequence that we will search, null for no text 223 * @param country the country to assume for phone numbers not written in international format 224 * (with a leading plus, or with the international dialing prefix of the specified region). 225 * May be null or "ZZ" if only numbers with a leading plus should be 226 * considered. 227 * @param leniency the leniency to use when evaluating candidate phone numbers 228 * @param maxTries the maximum number of invalid numbers to try before giving up on the text. 229 * This is to cover degenerate cases where the text has a lot of false positives in it. Must 230 * be {@code >= 0}. 231 */ PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency, long maxTries)232 PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency, 233 long maxTries) { 234 235 if ((util == null) || (leniency == null)) { 236 throw new NullPointerException(); 237 } 238 if (maxTries < 0) { 239 throw new IllegalArgumentException(); 240 } 241 this.phoneUtil = util; 242 this.text = (text != null) ? text : ""; 243 this.preferredRegion = country; 244 this.leniency = leniency; 245 this.maxTries = maxTries; 246 } 247 248 /** 249 * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex} 250 * that represents a phone number. Returns the next match, null if none was found. 251 * 252 * @param index the search index to start searching at 253 * @return the phone number match found, null if none can be found 254 */ find(int index)255 private PhoneNumberMatch find(int index) { 256 Matcher matcher = PATTERN.matcher(text); 257 while ((maxTries > 0) && matcher.find(index)) { 258 int start = matcher.start(); 259 CharSequence candidate = text.subSequence(start, matcher.end()); 260 261 // Check for extra numbers at the end. 262 // TODO: This is the place to start when trying to support extraction of multiple phone number 263 // from split notations (+41 79 123 45 67 / 68). 264 candidate = trimAfterFirstMatch(PhoneNumberUtil.SECOND_NUMBER_START_PATTERN, candidate); 265 266 PhoneNumberMatch match = extractMatch(candidate, start); 267 if (match != null) { 268 return match; 269 } 270 271 index = start + candidate.length(); 272 maxTries--; 273 } 274 275 return null; 276 } 277 278 /** 279 * Trims away any characters after the first match of {@code pattern} in {@code candidate}, 280 * returning the trimmed version. 281 */ trimAfterFirstMatch(Pattern pattern, CharSequence candidate)282 private static CharSequence trimAfterFirstMatch(Pattern pattern, CharSequence candidate) { 283 Matcher trailingCharsMatcher = pattern.matcher(candidate); 284 if (trailingCharsMatcher.find()) { 285 candidate = candidate.subSequence(0, trailingCharsMatcher.start()); 286 } 287 return candidate; 288 } 289 290 /** 291 * Helper method to determine if a character is a Latin-script letter or not. For our purposes, 292 * combining marks should also return true since we assume they have been added to a preceding 293 * Latin character. 294 */ 295 // @VisibleForTesting isLatinLetter(char letter)296 static boolean isLatinLetter(char letter) { 297 // Combining marks are a subset of non-spacing-mark. 298 if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) { 299 return false; 300 } 301 UnicodeBlock block = UnicodeBlock.of(letter); 302 return block.equals(UnicodeBlock.BASIC_LATIN) 303 || block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT) 304 || block.equals(UnicodeBlock.LATIN_EXTENDED_A) 305 || block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) 306 || block.equals(UnicodeBlock.LATIN_EXTENDED_B) 307 || block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS); 308 } 309 isInvalidPunctuationSymbol(char character)310 private static boolean isInvalidPunctuationSymbol(char character) { 311 return character == '%' || Character.getType(character) == Character.CURRENCY_SYMBOL; 312 } 313 314 /** 315 * Attempts to extract a match from a {@code candidate} character sequence. 316 * 317 * @param candidate the candidate text that might contain a phone number 318 * @param offset the offset of {@code candidate} within {@link #text} 319 * @return the match found, null if none can be found 320 */ extractMatch(CharSequence candidate, int offset)321 private PhoneNumberMatch extractMatch(CharSequence candidate, int offset) { 322 // Skip a match that is more likely to be a date. 323 if (SLASH_SEPARATED_DATES.matcher(candidate).find()) { 324 return null; 325 } 326 327 // Skip potential time-stamps. 328 if (TIME_STAMPS.matcher(candidate).find()) { 329 String followingText = text.toString().substring(offset + candidate.length()); 330 if (TIME_STAMPS_SUFFIX.matcher(followingText).lookingAt()) { 331 return null; 332 } 333 } 334 335 // Try to come up with a valid match given the entire candidate. 336 PhoneNumberMatch match = parseAndVerify(candidate, offset); 337 if (match != null) { 338 return match; 339 } 340 341 // If that failed, try to find an "inner match" - there might be a phone number within this 342 // candidate. 343 return extractInnerMatch(candidate, offset); 344 } 345 346 /** 347 * Attempts to extract a match from {@code candidate} if the whole candidate does not qualify as a 348 * match. 349 * 350 * @param candidate the candidate text that might contain a phone number 351 * @param offset the current offset of {@code candidate} within {@link #text} 352 * @return the match found, null if none can be found 353 */ extractInnerMatch(CharSequence candidate, int offset)354 private PhoneNumberMatch extractInnerMatch(CharSequence candidate, int offset) { 355 for (Pattern possibleInnerMatch : INNER_MATCHES) { 356 Matcher groupMatcher = possibleInnerMatch.matcher(candidate); 357 boolean isFirstMatch = true; 358 while (groupMatcher.find() && maxTries > 0) { 359 if (isFirstMatch) { 360 // We should handle any group before this one too. 361 CharSequence group = trimAfterFirstMatch( 362 PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, 363 candidate.subSequence(0, groupMatcher.start())); 364 PhoneNumberMatch match = parseAndVerify(group, offset); 365 if (match != null) { 366 return match; 367 } 368 maxTries--; 369 isFirstMatch = false; 370 } 371 CharSequence group = trimAfterFirstMatch( 372 PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, groupMatcher.group(1)); 373 PhoneNumberMatch match = parseAndVerify(group, offset + groupMatcher.start(1)); 374 if (match != null) { 375 return match; 376 } 377 maxTries--; 378 } 379 } 380 return null; 381 } 382 383 /** 384 * Parses a phone number from the {@code candidate} using {@link PhoneNumberUtil#parse} and 385 * verifies it matches the requested {@link #leniency}. If parsing and verification succeed, a 386 * corresponding {@link PhoneNumberMatch} is returned, otherwise this method returns null. 387 * 388 * @param candidate the candidate match 389 * @param offset the offset of {@code candidate} within {@link #text} 390 * @return the parsed and validated phone number match, or null 391 */ parseAndVerify(CharSequence candidate, int offset)392 private PhoneNumberMatch parseAndVerify(CharSequence candidate, int offset) { 393 try { 394 // Check the candidate doesn't contain any formatting which would indicate that it really 395 // isn't a phone number. 396 if (!MATCHING_BRACKETS.matcher(candidate).matches() || PUB_PAGES.matcher(candidate).find()) { 397 return null; 398 } 399 400 // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded 401 // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def. 402 if (leniency.compareTo(Leniency.VALID) >= 0) { 403 // If the candidate is not at the start of the text, and does not start with phone-number 404 // punctuation, check the previous character. 405 if (offset > 0 && !LEAD_CLASS.matcher(candidate).lookingAt()) { 406 char previousChar = text.charAt(offset - 1); 407 // We return null if it is a latin letter or an invalid punctuation symbol. 408 if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) { 409 return null; 410 } 411 } 412 int lastCharIndex = offset + candidate.length(); 413 if (lastCharIndex < text.length()) { 414 char nextChar = text.charAt(lastCharIndex); 415 if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) { 416 return null; 417 } 418 } 419 } 420 421 PhoneNumber number = phoneUtil.parseAndKeepRawInput(candidate, preferredRegion); 422 423 if (leniency.verify(number, candidate, phoneUtil, this)) { 424 // We used parseAndKeepRawInput to create this number, but for now we don't return the extra 425 // values parsed. TODO: stop clearing all values here and switch all users over 426 // to using rawInput() rather than the rawString() of PhoneNumberMatch. 427 number.clearCountryCodeSource(); 428 number.clearRawInput(); 429 number.clearPreferredDomesticCarrierCode(); 430 return new PhoneNumberMatch(offset, candidate.toString(), number); 431 } 432 } catch (NumberParseException e) { 433 // ignore and continue 434 } 435 return null; 436 } 437 438 /** 439 * Small helper interface such that the number groups can be checked according to different 440 * criteria, both for our default way of performing formatting and for any alternate formats we 441 * may want to check. 442 */ 443 interface NumberGroupingChecker { 444 /** 445 * Returns true if the groups of digits found in our candidate phone number match our 446 * expectations. 447 * 448 * @param number the original number we found when parsing 449 * @param normalizedCandidate the candidate number, normalized to only contain ASCII digits, 450 * but with non-digits (spaces etc) retained 451 * @param expectedNumberGroups the groups of digits that we would expect to see if we 452 * formatted this number 453 */ checkGroups(PhoneNumberUtil util, PhoneNumber number, StringBuilder normalizedCandidate, String[] expectedNumberGroups)454 boolean checkGroups(PhoneNumberUtil util, PhoneNumber number, 455 StringBuilder normalizedCandidate, String[] expectedNumberGroups); 456 } 457 allNumberGroupsRemainGrouped(PhoneNumberUtil util, PhoneNumber number, StringBuilder normalizedCandidate, String[] formattedNumberGroups)458 static boolean allNumberGroupsRemainGrouped(PhoneNumberUtil util, 459 PhoneNumber number, 460 StringBuilder normalizedCandidate, 461 String[] formattedNumberGroups) { 462 int fromIndex = 0; 463 if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) { 464 // First skip the country code if the normalized candidate contained it. 465 String countryCode = Integer.toString(number.getCountryCode()); 466 fromIndex = normalizedCandidate.indexOf(countryCode) + countryCode.length(); 467 } 468 // Check each group of consecutive digits are not broken into separate groupings in the 469 // {@code normalizedCandidate} string. 470 for (int i = 0; i < formattedNumberGroups.length; i++) { 471 // Fails if the substring of {@code normalizedCandidate} starting from {@code fromIndex} 472 // doesn't contain the consecutive digits in formattedNumberGroups[i]. 473 fromIndex = normalizedCandidate.indexOf(formattedNumberGroups[i], fromIndex); 474 if (fromIndex < 0) { 475 return false; 476 } 477 // Moves {@code fromIndex} forward. 478 fromIndex += formattedNumberGroups[i].length(); 479 if (i == 0 && fromIndex < normalizedCandidate.length()) { 480 // We are at the position right after the NDC. We get the region used for formatting 481 // information based on the country code in the phone number, rather than the number itself, 482 // as we do not need to distinguish between different countries with the same country 483 // calling code and this is faster. 484 String region = util.getRegionCodeForCountryCode(number.getCountryCode()); 485 if (util.getNddPrefixForRegion(region, true) != null 486 && Character.isDigit(normalizedCandidate.charAt(fromIndex))) { 487 // This means there is no formatting symbol after the NDC. In this case, we only 488 // accept the number if there is no formatting symbol at all in the number, except 489 // for extensions. This is only important for countries with national prefixes. 490 String nationalSignificantNumber = util.getNationalSignificantNumber(number); 491 return normalizedCandidate.substring(fromIndex - formattedNumberGroups[i].length()) 492 .startsWith(nationalSignificantNumber); 493 } 494 } 495 } 496 // The check here makes sure that we haven't mistakenly already used the extension to 497 // match the last group of the subscriber number. Note the extension cannot have 498 // formatting in-between digits. 499 return normalizedCandidate.substring(fromIndex).contains(number.getExtension()); 500 } 501 allNumberGroupsAreExactlyPresent(PhoneNumberUtil util, PhoneNumber number, StringBuilder normalizedCandidate, String[] formattedNumberGroups)502 static boolean allNumberGroupsAreExactlyPresent(PhoneNumberUtil util, 503 PhoneNumber number, 504 StringBuilder normalizedCandidate, 505 String[] formattedNumberGroups) { 506 String[] candidateGroups = 507 PhoneNumberUtil.NON_DIGITS_PATTERN.split(normalizedCandidate.toString()); 508 // Set this to the last group, skipping it if the number has an extension. 509 int candidateNumberGroupIndex = 510 number.hasExtension() ? candidateGroups.length - 2 : candidateGroups.length - 1; 511 // First we check if the national significant number is formatted as a block. 512 // We use contains and not equals, since the national significant number may be present with 513 // a prefix such as a national number prefix, or the country code itself. 514 if (candidateGroups.length == 1 515 || candidateGroups[candidateNumberGroupIndex].contains( 516 util.getNationalSignificantNumber(number))) { 517 return true; 518 } 519 // Starting from the end, go through in reverse, excluding the first group, and check the 520 // candidate and number groups are the same. 521 for (int formattedNumberGroupIndex = (formattedNumberGroups.length - 1); 522 formattedNumberGroupIndex > 0 && candidateNumberGroupIndex >= 0; 523 formattedNumberGroupIndex--, candidateNumberGroupIndex--) { 524 if (!candidateGroups[candidateNumberGroupIndex].equals( 525 formattedNumberGroups[formattedNumberGroupIndex])) { 526 return false; 527 } 528 } 529 // Now check the first group. There may be a national prefix at the start, so we only check 530 // that the candidate group ends with the formatted number group. 531 return (candidateNumberGroupIndex >= 0 532 && candidateGroups[candidateNumberGroupIndex].endsWith(formattedNumberGroups[0])); 533 } 534 535 /** 536 * Helper method to get the national-number part of a number, formatted without any national 537 * prefix, and return it as a set of digit blocks that would be formatted together following 538 * standard formatting rules. 539 */ getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number)540 private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number) { 541 // This will be in the format +CC-DG1-DG2-DGX;ext=EXT where DG1..DGX represents groups of 542 // digits. 543 String rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966); 544 // We remove the extension part from the formatted string before splitting it into different 545 // groups. 546 int endIndex = rfc3966Format.indexOf(';'); 547 if (endIndex < 0) { 548 endIndex = rfc3966Format.length(); 549 } 550 // The country-code will have a '-' following it. 551 int startIndex = rfc3966Format.indexOf('-') + 1; 552 return rfc3966Format.substring(startIndex, endIndex).split("-"); 553 } 554 555 /** 556 * Helper method to get the national-number part of a number, formatted without any national 557 * prefix, and return it as a set of digit blocks that should be formatted together according to 558 * the formatting pattern passed in. 559 */ getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number, NumberFormat formattingPattern)560 private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number, 561 NumberFormat formattingPattern) { 562 // If a format is provided, we format the NSN only, and split that according to the separator. 563 String nationalSignificantNumber = util.getNationalSignificantNumber(number); 564 return util.formatNsnUsingPattern(nationalSignificantNumber, 565 formattingPattern, PhoneNumberFormat.RFC3966).split("-"); 566 } 567 checkNumberGroupingIsValid( PhoneNumber number, CharSequence candidate, PhoneNumberUtil util, NumberGroupingChecker checker)568 boolean checkNumberGroupingIsValid( 569 PhoneNumber number, CharSequence candidate, PhoneNumberUtil util, 570 NumberGroupingChecker checker) { 571 StringBuilder normalizedCandidate = 572 PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */); 573 String[] formattedNumberGroups = getNationalNumberGroups(util, number); 574 if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) { 575 return true; 576 } 577 // If this didn't pass, see if there are any alternate formats that match, and try them instead. 578 PhoneMetadata alternateFormats = 579 DefaultMetadataDependenciesProvider.getInstance() 580 .getAlternateFormatsMetadataSource() 581 .getFormattingMetadataForCountryCallingCode(number.getCountryCode()); 582 String nationalSignificantNumber = util.getNationalSignificantNumber(number); 583 if (alternateFormats != null) { 584 for (NumberFormat alternateFormat : alternateFormats.getNumberFormatList()) { 585 if (alternateFormat.getLeadingDigitsPatternCount() > 0) { 586 // There is only one leading digits pattern for alternate formats. 587 Pattern pattern = 588 regexCache.getPatternForRegex(alternateFormat.getLeadingDigitsPattern(0)); 589 if (!pattern.matcher(nationalSignificantNumber).lookingAt()) { 590 // Leading digits don't match; try another one. 591 continue; 592 } 593 } 594 formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat); 595 if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) { 596 return true; 597 } 598 } 599 } 600 return false; 601 } 602 containsMoreThanOneSlashInNationalNumber(PhoneNumber number, String candidate)603 static boolean containsMoreThanOneSlashInNationalNumber(PhoneNumber number, String candidate) { 604 int firstSlashInBodyIndex = candidate.indexOf('/'); 605 if (firstSlashInBodyIndex < 0) { 606 // No slashes, this is okay. 607 return false; 608 } 609 // Now look for a second one. 610 int secondSlashInBodyIndex = candidate.indexOf('/', firstSlashInBodyIndex + 1); 611 if (secondSlashInBodyIndex < 0) { 612 // Only one slash, this is okay. 613 return false; 614 } 615 616 // If the first slash is after the country calling code, this is permitted. 617 boolean candidateHasCountryCode = 618 (number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN 619 || number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN); 620 if (candidateHasCountryCode 621 && PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(0, firstSlashInBodyIndex)) 622 .equals(Integer.toString(number.getCountryCode()))) { 623 // Any more slashes and this is illegal. 624 return candidate.substring(secondSlashInBodyIndex + 1).contains("/"); 625 } 626 return true; 627 } 628 containsOnlyValidXChars( PhoneNumber number, String candidate, PhoneNumberUtil util)629 static boolean containsOnlyValidXChars( 630 PhoneNumber number, String candidate, PhoneNumberUtil util) { 631 // The characters 'x' and 'X' can be (1) a carrier code, in which case they always precede the 632 // national significant number or (2) an extension sign, in which case they always precede the 633 // extension number. We assume a carrier code is more than 1 digit, so the first case has to 634 // have more than 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1 'x' 635 // or 'X'. We ignore the character if it appears as the last character of the string. 636 for (int index = 0; index < candidate.length() - 1; index++) { 637 char charAtIndex = candidate.charAt(index); 638 if (charAtIndex == 'x' || charAtIndex == 'X') { 639 char charAtNextIndex = candidate.charAt(index + 1); 640 if (charAtNextIndex == 'x' || charAtNextIndex == 'X') { 641 // This is the carrier code case, in which the 'X's always precede the national 642 // significant number. 643 index++; 644 if (util.isNumberMatch(number, candidate.substring(index)) != MatchType.NSN_MATCH) { 645 return false; 646 } 647 // This is the extension sign case, in which the 'x' or 'X' should always precede the 648 // extension number. 649 } else if (!PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(index)).equals( 650 number.getExtension())) { 651 return false; 652 } 653 } 654 } 655 return true; 656 } 657 isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util)658 static boolean isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util) { 659 // First, check how we deduced the country code. If it was written in international format, then 660 // the national prefix is not required. 661 if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) { 662 return true; 663 } 664 String phoneNumberRegion = 665 util.getRegionCodeForCountryCode(number.getCountryCode()); 666 PhoneMetadata metadata = util.getMetadataForRegion(phoneNumberRegion); 667 if (metadata == null) { 668 return true; 669 } 670 // Check if a national prefix should be present when formatting this number. 671 String nationalNumber = util.getNationalSignificantNumber(number); 672 NumberFormat formatRule = 673 util.chooseFormattingPatternForNumber(metadata.getNumberFormatList(), nationalNumber); 674 // To do this, we check that a national prefix formatting rule was present and that it wasn't 675 // just the first-group symbol ($1) with punctuation. 676 if ((formatRule != null) && formatRule.getNationalPrefixFormattingRule().length() > 0) { 677 if (formatRule.getNationalPrefixOptionalWhenFormatting()) { 678 // The national-prefix is optional in these cases, so we don't need to check if it was 679 // present. 680 return true; 681 } 682 if (PhoneNumberUtil.formattingRuleHasFirstGroupOnly( 683 formatRule.getNationalPrefixFormattingRule())) { 684 // National Prefix not needed for this number. 685 return true; 686 } 687 // Normalize the remainder. 688 String rawInputCopy = PhoneNumberUtil.normalizeDigitsOnly(number.getRawInput()); 689 StringBuilder rawInput = new StringBuilder(rawInputCopy); 690 // Check if we found a national prefix and/or carrier code at the start of the raw input, and 691 // return the result. 692 return util.maybeStripNationalPrefixAndCarrierCode(rawInput, metadata, null); 693 } 694 return true; 695 } 696 697 @Override hasNext()698 public boolean hasNext() { 699 if (state == State.NOT_READY) { 700 lastMatch = find(searchIndex); 701 if (lastMatch == null) { 702 state = State.DONE; 703 } else { 704 searchIndex = lastMatch.end(); 705 state = State.READY; 706 } 707 } 708 return state == State.READY; 709 } 710 711 @Override next()712 public PhoneNumberMatch next() { 713 // Check the state and find the next match as a side-effect if necessary. 714 if (!hasNext()) { 715 throw new NoSuchElementException(); 716 } 717 718 // Don't retain that memory any longer than necessary. 719 PhoneNumberMatch result = lastMatch; 720 lastMatch = null; 721 state = State.NOT_READY; 722 return result; 723 } 724 725 /** 726 * Always throws {@link UnsupportedOperationException} as removal is not supported. 727 */ 728 @Override remove()729 public void remove() { 730 throw new UnsupportedOperationException(); 731 } 732 } 733