1 /* 2 * Copyright (C) 2011 The Libphonenumber Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.i18n.phonenumbers; 18 19 import com.android.i18n.phonenumbers.PhoneNumberUtil.Leniency; 20 import com.android.i18n.phonenumbers.PhoneNumberUtil.MatchType; 21 import com.android.i18n.phonenumbers.PhoneNumberUtil.PhoneNumberFormat; 22 import com.android.i18n.phonenumbers.Phonemetadata.NumberFormat; 23 import com.android.i18n.phonenumbers.Phonemetadata.PhoneMetadata; 24 import com.android.i18n.phonenumbers.Phonenumber.PhoneNumber.CountryCodeSource; 25 import com.android.i18n.phonenumbers.Phonenumber.PhoneNumber; 26 27 import java.lang.Character.UnicodeBlock; 28 import java.util.Iterator; 29 import java.util.NoSuchElementException; 30 import java.util.regex.Matcher; 31 import java.util.regex.Pattern; 32 33 /** 34 * A stateful class that finds and extracts telephone numbers from {@linkplain CharSequence text}. 35 * Instances can be created using the {@linkplain PhoneNumberUtil#findNumbers factory methods} in 36 * {@link PhoneNumberUtil}. 37 * 38 * <p>Vanity numbers (phone numbers using alphabetic digits such as <tt>1-800-SIX-FLAGS</tt> are 39 * not found. 40 * 41 * <p>This class is not thread-safe. 42 */ 43 final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> { 44 /** 45 * The phone number pattern used by {@link #find}, similar to 46 * {@code PhoneNumberUtil.VALID_PHONE_NUMBER}, but with the following differences: 47 * <ul> 48 * <li>All captures are limited in order to place an upper bound to the text matched by the 49 * pattern. 50 * <ul> 51 * <li>Leading punctuation / plus signs are limited. 52 * <li>Consecutive occurrences of punctuation are limited. 53 * <li>Number of digits is limited. 54 * </ul> 55 * <li>No whitespace is allowed at the start or end. 56 * <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported. 57 * </ul> 58 */ 59 private static final Pattern PATTERN; 60 /** 61 * Matches strings that look like publication pages. Example: 62 * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns. 63 * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre> 64 * 65 * The string "211-227 (2003)" is not a telephone number. 66 */ 67 private static final Pattern PUB_PAGES = Pattern.compile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}"); 68 69 /** 70 * Matches strings that look like dates using "/" as a separator. Examples: 3/10/2011, 31/10/96 or 71 * 08/31/95. 72 */ 73 private static final Pattern SLASH_SEPARATED_DATES = 74 Pattern.compile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}"); 75 76 /** 77 * Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the 78 * trailing ":\d\d" -- that is covered by TIME_STAMPS_SUFFIX. 79 */ 80 private static final Pattern TIME_STAMPS = 81 Pattern.compile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$"); 82 private static final Pattern TIME_STAMPS_SUFFIX = Pattern.compile(":[0-5]\\d"); 83 84 /** 85 * Pattern to check that brackets match. Opening brackets should be closed within a phone number. 86 * This also checks that there is something inside the brackets. Having no brackets at all is also 87 * fine. 88 */ 89 private static final Pattern MATCHING_BRACKETS; 90 91 /** 92 * Patterns used to extract phone numbers from a larger phone-number-like pattern. These are 93 * ordered according to specificity. For example, white-space is last since that is frequently 94 * used in numbers, not just to separate two numbers. We have separate patterns since we don't 95 * want to break up the phone-number-like text on more than one different kind of symbol at one 96 * time, although symbols of the same type (e.g. space) can be safely grouped together. 97 * 98 * Note that if there is a match, we will always check any text found up to the first match as 99 * well. 100 */ 101 private static final Pattern[] INNER_MATCHES = { 102 // Breaks on the slash - e.g. "651-234-2345/332-445-1234" 103 Pattern.compile("/+(.*)"), 104 // Note that the bracket here is inside the capturing group, since we consider it part of the 105 // phone number. Will match a pattern like "(650) 223 3345 (754) 223 3321". 106 Pattern.compile("(\\([^(]*)"), 107 // Breaks on a hyphen - e.g. "12345 - 332-445-1234 is my number." 108 // We require a space on either side of the hyphen for it to be considered a separator. 109 Pattern.compile("(?:\\p{Z}-|-\\p{Z})\\p{Z}*(.+)"), 110 // Various types of wide hyphens. Note we have decided not to enforce a space here, since it's 111 // possible that it's supposed to be used to break two numbers without spaces, and we haven't 112 // seen many instances of it used within a number. 113 Pattern.compile("[\u2012-\u2015\uFF0D]\\p{Z}*(.+)"), 114 // Breaks on a full stop - e.g. "12345. 332-445-1234 is my number." 115 Pattern.compile("\\.+\\p{Z}*([^.]+)"), 116 // Breaks on space - e.g. "3324451234 8002341234" 117 Pattern.compile("\\p{Z}+(\\P{Z}+)") 118 }; 119 120 /** 121 * Punctuation that may be at the start of a phone number - brackets and plus signs. 122 */ 123 private static final Pattern LEAD_CLASS; 124 125 static { 126 /* Builds the MATCHING_BRACKETS and PATTERN regular expressions. The building blocks below exist 127 * to make the pattern more easily understood. */ 128 129 String openingParens = "(\\[\uFF08\uFF3B"; 130 String closingParens = ")\\]\uFF09\uFF3D"; 131 String nonParens = "[^" + openingParens + closingParens + "]"; 132 133 /* Limit on the number of pairs of brackets in a phone number. */ 134 String bracketPairLimit = limit(0, 3); 135 /* 136 * An opening bracket at the beginning may not be closed, but subsequent ones should be. It's 137 * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a 138 * closing bracket first. We limit the sets of brackets in a phone number to four. 139 */ 140 MATCHING_BRACKETS = Pattern.compile( 141 "(?:[" + openingParens + "])?" + "(?:" + nonParens + "+" + "[" + closingParens + "])?" + 142 nonParens + "+" + 143 "(?:[" + openingParens + "]" + nonParens + "+[" + closingParens + "])" + bracketPairLimit + 144 nonParens + "*"); 145 146 /* Limit on the number of leading (plus) characters. */ 147 String leadLimit = limit(0, 2); 148 /* Limit on the number of consecutive punctuation characters. */ 149 String punctuationLimit = limit(0, 4); 150 /* The maximum number of digits allowed in a digit-separated block. As we allow all digits in a 151 * single block, set high enough to accommodate the entire national number and the international 152 * country code. */ 153 int digitBlockLimit = 154 PhoneNumberUtil.MAX_LENGTH_FOR_NSN + PhoneNumberUtil.MAX_LENGTH_COUNTRY_CODE; 155 /* Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some 156 * formats use spaces to separate each digit. */ 157 String blockLimit = limit(0, digitBlockLimit); 158 159 /* A punctuation sequence allowing white space. */ 160 String punctuation = "[" + PhoneNumberUtil.VALID_PUNCTUATION + "]" + punctuationLimit; 161 /* A digits block without punctuation. */ 162 String digitSequence = "\\p{Nd}" + limit(1, digitBlockLimit); 163 164 String leadClassChars = openingParens + PhoneNumberUtil.PLUS_CHARS; 165 String leadClass = "[" + leadClassChars + "]"; 166 LEAD_CLASS = Pattern.compile(leadClass); 167 168 /* Phone number pattern allowing optional punctuation. */ 169 PATTERN = Pattern.compile( 170 "(?:" + leadClass + punctuation + ")" + leadLimit + 171 digitSequence + "(?:" + punctuation + digitSequence + ")" + blockLimit + 172 "(?:" + PhoneNumberUtil.EXTN_PATTERNS_FOR_MATCHING + ")?", 173 PhoneNumberUtil.REGEX_FLAGS); 174 } 175 176 /** Returns a regular expression quantifier with an upper and lower limit. */ limit(int lower, int upper)177 private static String limit(int lower, int upper) { 178 if ((lower < 0) || (upper <= 0) || (upper < lower)) { 179 throw new IllegalArgumentException(); 180 } 181 return "{" + lower + "," + upper + "}"; 182 } 183 184 /** The potential states of a PhoneNumberMatcher. */ 185 private enum State { 186 NOT_READY, READY, DONE 187 } 188 189 /** The phone number utility. */ 190 private final PhoneNumberUtil phoneUtil; 191 /** The text searched for phone numbers. */ 192 private final CharSequence text; 193 /** 194 * The region (country) to assume for phone numbers without an international prefix, possibly 195 * null. 196 */ 197 private final String preferredRegion; 198 /** The degree of validation requested. */ 199 private final Leniency leniency; 200 /** The maximum number of retries after matching an invalid number. */ 201 private long maxTries; 202 203 /** The iteration tristate. */ 204 private State state = State.NOT_READY; 205 /** The last successful match, null unless in {@link State#READY}. */ 206 private PhoneNumberMatch lastMatch = null; 207 /** The next index to start searching at. Undefined in {@link State#DONE}. */ 208 private int searchIndex = 0; 209 210 /** 211 * Creates a new instance. See the factory methods in {@link PhoneNumberUtil} on how to obtain a 212 * new instance. 213 * 214 * @param util the phone number util to use 215 * @param text the character sequence that we will search, null for no text 216 * @param country the country to assume for phone numbers not written in international format 217 * (with a leading plus, or with the international dialing prefix of the 218 * specified region). May be null or "ZZ" if only numbers with a 219 * leading plus should be considered. 220 * @param leniency the leniency to use when evaluating candidate phone numbers 221 * @param maxTries the maximum number of invalid numbers to try before giving up on the text. 222 * This is to cover degenerate cases where the text has a lot of false positives 223 * in it. Must be {@code >= 0}. 224 */ PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency, long maxTries)225 PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency, 226 long maxTries) { 227 228 if ((util == null) || (leniency == null)) { 229 throw new NullPointerException(); 230 } 231 if (maxTries < 0) { 232 throw new IllegalArgumentException(); 233 } 234 this.phoneUtil = util; 235 this.text = (text != null) ? text : ""; 236 this.preferredRegion = country; 237 this.leniency = leniency; 238 this.maxTries = maxTries; 239 } 240 241 /** 242 * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex} 243 * that represents a phone number. Returns the next match, null if none was found. 244 * 245 * @param index the search index to start searching at 246 * @return the phone number match found, null if none can be found 247 */ find(int index)248 private PhoneNumberMatch find(int index) { 249 Matcher matcher = PATTERN.matcher(text); 250 while ((maxTries > 0) && matcher.find(index)) { 251 int start = matcher.start(); 252 CharSequence candidate = text.subSequence(start, matcher.end()); 253 254 // Check for extra numbers at the end. 255 // TODO: This is the place to start when trying to support extraction of multiple phone number 256 // from split notations (+41 79 123 45 67 / 68). 257 candidate = trimAfterFirstMatch(PhoneNumberUtil.SECOND_NUMBER_START_PATTERN, candidate); 258 259 PhoneNumberMatch match = extractMatch(candidate, start); 260 if (match != null) { 261 return match; 262 } 263 264 index = start + candidate.length(); 265 maxTries--; 266 } 267 268 return null; 269 } 270 271 /** 272 * Trims away any characters after the first match of {@code pattern} in {@code candidate}, 273 * returning the trimmed version. 274 */ trimAfterFirstMatch(Pattern pattern, CharSequence candidate)275 private static CharSequence trimAfterFirstMatch(Pattern pattern, CharSequence candidate) { 276 Matcher trailingCharsMatcher = pattern.matcher(candidate); 277 if (trailingCharsMatcher.find()) { 278 candidate = candidate.subSequence(0, trailingCharsMatcher.start()); 279 } 280 return candidate; 281 } 282 283 /** 284 * Helper method to determine if a character is a Latin-script letter or not. For our purposes, 285 * combining marks should also return true since we assume they have been added to a preceding 286 * Latin character. 287 */ 288 // @VisibleForTesting isLatinLetter(char letter)289 static boolean isLatinLetter(char letter) { 290 // Combining marks are a subset of non-spacing-mark. 291 if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) { 292 return false; 293 } 294 UnicodeBlock block = UnicodeBlock.of(letter); 295 return block.equals(UnicodeBlock.BASIC_LATIN) || 296 block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT) || 297 block.equals(UnicodeBlock.LATIN_EXTENDED_A) || 298 block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) || 299 block.equals(UnicodeBlock.LATIN_EXTENDED_B) || 300 block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS); 301 } 302 isInvalidPunctuationSymbol(char character)303 private static boolean isInvalidPunctuationSymbol(char character) { 304 return character == '%' || Character.getType(character) == Character.CURRENCY_SYMBOL; 305 } 306 307 /** 308 * Attempts to extract a match from a {@code candidate} character sequence. 309 * 310 * @param candidate the candidate text that might contain a phone number 311 * @param offset the offset of {@code candidate} within {@link #text} 312 * @return the match found, null if none can be found 313 */ extractMatch(CharSequence candidate, int offset)314 private PhoneNumberMatch extractMatch(CharSequence candidate, int offset) { 315 // Skip a match that is more likely to be a date. 316 if (SLASH_SEPARATED_DATES.matcher(candidate).find()) { 317 return null; 318 } 319 320 // Skip potential time-stamps. 321 if (TIME_STAMPS.matcher(candidate).find()) { 322 String followingText = text.toString().substring(offset + candidate.length()); 323 if (TIME_STAMPS_SUFFIX.matcher(followingText).lookingAt()) { 324 return null; 325 } 326 } 327 328 // Try to come up with a valid match given the entire candidate. 329 String rawString = candidate.toString(); 330 PhoneNumberMatch match = parseAndVerify(rawString, offset); 331 if (match != null) { 332 return match; 333 } 334 335 // If that failed, try to find an "inner match" - there might be a phone number within this 336 // candidate. 337 return extractInnerMatch(rawString, offset); 338 } 339 340 /** 341 * Attempts to extract a match from {@code candidate} if the whole candidate does not qualify as a 342 * match. 343 * 344 * @param candidate the candidate text that might contain a phone number 345 * @param offset the current offset of {@code candidate} within {@link #text} 346 * @return the match found, null if none can be found 347 */ extractInnerMatch(String candidate, int offset)348 private PhoneNumberMatch extractInnerMatch(String candidate, int offset) { 349 for (Pattern possibleInnerMatch : INNER_MATCHES) { 350 int rangeStart = 0; 351 Matcher groupMatcher = possibleInnerMatch.matcher(candidate); 352 boolean isFirstMatch = true; 353 while (groupMatcher.find() && maxTries > 0) { 354 if (isFirstMatch) { 355 // We should handle any group before this one too. 356 CharSequence group = trimAfterFirstMatch( 357 PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, 358 candidate.substring(0, groupMatcher.start())); 359 PhoneNumberMatch match = parseAndVerify(group.toString(), offset); 360 if (match != null) { 361 return match; 362 } 363 maxTries--; 364 isFirstMatch = false; 365 } 366 CharSequence group = trimAfterFirstMatch( 367 PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, groupMatcher.group(1)); 368 PhoneNumberMatch match = parseAndVerify(group.toString(), offset + groupMatcher.start(1)); 369 if (match != null) { 370 return match; 371 } 372 maxTries--; 373 } 374 } 375 return null; 376 } 377 378 /** 379 * Parses a phone number from the {@code candidate} using {@link PhoneNumberUtil#parse} and 380 * verifies it matches the requested {@link #leniency}. If parsing and verification succeed, a 381 * corresponding {@link PhoneNumberMatch} is returned, otherwise this method returns null. 382 * 383 * @param candidate the candidate match 384 * @param offset the offset of {@code candidate} within {@link #text} 385 * @return the parsed and validated phone number match, or null 386 */ parseAndVerify(String candidate, int offset)387 private PhoneNumberMatch parseAndVerify(String candidate, int offset) { 388 try { 389 // Check the candidate doesn't contain any formatting which would indicate that it really 390 // isn't a phone number. 391 if (!MATCHING_BRACKETS.matcher(candidate).matches() || PUB_PAGES.matcher(candidate).find()) { 392 return null; 393 } 394 395 // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded 396 // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def. 397 if (leniency.compareTo(Leniency.VALID) >= 0) { 398 // If the candidate is not at the start of the text, and does not start with phone-number 399 // punctuation, check the previous character. 400 if (offset > 0 && !LEAD_CLASS.matcher(candidate).lookingAt()) { 401 char previousChar = text.charAt(offset - 1); 402 // We return null if it is a latin letter or an invalid punctuation symbol. 403 if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) { 404 return null; 405 } 406 } 407 int lastCharIndex = offset + candidate.length(); 408 if (lastCharIndex < text.length()) { 409 char nextChar = text.charAt(lastCharIndex); 410 if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) { 411 return null; 412 } 413 } 414 } 415 416 PhoneNumber number = phoneUtil.parseAndKeepRawInput(candidate, preferredRegion); 417 418 // Check Israel * numbers: these are a special case in that they are four-digit numbers that 419 // our library supports, but they can only be dialled with a leading *. Since we don't 420 // actually store or detect the * in our phone number library, this means in practice we 421 // detect most four digit numbers as being valid for Israel. We are considering moving these 422 // numbers to ShortNumberInfo instead, in which case this problem would go away, but in the 423 // meantime we want to restrict the false matches so we only allow these numbers if they are 424 // preceded by a star. We enforce this for all leniency levels even though these numbers are 425 // technically accepted by isPossibleNumber and isValidNumber since we consider it to be a 426 // deficiency in those methods that they accept these numbers without the *. 427 // TODO: Remove this or make it significantly less hacky once we've decided how to 428 // handle these short codes going forward in ShortNumberInfo. We could use the formatting 429 // rules for instance, but that would be slower. 430 if (phoneUtil.getRegionCodeForCountryCode(number.getCountryCode()).equals("IL") && 431 phoneUtil.getNationalSignificantNumber(number).length() == 4 && 432 (offset == 0 || (offset > 0 && text.charAt(offset - 1) != '*'))) { 433 // No match. 434 return null; 435 } 436 437 if (leniency.verify(number, candidate, phoneUtil)) { 438 // We used parseAndKeepRawInput to create this number, but for now we don't return the extra 439 // values parsed. TODO: stop clearing all values here and switch all users over 440 // to using rawInput() rather than the rawString() of PhoneNumberMatch. 441 number.clearCountryCodeSource(); 442 number.clearRawInput(); 443 number.clearPreferredDomesticCarrierCode(); 444 return new PhoneNumberMatch(offset, candidate, number); 445 } 446 } catch (NumberParseException e) { 447 // ignore and continue 448 } 449 return null; 450 } 451 452 /** 453 * Small helper interface such that the number groups can be checked according to different 454 * criteria, both for our default way of performing formatting and for any alternate formats we 455 * may want to check. 456 */ 457 interface NumberGroupingChecker { 458 /** 459 * Returns true if the groups of digits found in our candidate phone number match our 460 * expectations. 461 * 462 * @param number the original number we found when parsing 463 * @param normalizedCandidate the candidate number, normalized to only contain ASCII digits, 464 * but with non-digits (spaces etc) retained 465 * @param expectedNumberGroups the groups of digits that we would expect to see if we 466 * formatted this number 467 */ checkGroups(PhoneNumberUtil util, PhoneNumber number, StringBuilder normalizedCandidate, String[] expectedNumberGroups)468 boolean checkGroups(PhoneNumberUtil util, PhoneNumber number, 469 StringBuilder normalizedCandidate, String[] expectedNumberGroups); 470 } 471 allNumberGroupsRemainGrouped(PhoneNumberUtil util, PhoneNumber number, StringBuilder normalizedCandidate, String[] formattedNumberGroups)472 static boolean allNumberGroupsRemainGrouped(PhoneNumberUtil util, 473 PhoneNumber number, 474 StringBuilder normalizedCandidate, 475 String[] formattedNumberGroups) { 476 int fromIndex = 0; 477 if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) { 478 // First skip the country code if the normalized candidate contained it. 479 String countryCode = Integer.toString(number.getCountryCode()); 480 fromIndex = normalizedCandidate.indexOf(countryCode) + countryCode.length(); 481 } 482 // Check each group of consecutive digits are not broken into separate groupings in the 483 // {@code normalizedCandidate} string. 484 for (int i = 0; i < formattedNumberGroups.length; i++) { 485 // Fails if the substring of {@code normalizedCandidate} starting from {@code fromIndex} 486 // doesn't contain the consecutive digits in formattedNumberGroups[i]. 487 fromIndex = normalizedCandidate.indexOf(formattedNumberGroups[i], fromIndex); 488 if (fromIndex < 0) { 489 return false; 490 } 491 // Moves {@code fromIndex} forward. 492 fromIndex += formattedNumberGroups[i].length(); 493 if (i == 0 && fromIndex < normalizedCandidate.length()) { 494 // We are at the position right after the NDC. We get the region used for formatting 495 // information based on the country code in the phone number, rather than the number itself, 496 // as we do not need to distinguish between different countries with the same country 497 // calling code and this is faster. 498 String region = util.getRegionCodeForCountryCode(number.getCountryCode()); 499 if (util.getNddPrefixForRegion(region, true) != null && 500 Character.isDigit(normalizedCandidate.charAt(fromIndex))) { 501 // This means there is no formatting symbol after the NDC. In this case, we only 502 // accept the number if there is no formatting symbol at all in the number, except 503 // for extensions. This is only important for countries with national prefixes. 504 String nationalSignificantNumber = util.getNationalSignificantNumber(number); 505 return normalizedCandidate.substring(fromIndex - formattedNumberGroups[i].length()) 506 .startsWith(nationalSignificantNumber); 507 } 508 } 509 } 510 // The check here makes sure that we haven't mistakenly already used the extension to 511 // match the last group of the subscriber number. Note the extension cannot have 512 // formatting in-between digits. 513 return normalizedCandidate.substring(fromIndex).contains(number.getExtension()); 514 } 515 allNumberGroupsAreExactlyPresent(PhoneNumberUtil util, PhoneNumber number, StringBuilder normalizedCandidate, String[] formattedNumberGroups)516 static boolean allNumberGroupsAreExactlyPresent(PhoneNumberUtil util, 517 PhoneNumber number, 518 StringBuilder normalizedCandidate, 519 String[] formattedNumberGroups) { 520 String[] candidateGroups = 521 PhoneNumberUtil.NON_DIGITS_PATTERN.split(normalizedCandidate.toString()); 522 // Set this to the last group, skipping it if the number has an extension. 523 int candidateNumberGroupIndex = 524 number.hasExtension() ? candidateGroups.length - 2 : candidateGroups.length - 1; 525 // First we check if the national significant number is formatted as a block. 526 // We use contains and not equals, since the national significant number may be present with 527 // a prefix such as a national number prefix, or the country code itself. 528 if (candidateGroups.length == 1 || 529 candidateGroups[candidateNumberGroupIndex].contains( 530 util.getNationalSignificantNumber(number))) { 531 return true; 532 } 533 // Starting from the end, go through in reverse, excluding the first group, and check the 534 // candidate and number groups are the same. 535 for (int formattedNumberGroupIndex = (formattedNumberGroups.length - 1); 536 formattedNumberGroupIndex > 0 && candidateNumberGroupIndex >= 0; 537 formattedNumberGroupIndex--, candidateNumberGroupIndex--) { 538 if (!candidateGroups[candidateNumberGroupIndex].equals( 539 formattedNumberGroups[formattedNumberGroupIndex])) { 540 return false; 541 } 542 } 543 // Now check the first group. There may be a national prefix at the start, so we only check 544 // that the candidate group ends with the formatted number group. 545 return (candidateNumberGroupIndex >= 0 && 546 candidateGroups[candidateNumberGroupIndex].endsWith(formattedNumberGroups[0])); 547 } 548 549 /** 550 * Helper method to get the national-number part of a number, formatted without any national 551 * prefix, and return it as a set of digit blocks that would be formatted together. 552 */ getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number, NumberFormat formattingPattern)553 private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number, 554 NumberFormat formattingPattern) { 555 if (formattingPattern == null) { 556 // This will be in the format +CC-DG;ext=EXT where DG represents groups of digits. 557 String rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966); 558 // We remove the extension part from the formatted string before splitting it into different 559 // groups. 560 int endIndex = rfc3966Format.indexOf(';'); 561 if (endIndex < 0) { 562 endIndex = rfc3966Format.length(); 563 } 564 // The country-code will have a '-' following it. 565 int startIndex = rfc3966Format.indexOf('-') + 1; 566 return rfc3966Format.substring(startIndex, endIndex).split("-"); 567 } else { 568 // We format the NSN only, and split that according to the separator. 569 String nationalSignificantNumber = util.getNationalSignificantNumber(number); 570 return util.formatNsnUsingPattern(nationalSignificantNumber, 571 formattingPattern, PhoneNumberFormat.RFC3966).split("-"); 572 } 573 } 574 checkNumberGroupingIsValid( PhoneNumber number, String candidate, PhoneNumberUtil util, NumberGroupingChecker checker)575 static boolean checkNumberGroupingIsValid( 576 PhoneNumber number, String candidate, PhoneNumberUtil util, NumberGroupingChecker checker) { 577 // TODO: Evaluate how this works for other locales (testing has been limited to NANPA regions) 578 // and optimise if necessary. 579 StringBuilder normalizedCandidate = 580 PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */); 581 String[] formattedNumberGroups = getNationalNumberGroups(util, number, null); 582 if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) { 583 return true; 584 } 585 // If this didn't pass, see if there are any alternate formats, and try them instead. 586 PhoneMetadata alternateFormats = 587 MetadataManager.getAlternateFormatsForCountry(number.getCountryCode()); 588 if (alternateFormats != null) { 589 for (NumberFormat alternateFormat : alternateFormats.numberFormats()) { 590 formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat); 591 if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) { 592 return true; 593 } 594 } 595 } 596 return false; 597 } 598 containsMoreThanOneSlashInNationalNumber(PhoneNumber number, String candidate)599 static boolean containsMoreThanOneSlashInNationalNumber(PhoneNumber number, String candidate) { 600 int firstSlashInBodyIndex = candidate.indexOf('/'); 601 if (firstSlashInBodyIndex < 0) { 602 // No slashes, this is okay. 603 return false; 604 } 605 // Now look for a second one. 606 int secondSlashInBodyIndex = candidate.indexOf('/', firstSlashInBodyIndex + 1); 607 if (secondSlashInBodyIndex < 0) { 608 // Only one slash, this is okay. 609 return false; 610 } 611 612 // If the first slash is after the country calling code, this is permitted. 613 boolean candidateHasCountryCode = 614 (number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN || 615 number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN); 616 if (candidateHasCountryCode && 617 PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(0, firstSlashInBodyIndex)) 618 .equals(Integer.toString(number.getCountryCode()))) { 619 // Any more slashes and this is illegal. 620 return candidate.substring(secondSlashInBodyIndex + 1).contains("/"); 621 } 622 return true; 623 } 624 containsOnlyValidXChars( PhoneNumber number, String candidate, PhoneNumberUtil util)625 static boolean containsOnlyValidXChars( 626 PhoneNumber number, String candidate, PhoneNumberUtil util) { 627 // The characters 'x' and 'X' can be (1) a carrier code, in which case they always precede the 628 // national significant number or (2) an extension sign, in which case they always precede the 629 // extension number. We assume a carrier code is more than 1 digit, so the first case has to 630 // have more than 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1 'x' 631 // or 'X'. We ignore the character if it appears as the last character of the string. 632 for (int index = 0; index < candidate.length() - 1; index++) { 633 char charAtIndex = candidate.charAt(index); 634 if (charAtIndex == 'x' || charAtIndex == 'X') { 635 char charAtNextIndex = candidate.charAt(index + 1); 636 if (charAtNextIndex == 'x' || charAtNextIndex == 'X') { 637 // This is the carrier code case, in which the 'X's always precede the national 638 // significant number. 639 index++; 640 if (util.isNumberMatch(number, candidate.substring(index)) != MatchType.NSN_MATCH) { 641 return false; 642 } 643 // This is the extension sign case, in which the 'x' or 'X' should always precede the 644 // extension number. 645 } else if (!PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(index)).equals( 646 number.getExtension())) { 647 return false; 648 } 649 } 650 } 651 return true; 652 } 653 isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util)654 static boolean isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util) { 655 // First, check how we deduced the country code. If it was written in international format, then 656 // the national prefix is not required. 657 if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) { 658 return true; 659 } 660 String phoneNumberRegion = 661 util.getRegionCodeForCountryCode(number.getCountryCode()); 662 PhoneMetadata metadata = util.getMetadataForRegion(phoneNumberRegion); 663 if (metadata == null) { 664 return true; 665 } 666 // Check if a national prefix should be present when formatting this number. 667 String nationalNumber = util.getNationalSignificantNumber(number); 668 NumberFormat formatRule = 669 util.chooseFormattingPatternForNumber(metadata.numberFormats(), nationalNumber); 670 // To do this, we check that a national prefix formatting rule was present and that it wasn't 671 // just the first-group symbol ($1) with punctuation. 672 if ((formatRule != null) && formatRule.getNationalPrefixFormattingRule().length() > 0) { 673 if (formatRule.isNationalPrefixOptionalWhenFormatting()) { 674 // The national-prefix is optional in these cases, so we don't need to check if it was 675 // present. 676 return true; 677 } 678 if (PhoneNumberUtil.formattingRuleHasFirstGroupOnly( 679 formatRule.getNationalPrefixFormattingRule())) { 680 // National Prefix not needed for this number. 681 return true; 682 } 683 // Normalize the remainder. 684 String rawInputCopy = PhoneNumberUtil.normalizeDigitsOnly(number.getRawInput()); 685 StringBuilder rawInput = new StringBuilder(rawInputCopy); 686 // Check if we found a national prefix and/or carrier code at the start of the raw input, and 687 // return the result. 688 return util.maybeStripNationalPrefixAndCarrierCode(rawInput, metadata, null); 689 } 690 return true; 691 } 692 hasNext()693 public boolean hasNext() { 694 if (state == State.NOT_READY) { 695 lastMatch = find(searchIndex); 696 if (lastMatch == null) { 697 state = State.DONE; 698 } else { 699 searchIndex = lastMatch.end(); 700 state = State.READY; 701 } 702 } 703 return state == State.READY; 704 } 705 next()706 public PhoneNumberMatch next() { 707 // Check the state and find the next match as a side-effect if necessary. 708 if (!hasNext()) { 709 throw new NoSuchElementException(); 710 } 711 712 // Don't retain that memory any longer than necessary. 713 PhoneNumberMatch result = lastMatch; 714 lastMatch = null; 715 state = State.NOT_READY; 716 return result; 717 } 718 719 /** 720 * Always throws {@link UnsupportedOperationException} as removal is not supported. 721 */ remove()722 public void remove() { 723 throw new UnsupportedOperationException(); 724 } 725 } 726