• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Libphonenumber Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.i18n.phonenumbers;
18 
19 import com.android.i18n.phonenumbers.PhoneNumberUtil.Leniency;
20 import com.android.i18n.phonenumbers.PhoneNumberUtil.MatchType;
21 import com.android.i18n.phonenumbers.PhoneNumberUtil.PhoneNumberFormat;
22 import com.android.i18n.phonenumbers.Phonemetadata.NumberFormat;
23 import com.android.i18n.phonenumbers.Phonemetadata.PhoneMetadata;
24 import com.android.i18n.phonenumbers.Phonenumber.PhoneNumber.CountryCodeSource;
25 import com.android.i18n.phonenumbers.Phonenumber.PhoneNumber;
26 
27 import java.lang.Character.UnicodeBlock;
28 import java.util.Iterator;
29 import java.util.NoSuchElementException;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 
33 /**
34  * A stateful class that finds and extracts telephone numbers from {@linkplain CharSequence text}.
35  * Instances can be created using the {@linkplain PhoneNumberUtil#findNumbers factory methods} in
36  * {@link PhoneNumberUtil}.
37  *
38  * <p>Vanity numbers (phone numbers using alphabetic digits such as <tt>1-800-SIX-FLAGS</tt> are
39  * not found.
40  *
41  * <p>This class is not thread-safe.
42  *
43  * @author Tom Hofmann
44  */
45 final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
46   /**
47    * The phone number pattern used by {@link #find}, similar to
48    * {@code PhoneNumberUtil.VALID_PHONE_NUMBER}, but with the following differences:
49    * <ul>
50    *   <li>All captures are limited in order to place an upper bound to the text matched by the
51    *       pattern.
52    * <ul>
53    *   <li>Leading punctuation / plus signs are limited.
54    *   <li>Consecutive occurrences of punctuation are limited.
55    *   <li>Number of digits is limited.
56    * </ul>
57    *   <li>No whitespace is allowed at the start or end.
58    *   <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported.
59    * </ul>
60    */
61   private static final Pattern PATTERN;
62   /**
63    * Matches strings that look like publication pages. Example:
64    * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns.
65    * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre>
66    *
67    * The string "211-227 (2003)" is not a telephone number.
68    */
69   private static final Pattern PUB_PAGES = Pattern.compile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}");
70 
71   /**
72    * Matches strings that look like dates using "/" as a separator. Examples: 3/10/2011, 31/10/96 or
73    * 08/31/95.
74    */
75   private static final Pattern SLASH_SEPARATED_DATES =
76       Pattern.compile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}");
77 
78   /**
79    * Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the
80    * trailing ":\d\d" -- that is covered by TIME_STAMPS_SUFFIX.
81    */
82   private static final Pattern TIME_STAMPS =
83       Pattern.compile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d [0-2]\\d$");
84   private static final Pattern TIME_STAMPS_SUFFIX = Pattern.compile(":[0-5]\\d");
85 
86   /**
87    * Pattern to check that brackets match. Opening brackets should be closed within a phone number.
88    * This also checks that there is something inside the brackets. Having no brackets at all is also
89    * fine.
90    */
91   private static final Pattern MATCHING_BRACKETS;
92 
93   /**
94    * Matches white-space, which may indicate the end of a phone number and the start of something
95    * else (such as a neighbouring zip-code). If white-space is found, continues to match all
96    * characters that are not typically used to start a phone number.
97    */
98   private static final Pattern GROUP_SEPARATOR;
99 
100   /**
101    * Punctuation that may be at the start of a phone number - brackets and plus signs.
102    */
103   private static final Pattern LEAD_CLASS;
104 
105   static {
106     /* Builds the MATCHING_BRACKETS and PATTERN regular expressions. The building blocks below exist
107      * to make the pattern more easily understood. */
108 
109     String openingParens = "(\\[\uFF08\uFF3B";
110     String closingParens = ")\\]\uFF09\uFF3D";
111     String nonParens = "[^" + openingParens + closingParens + "]";
112 
113     /* Limit on the number of pairs of brackets in a phone number. */
114     String bracketPairLimit = limit(0, 3);
115     /*
116      * An opening bracket at the beginning may not be closed, but subsequent ones should be.  It's
117      * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a
118      * closing bracket first. We limit the sets of brackets in a phone number to four.
119      */
120     MATCHING_BRACKETS = Pattern.compile(
121         "(?:[" + openingParens + "])?" + "(?:" + nonParens + "+" + "[" + closingParens + "])?" +
122         nonParens + "+" +
123         "(?:[" + openingParens + "]" + nonParens + "+[" + closingParens + "])" + bracketPairLimit +
124         nonParens + "*");
125 
126     /* Limit on the number of leading (plus) characters. */
127     String leadLimit = limit(0, 2);
128     /* Limit on the number of consecutive punctuation characters. */
129     String punctuationLimit = limit(0, 4);
130     /* The maximum number of digits allowed in a digit-separated block. As we allow all digits in a
131      * single block, set high enough to accommodate the entire national number and the international
132      * country code. */
133     int digitBlockLimit =
134         PhoneNumberUtil.MAX_LENGTH_FOR_NSN + PhoneNumberUtil.MAX_LENGTH_COUNTRY_CODE;
135     /* Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some
136      * formats use spaces to separate each digit. */
137     String blockLimit = limit(0, digitBlockLimit);
138 
139     /* A punctuation sequence allowing white space. */
140     String punctuation = "[" + PhoneNumberUtil.VALID_PUNCTUATION + "]" + punctuationLimit;
141     /* A digits block without punctuation. */
142     String digitSequence = "\\p{Nd}" + limit(1, digitBlockLimit);
143 
144     String leadClassChars = openingParens + PhoneNumberUtil.PLUS_CHARS;
145     String leadClass = "[" + leadClassChars + "]";
146     LEAD_CLASS = Pattern.compile(leadClass);
147     GROUP_SEPARATOR = Pattern.compile("\\p{Z}" + "[^" + leadClassChars  + "\\p{Nd}]*");
148 
149     /* Phone number pattern allowing optional punctuation. */
150     PATTERN = Pattern.compile(
151         "(?:" + leadClass + punctuation + ")" + leadLimit +
152         digitSequence + "(?:" + punctuation + digitSequence + ")" + blockLimit +
153         "(?:" + PhoneNumberUtil.EXTN_PATTERNS_FOR_MATCHING + ")?",
154         PhoneNumberUtil.REGEX_FLAGS);
155   }
156 
157   /** Returns a regular expression quantifier with an upper and lower limit. */
limit(int lower, int upper)158   private static String limit(int lower, int upper) {
159     if ((lower < 0) || (upper <= 0) || (upper < lower)) {
160       throw new IllegalArgumentException();
161     }
162     return "{" + lower + "," + upper + "}";
163   }
164 
165   /** The potential states of a PhoneNumberMatcher. */
166   private enum State {
167     NOT_READY, READY, DONE
168   }
169 
170   /** The phone number utility. */
171   private final PhoneNumberUtil phoneUtil;
172   /** The text searched for phone numbers. */
173   private final CharSequence text;
174   /**
175    * The region (country) to assume for phone numbers without an international prefix, possibly
176    * null.
177    */
178   private final String preferredRegion;
179   /** The degree of validation requested. */
180   private final Leniency leniency;
181   /** The maximum number of retries after matching an invalid number. */
182   private long maxTries;
183 
184   /** The iteration tristate. */
185   private State state = State.NOT_READY;
186   /** The last successful match, null unless in {@link State#READY}. */
187   private PhoneNumberMatch lastMatch = null;
188   /** The next index to start searching at. Undefined in {@link State#DONE}. */
189   private int searchIndex = 0;
190 
191   /**
192    * Creates a new instance. See the factory methods in {@link PhoneNumberUtil} on how to obtain a
193    * new instance.
194    *
195    * @param util      the phone number util to use
196    * @param text      the character sequence that we will search, null for no text
197    * @param country   the country to assume for phone numbers not written in international format
198    *                  (with a leading plus, or with the international dialing prefix of the
199    *                  specified region). May be null or "ZZ" if only numbers with a
200    *                  leading plus should be considered.
201    * @param leniency  the leniency to use when evaluating candidate phone numbers
202    * @param maxTries  the maximum number of invalid numbers to try before giving up on the text.
203    *                  This is to cover degenerate cases where the text has a lot of false positives
204    *                  in it. Must be {@code >= 0}.
205    */
PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency, long maxTries)206   PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency,
207       long maxTries) {
208 
209     if ((util == null) || (leniency == null)) {
210       throw new NullPointerException();
211     }
212     if (maxTries < 0) {
213       throw new IllegalArgumentException();
214     }
215     this.phoneUtil = util;
216     this.text = (text != null) ? text : "";
217     this.preferredRegion = country;
218     this.leniency = leniency;
219     this.maxTries = maxTries;
220   }
221 
222   /**
223    * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex}
224    * that represents a phone number. Returns the next match, null if none was found.
225    *
226    * @param index  the search index to start searching at
227    * @return  the phone number match found, null if none can be found
228    */
find(int index)229   private PhoneNumberMatch find(int index) {
230     Matcher matcher = PATTERN.matcher(text);
231     while ((maxTries > 0) && matcher.find(index)) {
232       int start = matcher.start();
233       CharSequence candidate = text.subSequence(start, matcher.end());
234 
235       // Check for extra numbers at the end.
236       // TODO: This is the place to start when trying to support extraction of multiple phone number
237       // from split notations (+41 79 123 45 67 / 68).
238       candidate = trimAfterFirstMatch(PhoneNumberUtil.SECOND_NUMBER_START_PATTERN, candidate);
239 
240       PhoneNumberMatch match = extractMatch(candidate, start);
241       if (match != null) {
242         return match;
243       }
244 
245       index = start + candidate.length();
246       maxTries--;
247     }
248 
249     return null;
250   }
251 
252   /**
253    * Trims away any characters after the first match of {@code pattern} in {@code candidate},
254    * returning the trimmed version.
255    */
trimAfterFirstMatch(Pattern pattern, CharSequence candidate)256   private static CharSequence trimAfterFirstMatch(Pattern pattern, CharSequence candidate) {
257     Matcher trailingCharsMatcher = pattern.matcher(candidate);
258     if (trailingCharsMatcher.find()) {
259       candidate = candidate.subSequence(0, trailingCharsMatcher.start());
260     }
261     return candidate;
262   }
263 
264   /**
265    * Helper method to determine if a character is a Latin-script letter or not. For our purposes,
266    * combining marks should also return true since we assume they have been added to a preceding
267    * Latin character.
268    */
269   // @VisibleForTesting
isLatinLetter(char letter)270   static boolean isLatinLetter(char letter) {
271     // Combining marks are a subset of non-spacing-mark.
272     if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
273       return false;
274     }
275     UnicodeBlock block = UnicodeBlock.of(letter);
276     return block.equals(UnicodeBlock.BASIC_LATIN) ||
277         block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT) ||
278         block.equals(UnicodeBlock.LATIN_EXTENDED_A) ||
279         block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) ||
280         block.equals(UnicodeBlock.LATIN_EXTENDED_B) ||
281         block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
282   }
283 
isInvalidPunctuationSymbol(char character)284   private static boolean isInvalidPunctuationSymbol(char character) {
285     return character == '%' || Character.getType(character) == Character.CURRENCY_SYMBOL;
286   }
287 
288   /**
289    * Attempts to extract a match from a {@code candidate} character sequence.
290    *
291    * @param candidate  the candidate text that might contain a phone number
292    * @param offset  the offset of {@code candidate} within {@link #text}
293    * @return  the match found, null if none can be found
294    */
extractMatch(CharSequence candidate, int offset)295   private PhoneNumberMatch extractMatch(CharSequence candidate, int offset) {
296     // Skip a match that is more likely a publication page reference or a date.
297     if (PUB_PAGES.matcher(candidate).find() || SLASH_SEPARATED_DATES.matcher(candidate).find()) {
298       return null;
299     }
300     // Skip potential time-stamps.
301     if (TIME_STAMPS.matcher(candidate).find()) {
302       String followingText = text.toString().substring(offset + candidate.length());
303       if (TIME_STAMPS_SUFFIX.matcher(followingText).lookingAt()) {
304         return null;
305       }
306     }
307 
308     // Try to come up with a valid match given the entire candidate.
309     String rawString = candidate.toString();
310     PhoneNumberMatch match = parseAndVerify(rawString, offset);
311     if (match != null) {
312       return match;
313     }
314 
315     // If that failed, try to find an "inner match" - there might be a phone number within this
316     // candidate.
317     return extractInnerMatch(rawString, offset);
318   }
319 
320   /**
321    * Attempts to extract a match from {@code candidate} if the whole candidate does not qualify as a
322    * match.
323    *
324    * @param candidate  the candidate text that might contain a phone number
325    * @param offset  the current offset of {@code candidate} within {@link #text}
326    * @return  the match found, null if none can be found
327    */
extractInnerMatch(String candidate, int offset)328   private PhoneNumberMatch extractInnerMatch(String candidate, int offset) {
329     // Try removing either the first or last "group" in the number and see if this gives a result.
330     // We consider white space to be a possible indication of the start or end of the phone number.
331     Matcher groupMatcher = GROUP_SEPARATOR.matcher(candidate);
332 
333     if (groupMatcher.find()) {
334       // Try the first group by itself.
335       CharSequence firstGroupOnly = candidate.substring(0, groupMatcher.start());
336       firstGroupOnly = trimAfterFirstMatch(PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN,
337                                            firstGroupOnly);
338       PhoneNumberMatch match = parseAndVerify(firstGroupOnly.toString(), offset);
339       if (match != null) {
340         return match;
341       }
342       maxTries--;
343 
344       int withoutFirstGroupStart = groupMatcher.end();
345       // Try the rest of the candidate without the first group.
346       CharSequence withoutFirstGroup = candidate.substring(withoutFirstGroupStart);
347       withoutFirstGroup = trimAfterFirstMatch(PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN,
348                                               withoutFirstGroup);
349       match = parseAndVerify(withoutFirstGroup.toString(), offset + withoutFirstGroupStart);
350       if (match != null) {
351         return match;
352       }
353       maxTries--;
354 
355       if (maxTries > 0) {
356         int lastGroupStart = withoutFirstGroupStart;
357         while (groupMatcher.find()) {
358           // Find the last group.
359           lastGroupStart = groupMatcher.start();
360         }
361         CharSequence withoutLastGroup = candidate.substring(0, lastGroupStart);
362         withoutLastGroup = trimAfterFirstMatch(PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN,
363                                                withoutLastGroup);
364         if (withoutLastGroup.equals(firstGroupOnly)) {
365           // If there are only two groups, then the group "without the last group" is the same as
366           // the first group. In these cases, we don't want to re-check the number group, so we exit
367           // already.
368           return null;
369         }
370         match = parseAndVerify(withoutLastGroup.toString(), offset);
371         if (match != null) {
372           return match;
373         }
374         maxTries--;
375       }
376     }
377     return null;
378   }
379 
380   /**
381    * Parses a phone number from the {@code candidate} using {@link PhoneNumberUtil#parse} and
382    * verifies it matches the requested {@link #leniency}. If parsing and verification succeed, a
383    * corresponding {@link PhoneNumberMatch} is returned, otherwise this method returns null.
384    *
385    * @param candidate  the candidate match
386    * @param offset  the offset of {@code candidate} within {@link #text}
387    * @return  the parsed and validated phone number match, or null
388    */
parseAndVerify(String candidate, int offset)389   private PhoneNumberMatch parseAndVerify(String candidate, int offset) {
390     try {
391       // Check the candidate doesn't contain any formatting which would indicate that it really
392       // isn't a phone number.
393       if (!MATCHING_BRACKETS.matcher(candidate).matches()) {
394         return null;
395       }
396 
397       // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded
398       // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
399       if (leniency.compareTo(Leniency.VALID) >= 0) {
400         // If the candidate is not at the start of the text, and does not start with phone-number
401         // punctuation, check the previous character.
402         if (offset > 0 && !LEAD_CLASS.matcher(candidate).lookingAt()) {
403           char previousChar = text.charAt(offset - 1);
404           // We return null if it is a latin letter or an invalid punctuation symbol.
405           if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) {
406             return null;
407           }
408         }
409         int lastCharIndex = offset + candidate.length();
410         if (lastCharIndex < text.length()) {
411           char nextChar = text.charAt(lastCharIndex);
412           if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) {
413             return null;
414           }
415         }
416       }
417 
418       PhoneNumber number = phoneUtil.parseAndKeepRawInput(candidate, preferredRegion);
419       if (leniency.verify(number, candidate, phoneUtil)) {
420         // We used parseAndKeepRawInput to create this number, but for now we don't return the extra
421         // values parsed. TODO: stop clearing all values here and switch all users over
422         // to using rawInput() rather than the rawString() of PhoneNumberMatch.
423         number.clearCountryCodeSource();
424         number.clearRawInput();
425         number.clearPreferredDomesticCarrierCode();
426         return new PhoneNumberMatch(offset, candidate, number);
427       }
428     } catch (NumberParseException e) {
429       // ignore and continue
430     }
431     return null;
432   }
433 
434   /**
435    * Small helper interface such that the number groups can be checked according to different
436    * criteria, both for our default way of performing formatting and for any alternate formats we
437    * may want to check.
438    */
439   interface NumberGroupingChecker {
440     /**
441      * Returns true if the groups of digits found in our candidate phone number match our
442      * expectations.
443      *
444      * @param number  the original number we found when parsing
445      * @param normalizedCandidate  the candidate number, normalized to only contain ASCII digits,
446      *     but with non-digits (spaces etc) retained
447      * @param expectedNumberGroups  the groups of digits that we would expect to see if we
448      *     formatted this number
449      */
checkGroups(PhoneNumberUtil util, PhoneNumber number, StringBuilder normalizedCandidate, String[] expectedNumberGroups)450     boolean checkGroups(PhoneNumberUtil util, PhoneNumber number,
451                         StringBuilder normalizedCandidate, String[] expectedNumberGroups);
452   }
453 
allNumberGroupsRemainGrouped(PhoneNumberUtil util, PhoneNumber number, StringBuilder normalizedCandidate, String[] formattedNumberGroups)454   static boolean allNumberGroupsRemainGrouped(PhoneNumberUtil util,
455                                               PhoneNumber number,
456                                               StringBuilder normalizedCandidate,
457                                               String[] formattedNumberGroups) {
458     int fromIndex = 0;
459     // Check each group of consecutive digits are not broken into separate groupings in the
460     // {@code normalizedCandidate} string.
461     for (int i = 0; i < formattedNumberGroups.length; i++) {
462       // Fails if the substring of {@code normalizedCandidate} starting from {@code fromIndex}
463       // doesn't contain the consecutive digits in formattedNumberGroups[i].
464       fromIndex = normalizedCandidate.indexOf(formattedNumberGroups[i], fromIndex);
465       if (fromIndex < 0) {
466         return false;
467       }
468       // Moves {@code fromIndex} forward.
469       fromIndex += formattedNumberGroups[i].length();
470       if (i == 0 && fromIndex < normalizedCandidate.length()) {
471         // We are at the position right after the NDC.
472         if (Character.isDigit(normalizedCandidate.charAt(fromIndex))) {
473           // This means there is no formatting symbol after the NDC. In this case, we only
474           // accept the number if there is no formatting symbol at all in the number, except
475           // for extensions.
476           String nationalSignificantNumber = util.getNationalSignificantNumber(number);
477           return normalizedCandidate.substring(fromIndex - formattedNumberGroups[i].length())
478               .startsWith(nationalSignificantNumber);
479         }
480       }
481     }
482     // The check here makes sure that we haven't mistakenly already used the extension to
483     // match the last group of the subscriber number. Note the extension cannot have
484     // formatting in-between digits.
485     return normalizedCandidate.substring(fromIndex).contains(number.getExtension());
486   }
487 
allNumberGroupsAreExactlyPresent(PhoneNumberUtil util, PhoneNumber number, StringBuilder normalizedCandidate, String[] formattedNumberGroups)488   static boolean allNumberGroupsAreExactlyPresent(PhoneNumberUtil util,
489                                                   PhoneNumber number,
490                                                   StringBuilder normalizedCandidate,
491                                                   String[] formattedNumberGroups) {
492     String[] candidateGroups =
493         PhoneNumberUtil.NON_DIGITS_PATTERN.split(normalizedCandidate.toString());
494     // Set this to the last group, skipping it if the number has an extension.
495     int candidateNumberGroupIndex =
496         number.hasExtension() ? candidateGroups.length - 2 : candidateGroups.length - 1;
497     // First we check if the national significant number is formatted as a block.
498     // We use contains and not equals, since the national significant number may be present with
499     // a prefix such as a national number prefix, or the country code itself.
500     if (candidateGroups.length == 1 ||
501         candidateGroups[candidateNumberGroupIndex].contains(
502             util.getNationalSignificantNumber(number))) {
503       return true;
504     }
505     // Starting from the end, go through in reverse, excluding the first group, and check the
506     // candidate and number groups are the same.
507     for (int formattedNumberGroupIndex = (formattedNumberGroups.length - 1);
508          formattedNumberGroupIndex > 0 && candidateNumberGroupIndex >= 0;
509          formattedNumberGroupIndex--, candidateNumberGroupIndex--) {
510       if (!candidateGroups[candidateNumberGroupIndex].equals(
511           formattedNumberGroups[formattedNumberGroupIndex])) {
512         return false;
513       }
514     }
515     // Now check the first group. There may be a national prefix at the start, so we only check
516     // that the candidate group ends with the formatted number group.
517     return (candidateNumberGroupIndex >= 0 &&
518             candidateGroups[candidateNumberGroupIndex].endsWith(formattedNumberGroups[0]));
519   }
520 
521   /**
522    * Helper method to get the national-number part of a number, formatted without any national
523    * prefix, and return it as a set of digit blocks that would be formatted together.
524    */
getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number, NumberFormat formattingPattern)525   private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number,
526                                                   NumberFormat formattingPattern) {
527     if (formattingPattern == null) {
528       // This will be in the format +CC-DG;ext=EXT where DG represents groups of digits.
529       String rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966);
530       // We remove the extension part from the formatted string before splitting it into different
531       // groups.
532       int endIndex = rfc3966Format.indexOf(';');
533       if (endIndex < 0) {
534         endIndex = rfc3966Format.length();
535       }
536       // The country-code will have a '-' following it.
537       int startIndex = rfc3966Format.indexOf('-') + 1;
538       return rfc3966Format.substring(startIndex, endIndex).split("-");
539     } else {
540       // We format the NSN only, and split that according to the separator.
541       String nationalSignificantNumber = util.getNationalSignificantNumber(number);
542       return util.formatNsnUsingPattern(nationalSignificantNumber,
543                                         formattingPattern, PhoneNumberFormat.RFC3966).split("-");
544     }
545   }
546 
checkNumberGroupingIsValid( PhoneNumber number, String candidate, PhoneNumberUtil util, NumberGroupingChecker checker)547   static boolean checkNumberGroupingIsValid(
548       PhoneNumber number, String candidate, PhoneNumberUtil util, NumberGroupingChecker checker) {
549     // TODO: Evaluate how this works for other locales (testing has been limited to NANPA regions)
550     // and optimise if necessary.
551     StringBuilder normalizedCandidate =
552         PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */);
553     String[] formattedNumberGroups = getNationalNumberGroups(util, number, null);
554     if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
555       return true;
556     }
557     // If this didn't pass, see if there are any alternate formats, and try them instead.
558     PhoneMetadata alternateFormats =
559         MetadataManager.getAlternateFormatsForCountry(number.getCountryCode());
560     if (alternateFormats != null) {
561       for (NumberFormat alternateFormat : alternateFormats.numberFormats()) {
562         formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat);
563         if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
564           return true;
565         }
566       }
567     }
568     return false;
569   }
570 
containsMoreThanOneSlash(String candidate)571   static boolean containsMoreThanOneSlash(String candidate) {
572     int firstSlashIndex = candidate.indexOf('/');
573     return (firstSlashIndex > 0 && candidate.substring(firstSlashIndex + 1).contains("/"));
574   }
575 
containsOnlyValidXChars( PhoneNumber number, String candidate, PhoneNumberUtil util)576   static boolean containsOnlyValidXChars(
577       PhoneNumber number, String candidate, PhoneNumberUtil util) {
578     // The characters 'x' and 'X' can be (1) a carrier code, in which case they always precede the
579     // national significant number or (2) an extension sign, in which case they always precede the
580     // extension number. We assume a carrier code is more than 1 digit, so the first case has to
581     // have more than 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1 'x'
582     // or 'X'. We ignore the character if it appears as the last character of the string.
583     for (int index = 0; index < candidate.length() - 1; index++) {
584       char charAtIndex = candidate.charAt(index);
585       if (charAtIndex == 'x' || charAtIndex == 'X') {
586         char charAtNextIndex = candidate.charAt(index + 1);
587         if (charAtNextIndex == 'x' || charAtNextIndex == 'X') {
588           // This is the carrier code case, in which the 'X's always precede the national
589           // significant number.
590           index++;
591           if (util.isNumberMatch(number, candidate.substring(index)) != MatchType.NSN_MATCH) {
592             return false;
593           }
594         // This is the extension sign case, in which the 'x' or 'X' should always precede the
595         // extension number.
596         } else if (!PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(index)).equals(
597             number.getExtension())) {
598             return false;
599         }
600       }
601     }
602     return true;
603   }
604 
isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util)605   static boolean isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util) {
606     // First, check how we deduced the country code. If it was written in international format, then
607     // the national prefix is not required.
608     if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) {
609       return true;
610     }
611     String phoneNumberRegion =
612         util.getRegionCodeForCountryCode(number.getCountryCode());
613     PhoneMetadata metadata = util.getMetadataForRegion(phoneNumberRegion);
614     if (metadata == null) {
615       return true;
616     }
617     // Check if a national prefix should be present when formatting this number.
618     String nationalNumber = util.getNationalSignificantNumber(number);
619     NumberFormat formatRule =
620         util.chooseFormattingPatternForNumber(metadata.numberFormats(), nationalNumber);
621     // To do this, we check that a national prefix formatting rule was present and that it wasn't
622     // just the first-group symbol ($1) with punctuation.
623     if ((formatRule != null) && formatRule.getNationalPrefixFormattingRule().length() > 0) {
624       if (formatRule.isNationalPrefixOptionalWhenFormatting()) {
625         // The national-prefix is optional in these cases, so we don't need to check if it was
626         // present.
627         return true;
628       }
629       // Remove the first-group symbol.
630       String candidateNationalPrefixRule = formatRule.getNationalPrefixFormattingRule();
631       // We assume that the first-group symbol will never be _before_ the national prefix.
632       candidateNationalPrefixRule =
633           candidateNationalPrefixRule.substring(0, candidateNationalPrefixRule.indexOf("$1"));
634       candidateNationalPrefixRule =
635           PhoneNumberUtil.normalizeDigitsOnly(candidateNationalPrefixRule);
636       if (candidateNationalPrefixRule.length() == 0) {
637         // National Prefix not needed for this number.
638         return true;
639       }
640       // Normalize the remainder.
641       String rawInputCopy = PhoneNumberUtil.normalizeDigitsOnly(number.getRawInput());
642       StringBuilder rawInput = new StringBuilder(rawInputCopy);
643       // Check if we found a national prefix and/or carrier code at the start of the raw input, and
644       // return the result.
645       return util.maybeStripNationalPrefixAndCarrierCode(rawInput, metadata, null);
646     }
647     return true;
648   }
649 
hasNext()650   public boolean hasNext() {
651     if (state == State.NOT_READY) {
652       lastMatch = find(searchIndex);
653       if (lastMatch == null) {
654         state = State.DONE;
655       } else {
656         searchIndex = lastMatch.end();
657         state = State.READY;
658       }
659     }
660     return state == State.READY;
661   }
662 
next()663   public PhoneNumberMatch next() {
664     // Check the state and find the next match as a side-effect if necessary.
665     if (!hasNext()) {
666       throw new NoSuchElementException();
667     }
668 
669     // Don't retain that memory any longer than necessary.
670     PhoneNumberMatch result = lastMatch;
671     lastMatch = null;
672     state = State.NOT_READY;
673     return result;
674   }
675 
676   /**
677    * Always throws {@link UnsupportedOperationException} as removal is not supported.
678    */
remove()679   public void remove() {
680     throw new UnsupportedOperationException();
681   }
682 }
683