• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Libphonenumber Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.i18n.phonenumbers;
18 
19 import com.google.i18n.phonenumbers.PhoneNumberUtil.Leniency;
20 import com.google.i18n.phonenumbers.PhoneNumberUtil.MatchType;
21 import com.google.i18n.phonenumbers.PhoneNumberUtil.PhoneNumberFormat;
22 import com.google.i18n.phonenumbers.Phonemetadata.NumberFormat;
23 import com.google.i18n.phonenumbers.Phonemetadata.PhoneMetadata;
24 import com.google.i18n.phonenumbers.Phonenumber.PhoneNumber.CountryCodeSource;
25 import com.google.i18n.phonenumbers.Phonenumber.PhoneNumber;
26 import com.google.i18n.phonenumbers.internal.RegexCache;
27 import com.google.i18n.phonenumbers.metadata.DefaultMetadataDependenciesProvider;
28 import java.lang.Character.UnicodeBlock;
29 import java.util.Iterator;
30 import java.util.NoSuchElementException;
31 import java.util.regex.Matcher;
32 import java.util.regex.Pattern;
33 
34 /**
35  * A stateful class that finds and extracts telephone numbers from {@linkplain CharSequence text}.
36  * Instances can be created using the {@linkplain PhoneNumberUtil#findNumbers factory methods} in
37  * {@link PhoneNumberUtil}.
38  *
39  * <p>Vanity numbers (phone numbers using alphabetic digits such as <tt>1-800-SIX-FLAGS</tt> are
40  * not found.
41  *
42  * <p>This class is not thread-safe.
43  */
44 final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
45   /**
46    * The phone number pattern used by {@link #find}, similar to
47    * {@code PhoneNumberUtil.VALID_PHONE_NUMBER}, but with the following differences:
48    * <ul>
49    *   <li>All captures are limited in order to place an upper bound to the text matched by the
50    *       pattern.
51    * <ul>
52    *   <li>Leading punctuation / plus signs are limited.
53    *   <li>Consecutive occurrences of punctuation are limited.
54    *   <li>Number of digits is limited.
55    * </ul>
56    *   <li>No whitespace is allowed at the start or end.
57    *   <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported.
58    * </ul>
59    */
60   private static final Pattern PATTERN;
61   /**
62    * Matches strings that look like publication pages. Example:
63    * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns.
64    * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre>
65    *
66    * The string "211-227 (2003)" is not a telephone number.
67    */
68   private static final Pattern PUB_PAGES = Pattern.compile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}");
69 
70   /**
71    * Matches strings that look like dates using "/" as a separator. Examples: 3/10/2011, 31/10/96 or
72    * 08/31/95.
73    */
74   private static final Pattern SLASH_SEPARATED_DATES =
75       Pattern.compile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}");
76 
77   /**
78    * Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the
79    * trailing ":\d\d" -- that is covered by TIME_STAMPS_SUFFIX.
80    */
81   private static final Pattern TIME_STAMPS =
82       Pattern.compile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$");
83   private static final Pattern TIME_STAMPS_SUFFIX = Pattern.compile(":[0-5]\\d");
84 
85   /**
86    * Pattern to check that brackets match. Opening brackets should be closed within a phone number.
87    * This also checks that there is something inside the brackets. Having no brackets at all is also
88    * fine.
89    */
90   private static final Pattern MATCHING_BRACKETS;
91 
92   /**
93    * Patterns used to extract phone numbers from a larger phone-number-like pattern. These are
94    * ordered according to specificity. For example, white-space is last since that is frequently
95    * used in numbers, not just to separate two numbers. We have separate patterns since we don't
96    * want to break up the phone-number-like text on more than one different kind of symbol at one
97    * time, although symbols of the same type (e.g. space) can be safely grouped together.
98    *
99    * Note that if there is a match, we will always check any text found up to the first match as
100    * well.
101    */
102   private static final Pattern[] INNER_MATCHES = {
103       // Breaks on the slash - e.g. "651-234-2345/332-445-1234"
104       Pattern.compile("/+(.*)"),
105       // Note that the bracket here is inside the capturing group, since we consider it part of the
106       // phone number. Will match a pattern like "(650) 223 3345 (754) 223 3321".
107       Pattern.compile("(\\([^(]*)"),
108       // Breaks on a hyphen - e.g. "12345 - 332-445-1234 is my number."
109       // We require a space on either side of the hyphen for it to be considered a separator.
110       Pattern.compile("(?:\\p{Z}-|-\\p{Z})\\p{Z}*(.+)"),
111       // Various types of wide hyphens. Note we have decided not to enforce a space here, since it's
112       // possible that it's supposed to be used to break two numbers without spaces, and we haven't
113       // seen many instances of it used within a number.
114       Pattern.compile("[\u2012-\u2015\uFF0D]\\p{Z}*(.+)"),
115       // Breaks on a full stop - e.g. "12345. 332-445-1234 is my number."
116       Pattern.compile("\\.+\\p{Z}*([^.]+)"),
117       // Breaks on space - e.g. "3324451234 8002341234"
118       Pattern.compile("\\p{Z}+(\\P{Z}+)")
119   };
120 
121   /**
122    * Punctuation that may be at the start of a phone number - brackets and plus signs.
123    */
124   private static final Pattern LEAD_CLASS;
125 
126   static {
127     /* Builds the MATCHING_BRACKETS and PATTERN regular expressions. The building blocks below exist
128      * to make the pattern more easily understood. */
129 
130     String openingParens = "(\\[\uFF08\uFF3B";
131     String closingParens = ")\\]\uFF09\uFF3D";
132     String nonParens = "[^" + openingParens + closingParens + "]";
133 
134     /* Limit on the number of pairs of brackets in a phone number. */
135     String bracketPairLimit = limit(0, 3);
136     /*
137      * An opening bracket at the beginning may not be closed, but subsequent ones should be.  It's
138      * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a
139      * closing bracket first. We limit the sets of brackets in a phone number to four.
140      */
141     MATCHING_BRACKETS = Pattern.compile(
142         "(?:[" + openingParens + "])?" + "(?:" + nonParens + "+" + "[" + closingParens + "])?"
143         + nonParens + "+"
144         + "(?:[" + openingParens + "]" + nonParens + "+[" + closingParens + "])" + bracketPairLimit
145         + nonParens + "*");
146 
147     /* Limit on the number of leading (plus) characters. */
148     String leadLimit = limit(0, 2);
149     /* Limit on the number of consecutive punctuation characters. */
150     String punctuationLimit = limit(0, 4);
151     /* The maximum number of digits allowed in a digit-separated block. As we allow all digits in a
152      * single block, set high enough to accommodate the entire national number and the international
153      * country code. */
154     int digitBlockLimit =
155         PhoneNumberUtil.MAX_LENGTH_FOR_NSN + PhoneNumberUtil.MAX_LENGTH_COUNTRY_CODE;
156     /* Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some
157      * formats use spaces to separate each digit. */
158     String blockLimit = limit(0, digitBlockLimit);
159 
160     /* A punctuation sequence allowing white space. */
161     String punctuation = "[" + PhoneNumberUtil.VALID_PUNCTUATION + "]" + punctuationLimit;
162     /* A digits block without punctuation. */
163     String digitSequence = "\\p{Nd}" + limit(1, digitBlockLimit);
164 
165     String leadClassChars = openingParens + PhoneNumberUtil.PLUS_CHARS;
166     String leadClass = "[" + leadClassChars + "]";
167     LEAD_CLASS = Pattern.compile(leadClass);
168 
169     /* Phone number pattern allowing optional punctuation. */
170     PATTERN = Pattern.compile(
171         "(?:" + leadClass + punctuation + ")" + leadLimit
172         + digitSequence + "(?:" + punctuation + digitSequence + ")" + blockLimit
173         + "(?:" + PhoneNumberUtil.EXTN_PATTERNS_FOR_MATCHING + ")?",
174         PhoneNumberUtil.REGEX_FLAGS);
175   }
176 
177   /** Returns a regular expression quantifier with an upper and lower limit. */
limit(int lower, int upper)178   private static String limit(int lower, int upper) {
179     if ((lower < 0) || (upper <= 0) || (upper < lower)) {
180       throw new IllegalArgumentException();
181     }
182     return "{" + lower + "," + upper + "}";
183   }
184 
185   /** The potential states of a PhoneNumberMatcher. */
186   private enum State {
187     NOT_READY, READY, DONE
188   }
189 
190   /** The phone number utility. */
191   private final PhoneNumberUtil phoneUtil;
192   /** The text searched for phone numbers. */
193   private final CharSequence text;
194   /**
195    * The region (country) to assume for phone numbers without an international prefix, possibly
196    * null.
197    */
198   private final String preferredRegion;
199   /** The degree of validation requested. */
200   private final Leniency leniency;
201   /** The maximum number of retries after matching an invalid number. */
202   private long maxTries;
203 
204   /** The iteration tristate. */
205   private State state = State.NOT_READY;
206   /** The last successful match, null unless in {@link State#READY}. */
207   private PhoneNumberMatch lastMatch = null;
208   /** The next index to start searching at. Undefined in {@link State#DONE}. */
209   private int searchIndex = 0;
210 
211   // A cache for frequently used country-specific regular expressions. Set to 32 to cover ~2-3
212   // countries being used for the same doc with ~10 patterns for each country. Some pages will have
213   // a lot more countries in use, but typically fewer numbers for each so expanding the cache for
214   // that use-case won't have a lot of benefit.
215   private final RegexCache regexCache = new RegexCache(32);
216 
217   /**
218    * Creates a new instance. See the factory methods in {@link PhoneNumberUtil} on how to obtain a
219    * new instance.
220    *
221    * @param util  the phone number util to use
222    * @param text  the character sequence that we will search, null for no text
223    * @param country  the country to assume for phone numbers not written in international format
224    *     (with a leading plus, or with the international dialing prefix of the specified region).
225    *     May be null or "ZZ" if only numbers with a leading plus should be
226    *     considered.
227    * @param leniency  the leniency to use when evaluating candidate phone numbers
228    * @param maxTries  the maximum number of invalid numbers to try before giving up on the text.
229    *     This is to cover degenerate cases where the text has a lot of false positives in it. Must
230    *     be {@code >= 0}.
231    */
PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency, long maxTries)232   PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency,
233       long maxTries) {
234 
235     if ((util == null) || (leniency == null)) {
236       throw new NullPointerException();
237     }
238     if (maxTries < 0) {
239       throw new IllegalArgumentException();
240     }
241     this.phoneUtil = util;
242     this.text = (text != null) ? text : "";
243     this.preferredRegion = country;
244     this.leniency = leniency;
245     this.maxTries = maxTries;
246   }
247 
248   /**
249    * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex}
250    * that represents a phone number. Returns the next match, null if none was found.
251    *
252    * @param index  the search index to start searching at
253    * @return  the phone number match found, null if none can be found
254    */
find(int index)255   private PhoneNumberMatch find(int index) {
256     Matcher matcher = PATTERN.matcher(text);
257     while ((maxTries > 0) && matcher.find(index)) {
258       int start = matcher.start();
259       CharSequence candidate = text.subSequence(start, matcher.end());
260 
261       // Check for extra numbers at the end.
262       // TODO: This is the place to start when trying to support extraction of multiple phone number
263       // from split notations (+41 79 123 45 67 / 68).
264       candidate = trimAfterFirstMatch(PhoneNumberUtil.SECOND_NUMBER_START_PATTERN, candidate);
265 
266       PhoneNumberMatch match = extractMatch(candidate, start);
267       if (match != null) {
268         return match;
269       }
270 
271       index = start + candidate.length();
272       maxTries--;
273     }
274 
275     return null;
276   }
277 
278   /**
279    * Trims away any characters after the first match of {@code pattern} in {@code candidate},
280    * returning the trimmed version.
281    */
trimAfterFirstMatch(Pattern pattern, CharSequence candidate)282   private static CharSequence trimAfterFirstMatch(Pattern pattern, CharSequence candidate) {
283     Matcher trailingCharsMatcher = pattern.matcher(candidate);
284     if (trailingCharsMatcher.find()) {
285       candidate = candidate.subSequence(0, trailingCharsMatcher.start());
286     }
287     return candidate;
288   }
289 
290   /**
291    * Helper method to determine if a character is a Latin-script letter or not. For our purposes,
292    * combining marks should also return true since we assume they have been added to a preceding
293    * Latin character.
294    */
295   // @VisibleForTesting
isLatinLetter(char letter)296   static boolean isLatinLetter(char letter) {
297     // Combining marks are a subset of non-spacing-mark.
298     if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
299       return false;
300     }
301     UnicodeBlock block = UnicodeBlock.of(letter);
302     return block.equals(UnicodeBlock.BASIC_LATIN)
303         || block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT)
304         || block.equals(UnicodeBlock.LATIN_EXTENDED_A)
305         || block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL)
306         || block.equals(UnicodeBlock.LATIN_EXTENDED_B)
307         || block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
308   }
309 
isInvalidPunctuationSymbol(char character)310   private static boolean isInvalidPunctuationSymbol(char character) {
311     return character == '%' || Character.getType(character) == Character.CURRENCY_SYMBOL;
312   }
313 
314   /**
315    * Attempts to extract a match from a {@code candidate} character sequence.
316    *
317    * @param candidate  the candidate text that might contain a phone number
318    * @param offset  the offset of {@code candidate} within {@link #text}
319    * @return  the match found, null if none can be found
320    */
extractMatch(CharSequence candidate, int offset)321   private PhoneNumberMatch extractMatch(CharSequence candidate, int offset) {
322     // Skip a match that is more likely to be a date.
323     if (SLASH_SEPARATED_DATES.matcher(candidate).find()) {
324       return null;
325     }
326 
327     // Skip potential time-stamps.
328     if (TIME_STAMPS.matcher(candidate).find()) {
329       String followingText = text.toString().substring(offset + candidate.length());
330       if (TIME_STAMPS_SUFFIX.matcher(followingText).lookingAt()) {
331         return null;
332       }
333     }
334 
335     // Try to come up with a valid match given the entire candidate.
336     PhoneNumberMatch match = parseAndVerify(candidate, offset);
337     if (match != null) {
338       return match;
339     }
340 
341     // If that failed, try to find an "inner match" - there might be a phone number within this
342     // candidate.
343     return extractInnerMatch(candidate, offset);
344   }
345 
346   /**
347    * Attempts to extract a match from {@code candidate} if the whole candidate does not qualify as a
348    * match.
349    *
350    * @param candidate  the candidate text that might contain a phone number
351    * @param offset  the current offset of {@code candidate} within {@link #text}
352    * @return  the match found, null if none can be found
353    */
extractInnerMatch(CharSequence candidate, int offset)354   private PhoneNumberMatch extractInnerMatch(CharSequence candidate, int offset) {
355     for (Pattern possibleInnerMatch : INNER_MATCHES) {
356       Matcher groupMatcher = possibleInnerMatch.matcher(candidate);
357       boolean isFirstMatch = true;
358       while (groupMatcher.find() && maxTries > 0) {
359         if (isFirstMatch) {
360           // We should handle any group before this one too.
361           CharSequence group = trimAfterFirstMatch(
362               PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN,
363               candidate.subSequence(0, groupMatcher.start()));
364           PhoneNumberMatch match = parseAndVerify(group, offset);
365           if (match != null) {
366             return match;
367           }
368           maxTries--;
369           isFirstMatch = false;
370         }
371         CharSequence group = trimAfterFirstMatch(
372             PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, groupMatcher.group(1));
373         PhoneNumberMatch match = parseAndVerify(group, offset + groupMatcher.start(1));
374         if (match != null) {
375           return match;
376         }
377         maxTries--;
378       }
379     }
380     return null;
381   }
382 
383   /**
384    * Parses a phone number from the {@code candidate} using {@link PhoneNumberUtil#parse} and
385    * verifies it matches the requested {@link #leniency}. If parsing and verification succeed, a
386    * corresponding {@link PhoneNumberMatch} is returned, otherwise this method returns null.
387    *
388    * @param candidate  the candidate match
389    * @param offset  the offset of {@code candidate} within {@link #text}
390    * @return  the parsed and validated phone number match, or null
391    */
parseAndVerify(CharSequence candidate, int offset)392   private PhoneNumberMatch parseAndVerify(CharSequence candidate, int offset) {
393     try {
394       // Check the candidate doesn't contain any formatting which would indicate that it really
395       // isn't a phone number.
396       if (!MATCHING_BRACKETS.matcher(candidate).matches() || PUB_PAGES.matcher(candidate).find()) {
397         return null;
398       }
399 
400       // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded
401       // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
402       if (leniency.compareTo(Leniency.VALID) >= 0) {
403         // If the candidate is not at the start of the text, and does not start with phone-number
404         // punctuation, check the previous character.
405         if (offset > 0 && !LEAD_CLASS.matcher(candidate).lookingAt()) {
406           char previousChar = text.charAt(offset - 1);
407           // We return null if it is a latin letter or an invalid punctuation symbol.
408           if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) {
409             return null;
410           }
411         }
412         int lastCharIndex = offset + candidate.length();
413         if (lastCharIndex < text.length()) {
414           char nextChar = text.charAt(lastCharIndex);
415           if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) {
416             return null;
417           }
418         }
419       }
420 
421       PhoneNumber number = phoneUtil.parseAndKeepRawInput(candidate, preferredRegion);
422 
423       if (leniency.verify(number, candidate, phoneUtil, this)) {
424         // We used parseAndKeepRawInput to create this number, but for now we don't return the extra
425         // values parsed. TODO: stop clearing all values here and switch all users over
426         // to using rawInput() rather than the rawString() of PhoneNumberMatch.
427         number.clearCountryCodeSource();
428         number.clearRawInput();
429         number.clearPreferredDomesticCarrierCode();
430         return new PhoneNumberMatch(offset, candidate.toString(), number);
431       }
432     } catch (NumberParseException e) {
433       // ignore and continue
434     }
435     return null;
436   }
437 
438   /**
439    * Small helper interface such that the number groups can be checked according to different
440    * criteria, both for our default way of performing formatting and for any alternate formats we
441    * may want to check.
442    */
443   interface NumberGroupingChecker {
444     /**
445      * Returns true if the groups of digits found in our candidate phone number match our
446      * expectations.
447      *
448      * @param number  the original number we found when parsing
449      * @param normalizedCandidate  the candidate number, normalized to only contain ASCII digits,
450      *     but with non-digits (spaces etc) retained
451      * @param expectedNumberGroups  the groups of digits that we would expect to see if we
452      *     formatted this number
453      */
checkGroups(PhoneNumberUtil util, PhoneNumber number, StringBuilder normalizedCandidate, String[] expectedNumberGroups)454     boolean checkGroups(PhoneNumberUtil util, PhoneNumber number,
455                         StringBuilder normalizedCandidate, String[] expectedNumberGroups);
456   }
457 
allNumberGroupsRemainGrouped(PhoneNumberUtil util, PhoneNumber number, StringBuilder normalizedCandidate, String[] formattedNumberGroups)458   static boolean allNumberGroupsRemainGrouped(PhoneNumberUtil util,
459                                               PhoneNumber number,
460                                               StringBuilder normalizedCandidate,
461                                               String[] formattedNumberGroups) {
462     int fromIndex = 0;
463     if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) {
464       // First skip the country code if the normalized candidate contained it.
465       String countryCode = Integer.toString(number.getCountryCode());
466       fromIndex = normalizedCandidate.indexOf(countryCode) + countryCode.length();
467     }
468     // Check each group of consecutive digits are not broken into separate groupings in the
469     // {@code normalizedCandidate} string.
470     for (int i = 0; i < formattedNumberGroups.length; i++) {
471       // Fails if the substring of {@code normalizedCandidate} starting from {@code fromIndex}
472       // doesn't contain the consecutive digits in formattedNumberGroups[i].
473       fromIndex = normalizedCandidate.indexOf(formattedNumberGroups[i], fromIndex);
474       if (fromIndex < 0) {
475         return false;
476       }
477       // Moves {@code fromIndex} forward.
478       fromIndex += formattedNumberGroups[i].length();
479       if (i == 0 && fromIndex < normalizedCandidate.length()) {
480         // We are at the position right after the NDC. We get the region used for formatting
481         // information based on the country code in the phone number, rather than the number itself,
482         // as we do not need to distinguish between different countries with the same country
483         // calling code and this is faster.
484         String region = util.getRegionCodeForCountryCode(number.getCountryCode());
485         if (util.getNddPrefixForRegion(region, true) != null
486             && Character.isDigit(normalizedCandidate.charAt(fromIndex))) {
487           // This means there is no formatting symbol after the NDC. In this case, we only
488           // accept the number if there is no formatting symbol at all in the number, except
489           // for extensions. This is only important for countries with national prefixes.
490           String nationalSignificantNumber = util.getNationalSignificantNumber(number);
491           return normalizedCandidate.substring(fromIndex - formattedNumberGroups[i].length())
492               .startsWith(nationalSignificantNumber);
493         }
494       }
495     }
496     // The check here makes sure that we haven't mistakenly already used the extension to
497     // match the last group of the subscriber number. Note the extension cannot have
498     // formatting in-between digits.
499     return normalizedCandidate.substring(fromIndex).contains(number.getExtension());
500   }
501 
allNumberGroupsAreExactlyPresent(PhoneNumberUtil util, PhoneNumber number, StringBuilder normalizedCandidate, String[] formattedNumberGroups)502   static boolean allNumberGroupsAreExactlyPresent(PhoneNumberUtil util,
503                                                   PhoneNumber number,
504                                                   StringBuilder normalizedCandidate,
505                                                   String[] formattedNumberGroups) {
506     String[] candidateGroups =
507         PhoneNumberUtil.NON_DIGITS_PATTERN.split(normalizedCandidate.toString());
508     // Set this to the last group, skipping it if the number has an extension.
509     int candidateNumberGroupIndex =
510         number.hasExtension() ? candidateGroups.length - 2 : candidateGroups.length - 1;
511     // First we check if the national significant number is formatted as a block.
512     // We use contains and not equals, since the national significant number may be present with
513     // a prefix such as a national number prefix, or the country code itself.
514     if (candidateGroups.length == 1
515         || candidateGroups[candidateNumberGroupIndex].contains(
516             util.getNationalSignificantNumber(number))) {
517       return true;
518     }
519     // Starting from the end, go through in reverse, excluding the first group, and check the
520     // candidate and number groups are the same.
521     for (int formattedNumberGroupIndex = (formattedNumberGroups.length - 1);
522          formattedNumberGroupIndex > 0 && candidateNumberGroupIndex >= 0;
523          formattedNumberGroupIndex--, candidateNumberGroupIndex--) {
524       if (!candidateGroups[candidateNumberGroupIndex].equals(
525           formattedNumberGroups[formattedNumberGroupIndex])) {
526         return false;
527       }
528     }
529     // Now check the first group. There may be a national prefix at the start, so we only check
530     // that the candidate group ends with the formatted number group.
531     return (candidateNumberGroupIndex >= 0
532         && candidateGroups[candidateNumberGroupIndex].endsWith(formattedNumberGroups[0]));
533   }
534 
535   /**
536    * Helper method to get the national-number part of a number, formatted without any national
537    * prefix, and return it as a set of digit blocks that would be formatted together following
538    * standard formatting rules.
539    */
getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number)540   private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number) {
541     // This will be in the format +CC-DG1-DG2-DGX;ext=EXT where DG1..DGX represents groups of
542     // digits.
543     String rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966);
544     // We remove the extension part from the formatted string before splitting it into different
545     // groups.
546     int endIndex = rfc3966Format.indexOf(';');
547     if (endIndex < 0) {
548       endIndex = rfc3966Format.length();
549     }
550     // The country-code will have a '-' following it.
551     int startIndex = rfc3966Format.indexOf('-') + 1;
552     return rfc3966Format.substring(startIndex, endIndex).split("-");
553   }
554 
555   /**
556    * Helper method to get the national-number part of a number, formatted without any national
557    * prefix, and return it as a set of digit blocks that should be formatted together according to
558    * the formatting pattern passed in.
559    */
getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number, NumberFormat formattingPattern)560   private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number,
561                                                   NumberFormat formattingPattern) {
562     // If a format is provided, we format the NSN only, and split that according to the separator.
563     String nationalSignificantNumber = util.getNationalSignificantNumber(number);
564     return util.formatNsnUsingPattern(nationalSignificantNumber,
565                                       formattingPattern, PhoneNumberFormat.RFC3966).split("-");
566   }
567 
checkNumberGroupingIsValid( PhoneNumber number, CharSequence candidate, PhoneNumberUtil util, NumberGroupingChecker checker)568   boolean checkNumberGroupingIsValid(
569       PhoneNumber number, CharSequence candidate, PhoneNumberUtil util,
570       NumberGroupingChecker checker) {
571     StringBuilder normalizedCandidate =
572         PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */);
573     String[] formattedNumberGroups = getNationalNumberGroups(util, number);
574     if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
575       return true;
576     }
577     // If this didn't pass, see if there are any alternate formats that match, and try them instead.
578     PhoneMetadata alternateFormats =
579         DefaultMetadataDependenciesProvider.getInstance()
580             .getAlternateFormatsMetadataSource()
581               .getFormattingMetadataForCountryCallingCode(number.getCountryCode());
582     String nationalSignificantNumber = util.getNationalSignificantNumber(number);
583     if (alternateFormats != null) {
584       for (NumberFormat alternateFormat : alternateFormats.getNumberFormatList()) {
585         if (alternateFormat.getLeadingDigitsPatternCount() > 0) {
586           // There is only one leading digits pattern for alternate formats.
587           Pattern pattern =
588               regexCache.getPatternForRegex(alternateFormat.getLeadingDigitsPattern(0));
589           if (!pattern.matcher(nationalSignificantNumber).lookingAt()) {
590             // Leading digits don't match; try another one.
591             continue;
592           }
593         }
594         formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat);
595         if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
596           return true;
597         }
598       }
599     }
600     return false;
601   }
602 
containsMoreThanOneSlashInNationalNumber(PhoneNumber number, String candidate)603   static boolean containsMoreThanOneSlashInNationalNumber(PhoneNumber number, String candidate) {
604     int firstSlashInBodyIndex = candidate.indexOf('/');
605     if (firstSlashInBodyIndex < 0) {
606       // No slashes, this is okay.
607       return false;
608     }
609     // Now look for a second one.
610     int secondSlashInBodyIndex = candidate.indexOf('/', firstSlashInBodyIndex + 1);
611     if (secondSlashInBodyIndex < 0) {
612       // Only one slash, this is okay.
613       return false;
614     }
615 
616     // If the first slash is after the country calling code, this is permitted.
617     boolean candidateHasCountryCode =
618         (number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN
619          || number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN);
620     if (candidateHasCountryCode
621         && PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(0, firstSlashInBodyIndex))
622             .equals(Integer.toString(number.getCountryCode()))) {
623       // Any more slashes and this is illegal.
624       return candidate.substring(secondSlashInBodyIndex + 1).contains("/");
625     }
626     return true;
627   }
628 
containsOnlyValidXChars( PhoneNumber number, String candidate, PhoneNumberUtil util)629   static boolean containsOnlyValidXChars(
630       PhoneNumber number, String candidate, PhoneNumberUtil util) {
631     // The characters 'x' and 'X' can be (1) a carrier code, in which case they always precede the
632     // national significant number or (2) an extension sign, in which case they always precede the
633     // extension number. We assume a carrier code is more than 1 digit, so the first case has to
634     // have more than 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1 'x'
635     // or 'X'. We ignore the character if it appears as the last character of the string.
636     for (int index = 0; index < candidate.length() - 1; index++) {
637       char charAtIndex = candidate.charAt(index);
638       if (charAtIndex == 'x' || charAtIndex == 'X') {
639         char charAtNextIndex = candidate.charAt(index + 1);
640         if (charAtNextIndex == 'x' || charAtNextIndex == 'X') {
641           // This is the carrier code case, in which the 'X's always precede the national
642           // significant number.
643           index++;
644           if (util.isNumberMatch(number, candidate.substring(index)) != MatchType.NSN_MATCH) {
645             return false;
646           }
647         // This is the extension sign case, in which the 'x' or 'X' should always precede the
648         // extension number.
649         } else if (!PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(index)).equals(
650             number.getExtension())) {
651           return false;
652         }
653       }
654     }
655     return true;
656   }
657 
isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util)658   static boolean isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util) {
659     // First, check how we deduced the country code. If it was written in international format, then
660     // the national prefix is not required.
661     if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) {
662       return true;
663     }
664     String phoneNumberRegion =
665         util.getRegionCodeForCountryCode(number.getCountryCode());
666     PhoneMetadata metadata = util.getMetadataForRegion(phoneNumberRegion);
667     if (metadata == null) {
668       return true;
669     }
670     // Check if a national prefix should be present when formatting this number.
671     String nationalNumber = util.getNationalSignificantNumber(number);
672     NumberFormat formatRule =
673         util.chooseFormattingPatternForNumber(metadata.getNumberFormatList(), nationalNumber);
674     // To do this, we check that a national prefix formatting rule was present and that it wasn't
675     // just the first-group symbol ($1) with punctuation.
676     if ((formatRule != null) && formatRule.getNationalPrefixFormattingRule().length() > 0) {
677       if (formatRule.getNationalPrefixOptionalWhenFormatting()) {
678         // The national-prefix is optional in these cases, so we don't need to check if it was
679         // present.
680         return true;
681       }
682       if (PhoneNumberUtil.formattingRuleHasFirstGroupOnly(
683           formatRule.getNationalPrefixFormattingRule())) {
684         // National Prefix not needed for this number.
685         return true;
686       }
687       // Normalize the remainder.
688       String rawInputCopy = PhoneNumberUtil.normalizeDigitsOnly(number.getRawInput());
689       StringBuilder rawInput = new StringBuilder(rawInputCopy);
690       // Check if we found a national prefix and/or carrier code at the start of the raw input, and
691       // return the result.
692       return util.maybeStripNationalPrefixAndCarrierCode(rawInput, metadata, null);
693     }
694     return true;
695   }
696 
697   @Override
hasNext()698   public boolean hasNext() {
699     if (state == State.NOT_READY) {
700       lastMatch = find(searchIndex);
701       if (lastMatch == null) {
702         state = State.DONE;
703       } else {
704         searchIndex = lastMatch.end();
705         state = State.READY;
706       }
707     }
708     return state == State.READY;
709   }
710 
711   @Override
next()712   public PhoneNumberMatch next() {
713     // Check the state and find the next match as a side-effect if necessary.
714     if (!hasNext()) {
715       throw new NoSuchElementException();
716     }
717 
718     // Don't retain that memory any longer than necessary.
719     PhoneNumberMatch result = lastMatch;
720     lastMatch = null;
721     state = State.NOT_READY;
722     return result;
723   }
724 
725   /**
726    * Always throws {@link UnsupportedOperationException} as removal is not supported.
727    */
728   @Override
remove()729   public void remove() {
730     throw new UnsupportedOperationException();
731   }
732 }
733