• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Libphonenumber Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.i18n.phonenumbers;
18 
19 import com.android.i18n.phonenumbers.PhoneNumberUtil.Leniency;
20 import com.android.i18n.phonenumbers.PhoneNumberUtil.MatchType;
21 import com.android.i18n.phonenumbers.PhoneNumberUtil.PhoneNumberFormat;
22 import com.android.i18n.phonenumbers.Phonemetadata.NumberFormat;
23 import com.android.i18n.phonenumbers.Phonemetadata.PhoneMetadata;
24 import com.android.i18n.phonenumbers.Phonenumber.PhoneNumber.CountryCodeSource;
25 import com.android.i18n.phonenumbers.Phonenumber.PhoneNumber;
26 
27 import java.lang.Character.UnicodeBlock;
28 import java.util.Iterator;
29 import java.util.NoSuchElementException;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 
33 /**
34  * A stateful class that finds and extracts telephone numbers from {@linkplain CharSequence text}.
35  * Instances can be created using the {@linkplain PhoneNumberUtil#findNumbers factory methods} in
36  * {@link PhoneNumberUtil}.
37  *
38  * <p>Vanity numbers (phone numbers using alphabetic digits such as <tt>1-800-SIX-FLAGS</tt> are
39  * not found.
40  *
41  * <p>This class is not thread-safe.
42  */
43 final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
44   /**
45    * The phone number pattern used by {@link #find}, similar to
46    * {@code PhoneNumberUtil.VALID_PHONE_NUMBER}, but with the following differences:
47    * <ul>
48    *   <li>All captures are limited in order to place an upper bound to the text matched by the
49    *       pattern.
50    * <ul>
51    *   <li>Leading punctuation / plus signs are limited.
52    *   <li>Consecutive occurrences of punctuation are limited.
53    *   <li>Number of digits is limited.
54    * </ul>
55    *   <li>No whitespace is allowed at the start or end.
56    *   <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported.
57    * </ul>
58    */
59   private static final Pattern PATTERN;
60   /**
61    * Matches strings that look like publication pages. Example:
62    * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns.
63    * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre>
64    *
65    * The string "211-227 (2003)" is not a telephone number.
66    */
67   private static final Pattern PUB_PAGES = Pattern.compile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}");
68 
69   /**
70    * Matches strings that look like dates using "/" as a separator. Examples: 3/10/2011, 31/10/96 or
71    * 08/31/95.
72    */
73   private static final Pattern SLASH_SEPARATED_DATES =
74       Pattern.compile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}");
75 
76   /**
77    * Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the
78    * trailing ":\d\d" -- that is covered by TIME_STAMPS_SUFFIX.
79    */
80   private static final Pattern TIME_STAMPS =
81       Pattern.compile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$");
82   private static final Pattern TIME_STAMPS_SUFFIX = Pattern.compile(":[0-5]\\d");
83 
84   /**
85    * Pattern to check that brackets match. Opening brackets should be closed within a phone number.
86    * This also checks that there is something inside the brackets. Having no brackets at all is also
87    * fine.
88    */
89   private static final Pattern MATCHING_BRACKETS;
90 
91   /**
92    * Patterns used to extract phone numbers from a larger phone-number-like pattern. These are
93    * ordered according to specificity. For example, white-space is last since that is frequently
94    * used in numbers, not just to separate two numbers. We have separate patterns since we don't
95    * want to break up the phone-number-like text on more than one different kind of symbol at one
96    * time, although symbols of the same type (e.g. space) can be safely grouped together.
97    *
98    * Note that if there is a match, we will always check any text found up to the first match as
99    * well.
100    */
101   private static final Pattern[] INNER_MATCHES = {
102       // Breaks on the slash - e.g. "651-234-2345/332-445-1234"
103       Pattern.compile("/+(.*)"),
104       // Note that the bracket here is inside the capturing group, since we consider it part of the
105       // phone number. Will match a pattern like "(650) 223 3345 (754) 223 3321".
106       Pattern.compile("(\\([^(]*)"),
107       // Breaks on a hyphen - e.g. "12345 - 332-445-1234 is my number."
108       // We require a space on either side of the hyphen for it to be considered a separator.
109       Pattern.compile("(?:\\p{Z}-|-\\p{Z})\\p{Z}*(.+)"),
110       // Various types of wide hyphens. Note we have decided not to enforce a space here, since it's
111       // possible that it's supposed to be used to break two numbers without spaces, and we haven't
112       // seen many instances of it used within a number.
113       Pattern.compile("[\u2012-\u2015\uFF0D]\\p{Z}*(.+)"),
114       // Breaks on a full stop - e.g. "12345. 332-445-1234 is my number."
115       Pattern.compile("\\.+\\p{Z}*([^.]+)"),
116       // Breaks on space - e.g. "3324451234 8002341234"
117       Pattern.compile("\\p{Z}+(\\P{Z}+)")
118   };
119 
120   /**
121    * Punctuation that may be at the start of a phone number - brackets and plus signs.
122    */
123   private static final Pattern LEAD_CLASS;
124 
125   static {
126     /* Builds the MATCHING_BRACKETS and PATTERN regular expressions. The building blocks below exist
127      * to make the pattern more easily understood. */
128 
129     String openingParens = "(\\[\uFF08\uFF3B";
130     String closingParens = ")\\]\uFF09\uFF3D";
131     String nonParens = "[^" + openingParens + closingParens + "]";
132 
133     /* Limit on the number of pairs of brackets in a phone number. */
134     String bracketPairLimit = limit(0, 3);
135     /*
136      * An opening bracket at the beginning may not be closed, but subsequent ones should be.  It's
137      * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a
138      * closing bracket first. We limit the sets of brackets in a phone number to four.
139      */
140     MATCHING_BRACKETS = Pattern.compile(
141         "(?:[" + openingParens + "])?" + "(?:" + nonParens + "+" + "[" + closingParens + "])?" +
142         nonParens + "+" +
143         "(?:[" + openingParens + "]" + nonParens + "+[" + closingParens + "])" + bracketPairLimit +
144         nonParens + "*");
145 
146     /* Limit on the number of leading (plus) characters. */
147     String leadLimit = limit(0, 2);
148     /* Limit on the number of consecutive punctuation characters. */
149     String punctuationLimit = limit(0, 4);
150     /* The maximum number of digits allowed in a digit-separated block. As we allow all digits in a
151      * single block, set high enough to accommodate the entire national number and the international
152      * country code. */
153     int digitBlockLimit =
154         PhoneNumberUtil.MAX_LENGTH_FOR_NSN + PhoneNumberUtil.MAX_LENGTH_COUNTRY_CODE;
155     /* Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some
156      * formats use spaces to separate each digit. */
157     String blockLimit = limit(0, digitBlockLimit);
158 
159     /* A punctuation sequence allowing white space. */
160     String punctuation = "[" + PhoneNumberUtil.VALID_PUNCTUATION + "]" + punctuationLimit;
161     /* A digits block without punctuation. */
162     String digitSequence = "\\p{Nd}" + limit(1, digitBlockLimit);
163 
164     String leadClassChars = openingParens + PhoneNumberUtil.PLUS_CHARS;
165     String leadClass = "[" + leadClassChars + "]";
166     LEAD_CLASS = Pattern.compile(leadClass);
167 
168     /* Phone number pattern allowing optional punctuation. */
169     PATTERN = Pattern.compile(
170         "(?:" + leadClass + punctuation + ")" + leadLimit +
171         digitSequence + "(?:" + punctuation + digitSequence + ")" + blockLimit +
172         "(?:" + PhoneNumberUtil.EXTN_PATTERNS_FOR_MATCHING + ")?",
173         PhoneNumberUtil.REGEX_FLAGS);
174   }
175 
176   /** Returns a regular expression quantifier with an upper and lower limit. */
limit(int lower, int upper)177   private static String limit(int lower, int upper) {
178     if ((lower < 0) || (upper <= 0) || (upper < lower)) {
179       throw new IllegalArgumentException();
180     }
181     return "{" + lower + "," + upper + "}";
182   }
183 
184   /** The potential states of a PhoneNumberMatcher. */
185   private enum State {
186     NOT_READY, READY, DONE
187   }
188 
189   /** The phone number utility. */
190   private final PhoneNumberUtil phoneUtil;
191   /** The text searched for phone numbers. */
192   private final CharSequence text;
193   /**
194    * The region (country) to assume for phone numbers without an international prefix, possibly
195    * null.
196    */
197   private final String preferredRegion;
198   /** The degree of validation requested. */
199   private final Leniency leniency;
200   /** The maximum number of retries after matching an invalid number. */
201   private long maxTries;
202 
203   /** The iteration tristate. */
204   private State state = State.NOT_READY;
205   /** The last successful match, null unless in {@link State#READY}. */
206   private PhoneNumberMatch lastMatch = null;
207   /** The next index to start searching at. Undefined in {@link State#DONE}. */
208   private int searchIndex = 0;
209 
210   /**
211    * Creates a new instance. See the factory methods in {@link PhoneNumberUtil} on how to obtain a
212    * new instance.
213    *
214    * @param util      the phone number util to use
215    * @param text      the character sequence that we will search, null for no text
216    * @param country   the country to assume for phone numbers not written in international format
217    *                  (with a leading plus, or with the international dialing prefix of the
218    *                  specified region). May be null or "ZZ" if only numbers with a
219    *                  leading plus should be considered.
220    * @param leniency  the leniency to use when evaluating candidate phone numbers
221    * @param maxTries  the maximum number of invalid numbers to try before giving up on the text.
222    *                  This is to cover degenerate cases where the text has a lot of false positives
223    *                  in it. Must be {@code >= 0}.
224    */
PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency, long maxTries)225   PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency,
226       long maxTries) {
227 
228     if ((util == null) || (leniency == null)) {
229       throw new NullPointerException();
230     }
231     if (maxTries < 0) {
232       throw new IllegalArgumentException();
233     }
234     this.phoneUtil = util;
235     this.text = (text != null) ? text : "";
236     this.preferredRegion = country;
237     this.leniency = leniency;
238     this.maxTries = maxTries;
239   }
240 
241   /**
242    * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex}
243    * that represents a phone number. Returns the next match, null if none was found.
244    *
245    * @param index  the search index to start searching at
246    * @return  the phone number match found, null if none can be found
247    */
find(int index)248   private PhoneNumberMatch find(int index) {
249     Matcher matcher = PATTERN.matcher(text);
250     while ((maxTries > 0) && matcher.find(index)) {
251       int start = matcher.start();
252       CharSequence candidate = text.subSequence(start, matcher.end());
253 
254       // Check for extra numbers at the end.
255       // TODO: This is the place to start when trying to support extraction of multiple phone number
256       // from split notations (+41 79 123 45 67 / 68).
257       candidate = trimAfterFirstMatch(PhoneNumberUtil.SECOND_NUMBER_START_PATTERN, candidate);
258 
259       PhoneNumberMatch match = extractMatch(candidate, start);
260       if (match != null) {
261         return match;
262       }
263 
264       index = start + candidate.length();
265       maxTries--;
266     }
267 
268     return null;
269   }
270 
271   /**
272    * Trims away any characters after the first match of {@code pattern} in {@code candidate},
273    * returning the trimmed version.
274    */
trimAfterFirstMatch(Pattern pattern, CharSequence candidate)275   private static CharSequence trimAfterFirstMatch(Pattern pattern, CharSequence candidate) {
276     Matcher trailingCharsMatcher = pattern.matcher(candidate);
277     if (trailingCharsMatcher.find()) {
278       candidate = candidate.subSequence(0, trailingCharsMatcher.start());
279     }
280     return candidate;
281   }
282 
283   /**
284    * Helper method to determine if a character is a Latin-script letter or not. For our purposes,
285    * combining marks should also return true since we assume they have been added to a preceding
286    * Latin character.
287    */
288   // @VisibleForTesting
isLatinLetter(char letter)289   static boolean isLatinLetter(char letter) {
290     // Combining marks are a subset of non-spacing-mark.
291     if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
292       return false;
293     }
294     UnicodeBlock block = UnicodeBlock.of(letter);
295     return block.equals(UnicodeBlock.BASIC_LATIN) ||
296         block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT) ||
297         block.equals(UnicodeBlock.LATIN_EXTENDED_A) ||
298         block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) ||
299         block.equals(UnicodeBlock.LATIN_EXTENDED_B) ||
300         block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
301   }
302 
isInvalidPunctuationSymbol(char character)303   private static boolean isInvalidPunctuationSymbol(char character) {
304     return character == '%' || Character.getType(character) == Character.CURRENCY_SYMBOL;
305   }
306 
307   /**
308    * Attempts to extract a match from a {@code candidate} character sequence.
309    *
310    * @param candidate  the candidate text that might contain a phone number
311    * @param offset  the offset of {@code candidate} within {@link #text}
312    * @return  the match found, null if none can be found
313    */
extractMatch(CharSequence candidate, int offset)314   private PhoneNumberMatch extractMatch(CharSequence candidate, int offset) {
315     // Skip a match that is more likely to be a date.
316     if (SLASH_SEPARATED_DATES.matcher(candidate).find()) {
317       return null;
318     }
319 
320     // Skip potential time-stamps.
321     if (TIME_STAMPS.matcher(candidate).find()) {
322       String followingText = text.toString().substring(offset + candidate.length());
323       if (TIME_STAMPS_SUFFIX.matcher(followingText).lookingAt()) {
324         return null;
325       }
326     }
327 
328     // Try to come up with a valid match given the entire candidate.
329     String rawString = candidate.toString();
330     PhoneNumberMatch match = parseAndVerify(rawString, offset);
331     if (match != null) {
332       return match;
333     }
334 
335     // If that failed, try to find an "inner match" - there might be a phone number within this
336     // candidate.
337     return extractInnerMatch(rawString, offset);
338   }
339 
340   /**
341    * Attempts to extract a match from {@code candidate} if the whole candidate does not qualify as a
342    * match.
343    *
344    * @param candidate  the candidate text that might contain a phone number
345    * @param offset  the current offset of {@code candidate} within {@link #text}
346    * @return  the match found, null if none can be found
347    */
extractInnerMatch(String candidate, int offset)348   private PhoneNumberMatch extractInnerMatch(String candidate, int offset) {
349     for (Pattern possibleInnerMatch : INNER_MATCHES) {
350       int rangeStart = 0;
351       Matcher groupMatcher = possibleInnerMatch.matcher(candidate);
352       boolean isFirstMatch = true;
353       while (groupMatcher.find() && maxTries > 0) {
354         if (isFirstMatch) {
355           // We should handle any group before this one too.
356           CharSequence group = trimAfterFirstMatch(
357               PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN,
358               candidate.substring(0, groupMatcher.start()));
359           PhoneNumberMatch match = parseAndVerify(group.toString(), offset);
360           if (match != null) {
361             return match;
362           }
363           maxTries--;
364           isFirstMatch = false;
365         }
366         CharSequence group = trimAfterFirstMatch(
367             PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, groupMatcher.group(1));
368         PhoneNumberMatch match = parseAndVerify(group.toString(), offset + groupMatcher.start(1));
369         if (match != null) {
370           return match;
371         }
372         maxTries--;
373       }
374     }
375     return null;
376   }
377 
378   /**
379    * Parses a phone number from the {@code candidate} using {@link PhoneNumberUtil#parse} and
380    * verifies it matches the requested {@link #leniency}. If parsing and verification succeed, a
381    * corresponding {@link PhoneNumberMatch} is returned, otherwise this method returns null.
382    *
383    * @param candidate  the candidate match
384    * @param offset  the offset of {@code candidate} within {@link #text}
385    * @return  the parsed and validated phone number match, or null
386    */
parseAndVerify(String candidate, int offset)387   private PhoneNumberMatch parseAndVerify(String candidate, int offset) {
388     try {
389       // Check the candidate doesn't contain any formatting which would indicate that it really
390       // isn't a phone number.
391       if (!MATCHING_BRACKETS.matcher(candidate).matches() || PUB_PAGES.matcher(candidate).find()) {
392         return null;
393       }
394 
395       // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded
396       // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
397       if (leniency.compareTo(Leniency.VALID) >= 0) {
398         // If the candidate is not at the start of the text, and does not start with phone-number
399         // punctuation, check the previous character.
400         if (offset > 0 && !LEAD_CLASS.matcher(candidate).lookingAt()) {
401           char previousChar = text.charAt(offset - 1);
402           // We return null if it is a latin letter or an invalid punctuation symbol.
403           if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) {
404             return null;
405           }
406         }
407         int lastCharIndex = offset + candidate.length();
408         if (lastCharIndex < text.length()) {
409           char nextChar = text.charAt(lastCharIndex);
410           if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) {
411             return null;
412           }
413         }
414       }
415 
416       PhoneNumber number = phoneUtil.parseAndKeepRawInput(candidate, preferredRegion);
417 
418       // Check Israel * numbers: these are a special case in that they are four-digit numbers that
419       // our library supports, but they can only be dialled with a leading *. Since we don't
420       // actually store or detect the * in our phone number library, this means in practice we
421       // detect most four digit numbers as being valid for Israel. We are considering moving these
422       // numbers to ShortNumberInfo instead, in which case this problem would go away, but in the
423       // meantime we want to restrict the false matches so we only allow these numbers if they are
424       // preceded by a star. We enforce this for all leniency levels even though these numbers are
425       // technically accepted by isPossibleNumber and isValidNumber since we consider it to be a
426       // deficiency in those methods that they accept these numbers without the *.
427       // TODO: Remove this or make it significantly less hacky once we've decided how to
428       // handle these short codes going forward in ShortNumberInfo. We could use the formatting
429       // rules for instance, but that would be slower.
430       if (phoneUtil.getRegionCodeForCountryCode(number.getCountryCode()).equals("IL") &&
431           phoneUtil.getNationalSignificantNumber(number).length() == 4 &&
432           (offset == 0 || (offset > 0 && text.charAt(offset - 1) != '*'))) {
433         // No match.
434         return null;
435       }
436 
437       if (leniency.verify(number, candidate, phoneUtil)) {
438         // We used parseAndKeepRawInput to create this number, but for now we don't return the extra
439         // values parsed. TODO: stop clearing all values here and switch all users over
440         // to using rawInput() rather than the rawString() of PhoneNumberMatch.
441         number.clearCountryCodeSource();
442         number.clearRawInput();
443         number.clearPreferredDomesticCarrierCode();
444         return new PhoneNumberMatch(offset, candidate, number);
445       }
446     } catch (NumberParseException e) {
447       // ignore and continue
448     }
449     return null;
450   }
451 
452   /**
453    * Small helper interface such that the number groups can be checked according to different
454    * criteria, both for our default way of performing formatting and for any alternate formats we
455    * may want to check.
456    */
457   interface NumberGroupingChecker {
458     /**
459      * Returns true if the groups of digits found in our candidate phone number match our
460      * expectations.
461      *
462      * @param number  the original number we found when parsing
463      * @param normalizedCandidate  the candidate number, normalized to only contain ASCII digits,
464      *     but with non-digits (spaces etc) retained
465      * @param expectedNumberGroups  the groups of digits that we would expect to see if we
466      *     formatted this number
467      */
checkGroups(PhoneNumberUtil util, PhoneNumber number, StringBuilder normalizedCandidate, String[] expectedNumberGroups)468     boolean checkGroups(PhoneNumberUtil util, PhoneNumber number,
469                         StringBuilder normalizedCandidate, String[] expectedNumberGroups);
470   }
471 
allNumberGroupsRemainGrouped(PhoneNumberUtil util, PhoneNumber number, StringBuilder normalizedCandidate, String[] formattedNumberGroups)472   static boolean allNumberGroupsRemainGrouped(PhoneNumberUtil util,
473                                               PhoneNumber number,
474                                               StringBuilder normalizedCandidate,
475                                               String[] formattedNumberGroups) {
476     int fromIndex = 0;
477     if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) {
478       // First skip the country code if the normalized candidate contained it.
479       String countryCode = Integer.toString(number.getCountryCode());
480       fromIndex = normalizedCandidate.indexOf(countryCode) + countryCode.length();
481     }
482     // Check each group of consecutive digits are not broken into separate groupings in the
483     // {@code normalizedCandidate} string.
484     for (int i = 0; i < formattedNumberGroups.length; i++) {
485       // Fails if the substring of {@code normalizedCandidate} starting from {@code fromIndex}
486       // doesn't contain the consecutive digits in formattedNumberGroups[i].
487       fromIndex = normalizedCandidate.indexOf(formattedNumberGroups[i], fromIndex);
488       if (fromIndex < 0) {
489         return false;
490       }
491       // Moves {@code fromIndex} forward.
492       fromIndex += formattedNumberGroups[i].length();
493       if (i == 0 && fromIndex < normalizedCandidate.length()) {
494         // We are at the position right after the NDC. We get the region used for formatting
495         // information based on the country code in the phone number, rather than the number itself,
496         // as we do not need to distinguish between different countries with the same country
497         // calling code and this is faster.
498         String region = util.getRegionCodeForCountryCode(number.getCountryCode());
499         if (util.getNddPrefixForRegion(region, true) != null &&
500             Character.isDigit(normalizedCandidate.charAt(fromIndex))) {
501           // This means there is no formatting symbol after the NDC. In this case, we only
502           // accept the number if there is no formatting symbol at all in the number, except
503           // for extensions. This is only important for countries with national prefixes.
504           String nationalSignificantNumber = util.getNationalSignificantNumber(number);
505           return normalizedCandidate.substring(fromIndex - formattedNumberGroups[i].length())
506               .startsWith(nationalSignificantNumber);
507         }
508       }
509     }
510     // The check here makes sure that we haven't mistakenly already used the extension to
511     // match the last group of the subscriber number. Note the extension cannot have
512     // formatting in-between digits.
513     return normalizedCandidate.substring(fromIndex).contains(number.getExtension());
514   }
515 
allNumberGroupsAreExactlyPresent(PhoneNumberUtil util, PhoneNumber number, StringBuilder normalizedCandidate, String[] formattedNumberGroups)516   static boolean allNumberGroupsAreExactlyPresent(PhoneNumberUtil util,
517                                                   PhoneNumber number,
518                                                   StringBuilder normalizedCandidate,
519                                                   String[] formattedNumberGroups) {
520     String[] candidateGroups =
521         PhoneNumberUtil.NON_DIGITS_PATTERN.split(normalizedCandidate.toString());
522     // Set this to the last group, skipping it if the number has an extension.
523     int candidateNumberGroupIndex =
524         number.hasExtension() ? candidateGroups.length - 2 : candidateGroups.length - 1;
525     // First we check if the national significant number is formatted as a block.
526     // We use contains and not equals, since the national significant number may be present with
527     // a prefix such as a national number prefix, or the country code itself.
528     if (candidateGroups.length == 1 ||
529         candidateGroups[candidateNumberGroupIndex].contains(
530             util.getNationalSignificantNumber(number))) {
531       return true;
532     }
533     // Starting from the end, go through in reverse, excluding the first group, and check the
534     // candidate and number groups are the same.
535     for (int formattedNumberGroupIndex = (formattedNumberGroups.length - 1);
536          formattedNumberGroupIndex > 0 && candidateNumberGroupIndex >= 0;
537          formattedNumberGroupIndex--, candidateNumberGroupIndex--) {
538       if (!candidateGroups[candidateNumberGroupIndex].equals(
539           formattedNumberGroups[formattedNumberGroupIndex])) {
540         return false;
541       }
542     }
543     // Now check the first group. There may be a national prefix at the start, so we only check
544     // that the candidate group ends with the formatted number group.
545     return (candidateNumberGroupIndex >= 0 &&
546             candidateGroups[candidateNumberGroupIndex].endsWith(formattedNumberGroups[0]));
547   }
548 
549   /**
550    * Helper method to get the national-number part of a number, formatted without any national
551    * prefix, and return it as a set of digit blocks that would be formatted together.
552    */
getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number, NumberFormat formattingPattern)553   private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number,
554                                                   NumberFormat formattingPattern) {
555     if (formattingPattern == null) {
556       // This will be in the format +CC-DG;ext=EXT where DG represents groups of digits.
557       String rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966);
558       // We remove the extension part from the formatted string before splitting it into different
559       // groups.
560       int endIndex = rfc3966Format.indexOf(';');
561       if (endIndex < 0) {
562         endIndex = rfc3966Format.length();
563       }
564       // The country-code will have a '-' following it.
565       int startIndex = rfc3966Format.indexOf('-') + 1;
566       return rfc3966Format.substring(startIndex, endIndex).split("-");
567     } else {
568       // We format the NSN only, and split that according to the separator.
569       String nationalSignificantNumber = util.getNationalSignificantNumber(number);
570       return util.formatNsnUsingPattern(nationalSignificantNumber,
571                                         formattingPattern, PhoneNumberFormat.RFC3966).split("-");
572     }
573   }
574 
checkNumberGroupingIsValid( PhoneNumber number, String candidate, PhoneNumberUtil util, NumberGroupingChecker checker)575   static boolean checkNumberGroupingIsValid(
576       PhoneNumber number, String candidate, PhoneNumberUtil util, NumberGroupingChecker checker) {
577     // TODO: Evaluate how this works for other locales (testing has been limited to NANPA regions)
578     // and optimise if necessary.
579     StringBuilder normalizedCandidate =
580         PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */);
581     String[] formattedNumberGroups = getNationalNumberGroups(util, number, null);
582     if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
583       return true;
584     }
585     // If this didn't pass, see if there are any alternate formats, and try them instead.
586     PhoneMetadata alternateFormats =
587         MetadataManager.getAlternateFormatsForCountry(number.getCountryCode());
588     if (alternateFormats != null) {
589       for (NumberFormat alternateFormat : alternateFormats.numberFormats()) {
590         formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat);
591         if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
592           return true;
593         }
594       }
595     }
596     return false;
597   }
598 
containsMoreThanOneSlashInNationalNumber(PhoneNumber number, String candidate)599   static boolean containsMoreThanOneSlashInNationalNumber(PhoneNumber number, String candidate) {
600     int firstSlashInBodyIndex = candidate.indexOf('/');
601     if (firstSlashInBodyIndex < 0) {
602       // No slashes, this is okay.
603       return false;
604     }
605     // Now look for a second one.
606     int secondSlashInBodyIndex = candidate.indexOf('/', firstSlashInBodyIndex + 1);
607     if (secondSlashInBodyIndex < 0) {
608       // Only one slash, this is okay.
609       return false;
610     }
611 
612     // If the first slash is after the country calling code, this is permitted.
613     boolean candidateHasCountryCode =
614         (number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN ||
615          number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN);
616     if (candidateHasCountryCode &&
617         PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(0, firstSlashInBodyIndex))
618             .equals(Integer.toString(number.getCountryCode()))) {
619       // Any more slashes and this is illegal.
620       return candidate.substring(secondSlashInBodyIndex + 1).contains("/");
621     }
622     return true;
623   }
624 
containsOnlyValidXChars( PhoneNumber number, String candidate, PhoneNumberUtil util)625   static boolean containsOnlyValidXChars(
626       PhoneNumber number, String candidate, PhoneNumberUtil util) {
627     // The characters 'x' and 'X' can be (1) a carrier code, in which case they always precede the
628     // national significant number or (2) an extension sign, in which case they always precede the
629     // extension number. We assume a carrier code is more than 1 digit, so the first case has to
630     // have more than 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1 'x'
631     // or 'X'. We ignore the character if it appears as the last character of the string.
632     for (int index = 0; index < candidate.length() - 1; index++) {
633       char charAtIndex = candidate.charAt(index);
634       if (charAtIndex == 'x' || charAtIndex == 'X') {
635         char charAtNextIndex = candidate.charAt(index + 1);
636         if (charAtNextIndex == 'x' || charAtNextIndex == 'X') {
637           // This is the carrier code case, in which the 'X's always precede the national
638           // significant number.
639           index++;
640           if (util.isNumberMatch(number, candidate.substring(index)) != MatchType.NSN_MATCH) {
641             return false;
642           }
643         // This is the extension sign case, in which the 'x' or 'X' should always precede the
644         // extension number.
645         } else if (!PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(index)).equals(
646             number.getExtension())) {
647           return false;
648         }
649       }
650     }
651     return true;
652   }
653 
isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util)654   static boolean isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util) {
655     // First, check how we deduced the country code. If it was written in international format, then
656     // the national prefix is not required.
657     if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) {
658       return true;
659     }
660     String phoneNumberRegion =
661         util.getRegionCodeForCountryCode(number.getCountryCode());
662     PhoneMetadata metadata = util.getMetadataForRegion(phoneNumberRegion);
663     if (metadata == null) {
664       return true;
665     }
666     // Check if a national prefix should be present when formatting this number.
667     String nationalNumber = util.getNationalSignificantNumber(number);
668     NumberFormat formatRule =
669         util.chooseFormattingPatternForNumber(metadata.numberFormats(), nationalNumber);
670     // To do this, we check that a national prefix formatting rule was present and that it wasn't
671     // just the first-group symbol ($1) with punctuation.
672     if ((formatRule != null) && formatRule.getNationalPrefixFormattingRule().length() > 0) {
673       if (formatRule.isNationalPrefixOptionalWhenFormatting()) {
674         // The national-prefix is optional in these cases, so we don't need to check if it was
675         // present.
676         return true;
677       }
678       if (PhoneNumberUtil.formattingRuleHasFirstGroupOnly(
679           formatRule.getNationalPrefixFormattingRule())) {
680         // National Prefix not needed for this number.
681         return true;
682       }
683       // Normalize the remainder.
684       String rawInputCopy = PhoneNumberUtil.normalizeDigitsOnly(number.getRawInput());
685       StringBuilder rawInput = new StringBuilder(rawInputCopy);
686       // Check if we found a national prefix and/or carrier code at the start of the raw input, and
687       // return the result.
688       return util.maybeStripNationalPrefixAndCarrierCode(rawInput, metadata, null);
689     }
690     return true;
691   }
692 
hasNext()693   public boolean hasNext() {
694     if (state == State.NOT_READY) {
695       lastMatch = find(searchIndex);
696       if (lastMatch == null) {
697         state = State.DONE;
698       } else {
699         searchIndex = lastMatch.end();
700         state = State.READY;
701       }
702     }
703     return state == State.READY;
704   }
705 
next()706   public PhoneNumberMatch next() {
707     // Check the state and find the next match as a side-effect if necessary.
708     if (!hasNext()) {
709       throw new NoSuchElementException();
710     }
711 
712     // Don't retain that memory any longer than necessary.
713     PhoneNumberMatch result = lastMatch;
714     lastMatch = null;
715     state = State.NOT_READY;
716     return result;
717   }
718 
719   /**
720    * Always throws {@link UnsupportedOperationException} as removal is not supported.
721    */
remove()722   public void remove() {
723     throw new UnsupportedOperationException();
724   }
725 }
726