• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Libphonenumber Authors.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.i18n.phonenumbers.metadata.regex;
18 
19 import static com.google.common.base.CharMatcher.whitespace;
20 import static com.google.common.truth.Truth.assertThat;
21 import static com.google.i18n.phonenumbers.metadata.regex.RegexGenerator.basic;
22 import static java.util.stream.Collectors.joining;
23 
24 import com.google.common.collect.ImmutableList;
25 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
26 import com.google.i18n.phonenumbers.metadata.RangeTree;
27 import java.util.Arrays;
28 import java.util.List;
29 import org.junit.Test;
30 import org.junit.runner.RunWith;
31 import org.junit.runners.JUnit4;
32 
33 @RunWith(JUnit4.class)
34 public class RegexGeneratorTest {
35   @Test
testSimple()36   public void testSimple() {
37     assertRegex(basic(), ranges("123xxx"), "123\\d{3}");
38     // This could be improved to "..." rather than ".{3}" saving 1 char, probably not worth it.
39     assertRegex(basic().withDotMatch(), ranges("123xxx"), "123.{3}");
40   }
41 
42   @Test
testVariableLength()43   public void testVariableLength() {
44     assertRegex(basic(), ranges("123xxx", "123xxxx", "123xxxxx", "123xxxxxx"), "123\\d{3,6}");
45   }
46 
47   @Test
testTailOptimization()48   public void testTailOptimization() {
49     RangeTree dfa = ranges("123xxx", "123xxxx", "145xxx");
50     assertRegex(basic(), dfa, "1(?:23\\d{3,4}|45\\d{3})");
51     assertRegex(basic().withTailOptimization(), dfa, "1(?:23\\d?|45)\\d{3}");
52   }
53 
54   @Test
testDfaFactorization()55   public void testDfaFactorization() {
56     // Essentially create a "thin" wedge of specific non-determinism with the shorter (5-digit)
57     // numbers which prevents the larger ranges from being contiguous in the DFA.
58     RangeTree dfa = ranges("1234x", "1256x", "[0-4]xxxxxx", "[0-4]xxxxxxx");
59     assertRegex(basic(), dfa,
60         "[02-4]\\d{6,7}|",
61         "1(?:[013-9]\\d{5,6}|",
62         "2(?:[0-246-9]\\d{4,5}|",
63         "3(?:[0-35-9]\\d{3,4}|4\\d(?:\\d{2,3})?)|",
64         "5(?:[0-57-9]\\d{3,4}|6\\d(?:\\d{2,3})?)))");
65     assertRegex(basic().withDfaFactorization(), dfa, "[0-4]\\d{6,7}|12(?:34|56)\\d");
66   }
67 
68   @Test
testSubgroupOptimization()69   public void testSubgroupOptimization() {
70     // The subgraph of "everything except 95, 96 and 100" (this appears in China leading digits).
71     RangeTree postgraph = ranges("[02-8]", "1[1-9]", "10[1-9]", "9[0-47-9]");
72     RangeTree pregraph = ranges("123", "234", "345", "456", "567");
73 
74     // Cross product of pre and post paths.
75     RangeTree subgraph = RangeTree.from(
76         pregraph.asRangeSpecifications().stream()
77             .flatMap(a -> postgraph.asRangeSpecifications().stream().map(a::extendBy)));
78 
79     // Union in other paths to trigger repetition in the "basic" case.
80     RangeTree rest = ranges("128xx", "238xx", "348xx", "458xx", "568xx");
81     RangeTree dfa = rest.union(subgraph);
82 
83     assertRegex(basic(), dfa,
84         "12(?:3(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|",
85         "23(?:4(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|",
86         "34(?:5(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|",
87         "45(?:6(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|",
88         "56(?:7(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)");
89 
90     assertRegex(basic().withSubgroupOptimization(), dfa,
91         "(?:12|23|34|45|56)8\\d\\d|",
92         "(?:123|234|345|456|567)(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])");
93   }
94 
95   @Test
testRegression_bug_65250963()96   public void testRegression_bug_65250963() {
97     RangeTree dfa = ranges(
98         "1387",
99         "1697",
100         "1524",
101         "1539",
102         "1768",
103         "1946");
104     assertRegex(basic(), dfa,
105         "1(?:",
106         "  (?:",
107         "    38|",
108         "    69",
109         "  )7|",
110         "  5(?:",
111         "    24|",
112         "    39",
113         "  )|",
114         "  768|",
115         "  946",
116         ")");
117   }
118 
119   @Test
testRegression_bug_68929642()120   public void testRegression_bug_68929642() {
121     assertMatches(
122         "1\\d{6}(?:\\d{2})?",
123         ImmutableList.of("1234567", "123456789"),
124         ImmutableList.of("12345678"),
125         "1xxx_xxx", "1xx_xxx_xxx");
126 
127     assertMatches(
128         "1\\d{6}[0-7]?",
129         ImmutableList.of("1234567", "12345670"),
130         ImmutableList.of("123456", "123456700"),
131         "1xxx_xxx", "1x_xxx_xx[0-7]");
132 
133     assertMatches(
134         "\\d\\d?",
135         ImmutableList.of("1", "12"),
136         ImmutableList.of("", "123"),
137         "x", "xx");
138 
139     assertMatches(
140         "\\d{1,3}",
141         ImmutableList.of("1", "12", "123"),
142         ImmutableList.of("", "1234"),
143         "x", "xx", "xxx");
144 
145     assertMatches(
146         "\\d(?:\\d{3}(?:\\d{2})?)?",
147         ImmutableList.of("1", "1234", "123456"),
148         ImmutableList.of("", "12", "123", "12345", "1234567"),
149         "x", "xxxx", "xxx_xxx");
150 
151     assertMatches(
152         "(?:\\d\\d(?:\\d(?:\\d{2,4})?)?)?",
153         ImmutableList.of("", "12", "123", "12345", "123456", "1234567"),
154         ImmutableList.of("1", "1234", "12345678"),
155         "", "xx", "xxx", "xx_xxx", "xxx_xxx", "xxxx_xxx");
156 
157     assertMatches(
158         "(?:\\d{2})?",
159         ImmutableList.of("", "12"),
160         ImmutableList.of("1", "123"),
161         "", "xx");
162 
163     assertMatches(
164         "\\d?",
165         ImmutableList.of("", "1"),
166         ImmutableList.of("12"),
167         "", "x");
168   }
169 
170   // This does not check that the generated regex is the same as the input, but it does test some
171   // positive/negative matching cases against both and verifies that the DFA for both are equal.
assertMatches( String pattern, List<String> matchNumbers, List<String> noMatchNumbers, String... specs)172   private static void assertMatches(
173       String pattern, List<String> matchNumbers, List<String> noMatchNumbers, String... specs) {
174     String regex = basic().toRegex(ranges(specs));
175     assertThat(regex).isEqualTo(pattern);
176 
177     // Test the given positive/negative match numbers and expect the same behaviour from both.
178     for (String number : matchNumbers) {
179       assertThat(number).matches(pattern);
180       assertThat(number).matches(regex);
181     }
182     for (String number : noMatchNumbers) {
183       assertThat(number).doesNotMatch(pattern);
184       assertThat(number).doesNotMatch(regex);
185     }
186   }
187 
assertRegex(RegexGenerator generator, RangeTree dfa, String... lines)188   private static void assertRegex(RegexGenerator generator, RangeTree dfa, String... lines) {
189     String regex = generator.toRegex(dfa);
190     String expected = Arrays.stream(lines).map(whitespace()::removeFrom).collect(joining());
191     assertThat(regex).isEqualTo(expected);
192   }
193 
ranges(String... specs)194   private static RangeTree ranges(String... specs) {
195     return RangeTree.from(Arrays.stream(specs).map(RangeSpecification::parse));
196   }
197 }
198