1 /* 2 * Copyright (C) 2017 The Libphonenumber Authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.i18n.phonenumbers.metadata.regex; 18 19 import static com.google.common.base.CharMatcher.whitespace; 20 import static com.google.common.truth.Truth.assertThat; 21 import static com.google.i18n.phonenumbers.metadata.regex.RegexGenerator.basic; 22 import static java.util.stream.Collectors.joining; 23 24 import com.google.common.collect.ImmutableList; 25 import com.google.i18n.phonenumbers.metadata.RangeSpecification; 26 import com.google.i18n.phonenumbers.metadata.RangeTree; 27 import java.util.Arrays; 28 import java.util.List; 29 import org.junit.Test; 30 import org.junit.runner.RunWith; 31 import org.junit.runners.JUnit4; 32 33 @RunWith(JUnit4.class) 34 public class RegexGeneratorTest { 35 @Test testSimple()36 public void testSimple() { 37 assertRegex(basic(), ranges("123xxx"), "123\\d{3}"); 38 // This could be improved to "..." rather than ".{3}" saving 1 char, probably not worth it. 39 assertRegex(basic().withDotMatch(), ranges("123xxx"), "123.{3}"); 40 } 41 42 @Test testVariableLength()43 public void testVariableLength() { 44 assertRegex(basic(), ranges("123xxx", "123xxxx", "123xxxxx", "123xxxxxx"), "123\\d{3,6}"); 45 } 46 47 @Test testTailOptimization()48 public void testTailOptimization() { 49 RangeTree dfa = ranges("123xxx", "123xxxx", "145xxx"); 50 assertRegex(basic(), dfa, "1(?:23\\d{3,4}|45\\d{3})"); 51 assertRegex(basic().withTailOptimization(), dfa, "1(?:23\\d?|45)\\d{3}"); 52 } 53 54 @Test testDfaFactorization()55 public void testDfaFactorization() { 56 // Essentially create a "thin" wedge of specific non-determinism with the shorter (5-digit) 57 // numbers which prevents the larger ranges from being contiguous in the DFA. 58 RangeTree dfa = ranges("1234x", "1256x", "[0-4]xxxxxx", "[0-4]xxxxxxx"); 59 assertRegex(basic(), dfa, 60 "[02-4]\\d{6,7}|", 61 "1(?:[013-9]\\d{5,6}|", 62 "2(?:[0-246-9]\\d{4,5}|", 63 "3(?:[0-35-9]\\d{3,4}|4\\d(?:\\d{2,3})?)|", 64 "5(?:[0-57-9]\\d{3,4}|6\\d(?:\\d{2,3})?)))"); 65 assertRegex(basic().withDfaFactorization(), dfa, "[0-4]\\d{6,7}|12(?:34|56)\\d"); 66 } 67 68 @Test testSubgroupOptimization()69 public void testSubgroupOptimization() { 70 // The subgraph of "everything except 95, 96 and 100" (this appears in China leading digits). 71 RangeTree postgraph = ranges("[02-8]", "1[1-9]", "10[1-9]", "9[0-47-9]"); 72 RangeTree pregraph = ranges("123", "234", "345", "456", "567"); 73 74 // Cross product of pre and post paths. 75 RangeTree subgraph = RangeTree.from( 76 pregraph.asRangeSpecifications().stream() 77 .flatMap(a -> postgraph.asRangeSpecifications().stream().map(a::extendBy))); 78 79 // Union in other paths to trigger repetition in the "basic" case. 80 RangeTree rest = ranges("128xx", "238xx", "348xx", "458xx", "568xx"); 81 RangeTree dfa = rest.union(subgraph); 82 83 assertRegex(basic(), dfa, 84 "12(?:3(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|", 85 "23(?:4(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|", 86 "34(?:5(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|", 87 "45(?:6(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|", 88 "56(?:7(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)"); 89 90 assertRegex(basic().withSubgroupOptimization(), dfa, 91 "(?:12|23|34|45|56)8\\d\\d|", 92 "(?:123|234|345|456|567)(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])"); 93 } 94 95 @Test testRegression_bug_65250963()96 public void testRegression_bug_65250963() { 97 RangeTree dfa = ranges( 98 "1387", 99 "1697", 100 "1524", 101 "1539", 102 "1768", 103 "1946"); 104 assertRegex(basic(), dfa, 105 "1(?:", 106 " (?:", 107 " 38|", 108 " 69", 109 " )7|", 110 " 5(?:", 111 " 24|", 112 " 39", 113 " )|", 114 " 768|", 115 " 946", 116 ")"); 117 } 118 119 @Test testRegression_bug_68929642()120 public void testRegression_bug_68929642() { 121 assertMatches( 122 "1\\d{6}(?:\\d{2})?", 123 ImmutableList.of("1234567", "123456789"), 124 ImmutableList.of("12345678"), 125 "1xxx_xxx", "1xx_xxx_xxx"); 126 127 assertMatches( 128 "1\\d{6}[0-7]?", 129 ImmutableList.of("1234567", "12345670"), 130 ImmutableList.of("123456", "123456700"), 131 "1xxx_xxx", "1x_xxx_xx[0-7]"); 132 133 assertMatches( 134 "\\d\\d?", 135 ImmutableList.of("1", "12"), 136 ImmutableList.of("", "123"), 137 "x", "xx"); 138 139 assertMatches( 140 "\\d{1,3}", 141 ImmutableList.of("1", "12", "123"), 142 ImmutableList.of("", "1234"), 143 "x", "xx", "xxx"); 144 145 assertMatches( 146 "\\d(?:\\d{3}(?:\\d{2})?)?", 147 ImmutableList.of("1", "1234", "123456"), 148 ImmutableList.of("", "12", "123", "12345", "1234567"), 149 "x", "xxxx", "xxx_xxx"); 150 151 assertMatches( 152 "(?:\\d\\d(?:\\d(?:\\d{2,4})?)?)?", 153 ImmutableList.of("", "12", "123", "12345", "123456", "1234567"), 154 ImmutableList.of("1", "1234", "12345678"), 155 "", "xx", "xxx", "xx_xxx", "xxx_xxx", "xxxx_xxx"); 156 157 assertMatches( 158 "(?:\\d{2})?", 159 ImmutableList.of("", "12"), 160 ImmutableList.of("1", "123"), 161 "", "xx"); 162 163 assertMatches( 164 "\\d?", 165 ImmutableList.of("", "1"), 166 ImmutableList.of("12"), 167 "", "x"); 168 } 169 170 // This does not check that the generated regex is the same as the input, but it does test some 171 // positive/negative matching cases against both and verifies that the DFA for both are equal. assertMatches( String pattern, List<String> matchNumbers, List<String> noMatchNumbers, String... specs)172 private static void assertMatches( 173 String pattern, List<String> matchNumbers, List<String> noMatchNumbers, String... specs) { 174 String regex = basic().toRegex(ranges(specs)); 175 assertThat(regex).isEqualTo(pattern); 176 177 // Test the given positive/negative match numbers and expect the same behaviour from both. 178 for (String number : matchNumbers) { 179 assertThat(number).matches(pattern); 180 assertThat(number).matches(regex); 181 } 182 for (String number : noMatchNumbers) { 183 assertThat(number).doesNotMatch(pattern); 184 assertThat(number).doesNotMatch(regex); 185 } 186 } 187 assertRegex(RegexGenerator generator, RangeTree dfa, String... lines)188 private static void assertRegex(RegexGenerator generator, RangeTree dfa, String... lines) { 189 String regex = generator.toRegex(dfa); 190 String expected = Arrays.stream(lines).map(whitespace()::removeFrom).collect(joining()); 191 assertThat(regex).isEqualTo(expected); 192 } 193 ranges(String... specs)194 private static RangeTree ranges(String... specs) { 195 return RangeTree.from(Arrays.stream(specs).map(RangeSpecification::parse)); 196 } 197 } 198