1 /* 2 * Copyright (C) 2017 The Libphonenumber Authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler; 18 19 import static com.google.common.collect.ImmutableList.toImmutableList; 20 import static com.google.common.truth.Truth.assertWithMessage; 21 import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK; 22 import static java.lang.Integer.bitCount; 23 import static java.lang.Integer.lowestOneBit; 24 import static java.lang.Integer.numberOfTrailingZeros; 25 26 import com.google.common.collect.Multimap; 27 import com.google.common.collect.MultimapBuilder; 28 import com.google.common.collect.SetMultimap; 29 import com.google.i18n.phonenumbers.internal.finitestatematcher.compiler.RegressionTestProto; 30 import com.google.i18n.phonenumbers.internal.finitestatematcher.compiler.RegressionTestProto.TestCase; 31 import com.google.i18n.phonenumbers.internal.finitestatematcher.compiler.RegressionTestProto.Tests; 32 import com.google.i18n.phonenumbers.metadata.DigitSequence; 33 import com.google.i18n.phonenumbers.metadata.RangeSpecification; 34 import com.google.i18n.phonenumbers.metadata.RangeTree; 35 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge; 36 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode; 37 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor; 38 import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher; 39 import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.Result; 40 import com.google.protobuf.ByteString; 41 import com.google.protobuf.TextFormat; 42 import java.io.IOException; 43 import java.io.InputStream; 44 import java.io.InputStreamReader; 45 import java.io.PrintWriter; 46 import java.io.StringWriter; 47 import java.nio.charset.StandardCharsets; 48 import java.util.HashSet; 49 import java.util.List; 50 import java.util.Set; 51 import org.junit.Test; 52 import org.junit.runner.RunWith; 53 import org.junit.runners.JUnit4; 54 55 @RunWith(JUnit4.class) 56 public class CompilerRegressionTest { 57 // Tests that the compiler produces the expected output, byte-for-byte. 58 @Test testCompiledBytesEqualExpectedMatcherBytes()59 public void testCompiledBytesEqualExpectedMatcherBytes() throws IOException { 60 StringWriter buffer = new StringWriter(); 61 PrintWriter errors = new PrintWriter(buffer); 62 try (InputStream data = 63 CompilerRegressionTest.class.getResourceAsStream("regression_test_data.textpb")) { 64 Tests.Builder tests = RegressionTestProto.Tests.newBuilder(); 65 TextFormat.merge(new InputStreamReader(data, StandardCharsets.UTF_8), tests); 66 for (TestCase tc : tests.getTestCaseList()) { 67 byte[] actual = MatcherCompiler.compile(ranges(tc.getRangeList())); 68 byte[] expected = combine(tc.getExpectedList()); 69 int diffIndex = indexOfDiff(actual, expected); 70 if (!tc.getShouldFail()) { 71 if (diffIndex != -1) { 72 errors.format("FAILED [%s]: First difference at index %d\n", tc.getName(), diffIndex); 73 errors.format("Actual : %s\n", formatPbSnippet(actual, diffIndex, 20)); 74 errors.format("Expected: %s\n", formatPbSnippet(expected, diffIndex, 20)); 75 writeGoldenPbOutput(actual, errors); 76 } 77 } else { 78 if (diffIndex == -1) { 79 errors.format("FAILED [%s]: Expected difference, but got none\n", tc.getName()); 80 } 81 } 82 } 83 } 84 String errorMessage = buffer.toString(); 85 if (!errorMessage.isEmpty()) { 86 assertWithMessage(errorMessage).fail(); 87 } 88 } 89 90 // Test that the matcher behaves correctly with respect to the input ranges using the expected 91 // byte sequences. If this test fails, then the matcher implementation is doing something wrong, 92 // or the expected bytes were generated incorrectly (either by hand or from the compiler). 93 // 94 // IMPORTANT: This test tests that the expected bytes (rather than the compiled bytes) match the 95 // numbers in the ranges. This avoids the risk of any bugs in both the matcher and compiler 96 // somehow cancelling each other out. However this also means that this test depends on the 97 // equality test above for validity (i.e. this test can pass even if the matcher compiler is 98 // broken, so it should not be run in isolation when debugging). 99 @Test testExpectedMatcherBytesMatchRanges()100 public void testExpectedMatcherBytesMatchRanges() throws IOException { 101 try (InputStream data = 102 CompilerRegressionTest.class.getResourceAsStream("regression_test_data.textpb")) { 103 RegressionTestProto.Tests.Builder tests = RegressionTestProto.Tests.newBuilder(); 104 TextFormat.merge(new InputStreamReader(data, StandardCharsets.UTF_8), tests); 105 for (TestCase tc : tests.getTestCaseList()) { 106 RangeTree ranges = ranges(tc.getRangeList()); 107 // If we compiled the ranges here, we could risk a situation where the compiled bytes were 108 // broken but the compiler had a corresponding bug that cancelled it out. This test only 109 // tests the matcher behaviour, whereas the test above only tests the compiler behaviour. 110 DigitSequenceMatcher matcher = DigitSequenceMatcher.create(combine(tc.getExpectedList())); 111 Multimap<Result, DigitSequence> numbers = buildTestNumbers(ranges); 112 if (!tc.getShouldFail()) { 113 testExpectedMatch(tc.getName(), matcher, numbers); 114 } else { 115 testExpectedFailure(tc.getName(), matcher, numbers); 116 } 117 } 118 } 119 } 120 testExpectedMatch(String testName, DigitSequenceMatcher matcher, Multimap<Result, DigitSequence> numbers)121 private static void testExpectedMatch(String testName, DigitSequenceMatcher matcher, 122 Multimap<Result, DigitSequence> numbers) { 123 for (Result expectedResult : Result.values()) { 124 for (DigitSequence s : numbers.get(expectedResult)) { 125 Result result = matcher.match(new Sequence(s)); 126 assertWithMessage("FAILED [%s]: Sequence %s", testName, s) 127 .that(result).isEqualTo(expectedResult); 128 } 129 } 130 } 131 testExpectedFailure(String testName, DigitSequenceMatcher matcher, Multimap<Result, DigitSequence> numbers)132 private static void testExpectedFailure(String testName, DigitSequenceMatcher matcher, 133 Multimap<Result, DigitSequence> numbers) { 134 for (Result expectedResult : Result.values()) { 135 for (DigitSequence s : numbers.get(expectedResult)) { 136 Result result = matcher.match(new Sequence(s)); 137 if (result != expectedResult) { 138 return; 139 } 140 } 141 } 142 assertWithMessage("FAILED [%s]: Expected at least one failure", testName).fail(); 143 } 144 145 // Magic number: DigitSequences cannot be longer than 18 digits at the moment, so a check is 146 // needed to prevent us trying to make a longer-than-allowed sequences in tests. This only 147 // happens in the case of a terminal node, since non-terminal paths must be < 17 digits long. 148 // If the allowed digits increases, this value can be modified or left as-is. 149 private static final int MAX_SEQUENCE_LENGTH = 18; 150 151 // Trivial adapter from the metadata DigitSequence to the matcher's lightweight sequence. 152 private static final class Sequence implements DigitSequenceMatcher.DigitSequence { 153 private final DigitSequence seq; 154 private int index = 0; 155 Sequence(DigitSequence seq)156 Sequence(DigitSequence seq) { 157 this.seq = seq; 158 } 159 160 @Override hasNext()161 public boolean hasNext() { 162 return index < seq.length(); 163 } 164 165 @Override next()166 public int next() { 167 return seq.getDigit(index++); 168 } 169 } 170 171 // Returns a RangeTree for the list of RangeSpecification strings. ranges(List<String> specs)172 RangeTree ranges(List<String> specs) { 173 return RangeTree.from(specs.stream().map(RangeSpecification::parse).collect(toImmutableList())); 174 } 175 176 // Builds a map of numbers for the given RangeTree to test every branching point in the DFA. 177 // All paths combinations are generated exactly once to give coverage. This does use pseudo 178 // random numbers to pick random digits from masks, but it should not be flaky. If it _ever_ 179 // fails then it implies a serious problem with the matcher compiler or matcher implementation. buildTestNumbers(RangeTree ranges)180 private static Multimap<Result, DigitSequence> buildTestNumbers(RangeTree ranges) { 181 SetMultimap<Result, DigitSequence> numbers = 182 MultimapBuilder.enumKeys(Result.class).treeSetValues().build(); 183 Set<DfaNode> visited = new HashSet<>(); 184 ranges.accept(new Visitor(RangeSpecification.empty(), numbers, visited)); 185 return numbers; 186 } 187 188 /** 189 * Visitor to generate a targeted set of test numbers from a range tree DFA, which should 190 * exercise every instruction in the corresponding matcher data. These numbers should ensure 191 * that every "branch" (including early terminations) is taken at least once. Where digits 192 * should be equivalent (i.e. both x & y have the same effect) they are chosen randomly, since 193 * otherwise you would need to generate billions of numbers to cover every possible combination. 194 */ 195 private static final class Visitor implements DfaVisitor { 196 private final RangeSpecification sourcePath; 197 private final SetMultimap<Result, DigitSequence> numbers; 198 private final Set<DfaNode> visited; 199 private int outEdgesMask = 0; 200 Visitor(RangeSpecification sourcePath, SetMultimap<Result, DigitSequence> numbers, Set<DfaNode> visited)201 Visitor(RangeSpecification sourcePath, 202 SetMultimap<Result, DigitSequence> numbers, 203 Set<DfaNode> visited) { 204 this.sourcePath = sourcePath; 205 this.numbers = numbers; 206 this.visited = visited; 207 } 208 209 @Override visit(DfaNode source, DfaEdge edge, DfaNode target)210 public void visit(DfaNode source, DfaEdge edge, DfaNode target) { 211 // Record the current outgoing edge mask. 212 int mask = edge.getDigitMask(); 213 outEdgesMask |= mask; 214 // Get the current path and add a test number for it. 215 RangeSpecification path = sourcePath.extendByMask(mask); 216 numbers.put(target.canTerminate() ? Result.MATCHED : Result.TOO_SHORT, sequenceIn(path)); 217 // Avoid recursing into nodes we've already visited. This avoids generating many (hundreds) 218 // of test numbers for nodes which are reachable in many ways (via many path prefixes). This 219 // is an optional check and could be removed, but for testing larger ranges it seems to make 220 // a difference in test time. DFA node/instruction coverage should be unaffected by this. 221 if (visited.contains(target)) { 222 return; 223 } 224 visited.add(target); 225 // Recurse into the next level with a new visitor starting from our path (it's okay to visit 226 // the terminal node here since it does nothing and leaves the out edges mask zero). 227 Visitor childVisitor = new Visitor(path, numbers, visited); 228 target.accept(childVisitor); 229 // After recursion, find out which of our target's out-edges cannot be reached. 230 int unreachableMask = ~childVisitor.outEdgesMask & ALL_DIGITS_MASK; 231 if (unreachableMask != 0 && path.length() < MAX_SEQUENCE_LENGTH) { 232 // Create a path which cannot be reached directly from our target node. If this is the 233 // terminal node then we create a path that's too long, otherwise it's just invalid. 234 Result expected = target.equals(RangeTree.getTerminal()) ? Result.TOO_LONG : Result.INVALID; 235 numbers.put(expected, sequenceIn(path.extendByMask(unreachableMask))); 236 } 237 } 238 } 239 240 // Returns a pseudo randomly chosen sequence from the given path. sequenceIn(RangeSpecification path)241 private static final DigitSequence sequenceIn(RangeSpecification path) { 242 DigitSequence seq = DigitSequence.empty(); 243 for (int n = 0; n < path.length(); n++) { 244 int mask = path.getBitmask(n); 245 // A random number M in [0..BitCount), not the bit itself. 246 // E.g. mask = 0011010011 ==> (0 <= maskBit < 5) (allowed digits are {0,1,4,6,7}) 247 int maskBit = (int) (bitCount(mask) * Math.random()); 248 // Mask out the M lower bits which come before the randomly selected one. 249 // E.g. maskBit = 3 ==> mask = 0011000000 (3 lower bits cleared) 250 while (maskBit > 0) { 251 mask &= ~lowestOneBit(mask); 252 maskBit--; 253 } 254 // Extend the sequence by the digit value of the randomly selected bit. 255 // E.g. mask = 0011000000 ==> digit = 6 (randomly chosen from the allowed digits). 256 seq = seq.extendBy(numberOfTrailingZeros(mask)); 257 } 258 return seq; 259 } 260 261 // Combines multiple ByteStrings into a single byte[] (we allow splitting in the regression test 262 // file for readability. combine(List<ByteString> bytes)263 private static byte[] combine(List<ByteString> bytes) { 264 int size = bytes.stream().mapToInt(ByteString::size).sum(); 265 byte[] out = new byte[size]; 266 int offset = 0; 267 for (ByteString b : bytes) { 268 b.copyTo(out, offset); 269 offset += b.size(); 270 } 271 return out; 272 } 273 274 // Return the index of the first difference, or -1 is the byte arrays are the same. indexOfDiff(byte[] a, byte[] b)275 private static int indexOfDiff(byte[] a, byte[] b) { 276 int length = Math.min(a.length, b.length); 277 for (int n = 0; n < length; n++) { 278 if (a[n] != b[n]) { 279 return n; 280 } 281 } 282 return (a.length == length && b.length == length) ? -1 : length; 283 } 284 285 // Formats a subset of the bytes as a human readable snippet using C-style hex escaping (which 286 // is compatible with the regression test data). formatPbSnippet(byte[] bytes, int start, int length)287 private static String formatPbSnippet(byte[] bytes, int start, int length) { 288 StringBuilder out = new StringBuilder(); 289 if (start > 0) { 290 out.append("..."); 291 } 292 appendBytes(out, bytes, start, length); 293 if (start + length < bytes.length) { 294 out.append("..."); 295 } 296 return out.toString(); 297 } 298 299 // Writes bytes such that they can be cut & pasted into a regression test file as new golden data. writeGoldenPbOutput(byte[] bytes, PrintWriter errors)300 private static void writeGoldenPbOutput(byte[] bytes, PrintWriter errors) { 301 errors.println("Golden Data:"); 302 StringBuilder out = new StringBuilder(); 303 for (int start = 0; start < bytes.length; start += 20) { 304 errors.format(" expected: \"%s\"\n", appendBytes(out, bytes, start, 20)); 305 out.setLength(0); 306 } 307 } 308 309 // Appends a set of bytes in C-style hex format (e.g. \xHH). appendBytes(StringBuilder out, byte[] bytes, int start, int length)310 private static StringBuilder appendBytes(StringBuilder out, byte[] bytes, int start, int length) { 311 int end = Math.min(start + length, bytes.length); 312 for (int n = start; n < end; n++) { 313 out.append(String.format("\\x%02x", bytes[n] & 0xFF)); 314 } 315 return out; 316 } 317 } 318