• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Libphonenumber Authors.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler;
18 
19 import static com.google.common.collect.ImmutableList.toImmutableList;
20 import static com.google.common.truth.Truth.assertWithMessage;
21 import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
22 import static java.lang.Integer.bitCount;
23 import static java.lang.Integer.lowestOneBit;
24 import static java.lang.Integer.numberOfTrailingZeros;
25 
26 import com.google.common.collect.Multimap;
27 import com.google.common.collect.MultimapBuilder;
28 import com.google.common.collect.SetMultimap;
29 import com.google.i18n.phonenumbers.internal.finitestatematcher.compiler.RegressionTestProto;
30 import com.google.i18n.phonenumbers.internal.finitestatematcher.compiler.RegressionTestProto.TestCase;
31 import com.google.i18n.phonenumbers.internal.finitestatematcher.compiler.RegressionTestProto.Tests;
32 import com.google.i18n.phonenumbers.metadata.DigitSequence;
33 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
34 import com.google.i18n.phonenumbers.metadata.RangeTree;
35 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge;
36 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
37 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor;
38 import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher;
39 import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.Result;
40 import com.google.protobuf.ByteString;
41 import com.google.protobuf.TextFormat;
42 import java.io.IOException;
43 import java.io.InputStream;
44 import java.io.InputStreamReader;
45 import java.io.PrintWriter;
46 import java.io.StringWriter;
47 import java.nio.charset.StandardCharsets;
48 import java.util.HashSet;
49 import java.util.List;
50 import java.util.Set;
51 import org.junit.Test;
52 import org.junit.runner.RunWith;
53 import org.junit.runners.JUnit4;
54 
55 @RunWith(JUnit4.class)
56 public class CompilerRegressionTest {
57   // Tests that the compiler produces the expected output, byte-for-byte.
58   @Test
testCompiledBytesEqualExpectedMatcherBytes()59   public void testCompiledBytesEqualExpectedMatcherBytes() throws IOException {
60     StringWriter buffer = new StringWriter();
61     PrintWriter errors = new PrintWriter(buffer);
62     try (InputStream data =
63         CompilerRegressionTest.class.getResourceAsStream("regression_test_data.textpb")) {
64       Tests.Builder tests = RegressionTestProto.Tests.newBuilder();
65       TextFormat.merge(new InputStreamReader(data, StandardCharsets.UTF_8), tests);
66       for (TestCase tc : tests.getTestCaseList()) {
67         byte[] actual = MatcherCompiler.compile(ranges(tc.getRangeList()));
68         byte[] expected = combine(tc.getExpectedList());
69         int diffIndex = indexOfDiff(actual, expected);
70         if (!tc.getShouldFail()) {
71           if (diffIndex != -1) {
72             errors.format("FAILED [%s]: First difference at index %d\n", tc.getName(), diffIndex);
73             errors.format("Actual  : %s\n", formatPbSnippet(actual, diffIndex, 20));
74             errors.format("Expected: %s\n", formatPbSnippet(expected, diffIndex, 20));
75             writeGoldenPbOutput(actual, errors);
76           }
77         } else {
78           if (diffIndex == -1) {
79             errors.format("FAILED [%s]: Expected difference, but got none\n", tc.getName());
80           }
81         }
82       }
83     }
84     String errorMessage = buffer.toString();
85     if (!errorMessage.isEmpty()) {
86       assertWithMessage(errorMessage).fail();
87     }
88   }
89 
90   // Test that the matcher behaves correctly with respect to the input ranges using the expected
91   // byte sequences. If this test fails, then the matcher implementation is doing something wrong,
92   // or the expected bytes were generated incorrectly (either by hand or from the compiler).
93   //
94   // IMPORTANT: This test tests that the expected bytes (rather than the compiled bytes) match the
95   // numbers in the ranges. This avoids the risk of any bugs in both the matcher and compiler
96   // somehow cancelling each other out. However this also means that this test depends on the
97   // equality test above for validity (i.e. this test can pass even if the matcher compiler is
98   // broken, so it should not be run in isolation when debugging).
99   @Test
testExpectedMatcherBytesMatchRanges()100   public void testExpectedMatcherBytesMatchRanges() throws IOException {
101     try (InputStream data =
102         CompilerRegressionTest.class.getResourceAsStream("regression_test_data.textpb")) {
103       RegressionTestProto.Tests.Builder tests = RegressionTestProto.Tests.newBuilder();
104       TextFormat.merge(new InputStreamReader(data, StandardCharsets.UTF_8), tests);
105       for (TestCase tc : tests.getTestCaseList()) {
106         RangeTree ranges = ranges(tc.getRangeList());
107         // If we compiled the ranges here, we could risk a situation where the compiled bytes were
108         // broken but the compiler had a corresponding bug that cancelled it out. This test only
109         // tests the matcher behaviour, whereas the test above only tests the compiler behaviour.
110         DigitSequenceMatcher matcher = DigitSequenceMatcher.create(combine(tc.getExpectedList()));
111         Multimap<Result, DigitSequence> numbers = buildTestNumbers(ranges);
112         if (!tc.getShouldFail()) {
113           testExpectedMatch(tc.getName(), matcher, numbers);
114         } else {
115           testExpectedFailure(tc.getName(), matcher, numbers);
116         }
117       }
118     }
119   }
120 
testExpectedMatch(String testName, DigitSequenceMatcher matcher, Multimap<Result, DigitSequence> numbers)121   private static void testExpectedMatch(String testName, DigitSequenceMatcher matcher,
122       Multimap<Result, DigitSequence> numbers) {
123     for (Result expectedResult : Result.values()) {
124       for (DigitSequence s : numbers.get(expectedResult)) {
125         Result result = matcher.match(new Sequence(s));
126         assertWithMessage("FAILED [%s]: Sequence %s", testName, s)
127             .that(result).isEqualTo(expectedResult);
128       }
129     }
130   }
131 
testExpectedFailure(String testName, DigitSequenceMatcher matcher, Multimap<Result, DigitSequence> numbers)132   private static void testExpectedFailure(String testName, DigitSequenceMatcher matcher,
133       Multimap<Result, DigitSequence> numbers) {
134     for (Result expectedResult : Result.values()) {
135       for (DigitSequence s : numbers.get(expectedResult)) {
136         Result result = matcher.match(new Sequence(s));
137         if (result != expectedResult) {
138           return;
139         }
140       }
141     }
142     assertWithMessage("FAILED [%s]: Expected at least one failure", testName).fail();
143   }
144 
145   // Magic number: DigitSequences cannot be longer than 18 digits at the moment, so a check is
146   // needed to prevent us trying to make a longer-than-allowed sequences in tests. This only
147   // happens in the case of a terminal node, since non-terminal paths must be < 17 digits long.
148   // If the allowed digits increases, this value can be modified or left as-is.
149   private static final int MAX_SEQUENCE_LENGTH = 18;
150 
151   // Trivial adapter from the metadata DigitSequence to the matcher's lightweight sequence.
152   private static final class Sequence implements DigitSequenceMatcher.DigitSequence {
153     private final DigitSequence seq;
154     private int index = 0;
155 
Sequence(DigitSequence seq)156     Sequence(DigitSequence seq) {
157       this.seq = seq;
158     }
159 
160     @Override
hasNext()161     public boolean hasNext() {
162       return index < seq.length();
163     }
164 
165     @Override
next()166     public int next() {
167       return seq.getDigit(index++);
168     }
169   }
170 
171   // Returns a RangeTree for the list of RangeSpecification strings.
ranges(List<String> specs)172   RangeTree ranges(List<String> specs) {
173     return RangeTree.from(specs.stream().map(RangeSpecification::parse).collect(toImmutableList()));
174   }
175 
176   // Builds a map of numbers for the given RangeTree to test every branching point in the DFA.
177   // All paths combinations are generated exactly once to give coverage. This does use pseudo
178   // random numbers to pick random digits from masks, but it should not be flaky. If it _ever_
179   // fails then it implies a serious problem with the matcher compiler or matcher implementation.
buildTestNumbers(RangeTree ranges)180   private static Multimap<Result, DigitSequence> buildTestNumbers(RangeTree ranges) {
181     SetMultimap<Result, DigitSequence> numbers =
182         MultimapBuilder.enumKeys(Result.class).treeSetValues().build();
183     Set<DfaNode> visited = new HashSet<>();
184     ranges.accept(new Visitor(RangeSpecification.empty(), numbers, visited));
185     return numbers;
186   }
187 
188   /**
189    * Visitor to generate a targeted set of test numbers from a range tree DFA, which should
190    * exercise every instruction in the corresponding matcher data. These numbers should ensure
191    * that every "branch" (including early terminations) is taken at least once. Where digits
192    * should be equivalent (i.e. both x & y have the same effect) they are chosen randomly, since
193    * otherwise you would need to generate billions of numbers to cover every possible combination.
194    */
195   private static final class Visitor implements DfaVisitor {
196     private final RangeSpecification sourcePath;
197     private final SetMultimap<Result, DigitSequence> numbers;
198     private final Set<DfaNode> visited;
199     private int outEdgesMask = 0;
200 
Visitor(RangeSpecification sourcePath, SetMultimap<Result, DigitSequence> numbers, Set<DfaNode> visited)201     Visitor(RangeSpecification sourcePath,
202         SetMultimap<Result, DigitSequence> numbers,
203         Set<DfaNode> visited) {
204       this.sourcePath = sourcePath;
205       this.numbers = numbers;
206       this.visited = visited;
207     }
208 
209     @Override
visit(DfaNode source, DfaEdge edge, DfaNode target)210     public void visit(DfaNode source, DfaEdge edge, DfaNode target) {
211       // Record the current outgoing edge mask.
212       int mask = edge.getDigitMask();
213       outEdgesMask |= mask;
214       // Get the current path and add a test number for it.
215       RangeSpecification path = sourcePath.extendByMask(mask);
216       numbers.put(target.canTerminate() ? Result.MATCHED : Result.TOO_SHORT, sequenceIn(path));
217       // Avoid recursing into nodes we've already visited. This avoids generating many (hundreds)
218       // of test numbers for nodes which are reachable in many ways (via many path prefixes). This
219       // is an optional check and could be removed, but for testing larger ranges it seems to make
220       // a difference in test time. DFA node/instruction coverage should be unaffected by this.
221       if (visited.contains(target)) {
222         return;
223       }
224       visited.add(target);
225       // Recurse into the next level with a new visitor starting from our path (it's okay to visit
226       // the terminal node here since it does nothing and leaves the out edges mask zero).
227       Visitor childVisitor = new Visitor(path, numbers, visited);
228       target.accept(childVisitor);
229       // After recursion, find out which of our target's out-edges cannot be reached.
230       int unreachableMask = ~childVisitor.outEdgesMask & ALL_DIGITS_MASK;
231       if (unreachableMask != 0 && path.length() < MAX_SEQUENCE_LENGTH) {
232         // Create a path which cannot be reached directly from our target node. If this is the
233         // terminal node then we create a path that's too long, otherwise it's just invalid.
234         Result expected = target.equals(RangeTree.getTerminal()) ? Result.TOO_LONG : Result.INVALID;
235         numbers.put(expected, sequenceIn(path.extendByMask(unreachableMask)));
236       }
237     }
238   }
239 
240   // Returns a pseudo randomly chosen sequence from the given path.
sequenceIn(RangeSpecification path)241   private static final DigitSequence sequenceIn(RangeSpecification path) {
242     DigitSequence seq = DigitSequence.empty();
243     for (int n = 0; n < path.length(); n++) {
244       int mask = path.getBitmask(n);
245       // A random number M in [0..BitCount), not the bit itself.
246       // E.g. mask = 0011010011 ==> (0 <= maskBit < 5) (allowed digits are {0,1,4,6,7})
247       int maskBit = (int) (bitCount(mask) * Math.random());
248       // Mask out the M lower bits which come before the randomly selected one.
249       // E.g. maskBit = 3 ==> mask = 0011000000 (3 lower bits cleared)
250       while (maskBit > 0) {
251         mask &= ~lowestOneBit(mask);
252         maskBit--;
253       }
254       // Extend the sequence by the digit value of the randomly selected bit.
255       // E.g. mask = 0011000000 ==> digit = 6 (randomly chosen from the allowed digits).
256       seq = seq.extendBy(numberOfTrailingZeros(mask));
257     }
258     return seq;
259   }
260 
261   // Combines multiple ByteStrings into a single byte[] (we allow splitting in the regression test
262   // file for readability.
combine(List<ByteString> bytes)263   private static byte[] combine(List<ByteString> bytes) {
264     int size = bytes.stream().mapToInt(ByteString::size).sum();
265     byte[] out = new byte[size];
266     int offset = 0;
267     for (ByteString b : bytes) {
268       b.copyTo(out, offset);
269       offset += b.size();
270     }
271     return out;
272   }
273 
274   // Return the index of the first difference, or -1 is the byte arrays are the same.
indexOfDiff(byte[] a, byte[] b)275   private static int indexOfDiff(byte[] a, byte[] b) {
276     int length = Math.min(a.length, b.length);
277     for (int n = 0; n < length; n++) {
278       if (a[n] != b[n]) {
279         return n;
280       }
281     }
282     return (a.length == length && b.length == length) ? -1 : length;
283   }
284 
285   // Formats a subset of the bytes as a human readable snippet using C-style hex escaping (which
286   // is compatible with the regression test data).
formatPbSnippet(byte[] bytes, int start, int length)287   private static String formatPbSnippet(byte[] bytes, int start, int length) {
288     StringBuilder out = new StringBuilder();
289     if (start > 0) {
290       out.append("...");
291     }
292     appendBytes(out, bytes, start, length);
293     if (start + length < bytes.length) {
294       out.append("...");
295     }
296     return out.toString();
297   }
298 
299   // Writes bytes such that they can be cut & pasted into a regression test file as new golden data.
writeGoldenPbOutput(byte[] bytes, PrintWriter errors)300   private static void writeGoldenPbOutput(byte[] bytes, PrintWriter errors) {
301     errors.println("Golden Data:");
302     StringBuilder out = new StringBuilder();
303     for (int start = 0; start < bytes.length; start += 20) {
304       errors.format("  expected: \"%s\"\n", appendBytes(out, bytes, start, 20));
305       out.setLength(0);
306     }
307   }
308 
309   // Appends a set of bytes in C-style hex format (e.g. \xHH).
appendBytes(StringBuilder out, byte[] bytes, int start, int length)310   private static StringBuilder appendBytes(StringBuilder out, byte[] bytes, int start, int length) {
311     int end = Math.min(start + length, bytes.length);
312     for (int n = start; n < end; n++) {
313       out.append(String.format("\\x%02x", bytes[n] & 0xFF));
314     }
315     return out;
316   }
317 }
318