1 /* 2 * Copyright (C) 2017 The Libphonenumber Authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.google.i18n.phonenumbers.metadata; 17 18 import static com.google.common.base.Preconditions.checkNotNull; 19 import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy.REQUIRE_EQUAL_EDGES; 20 21 import com.google.common.collect.ImmutableList; 22 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge; 23 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode; 24 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor; 25 import java.util.ArrayList; 26 import java.util.List; 27 28 /** 29 * Factor a range tree into a sequence of trees which attempts to minimize overall complexity in 30 * the face of non-determinism. This can be used to reduce the size of any generated regular 31 * expressions. 32 */ 33 public final class RangeTreeFactorizer { 34 /** Strategies to control how merging is achieved when building factors.*/ 35 public enum MergeStrategy { 36 /** 37 * Edges are only merged if they accept exactly the same set of digits. If the existing factor 38 * contains "[0-5]" it will not be merged with the candidate edge "[0-8]". 39 */ 40 REQUIRE_EQUAL_EDGES, 41 /** 42 * Edges can be merged if the candidate edge accepts more digits than the existing edge. If the 43 * existing factor contains "[0-5]" and the candidate edge is "[0-8]", the candidate edge is 44 * split so that "[0-5]" is merged as normal and an additional edge "[6-8]" is branched off. 45 */ 46 ALLOW_EDGE_SPLITTING, 47 } 48 49 /** 50 * Factors the given range tree. 51 * <p> 52 * Paths are processed longest-first, and a path belongs in particular "factor" if it can be 53 * added without "causing a split" in the existing factor. For example, given an existing factor 54 * {@code {"12[3-6]x", "45xx"}}: 55 * <ul> 56 * <li>The path "12[3-6]" can be added, since it is a prefix of one of the existing paths in 57 * the DFA. 58 * <li>The path "13xx" can be added since it forms a new branch in the DFA, which does not 59 * affect any existing branches ("13..." is disjoint with "12..."). 60 * <li>The path "12[34]" cannot be added since it would "split" the existing path 61 * "12[3-6]x" in the DFA ("[34]" is a subset of "[3-6]"). " 62 * <li>Depending on the merge strategy, the path "12[0-6]x" might be added ("[0-6]" is a 63 * superset of "[3-6]"). See {@link MergeStrategy} for more information. 64 * </ul> 65 */ factor(RangeTree ranges, MergeStrategy strategy)66 public static ImmutableList<RangeTree> factor(RangeTree ranges, MergeStrategy strategy) { 67 // If only one length on all paths, the DFA is already "factored". 68 if (ranges.getLengths().size() == 1) { 69 return ImmutableList.of(ranges); 70 } 71 List<RangeTree> factors = new ArrayList<>(); 72 // Start with the "naive" factors (splitting by length) from longest to shortest. 73 for (int n : ranges.getLengths().descendingSet()) { 74 factors.add(ranges.intersect(RangeTree.from(RangeSpecification.any(n)))); 75 } 76 // Now attempt to merge as much of each of the shorter factors as possible into the longer ones. 77 // In each loop we subsume a candidate factor into previous factors, either in whole or in part. 78 int index = 1; 79 while (index < factors.size()) { 80 // Merge (as much as possible) each "naive" factor into earlier factors. 81 RangeTree r = factors.get(index); 82 for (int n = 0; n < index && !r.isEmpty(); n++) { 83 RangeTree merged = new RangeTreeFactorizer(factors.get(n), strategy).mergeFrom(r); 84 factors.set(n, merged); 85 // Calculate the ranges which haven't yet been merged into any earlier factor. 86 r = r.subtract(merged); 87 } 88 if (r.isEmpty()) { 89 // All ranges merged, so remove the original factor (index now references the next factor). 90 factors.remove(index); 91 } else { 92 // We have some un-factorable ranges which are kept to start a new factor. 93 factors.set(index, r); 94 index++; 95 } 96 } 97 return ImmutableList.copyOf(factors); 98 } 99 100 // This is modified as paths are added. 101 private RangeTree factor; 102 private final MergeStrategy strategy; 103 RangeTreeFactorizer(RangeTree factor, MergeStrategy strategy)104 RangeTreeFactorizer(RangeTree factor, MergeStrategy strategy) { 105 this.factor = checkNotNull(factor); 106 this.strategy = strategy; 107 } 108 mergeFrom(RangeTree ranges)109 RangeTree mergeFrom(RangeTree ranges) { 110 recursivelyMerge(ranges.getInitial(), factor.getInitial(), RangeSpecification.empty()); 111 return factor; 112 } 113 recursivelyMerge(DfaNode srcNode, DfaNode dstNode, RangeSpecification path)114 void recursivelyMerge(DfaNode srcNode, DfaNode dstNode, RangeSpecification path) { 115 if (srcNode.canTerminate()) { 116 factor = factor.union(RangeTree.from(path)); 117 } else { 118 srcNode.accept(new FactoringVisitor(dstNode, path)); 119 } 120 } 121 122 private final class FactoringVisitor implements DfaVisitor { 123 private final RangeSpecification path; 124 private final DfaNode dstNode; 125 126 // True if we encountered a situation when an edge we are merging (srcMask) has a partial 127 // overlap with the existing edge (dstMask) (e.g. merging "[0-6]" into "[4-9]"). This is 128 // distinct from the case where the existing edge is a subset of the edge being merged (e.g. 129 // merging "[0-6]" into "[2-4]", where the edge being merged can be split into "[0156]" and 130 // "[2-4]"). In either strategy, a partial overlap will prevent merging. 131 private boolean partialOverlap = false; 132 133 // Records the union of all edge ranges visited for the current node. This is used to determine 134 // the remaining edges that must be added after visiting the existing factor (especially in the 135 // case of ALLOW_EDGE_SPLITTING). 136 private int allDstMask = 0; 137 FactoringVisitor(DfaNode dstNode, RangeSpecification path)138 FactoringVisitor(DfaNode dstNode, RangeSpecification path) { 139 this.dstNode = dstNode; 140 this.path = path; 141 } 142 143 @Override visit(DfaNode source, DfaEdge srcEdge, DfaNode srcTarget)144 public void visit(DfaNode source, DfaEdge srcEdge, DfaNode srcTarget) { 145 int srcMask = srcEdge.getDigitMask(); 146 dstNode.accept((s, dstEdge, dstTarget) -> { 147 int dstMask = dstEdge.getDigitMask(); 148 if ((strategy == REQUIRE_EQUAL_EDGES) ? (dstMask == srcMask) : (dstMask & ~srcMask) == 0) { 149 // The set of digits accepted by the edge being merged (mask) is equal-to or a superset 150 // of the digits of the edge in the factor we are merging into. The path is extended by 151 // the destination edge because during recursion we only follow paths already in the 152 // factor. 153 recursivelyMerge(srcTarget, dstTarget, path.extendByMask(dstMask)); 154 } else { 155 partialOverlap |= (dstMask & srcMask) != 0; 156 } 157 allDstMask |= dstMask; 158 }); 159 if (!partialOverlap) { 160 // Work out the digits that weren't in any of the edges of the factor we were processing 161 // and merge the sub-tree under that edge into the current factor. For REQUIRE_EQUAL_EDGES 162 // the extraMask is always either srcMask or 0 (since the edge was either added in full, 163 // or disjoint with all the existing edges). For ALLOW_EDGE_SPLITTING it's the remaining 164 // range that wasn't merged with any of the existing paths. 165 int extraMask = srcMask & ~allDstMask; 166 if (extraMask != 0) { 167 new MergingVisitor(path).recurse(srcTarget, extraMask); 168 } 169 } 170 } 171 } 172 173 private final class MergingVisitor implements DfaVisitor { 174 private final RangeSpecification path; 175 MergingVisitor(RangeSpecification path)176 MergingVisitor(RangeSpecification path) { 177 this.path = checkNotNull(path); 178 } 179 recurse(DfaNode node, int mask)180 void recurse(DfaNode node, int mask) { 181 RangeSpecification newPath = path.extendByMask(mask); 182 if (node.canTerminate()) { 183 factor = factor.union(RangeTree.from(newPath)); 184 } else { 185 node.accept(new MergingVisitor(newPath)); 186 } 187 } 188 189 @Override visit(DfaNode source, DfaEdge edge, DfaNode target)190 public void visit(DfaNode source, DfaEdge edge, DfaNode target) { 191 recurse(target, edge.getDigitMask()); 192 } 193 } 194 } 195