1 /* 2 * Copyright (C) 2017 The Libphonenumber Authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.google.i18n.phonenumbers.metadata.table; 17 18 import static com.google.common.base.Preconditions.checkArgument; 19 import static com.google.common.base.Preconditions.checkNotNull; 20 import static com.google.common.base.Preconditions.checkState; 21 import static com.google.common.collect.ImmutableList.toImmutableList; 22 import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK; 23 import static java.lang.Integer.numberOfTrailingZeros; 24 import static java.util.Comparator.comparing; 25 26 import com.google.auto.value.AutoValue; 27 import com.google.common.collect.ImmutableList; 28 import com.google.common.collect.ImmutableSortedSet; 29 import com.google.i18n.phonenumbers.metadata.DigitSequence; 30 import com.google.i18n.phonenumbers.metadata.RangeSpecification; 31 import com.google.i18n.phonenumbers.metadata.RangeTree; 32 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge; 33 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode; 34 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor; 35 import com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer; 36 import com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy; 37 import java.util.ArrayList; 38 import java.util.Comparator; 39 import java.util.List; 40 import java.util.NavigableSet; 41 import java.util.Set; 42 43 /** 44 * A range key is somewhat similar to a {@link RangeSpecification}, except that it can encode 45 * multiple possible lengths for a single range prefix. Range keys are particularly useful as 46 * unique "row keys" when representing range trees as tabular data. 47 */ 48 @AutoValue 49 public abstract class RangeKey { 50 /** 51 * Order by prefix first and then minimum length. For row keys representing disjoint ranges, this 52 * will be a total ordering (since the comparison is really with the "shortest" digit sequence in 53 * the ranges, which must be distinct for disjoint ranges). 54 */ 55 public static final Comparator<RangeKey> ORDERING = 56 comparing(RangeKey::getPrefix, comparing(s -> s.min().toString())) 57 .thenComparing(RangeKey::getLengths, comparing(NavigableSet::first)); 58 59 /** 60 * Creates a range key representing ranges with a prefix of some set of lengths. The prefix must 61 * not be longer than the possible lengths and cannot end with an "any" edge (i.e. "x"). 62 */ create(RangeSpecification prefix, Set<Integer> lengths)63 public static RangeKey create(RangeSpecification prefix, Set<Integer> lengths) { 64 checkArgument(prefix.length() == 0 || prefix.getBitmask(prefix.length() - 1) != ALL_DIGITS_MASK, 65 "prefix cannot end with an 'any' edge: %s", prefix); 66 ImmutableSortedSet<Integer> sorted = ImmutableSortedSet.copyOf(lengths); 67 checkArgument(sorted.first() >= prefix.length(), 68 "lengths cannot be shorter than the prefix: %s - %s", prefix, lengths); 69 return new AutoValue_RangeKey(prefix, sorted); 70 } 71 72 /** 73 * Decomposes the given range tree into a sorted sequence of keys, representing the same digit 74 * sequences. The resulting keys form a disjoint covering of the original range set, and no 75 * two keys will contain the same prefix (but prefixes of keys may overlap, even if the ranges 76 * they ultimately represent do not). The resulting sequence is ordered by {@link #ORDERING}. 77 */ decompose(RangeTree tree)78 public static ImmutableList<RangeKey> decompose(RangeTree tree) { 79 List<RangeKey> keys = new ArrayList<>(); 80 // The ALLOW_EDGE_SPLITTING strategy works best for the case of generating row keys because it 81 // helps avoid having the same sequence appear in multiple rows. Note however than even this 82 // strategy isn't perfect, and partially overlapping ranges with different lengths can still 83 // cause issues. For example, 851 appears as a prefix for 2 rows in the following (real world) 84 // example. 85 // prefix=85[1-9], length=10 86 // prefix=8[57]1, length=11 87 // However a given digit sequence will still only appear in (at most) one range key based on 88 // its length. 89 for (RangeTree f : RangeTreeFactorizer.factor(tree, MergeStrategy.ALLOW_EDGE_SPLITTING)) { 90 KeyVisitor.visit(f, keys); 91 } 92 return ImmutableList.sortedCopyOf(ORDERING, keys); 93 } 94 95 // A recursive descent visitor that splits range keys from the visited tree on the upward phase 96 // of visitation. After finding the terminal node, the visitor tries to strip as much of the 97 // trailing "any" path as possible, to leave the prefix. Note that the visitor can never start 98 // another downward visitation while its processing the "any" paths, because if it walks up 99 // through an "any" path, the node it reaches cannot have any other edges coming from it (the 100 // "any" path is all the possible edges). 101 private static class KeyVisitor implements DfaVisitor { visit(RangeTree tree, List<RangeKey> keys)102 private static void visit(RangeTree tree, List<RangeKey> keys) { 103 KeyVisitor v = new KeyVisitor(keys); 104 tree.accept(v); 105 // We may still need to emit a key for ranges with "any" paths that reach the root node. 106 int lengthMask = v.lengthMask; 107 // Shouldn't happen for phone numbers, since it implies the existence of "zero length" digit 108 // sequences. 109 if (tree.getInitial().canTerminate()) { 110 lengthMask |= 1; 111 } 112 if (lengthMask != 0) { 113 // Use the empty specification as a prefix since the ranges are defined purely by length. 114 keys.add(new AutoValue_RangeKey(RangeSpecification.empty(), buildLengths(lengthMask))); 115 } 116 } 117 118 // Collection of extracted keys. 119 private final List<RangeKey> keys; 120 // Current path from the root of the tree being visited. 121 private RangeSpecification path = RangeSpecification.empty(); 122 // Non-zero when we are in the "upward" phase of visitation, processing trailing "any" paths. 123 // When zero we are either in a "downward" phase or traversing up without stripping paths. 124 private int lengthMask = 0; 125 KeyVisitor(List<RangeKey> keys)126 private KeyVisitor(List<RangeKey> keys) { 127 this.keys = checkNotNull(keys); 128 } 129 130 @Override visit(DfaNode source, DfaEdge edge, DfaNode target)131 public void visit(DfaNode source, DfaEdge edge, DfaNode target) { 132 checkState(lengthMask == 0, 133 "during downward tree traversal, length mask should be zero (was %s)", lengthMask); 134 RangeSpecification oldPath = path; 135 path = path.extendByMask(edge.getDigitMask()); 136 if (target.equals(RangeTree.getTerminal())) { 137 lengthMask = (1 << path.length()); 138 // We might emit the key immediately for ranges without trailing paths (e.g. "1234"). 139 maybeEmitKey(); 140 } else { 141 target.accept(this); 142 // If we see a terminating node, we are either adding a new possible length to an existing 143 // key or starting to process a new key (we don't know and it doesn't matter providing we 144 // capture the current length in the mask). 145 if (target.canTerminate()) { 146 lengthMask |= (1 << path.length()); 147 } 148 maybeEmitKey(); 149 } 150 path = oldPath; 151 } 152 153 // Conditionally emits a key for the current path prefix and possible lengths if we've found 154 // the "end" of an "any" path (e.g. we have possible lengths and the edge above us is not an 155 // "any" path). maybeEmitKey()156 private void maybeEmitKey() { 157 if (lengthMask != 0 && path.getBitmask(path.length() - 1) != ALL_DIGITS_MASK) { 158 keys.add(new AutoValue_RangeKey(path, buildLengths(lengthMask))); 159 lengthMask = 0; 160 } 161 } 162 } 163 164 /** 165 * Returns the prefix for this range key. All digit sequences matches by this key are of the 166 * form {@code "<prefix>xxxx"} for some number of "any" edges. This prefix can be "empty" for 167 * ranges such as {@code "xxxx"}. 168 */ getPrefix()169 public abstract RangeSpecification getPrefix(); 170 171 /** 172 * Returns the possible lengths for digit sequences matched by this key. The returned set is 173 * never empty. 174 */ getLengths()175 public abstract ImmutableSortedSet<Integer> getLengths(); 176 177 /** 178 * Converts the range key into a sequence of range specifications, ordered by length. The 179 * returned set is never empty. 180 */ asRangeSpecifications()181 public final ImmutableList<RangeSpecification> asRangeSpecifications() { 182 RangeSpecification s = getPrefix(); 183 return getLengths().stream() 184 .map(n -> s.extendByLength(n - s.length())) 185 .collect(toImmutableList()); 186 } 187 asRangeTree()188 public final RangeTree asRangeTree() { 189 RangeSpecification s = getPrefix(); 190 return RangeTree.from(getLengths().stream().map(n -> s.extendByLength(n - s.length()))); 191 } 192 193 /* 194 * Checks if the RangeKey contains a range represented by the given prefix and length. 195 */ contains(DigitSequence prefix, Integer length)196 public boolean contains(DigitSequence prefix, Integer length) { 197 return asRangeSpecifications().stream() 198 .anyMatch( 199 specification -> 200 specification.matches( 201 prefix.extendBy(DigitSequence.zeros(length - prefix.length())))); 202 } 203 buildLengths(int lengthMask)204 private static ImmutableSortedSet<Integer> buildLengths(int lengthMask) { 205 checkArgument(lengthMask != 0); 206 ImmutableSortedSet.Builder<Integer> lengths = ImmutableSortedSet.naturalOrder(); 207 do { 208 int length = numberOfTrailingZeros(lengthMask); 209 lengths.add(length); 210 // Clear each bit as we go. 211 lengthMask &= ~(1 << length); 212 } while (lengthMask != 0); 213 return lengths.build(); 214 } 215 } 216