• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Libphonenumber Authors.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.google.i18n.phonenumbers.metadata.table;
17 
18 import static com.google.common.base.Preconditions.checkArgument;
19 import static com.google.common.base.Preconditions.checkNotNull;
20 import static com.google.common.base.Preconditions.checkState;
21 import static com.google.common.collect.ImmutableList.toImmutableList;
22 import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
23 import static java.lang.Integer.numberOfTrailingZeros;
24 import static java.util.Comparator.comparing;
25 
26 import com.google.auto.value.AutoValue;
27 import com.google.common.collect.ImmutableList;
28 import com.google.common.collect.ImmutableSortedSet;
29 import com.google.i18n.phonenumbers.metadata.DigitSequence;
30 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
31 import com.google.i18n.phonenumbers.metadata.RangeTree;
32 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge;
33 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
34 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor;
35 import com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer;
36 import com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy;
37 import java.util.ArrayList;
38 import java.util.Comparator;
39 import java.util.List;
40 import java.util.NavigableSet;
41 import java.util.Set;
42 
43 /**
44  * A range key is somewhat similar to a {@link RangeSpecification}, except that it can encode
45  * multiple possible lengths for a single range prefix. Range keys are particularly useful as
46  * unique "row keys" when representing range trees as tabular data.
47  */
48 @AutoValue
49 public abstract class RangeKey {
50   /**
51    * Order by prefix first and then minimum length. For row keys representing disjoint ranges, this
52    * will be a total ordering (since the comparison is really with the "shortest" digit sequence in
53    * the ranges, which must be distinct for disjoint ranges).
54    */
55   public static final Comparator<RangeKey> ORDERING =
56       comparing(RangeKey::getPrefix, comparing(s -> s.min().toString()))
57           .thenComparing(RangeKey::getLengths, comparing(NavigableSet::first));
58 
59   /**
60    * Creates a range key representing ranges with a prefix of some set of lengths. The prefix must
61    * not be longer than the possible lengths and cannot end with an "any" edge (i.e. "x").
62    */
create(RangeSpecification prefix, Set<Integer> lengths)63   public static RangeKey create(RangeSpecification prefix, Set<Integer> lengths) {
64     checkArgument(prefix.length() == 0 || prefix.getBitmask(prefix.length() - 1) != ALL_DIGITS_MASK,
65         "prefix cannot end with an 'any' edge: %s", prefix);
66     ImmutableSortedSet<Integer> sorted = ImmutableSortedSet.copyOf(lengths);
67     checkArgument(sorted.first() >= prefix.length(),
68         "lengths cannot be shorter than the prefix: %s - %s", prefix, lengths);
69     return new AutoValue_RangeKey(prefix, sorted);
70   }
71 
72   /**
73    * Decomposes the given range tree into a sorted sequence of keys, representing the same digit
74    * sequences. The resulting keys form a disjoint covering of the original range set, and no
75    * two keys will contain the same prefix (but prefixes of keys may overlap, even if the ranges
76    * they ultimately represent do not). The resulting sequence is ordered by {@link #ORDERING}.
77    */
decompose(RangeTree tree)78   public static ImmutableList<RangeKey> decompose(RangeTree tree) {
79     List<RangeKey> keys = new ArrayList<>();
80     // The ALLOW_EDGE_SPLITTING strategy works best for the case of generating row keys because it
81     // helps avoid having the same sequence appear in multiple rows. Note however than even this
82     // strategy isn't perfect, and partially overlapping ranges with different lengths can still
83     // cause issues. For example, 851 appears as a prefix for 2 rows in the following (real world)
84     // example.
85     //   prefix=85[1-9], length=10
86     //   prefix=8[57]1,  length=11
87     // However a given digit sequence will still only appear in (at most) one range key based on
88     // its length.
89     for (RangeTree f : RangeTreeFactorizer.factor(tree, MergeStrategy.ALLOW_EDGE_SPLITTING)) {
90       KeyVisitor.visit(f, keys);
91     }
92     return ImmutableList.sortedCopyOf(ORDERING, keys);
93   }
94 
95   // A recursive descent visitor that splits range keys from the visited tree on the upward phase
96   // of visitation. After finding the terminal node, the visitor tries to strip as much of the
97   // trailing "any" path as possible, to leave the prefix. Note that the visitor can never start
98   // another downward visitation while its processing the "any" paths, because if it walks up
99   // through an "any" path, the node it reaches cannot have any other edges coming from it (the
100   // "any" path is all the possible edges).
101   private static class KeyVisitor implements DfaVisitor {
visit(RangeTree tree, List<RangeKey> keys)102     private static void visit(RangeTree tree, List<RangeKey> keys) {
103       KeyVisitor v = new KeyVisitor(keys);
104       tree.accept(v);
105       // We may still need to emit a key for ranges with "any" paths that reach the root node.
106       int lengthMask = v.lengthMask;
107       // Shouldn't happen for phone numbers, since it implies the existence of "zero length" digit
108       // sequences.
109       if (tree.getInitial().canTerminate()) {
110         lengthMask |= 1;
111       }
112       if (lengthMask != 0) {
113         // Use the empty specification as a prefix since the ranges are defined purely by length.
114         keys.add(new AutoValue_RangeKey(RangeSpecification.empty(), buildLengths(lengthMask)));
115       }
116     }
117 
118     // Collection of extracted keys.
119     private final List<RangeKey> keys;
120     // Current path from the root of the tree being visited.
121     private RangeSpecification path = RangeSpecification.empty();
122     // Non-zero when we are in the "upward" phase of visitation, processing trailing "any" paths.
123     // When zero we are either in a "downward" phase or traversing up without stripping paths.
124     private int lengthMask = 0;
125 
KeyVisitor(List<RangeKey> keys)126     private KeyVisitor(List<RangeKey> keys) {
127       this.keys = checkNotNull(keys);
128     }
129 
130     @Override
visit(DfaNode source, DfaEdge edge, DfaNode target)131     public void visit(DfaNode source, DfaEdge edge, DfaNode target) {
132       checkState(lengthMask == 0,
133           "during downward tree traversal, length mask should be zero (was %s)", lengthMask);
134       RangeSpecification oldPath = path;
135       path = path.extendByMask(edge.getDigitMask());
136       if (target.equals(RangeTree.getTerminal())) {
137         lengthMask = (1 << path.length());
138         // We might emit the key immediately for ranges without trailing paths (e.g. "1234").
139         maybeEmitKey();
140       } else {
141         target.accept(this);
142         // If we see a terminating node, we are either adding a new possible length to an existing
143         // key or starting to process a new key (we don't know and it doesn't matter providing we
144         // capture the current length in the mask).
145         if (target.canTerminate()) {
146           lengthMask |= (1 << path.length());
147         }
148         maybeEmitKey();
149       }
150       path = oldPath;
151     }
152 
153     // Conditionally emits a key for the current path prefix and possible lengths if we've found
154     // the "end" of an "any" path (e.g. we have possible lengths and the edge above us is not an
155     // "any" path).
maybeEmitKey()156     private void maybeEmitKey() {
157       if (lengthMask != 0 && path.getBitmask(path.length() - 1) != ALL_DIGITS_MASK) {
158         keys.add(new AutoValue_RangeKey(path, buildLengths(lengthMask)));
159         lengthMask = 0;
160       }
161     }
162   }
163 
164   /**
165    * Returns the prefix for this range key. All digit sequences matches by this key are of the
166    * form {@code "<prefix>xxxx"} for some number of "any" edges. This prefix can be "empty" for
167    * ranges such as {@code "xxxx"}.
168    */
getPrefix()169   public abstract RangeSpecification getPrefix();
170 
171   /**
172    * Returns the possible lengths for digit sequences matched by this key.  The returned set is
173    * never empty.
174    */
getLengths()175   public abstract ImmutableSortedSet<Integer> getLengths();
176 
177   /**
178    * Converts the range key into a sequence of range specifications, ordered by length. The
179    * returned set is never empty.
180    */
asRangeSpecifications()181   public final ImmutableList<RangeSpecification> asRangeSpecifications() {
182     RangeSpecification s = getPrefix();
183     return getLengths().stream()
184         .map(n -> s.extendByLength(n - s.length()))
185         .collect(toImmutableList());
186   }
187 
asRangeTree()188   public final RangeTree asRangeTree() {
189     RangeSpecification s = getPrefix();
190     return RangeTree.from(getLengths().stream().map(n -> s.extendByLength(n - s.length())));
191   }
192 
193   /*
194    * Checks if the RangeKey contains a range represented by the given prefix and length.
195    */
contains(DigitSequence prefix, Integer length)196   public boolean contains(DigitSequence prefix, Integer length) {
197     return asRangeSpecifications().stream()
198         .anyMatch(
199             specification ->
200                 specification.matches(
201                     prefix.extendBy(DigitSequence.zeros(length - prefix.length()))));
202   }
203 
buildLengths(int lengthMask)204   private static ImmutableSortedSet<Integer> buildLengths(int lengthMask) {
205     checkArgument(lengthMask != 0);
206     ImmutableSortedSet.Builder<Integer> lengths = ImmutableSortedSet.naturalOrder();
207     do {
208       int length = numberOfTrailingZeros(lengthMask);
209       lengths.add(length);
210       // Clear each bit as we go.
211       lengthMask &= ~(1 << length);
212     } while (lengthMask != 0);
213     return lengths.build();
214   }
215 }
216