• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Libphonenumber Authors.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.i18n.phonenumbers.metadata.regex;
18 
19 import static com.google.common.base.Preconditions.checkArgument;
20 import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
21 
22 import com.google.common.base.Preconditions;
23 import com.google.common.collect.ImmutableList;
24 import com.google.common.collect.ImmutableSortedSet;
25 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
26 import java.util.Collection;
27 import java.util.List;
28 import java.util.Set;
29 import java.util.stream.Collectors;
30 import java.util.stream.Stream;
31 
32 /**
33  * Value type for edges in NFA graphs of phone number regular expressions. Outside this package,
34  * this type is mainly used for examining NFA graphs which represent a regular expression,
35  * generated via {@link RangeTreeConverter#toNfaGraph}..
36  *
37  * <p>Note that the ordering of edges is carefully designed to attempt to replicate as much of the
38  * existing intuition about ordering in regular expressions as possible. This should result in any
39  * generated expressions being as close to existing hand edited expressions as possible.
40  */
41 public abstract class Edge implements Comparable<Edge> {
42   /** API for visiting composite edges; see also {@link #accept(Visitor)}. */
43   public interface Visitor {
44     /** Visits a leaf node simple edge. */
visit(SimpleEdge edge)45     void visit(SimpleEdge edge);
46     /**
47      * Visits a composited sequence of edges. Note that sequences only ever contain disjunctions or
48      * simple edges, but never other sequences. For edges "a", "b", "c", this represents the
49      * concatenated edge "abc".
50      */
visitSequence(List<Edge> edges)51     void visitSequence(List<Edge> edges);
52     /**
53      * Visits a disjunction of parallel edges. Note that disjunctions only ever contain sequences
54      * or simple edges, but never other disjunctions. For edges "a", "b", "c", this represents the
55      * disjunctive group "(a|b|c)".
56      */
visitGroup(Set<Edge> edges, boolean isOptional)57     void visitGroup(Set<Edge> edges, boolean isOptional);
58   }
59 
60   // The singleton epsilon edge.
61   private static final SimpleEdge EPSILON = new SimpleEdge();
62   // The singleton edge matching any digit (i.e. 'x' or '\d').
63   private static final SimpleEdge ANY = new SimpleEdge(ALL_DIGITS_MASK, false);
64   // The singleton edge optionally matching any digit (i.e. 'x?' or '\d?').
65   private static final SimpleEdge OPTIONAL_ANY = ANY.optional();
66 
67   /** Returns an edge which accepts digits 0 to 9 according tothe bits set in the given mask. */
fromMask(int digitMask)68   public static SimpleEdge fromMask(int digitMask) {
69     return digitMask == ALL_DIGITS_MASK ? ANY : new SimpleEdge(digitMask, false);
70   }
71 
72   /**
73    * Returns the epsilon edge which accepts zero length input and transitions immediately. This
74    * edge should only ever appear parallel to other edges, and not as the only transition between
75    * two nodes.
76    */
epsilon()77   public static SimpleEdge epsilon() {
78     return EPSILON;
79   }
80 
81   /** Returns the edge which accepts any digit {@code [0-9]}. */
any()82   public static SimpleEdge any() {
83     return ANY;
84   }
85 
86   /** Returns the edge which optionally accepts any digit {@code [0-9]}. */
optionalAny()87   public static SimpleEdge optionalAny() {
88     return OPTIONAL_ANY;
89   }
90 
91   /**
92    * Returns the ordered concatenation of the given edges. If either edge is a concatenation, it
93    * is first expanded, so that the resulting edge contains only simple edges or disjunctions.
94    */
concatenation(Edge lhs, Edge rhs)95   public static Edge concatenation(Edge lhs, Edge rhs) {
96     checkArgument(!lhs.equals(EPSILON) && !rhs.equals(EPSILON), "cannot concatenate epsilon edges");
97     // Don't make concatenations of concatenations; flatten them out so you only have singletons
98     // or disjunctions. This is equivalent to writing "xyz" instead of "x(yz)".
99     List<Edge> edges = Stream.of(lhs, rhs)
100         .flatMap(
101             e -> (e instanceof Concatenation) ? ((Concatenation) e).edges.stream() : Stream.of(e))
102         .collect(Collectors.toList());
103     return new Concatenation(edges);
104   }
105 
106   /**
107    * Returns the disjunction of the given edges. If either edge is already a concatenation, it
108    * is first expanded, so that the resulting edge contains only simple edges or disjunctions.
109    */
disjunction(Collection<Edge> edges)110   public static Edge disjunction(Collection<Edge> edges) {
111     // Don't make disjunctions of disjunctions; flatten them out so you only have singletons,
112     // concatenations or epsilon. This is equivalent to writing "(x|y|z)" instead of "(x|(y|z))".
113     List<Edge> allEdges = edges.stream()
114         .flatMap(
115             e -> (e instanceof Disjunction) ? ((Disjunction) e).edges.stream() : Stream.of(e))
116         .sorted()
117         .distinct()
118         .collect(Collectors.toList());
119     // There should only ever be one epsilon when we make a disjunction (disjunctions are made when
120     // subgraphs collapse and each subgraph should only have one epsilon to make it optional).
121     // Epsilons sort to-the-left of everything, so if there is an epsilon it must be the first edge.
122     boolean isOptional = allEdges.get(0) == EPSILON;
123     if (isOptional) {
124       allEdges = allEdges.subList(1, allEdges.size());
125     }
126     Preconditions.checkState(!allEdges.contains(EPSILON));
127     return new Disjunction(allEdges, isOptional);
128   }
129 
130   /** An edge optionally matching a single input token, or the epsilon transition. */
131   public static final class SimpleEdge extends Edge {
132     private final int digitMask;
133     private final boolean isOptional;
134 
135     // Constructor for singleton epsilon edge.
SimpleEdge()136     private SimpleEdge() {
137       this.digitMask = 0;
138       // An optional epsilon makes no real sense.
139       this.isOptional = false;
140     }
141 
SimpleEdge(int digitMask, boolean isOptional)142     private SimpleEdge(int digitMask, boolean isOptional) {
143       checkArgument(digitMask > 0 && digitMask < (1 << 10), "invalid bit mask %s", digitMask);
144       this.digitMask = digitMask;
145       this.isOptional = isOptional;
146     }
147 
148     /** Returns the mask of digits accepted by this edge. */
getDigitMask()149     public int getDigitMask() {
150       return digitMask;
151     }
152 
153     /** Returns whether this edge is optional. */
isOptional()154     public boolean isOptional() {
155       return isOptional;
156     }
157 
158     /** Returns an optional version of this, non-optional edge. */
optional()159     public SimpleEdge optional() {
160       Preconditions.checkState(digitMask != 0, "cannot make epsilon optional");
161       Preconditions.checkState(!isOptional, "edge already optional");
162       return new SimpleEdge(digitMask, true);
163     }
164 
165     @Override
accept(Visitor visitor)166     public void accept(Visitor visitor) {
167       visitor.visit(this);
168     }
169 
170     @Override
equals(Object obj)171     public boolean equals(Object obj) {
172       return (obj instanceof SimpleEdge) && digitMask == ((SimpleEdge) obj).digitMask;
173     }
174 
175     @Override
hashCode()176     public int hashCode() {
177       return digitMask;
178     }
179 
180     @Override
compareTo(Edge rhs)181     public int compareTo(Edge rhs) {
182       if (rhs instanceof SimpleEdge) {
183         return compare((SimpleEdge) rhs);
184       } else {
185         // Composite types know how to compare themselves to SimpleEdges, so delegate to them but
186         // remember to invert the result since we are reversing the comparison order.
187         return -rhs.compareTo(this);
188       }
189     }
190 
compare(SimpleEdge rhs)191     private int compare(SimpleEdge rhs) {
192       if (isOptional != rhs.isOptional) {
193         // Optional edges sort to-the-right of non-optional things.
194         return isOptional ? 1 : -1;
195       }
196       if (digitMask == rhs.digitMask) {
197         return 0;
198       }
199       if (digitMask == 0 || rhs.digitMask == 0) {
200         // Epsilon sorts to-the-left of everything.
201         return digitMask == 0 ? -1 : 1;
202       }
203       // Unlike many other places where range specifications are used, we cannot guarantee the
204       // ranges are disjoint here, so we sort on the reversed bitmask to favour the lowest set bit.
205       // This sorts 'x' ([0-9]) to the left of everything, and epsilon to the right of everything.
206       // I.e. "x" < "0", "0" < "1", "[0-3]" < "[0-2]", "9" < epsilon.
207       //
208       // Remember to logical-shift back down to avoid negative values.
209       int reverseLhsMask = (Integer.reverse(digitMask) >>> 22);
210       int reverseRhsMask = (Integer.reverse(rhs.digitMask) >>> 22);
211       // Compare in the opposite order, so the largest reversed value is ordered "to the left".
212       return Integer.compare(reverseRhsMask, reverseLhsMask);
213     }
214   }
215 
216   // A sequence of edges (disjunctions or simple edges).
217   private static final class Concatenation extends Edge {
218     private final ImmutableList<Edge> edges;
219 
Concatenation(Collection<Edge> edges)220     private Concatenation(Collection<Edge> edges) {
221       this.edges = ImmutableList.copyOf(edges);
222     }
223 
224     @Override
accept(Visitor visitor)225     public void accept(Visitor visitor) {
226       visitor.visitSequence(edges);
227     }
228 
229     @Override
equals(Object obj)230     public boolean equals(Object obj) {
231       return (obj instanceof Concatenation) && edges.equals(((Concatenation) obj).edges);
232     }
233 
234     @Override
hashCode()235     public int hashCode() {
236       return edges.hashCode();
237     }
238 
239     @Override
compareTo(Edge rhs)240     public int compareTo(Edge rhs) {
241       if (rhs instanceof Concatenation) {
242         return compareEdges(edges, ((Concatenation) rhs).edges);
243       } else {
244         // Compare our first edge to the non-concatenation. If this compares as equal, order the
245         // concatenation between simple edges and disjunctions to break the tie and avoid implying
246         // that a concatenation and a non-concatenation are equal.
247         int comparison = -rhs.compareTo(edges.get(0));
248         return comparison != 0 ? comparison : (rhs instanceof SimpleEdge ? 1 : -1);
249       }
250     }
251   }
252 
253   // A disjunctive group of edges (sequences or simple edges).
254   private static final class Disjunction extends Edge {
255     private final ImmutableSortedSet<Edge> edges;
256     private final boolean isOptional;
257 
Disjunction(Collection<Edge> edges, boolean isOptional)258     private Disjunction(Collection<Edge> edges, boolean isOptional) {
259       checkArgument(!edges.isEmpty());
260       this.edges = ImmutableSortedSet.copyOf(edges);
261       this.isOptional = isOptional;
262     }
263 
264     @Override
accept(Visitor visitor)265     public void accept(Visitor visitor) {
266       visitor.visitGroup(edges, isOptional);
267     }
268 
269     @Override
equals(Object obj)270     public boolean equals(Object obj) {
271       return (obj instanceof Disjunction) && edges.equals(((Disjunction) obj).edges);
272     }
273 
274     @Override
hashCode()275     public int hashCode() {
276       // Negate bits here to be different from Concatenation.
277       return ~edges.hashCode();
278     }
279 
280     @Override
compareTo(Edge rhs)281     public int compareTo(Edge rhs) {
282       if (rhs instanceof Disjunction) {
283         return compareEdges(edges.asList(), ((Disjunction) rhs).edges.asList());
284       } else {
285         // Compare our first edge to the non-disjunction. If this compares as equal, order the
286         // disjunction to the right of the other edge to break the tie and avoid implying that
287         // a disjunction and a non-disjunction are equal.
288         int comparison = -rhs.compareTo(edges.asList().get(0));
289         return comparison == 0 ? 1 : comparison;
290       }
291     }
292   }
293 
294   /**
295    * Accepts a visitor on this edge, visiting any sub-edges from which it is composed. This is a
296    * double-dispatch visitor to avoid anyone processing edges needing to know about specific types.
297    * Only the immediate edge is visited and the visitor is then responsible for visiting child
298    * edges.
299    */
accept(Visitor visitor)300   public abstract void accept(Visitor visitor);
301 
302   // Compare lists according to elements, and tie break on length if different. This is effectively
303   // a lexicographical ordering.
compareEdges(ImmutableList<Edge> lhs, ImmutableList<Edge> rhs)304   private static int compareEdges(ImmutableList<Edge> lhs, ImmutableList<Edge> rhs) {
305     int minSize = Math.min(lhs.size(), rhs.size());
306     for (int n = 0; n < minSize; n++) {
307       int compared = lhs.get(n).compareTo(rhs.get(n));
308       if (compared != 0) {
309         return compared;
310       }
311     }
312     return Integer.compare(lhs.size(), rhs.size());
313   }
314 
315   @Override
toString()316   public String toString() {
317     StringBuilder out = new StringBuilder();
318     accept(new Visitor() {
319       @Override
320       public void visit(SimpleEdge e) {
321         if (e.equals(Edge.epsilon())) {
322           // Epsilon cannot be optional.
323           out.append("e");
324         } else {
325           int m = e.getDigitMask();
326           out.append(m == ALL_DIGITS_MASK ? "x" : RangeSpecification.toString(m));
327           if (e.isOptional()) {
328             out.append('?');
329           }
330         }
331       }
332 
333       @Override
334       public void visitSequence(List<Edge> edges) {
335         edges.forEach(e -> e.accept(this));
336       }
337 
338       @Override
339       public void visitGroup(Set<Edge> edges, boolean isOptional) {
340         out.append("(");
341         edges.forEach(e -> {
342           e.accept(this);
343           out.append("|");
344         });
345         out.setLength(out.length() - 1);
346         out.append(isOptional ? ")?" : ")");
347       }
348     });
349     return out.toString();
350   }
351 }
352