• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Libphonenumber Authors.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.i18n.phonenumbers.metadata.regex;
18 
19 import static com.google.common.base.Preconditions.checkState;
20 
21 import com.google.common.graph.ElementOrder;
22 import com.google.common.graph.MutableValueGraph;
23 import com.google.common.graph.ValueGraph;
24 import com.google.common.graph.ValueGraphBuilder;
25 import com.google.i18n.phonenumbers.metadata.RangeTree;
26 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge;
27 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
28 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor;
29 import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
30 import java.util.HashMap;
31 import java.util.Map;
32 
33 /**
34  * Converts DFA {@link RangeTree}s to NFA {@link ValueGraph}s. The resulting graph has almost
35  * exactly the same node and edge structure as the original DFA, with the following exceptions:
36  * <ol>
37  *   <li>Nodes which could optionally terminate now have 'epsilon' edges connecting them to the
38  *   terminal node.
39  *   <li>If an optionally terminating node connects directly to the terminal node, then a special
40  *   "optional edge" is used (this is because the {@link ValueGraph} structure allows only one
41  *   value for each edge, so you can't have an epsilon edge that goes between the same source and
42  *   target as other edge).
43  * </ol>
44  */
45 public final class RangeTreeConverter {
46   /**
47    * Returns the directed NFA graph representation of a {@link RangeTree}. The returned graph is
48    * not a DFA and may contain epsilon transitions. Nodes are assigned in visitation order, except
49    * for the initial and terminal nodes which are always present in the graph.
50    */
toNfaGraph(RangeTree ranges)51   public static ValueGraph<Node, SimpleEdge> toNfaGraph(RangeTree ranges) {
52     NfaVisitor visitor = new NfaVisitor(ranges.getInitial());
53     ranges.accept(visitor);
54     return visitor.graph;
55   }
56 
57   private static class NfaVisitor implements DfaVisitor {
58     private final MutableValueGraph<Node, SimpleEdge> graph = ValueGraphBuilder
59         .directed()
60         .allowsSelfLoops(false)
61         // Stable ordering should help keep any generated structures (regex, graph files) stable.
62         .nodeOrder(ElementOrder.<Node>natural())
63         .build();
64     // Map of nodes added to the new graph (keyed by the corresponding DFA node).
65     private final Map<DfaNode, Node> nodeMap = new HashMap<>();
66     // The last node we added.
67     private Node lastAdded;
68 
NfaVisitor(DfaNode initial)69     private NfaVisitor(DfaNode initial) {
70       // Add initial and terminal nodes first (there's always exactly one of each).
71       graph.addNode(Node.INITIAL);
72       graph.addNode(Node.TERMINAL);
73       // During visitation we check only target nodes to add epsilon edges, but we may also need
74       // to add an epsilon from the very top if the DFA can match the empty input.
75       if (initial.canTerminate()) {
76         graph.putEdgeValue(Node.INITIAL, Node.TERMINAL, Edge.epsilon());
77       }
78       nodeMap.put(initial, Node.INITIAL);
79       nodeMap.put(RangeTree.getTerminal(), Node.TERMINAL);
80       lastAdded = Node.TERMINAL;
81     }
82 
83     @Override
visit(DfaNode dfaSource, DfaEdge dfaEdge, DfaNode dfaTarget)84     public void visit(DfaNode dfaSource, DfaEdge dfaEdge, DfaNode dfaTarget) {
85       SimpleEdge simpleEdge = Edge.fromMask(dfaEdge.getDigitMask());
86       Node source = nodeMap.get(dfaSource);
87       Node target = getTarget(dfaTarget);
88       boolean wasNewNode = graph.addNode(target);
89       // The only chance of an existing edge is if an epsilon was already added immediately before
90       // visiting this edge. This can only occur if (target == TERMINAL) however.
91       SimpleEdge epsilon = graph.putEdgeValue(source, target, simpleEdge);
92       if (epsilon != null) {
93         checkState(target.equals(Node.TERMINAL) && epsilon.equals(Edge.epsilon()),
94             "unexpected edge during visitation: %s -- %s --> %s", source, epsilon, target);
95         // Re-add the edge, but this time make it optional (because that's what epsilon means).
96         graph.putEdgeValue(source, target, simpleEdge.optional());
97       }
98       // Only recurse if the target node was newly added to the graph in this visitation.
99       if (wasNewNode) {
100         // The TERMINAL node is always in the map so (target != TERMINAL) here. This means we
101         // never risk adding a loop in the graph. The epsilon may end up being swapped out for
102         // an optional edge when we visit the dfaTarget, but that's fine.
103         if (dfaTarget.canTerminate()) {
104           graph.putEdgeValue(target, Node.TERMINAL, Edge.epsilon());
105         }
106         dfaTarget.accept(this);
107       }
108     }
109 
110     // Gets or creates a new target node, adding it to the node map (but not to the graph itself).
getTarget(DfaNode gnode)111     private Node getTarget(DfaNode gnode) {
112       Node target = nodeMap.get(gnode);
113       if (target != null) {
114         return target;
115       }
116       lastAdded = lastAdded.createNext();
117       nodeMap.put(gnode, lastAdded);
118       return lastAdded;
119     }
120   }
121 
RangeTreeConverter()122   private RangeTreeConverter() {}
123 }
124