1 /* 2 * Copyright (C) 2017 The Libphonenumber Authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.i18n.phonenumbers.metadata.regex; 18 19 import static com.google.common.base.Preconditions.checkState; 20 21 import com.google.common.graph.ElementOrder; 22 import com.google.common.graph.MutableValueGraph; 23 import com.google.common.graph.ValueGraph; 24 import com.google.common.graph.ValueGraphBuilder; 25 import com.google.i18n.phonenumbers.metadata.RangeTree; 26 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge; 27 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode; 28 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor; 29 import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge; 30 import java.util.HashMap; 31 import java.util.Map; 32 33 /** 34 * Converts DFA {@link RangeTree}s to NFA {@link ValueGraph}s. The resulting graph has almost 35 * exactly the same node and edge structure as the original DFA, with the following exceptions: 36 * <ol> 37 * <li>Nodes which could optionally terminate now have 'epsilon' edges connecting them to the 38 * terminal node. 39 * <li>If an optionally terminating node connects directly to the terminal node, then a special 40 * "optional edge" is used (this is because the {@link ValueGraph} structure allows only one 41 * value for each edge, so you can't have an epsilon edge that goes between the same source and 42 * target as other edge). 43 * </ol> 44 */ 45 public final class RangeTreeConverter { 46 /** 47 * Returns the directed NFA graph representation of a {@link RangeTree}. The returned graph is 48 * not a DFA and may contain epsilon transitions. Nodes are assigned in visitation order, except 49 * for the initial and terminal nodes which are always present in the graph. 50 */ toNfaGraph(RangeTree ranges)51 public static ValueGraph<Node, SimpleEdge> toNfaGraph(RangeTree ranges) { 52 NfaVisitor visitor = new NfaVisitor(ranges.getInitial()); 53 ranges.accept(visitor); 54 return visitor.graph; 55 } 56 57 private static class NfaVisitor implements DfaVisitor { 58 private final MutableValueGraph<Node, SimpleEdge> graph = ValueGraphBuilder 59 .directed() 60 .allowsSelfLoops(false) 61 // Stable ordering should help keep any generated structures (regex, graph files) stable. 62 .nodeOrder(ElementOrder.<Node>natural()) 63 .build(); 64 // Map of nodes added to the new graph (keyed by the corresponding DFA node). 65 private final Map<DfaNode, Node> nodeMap = new HashMap<>(); 66 // The last node we added. 67 private Node lastAdded; 68 NfaVisitor(DfaNode initial)69 private NfaVisitor(DfaNode initial) { 70 // Add initial and terminal nodes first (there's always exactly one of each). 71 graph.addNode(Node.INITIAL); 72 graph.addNode(Node.TERMINAL); 73 // During visitation we check only target nodes to add epsilon edges, but we may also need 74 // to add an epsilon from the very top if the DFA can match the empty input. 75 if (initial.canTerminate()) { 76 graph.putEdgeValue(Node.INITIAL, Node.TERMINAL, Edge.epsilon()); 77 } 78 nodeMap.put(initial, Node.INITIAL); 79 nodeMap.put(RangeTree.getTerminal(), Node.TERMINAL); 80 lastAdded = Node.TERMINAL; 81 } 82 83 @Override visit(DfaNode dfaSource, DfaEdge dfaEdge, DfaNode dfaTarget)84 public void visit(DfaNode dfaSource, DfaEdge dfaEdge, DfaNode dfaTarget) { 85 SimpleEdge simpleEdge = Edge.fromMask(dfaEdge.getDigitMask()); 86 Node source = nodeMap.get(dfaSource); 87 Node target = getTarget(dfaTarget); 88 boolean wasNewNode = graph.addNode(target); 89 // The only chance of an existing edge is if an epsilon was already added immediately before 90 // visiting this edge. This can only occur if (target == TERMINAL) however. 91 SimpleEdge epsilon = graph.putEdgeValue(source, target, simpleEdge); 92 if (epsilon != null) { 93 checkState(target.equals(Node.TERMINAL) && epsilon.equals(Edge.epsilon()), 94 "unexpected edge during visitation: %s -- %s --> %s", source, epsilon, target); 95 // Re-add the edge, but this time make it optional (because that's what epsilon means). 96 graph.putEdgeValue(source, target, simpleEdge.optional()); 97 } 98 // Only recurse if the target node was newly added to the graph in this visitation. 99 if (wasNewNode) { 100 // The TERMINAL node is always in the map so (target != TERMINAL) here. This means we 101 // never risk adding a loop in the graph. The epsilon may end up being swapped out for 102 // an optional edge when we visit the dfaTarget, but that's fine. 103 if (dfaTarget.canTerminate()) { 104 graph.putEdgeValue(target, Node.TERMINAL, Edge.epsilon()); 105 } 106 dfaTarget.accept(this); 107 } 108 } 109 110 // Gets or creates a new target node, adding it to the node map (but not to the graph itself). getTarget(DfaNode gnode)111 private Node getTarget(DfaNode gnode) { 112 Node target = nodeMap.get(gnode); 113 if (target != null) { 114 return target; 115 } 116 lastAdded = lastAdded.createNext(); 117 nodeMap.put(gnode, lastAdded); 118 return lastAdded; 119 } 120 } 121 RangeTreeConverter()122 private RangeTreeConverter() {} 123 } 124