• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011, Mike Samuel
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions
6 // are met:
7 //
8 // Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // Neither the name of the OWASP nor the names of its contributors may
14 // be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
28 
29 package org.owasp.html;
30 
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.List;
34 import java.util.Map;
35 import java.util.TreeMap;
36 
37 /**
38  * A trie used to separate punctuation tokens in a run of non-whitespace
39  * characters by preferring the longest punctuation string possible in a
40  * greedy left-to-right scan.
41  *
42  * @author Mike Samuel <mikesamuel@gmail.com>
43  */
44 final class Trie {
45   private final char[] childMap;
46   private final Trie[] children;
47   private final boolean terminal;
48   private final int value;
49 
50   /**
51    * @param elements not empty, non null.
52    */
Trie(Map<String, Integer> elements)53   public Trie(Map<String, Integer> elements) {
54     this(sortedUniqEntries(elements), 0);
55   }
56 
Trie(List<Map.Entry<String, Integer>> elements, int depth)57   private Trie(List<Map.Entry<String, Integer>> elements, int depth) {
58     this(elements, depth, 0, elements.size());
59   }
60 
61   /**
62    * @param elements not empty, non null.  Not modified.
63    * @param depth the depth in the tree.
64    * @param start an index into punctuationStrings of the first string in this
65    *   subtree.
66    * @param end an index into punctuationStrings past the last string in this
67    *   subtree.
68    */
Trie( List<Map.Entry<String, Integer>> elements, int depth, int start, int end)69   private Trie(
70       List<Map.Entry<String, Integer>> elements, int depth,
71       int start, int end) {
72     this.terminal = depth == elements.get(start).getKey().length();
73     if (this.terminal) {
74       this.value = elements.get(start).getValue();
75       if (start + 1 == end) {  // base case
76         this.childMap = ZERO_CHARS;
77         this.children = ZERO_TRIES;
78         return;
79       } else {
80         ++start;
81       }
82     } else {
83       this.value = Integer.MAX_VALUE;
84     }
85     int childCount = 0;
86     {
87       int last = -1;
88       for (int i = start; i < end; ++i) {
89         char ch = elements.get(i).getKey().charAt(depth);
90         if (ch != last) {
91           ++childCount;
92           last = ch;
93         }
94       }
95     }
96     this.childMap = new char[childCount];
97     this.children = new Trie[childCount];
98     int childStart = start;
99     int childIndex = 0;
100     char lastCh = elements.get(start).getKey().charAt(depth);
101     for (int i = start + 1; i < end; ++i) {
102       char ch = elements.get(i).getKey().charAt(depth);
103       if (ch != lastCh) {
104         childMap[childIndex] = lastCh;
105         children[childIndex++] = new Trie(
106           elements, depth + 1, childStart, i);
107         childStart = i;
108         lastCh = ch;
109       }
110     }
111     childMap[childIndex] = lastCh;
112     children[childIndex++] = new Trie(elements, depth + 1, childStart, end);
113   }
114 
115   /** Does this node correspond to a complete string in the input set. */
isTerminal()116   public boolean isTerminal() { return terminal; }
117 
getValue()118   public int getValue() { return value; }
119 
120   /**
121    * The child corresponding to the given character.
122    * @return null if no such trie.
123    */
lookup(char ch)124   public Trie lookup(char ch) {
125     int i = Arrays.binarySearch(childMap, ch);
126     return i >= 0 ? children[i] : null;
127   }
128 
129   /**
130    * The descendant of this trie corresponding to the string for this trie
131    * appended with s.
132    * @param s non null.
133    * @return null if no such trie.
134    */
lookup(CharSequence s)135   public Trie lookup(CharSequence s) {
136     Trie t = this;
137     for (int i = 0, n = s.length(); i < n; ++i) {
138       t = t.lookup(s.charAt(i));
139       if (null == t) { break; }
140     }
141     return t;
142   }
143 
contains(char ch)144   public boolean contains(char ch) {
145     return Arrays.binarySearch(childMap, ch) >= 0;
146   }
147 
sortedUniqEntries( Map<String, T> m)148   private static <T> List<Map.Entry<String, T>> sortedUniqEntries(
149       Map<String, T> m) {
150     return new ArrayList<Map.Entry<String, T>>(
151         new TreeMap<String, T>(m).entrySet());
152   }
153 
154   private static final char[] ZERO_CHARS = new char[0];
155   private static final Trie[] ZERO_TRIES = new Trie[0];
156 
157   /**
158    * Append all strings s such that {@code this.lookup(s).isTerminal()} to the
159    * given list in lexical order.
160    */
toStringList(List<String> strings)161   public void toStringList(List<String> strings) {
162     toStringList("", strings);
163   }
164 
toStringList(String prefix, List<String> strings)165   private void toStringList(String prefix, List<String> strings) {
166     if (terminal) { strings.add(prefix); }
167     for (int i = 0, n = childMap.length; i < n; ++i) {
168       children[i].toStringList(prefix + childMap[i], strings);
169     }
170   }
171 
172   @Override
toString()173   public String toString() {
174     StringBuilder sb = new StringBuilder();
175     toStringBuilder(0, sb);
176     return sb.toString();
177   }
178 
toStringBuilder(int depth, StringBuilder sb)179   private void toStringBuilder(int depth, StringBuilder sb) {
180     sb.append(terminal ? "terminal" : "nonterminal");
181     ++depth;
182     for (int i = 0; i < childMap.length; ++i) {
183       sb.append('\n');
184       for (int d = 0; d < depth; ++d) {
185         sb.append('\t');
186       }
187       sb.append('\'').append(childMap[i]).append("' ");
188       children[i].toStringBuilder(depth, sb);
189     }
190   }
191 }
192